2 from __future__
import print_function
3 from builtins
import input
5 tesshelper.py -- Utility operations to compare, report stats, and copy
6 public headers for tesseract 3.0x VS2008 Project
8 $RCSfile: tesshelper.py,v $ $Revision: 7ca575b377aa $ $Date: 2012/03/07 17:26:31 $
14 python 2.7 or greater: activestate.com
15 http://www.activestate.com/activepython/downloads
17 because using the new argparse module and new literal set syntax (s={1, 2}) .
22 Format for a .vcproj file entry:
25 RelativePath="..\src\allheaders.h"
34 Assume that tesshelper.py is in c:\buildfolder\tesseract-3.02\vs2008,
35 which is also the current directory. Then,
37 python tesshelper .. compare
39 will compare c:\buildfolder\tesseract-3.02 "library" directories to the
41 (c:\buildfolder\tesseract-3.02\vs2008\libtesseract\libtesseract.vcproj).
43 python tesshelper .. report
45 will display summary stats for c:\buildfolder\tesseract-3.02 "library"
46 directories and the libtesseract Project.
48 python tesshelper .. copy ..\..\include
50 will copy all "public" libtesseract header files to
51 c:\buildfolder\include.
53 python tesshelper .. clean
55 will clean the vs2008 folder of all build directories, and .user, .suo,
56 .ncb, and other temp files.
72 VERSION =
"1.0 %s" %
"$Date: 2012/03/07 17:26:31 $".split()[1]
73 PROJ_SUBDIR =
r"vs2008\libtesseract"
74 PROJFILE =
"libtesseract.vcproj"
76 NEWHEADERS_FILENAME =
"newheaders.txt"
77 NEWSOURCES_FILENAME =
"newsources.txt"
80 ''' <ClCompile Include="..\..\%s" />'''
85 """Return sets of all, c, h, and resources files in libtesseract Project"""
91 projectFilesSet = set()
92 f = open(libProjectFile,
"r")
96 projectFiles = re.findall(r'(?i)Include="(\.[^"]+)"', data)
97 for projectFile
in projectFiles:
98 root, ext = os.path.splitext(projectFile.lower())
99 if ext ==
".c" or ext ==
".cpp":
100 projectCFiles.add(projectFile)
102 projectHFiles.add(projectFile)
104 projectRFiles.add(projectFile)
106 print(
"unknown file type: %s" % projectFile)
108 relativePath = os.path.join(libTessDir, projectFile)
109 relativePath = os.path.abspath(relativePath)
110 relativePath = relativePath[nTrimChars:].lower()
111 projectFilesSet.add(relativePath)
113 return projectFilesSet, projectHFiles, projectCFiles, projectRFiles
116 """Return set of all libtesseract files in tessDir"""
127 r"neural_networks\runtime",
134 r"vs2010\libtesseract",
139 for curDir
in libDirs:
140 baseDir = os.path.join(tessDir, curDir)
141 for filetype
in [
"*.c",
"*.cpp",
"*.h"]:
142 pattern = os.path.join(baseDir, filetype)
143 fileList = glob.glob(pattern)
144 for curFile
in fileList:
145 curFile = os.path.abspath(curFile)
146 relativePath = curFile[nTrimChars:].lower()
147 tessFiles.add(relativePath)
154 '''Compare libtesseract Project files and actual "sub-library" files.'''
156 vs2010Dir = os.path.join(tessDir,
"vs2010")
157 libTessDir = os.path.join(vs2010Dir,
"libtesseract")
158 libProjectFile = os.path.join(libTessDir,
"libtesseract.vcxproj")
159 tessAbsDir = os.path.abspath(tessDir)
160 nTrimChars = len(tessAbsDir)+1
161 print(
'Comparing VS2010 Project "%s" with\n "%s"' % (libProjectFile,
164 projectFilesSet, projectHFiles, projectCFiles, projectRFiles = \
168 extraFiles = tessFiles - projectFilesSet
169 print(
"%2d Extra files (in %s but not in Project)" % (len(extraFiles),
173 sortedList = list(extraFiles)
175 for filename
in sortedList:
176 root, ext = os.path.splitext(filename.lower())
178 headerFiles.append(filename)
180 sourceFiles.append(filename)
181 print(
" %s " % filename)
184 print(
"%2d new header file items written to %s" % (len(headerFiles),
185 NEWHEADERS_FILENAME))
187 with open(NEWHEADERS_FILENAME,
"w")
as f:
188 for filename
in headerFiles:
189 f.write(fileNodeTemplate % filename)
191 print(
"%2d new source file items written to %s" % (len(sourceFiles),
192 NEWSOURCES_FILENAME))
194 with open(NEWSOURCES_FILENAME,
"w")
as f:
195 for filename
in sourceFiles:
196 f.write(fileNodeTemplate % filename)
199 deadFiles = projectFilesSet - tessFiles
200 print(
"%2d Dead files (in Project but not in %s" % (len(deadFiles),
202 sortedList = list(deadFiles)
204 for filename
in sortedList:
205 print(
" %s " % filename)
210 """Report summary stats on "sub-library" files and libtesseract Project file."""
212 vs2010Dir = os.path.join(tessDir,
"vs2008")
213 libTessDir = os.path.join(vs2010Dir,
"libtesseract")
214 libProjectFile = os.path.join(libTessDir,
"libtesseract.vcproj")
215 tessAbsDir = os.path.abspath(tessDir)
216 nTrimChars = len(tessAbsDir)+1
218 projectFilesSet, projectHFiles, projectCFiles, projectRFiles = \
222 print(
'Summary stats for "%s" library directories' % tessAbsDir)
224 for tessFile
in tessFiles:
225 tessFile = tessFile.lower()
226 folder, head = os.path.split(tessFile)
227 file, ext = os.path.splitext(head)
228 typeCounter = folderCounters.setdefault(folder, collections.Counter())
229 typeCounter[ext[1:]] += 1
231 folders = list(folderCounters.keys())
239 print(
" total h cpp")
240 print(
" ----- --- ---")
241 for folder
in folders:
242 counters = folderCounters[folder]
243 nHFiles = counters[
'h']
244 nCPPFiles = counters[
'cpp']
246 total = nHFiles + nCPPFiles
249 totalCPP += nCPPFiles
251 print(
" %5d %3d %3d %s" % (total, nHFiles, nCPPFiles, folder))
252 print(
" ----- --- ---")
253 print(
" %5d %3d %3d" % (totalFiles, totalH, totalCPP))
256 print(
'Summary stats for VS2008 Project "%s"' % libProjectFile)
257 print(
" %5d %s" %(len(projectHFiles),
"Header files"))
258 print(
" %5d %s" % (len(projectCFiles),
"Source files"))
259 print(
" %5d %s" % (len(projectRFiles),
"Resource files"))
261 print(
" %5d" % (len(projectHFiles) + len(projectCFiles) + len(projectRFiles), ))
266 """Copy set of files to specified include dir."""
269 print(
'Copying libtesseract "%s" headers to %s' % (description, includeDir))
272 sortedList = list(fileSet)
277 for includeFile
in sortedList:
278 filepath = os.path.join(tessDir, includeFile)
279 if os.path.isfile(filepath):
280 shutil.copy2(filepath, includeDir)
281 print(
"Copied: %s" % includeFile)
284 print(
'***Error: "%s" doesn\'t exist"' % filepath)
285 errList.append(filepath)
287 print(
'%d header files successfully copied to "%s"' % (count, includeDir))
289 print(
"The following %d files were not copied:")
290 for filepath
in errList:
291 print(
" %s" % filepath)
294 '''Copy all "public" libtesseract Project header files to include directory.
296 Preserves directory hierarchy.'''
302 r"ccstruct\publictypes.h",
303 r"ccmain\thresholder.h",
306 r"ccutil\tesscallback.h",
308 r"ccutil\platform.h",
315 r"ccutil\serialis.h",
321 resultIteratorIncludeSet = {
322 r"ccmain\ltrresultiterator.h",
323 r"ccmain\pageiterator.h",
324 r"ccmain\resultiterator.h",
325 r"ccutil\genericvector.h",
326 r"ccutil\tesscallback.h",
332 r"ccutil\unicharmap.h",
333 r"ccutil\unicharset.h",
336 genericVectorIncludeSet = {
337 r"ccutil\genericvector.h",
338 r"ccutil\tesscallback.h",
348 r"ccstruct\points.h",
349 r"ccstruct\ipoints.h",
352 r"ccutil\serialis.h",
357 r"viewer\scrollview.h",
358 r"ccstruct\vecfuncs.h",
363 r"vs2008\include\leptonica_versionnumbers.vsprops",
364 r"vs2008\include\tesseract_versionnumbers.vsprops",
367 tessIncludeDir = os.path.join(includeDir,
"tesseract")
368 if os.path.isfile(tessIncludeDir):
369 print(
'Aborting: "%s" is a file not a directory.' % tessIncludeDir)
371 if not os.path.exists(tessIncludeDir):
372 os.mkdir(tessIncludeDir)
375 fileSet = baseIncludeSet | strngIncludeSet | resultIteratorIncludeSet
377 copyIncludes(fileSet,
"public", tessDir, tessIncludeDir)
378 copyIncludes(extraFilesSet,
"extra", tessDir, includeDir)
383 '''Clean vs2008 folder of all build directories and certain temp files.'''
385 vs2010Dir = os.path.join(tessDir,
"vs2008")
386 vs2008AbsDir = os.path.abspath(vs2010Dir)
389 'Are you sure you want to clean the\n "%s" folder (Yes/No) [No]? ' %
391 if answer.lower()
not in (
"yes",):
393 answer = eval(input(
'Only list the items to be deleted (Yes/No) [Yes]? '))
394 answer = answer.strip()
395 listOnly = answer.lower()
not in (
"no",)
397 for rootDir, dirs, files
in os.walk(vs2008AbsDir):
398 for buildDir
in (
"LIB_Release",
"LIB_Debug",
"DLL_Release",
"DLL_Debug"):
400 dirs.remove(buildDir)
401 absBuildDir = os.path.join(rootDir, buildDir)
403 print(
"Would remove: %s" % absBuildDir)
405 print(
"Removing: %s" % absBuildDir)
406 shutil.rmtree(absBuildDir)
408 if rootDir == vs2008AbsDir:
410 if file.lower()
not in (
"tesseract.sln",
413 absPath = os.path.join(rootDir, file)
415 print(
"Would remove: %s" % absPath)
417 print(
"Removing: %s" % absPath)
421 root, ext = os.path.splitext(file)
422 if ext.lower()
in (
".suo",
426 len(ext)>0
and ext[-1] ==
"~"):
427 absPath = os.path.join(rootDir, file)
429 print(
"Would remove: %s" % absPath)
431 print(
"Removing: %s" % absPath)
437 """Check that tessDir is a valid tesseract directory."""
439 if not os.path.isdir(tessDir):
440 raise argparse.ArgumentTypeError(
'Directory "%s" doesn\'t exist.' % tessDir)
441 projFile = os.path.join(tessDir, PROJ_SUBDIR, PROJFILE)
442 if not os.path.isfile(projFile):
443 raise argparse.ArgumentTypeError(
'Project file "%s" doesn\'t exist.' % projFile)
447 """Check that dir is a valid directory named include."""
449 if not os.path.isdir(dir):
450 raise argparse.ArgumentTypeError(
'Directory "%s" doesn\'t exist.' % dir)
452 dirpath = os.path.abspath(dir)
453 head, tail = os.path.split(dirpath)
454 if tail.lower() !=
"include":
455 raise argparse.ArgumentTypeError(
'Include directory "%s" must be named "include".' % tail)
460 parser = argparse.ArgumentParser(
462 formatter_class=argparse.RawDescriptionHelpFormatter)
464 parser.add_argument(
"--version", action=
"version",
465 version=
"%(prog)s " + VERSION)
466 parser.add_argument(
'tessDir', type=validateTessDir,
467 help=
"tesseract installation directory")
469 subparsers = parser.add_subparsers(
470 dest=
"subparser_name",
472 parser_changes = subparsers.add_parser(
'compare',
473 help=
"compare libtesseract Project with tessDir")
474 parser_changes.set_defaults(func=tessCompare)
476 parser_report = subparsers.add_parser(
'report',
477 help=
"report libtesseract summary stats")
478 parser_report.set_defaults(func=tessReport)
480 parser_copy = subparsers.add_parser(
'copy',
481 help=
"copy public libtesseract header files to includeDir")
482 parser_copy.add_argument(
'includeDir', type=validateDir,
483 help=
"Directory to copy header files to.")
484 parser_copy.set_defaults(func=tessCopy)
486 parser_clean = subparsers.add_parser(
'clean',
487 help=
"clean vs2008 folder of build folders and .user files")
488 parser_clean.set_defaults(func=tessClean)
491 if (len(sys.argv) == 2):
492 sys.argv.append(
"compare")
493 args = parser.parse_args()
496 if args.func == tessCopy:
497 args.func(args.tessDir, args.includeDir)
499 args.func(args.tessDir)
501 if __name__ ==
'__main__' :