219 parser.parse_args(args=argv, namespace=ctx)
222 if not ctx.lang_code:
223 err_exit(
"Need to specify a language --lang")
224 if not ctx.langdata_dir:
225 err_exit(
"Need to specify path to language files --langdata_dir")
226 if not ctx.tessdata_dir:
227 tessdata_prefix = os.environ.get(
"TESSDATA_PREFIX",
"")
228 if not tessdata_prefix:
230 "Need to specify a --tessdata_dir or have a "
231 "TESSDATA_PREFIX variable defined in your environment"
234 ctx.tessdata_dir = tessdata_prefix
235 if not ctx.output_dir:
236 ctx.output_dir = mkdtemp(prefix=f
"trained-{ctx.lang_code}-{ctx.timestamp}")
237 log.info(f
"Output directory set to: {ctx.output_dir}")
241 ctx.training_dir = mkdtemp(prefix=f
"{ctx.lang_code}-{ctx.timestamp}")
243 ctx.training_dir = mkdtemp(prefix=f
"{ctx.lang_code}-{ctx.timestamp}", dir=ctx.tmp_dir)
245 ctx.log_file = pathlib.Path(ctx.training_dir) /
"tesstrain.log"
246 log.info(f
"Log file location: {ctx.log_file}")
248 def show_tmpdir_location(training_dir):
251 if pathlib.Path(training_dir).exists():
252 print(f
"Temporary files retained at: {training_dir}")
254 atexit.register(show_tmpdir_location, ctx.training_dir)
258 if not ctx.training_text:
259 ctx.training_text = (
260 pathlib.Path(ctx.langdata_dir) / ctx.lang_code / f
"{ctx.lang_code}.training_text"
262 if not ctx.wordlist_file:
263 ctx.wordlist_file = (
264 pathlib.Path(ctx.langdata_dir) / ctx.lang_code / f
"{ctx.lang_code}.wordlist"
267 ctx.word_bigrams_file = (
268 pathlib.Path(ctx.langdata_dir) / ctx.lang_code / f
"{ctx.lang_code}.word.bigrams"
271 pathlib.Path(ctx.langdata_dir) / ctx.lang_code / f
"{ctx.lang_code}.numbers"
273 ctx.punc_file = pathlib.Path(ctx.langdata_dir) / ctx.lang_code / f
"{ctx.lang_code}.punc"
274 ctx.bigram_freqs_file = pathlib.Path(ctx.training_text).with_suffix(
275 ".training_text.bigram_freqs"
277 ctx.unigram_freqs_file = pathlib.Path(ctx.training_text).with_suffix(
278 ".training_text.unigram_freqs"
280 ctx.train_ngrams_file = pathlib.Path(ctx.training_text).with_suffix(
281 ".training_text.train_ngrams"
283 ctx.generate_dawgs = 1