30 contextual_ = contextual;
34 for (
int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
47 fprintf(stderr,
"Cube ERROR (WordSizeModel::Create): unable to allocate "
48 "new word size model object\n");
52 if (!obj->Init(data_file_path, lang)) {
59 bool WordSizeModel::Init(
const string &data_file_path,
const string &
lang) {
60 string stats_file_name;
61 stats_file_name = data_file_path +
lang;
62 stats_file_name +=
".cube.size";
72 vector<string> tokens;
74 if (tokens.size() < 1) {
75 fprintf(stderr,
"Cube ERROR (WordSizeModel::Init): invalid "
76 "file contents: %s\n", stats_file_name.c_str());
80 font_pair_size_models_.clear();
83 int token_cnt = contextual_ ?
84 (kExpectedTokenCount + 4) : kExpectedTokenCount;
89 int size_class_cnt = contextual_ ?
93 for (
int tok = 0; tok < tokens.size(); tok += token_cnt) {
95 if (tok == 0 || fnt_name != tokens[tok]) {
96 FontPairSizeInfo fnt_info;
98 fnt_info.pair_size_info =
new PairSizeInfo *[size_class_cnt];
99 if (!fnt_info.pair_size_info) {
100 fprintf(stderr,
"Cube ERROR (WordSizeModel::Init): error allcoating "
101 "memory for font pair size info\n");
105 fnt_info.pair_size_info[0] =
106 new PairSizeInfo[size_class_cnt * size_class_cnt];
107 if (!fnt_info.pair_size_info[0]) {
108 fprintf(stderr,
"Cube ERROR (WordSizeModel::Init): error allocating "
109 "memory for font pair size info\n");
113 memset(fnt_info.pair_size_info[0], 0, size_class_cnt * size_class_cnt *
114 sizeof(PairSizeInfo));
116 for (
int cls = 1; cls < size_class_cnt; cls++) {
117 fnt_info.pair_size_info[cls] =
118 fnt_info.pair_size_info[cls - 1] + size_class_cnt;
122 string stripped_font_name = tokens[tok].substr(0, tokens[tok].find(
'.'));
123 string::size_type strt_pos = stripped_font_name.find_last_of(
"/\\");
124 if (strt_pos != string::npos) {
125 fnt_info.font_name = stripped_font_name.substr(strt_pos);
127 fnt_info.font_name = stripped_font_name;
129 font_pair_size_models_.push_back(fnt_info);
156 if (sscanf(tokens[tok + 1].c_str(),
"%d", &cls_0) != 1 ||
157 sscanf(tokens[tok + 2].c_str(),
"%d", &start_0) != 1 ||
158 sscanf(tokens[tok + 3].c_str(),
"%d", &end_0) != 1 ||
159 sscanf(tokens[tok + 5].c_str(),
"%lf", &wid_0) != 1 ||
160 sscanf(tokens[tok + 6].c_str(),
"%lf", &hgt_0) != 1 ||
161 sscanf(tokens[tok + 7].c_str(),
"%d", &cls_1) != 1 ||
162 sscanf(tokens[tok + 8].c_str(),
"%d", &start_1) != 1 ||
163 sscanf(tokens[tok + 9].c_str(),
"%d", &end_1) != 1 ||
164 sscanf(tokens[tok + 11].c_str(),
"%lf", &delta_top) != 1 ||
165 sscanf(tokens[tok + 12].c_str(),
"%lf", &wid_1) != 1 ||
166 sscanf(tokens[tok + 13].c_str(),
"%lf", &hgt_1) != 1 ||
167 (start_0 != 0 && start_0 != 1) || (end_0 != 0 && end_0 != 1) ||
168 (start_1 != 0 && start_1 != 1) || (end_1 != 0 && end_1 != 1)) {
169 fprintf(stderr,
"Cube ERROR (WordSizeModel::Init): bad format at "
170 "line %d\n", 1 + (tok / token_cnt));
173 size_code_0 =
SizeCode(cls_0, start_0, end_0);
174 size_code_1 =
SizeCode(cls_1, start_1, end_1);
176 if (sscanf(tokens[tok + 1].c_str(),
"%d", &cls_0) != 1 ||
177 sscanf(tokens[tok + 3].c_str(),
"%lf", &wid_0) != 1 ||
178 sscanf(tokens[tok + 4].c_str(),
"%lf", &hgt_0) != 1 ||
179 sscanf(tokens[tok + 5].c_str(),
"%d", &cls_1) != 1 ||
180 sscanf(tokens[tok + 7].c_str(),
"%lf", &delta_top) != 1 ||
181 sscanf(tokens[tok + 8].c_str(),
"%lf", &wid_1) != 1 ||
182 sscanf(tokens[tok + 9].c_str(),
"%lf", &hgt_1) != 1) {
183 fprintf(stderr,
"Cube ERROR (WordSizeModel::Init): bad format at "
184 "line %d\n", 1 + (tok / token_cnt));
192 FontPairSizeInfo fnt_info = font_pair_size_models_.back();
193 fnt_info.pair_size_info[size_code_0][size_code_1].delta_top =
194 static_cast<int>(delta_top * kShapeModelScale);
195 fnt_info.pair_size_info[size_code_0][size_code_1].wid_0 =
196 static_cast<int>(wid_0 * kShapeModelScale);
197 fnt_info.pair_size_info[size_code_0][size_code_1].hgt_0 =
198 static_cast<int>(hgt_0 * kShapeModelScale);
199 fnt_info.pair_size_info[size_code_0][size_code_1].wid_1 =
200 static_cast<int>(wid_1 * kShapeModelScale);
201 fnt_info.pair_size_info[size_code_0][size_code_1].hgt_1 =
202 static_cast<int>(hgt_1 * kShapeModelScale);
204 fnt_name = tokens[tok];
214 double best_dist =
static_cast<double>(
WORST_COST);
216 for (
int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
218 double mean_dist = 0;
221 for (
int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {
222 int cls_0 = char_set_->
ClassID(samp_array[smp_0]->StrLabel());
230 samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,
231 samp_array[smp_0]->LastChar() == 0 ? 0 : 1);
236 int char0_height = samp_array[smp_0]->
Height();
237 int char0_width = samp_array[smp_0]->
Width();
238 int char0_top = samp_array[smp_0]->
Top();
240 for (
int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {
241 int cls_1 = char_set_->
ClassID(samp_array[smp_1]->StrLabel());
249 samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,
250 samp_array[smp_1]->LastChar() == 0 ? 0 : 1);
255 char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),
256 samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),
267 mean_dist /= pair_cnt;
268 if (best_fnt == -1 || mean_dist < best_dist) {
269 best_dist = mean_dist;
273 if (best_fnt == -1) {
276 return static_cast<int>(best_dist);
281 int width_1,
int height_1,
int top_1,
283 double scale_factor =
static_cast<double>(pair_info.
hgt_0) /
284 static_cast<double>(height_0);
286 if (scale_factor > 0) {
287 double norm_width_0 = width_0 * scale_factor;
288 double norm_width_1 = width_1 * scale_factor;
289 double norm_height_1 = height_1 * scale_factor;
290 double norm_delta_top = (top_1 - top_0) * scale_factor;
294 dist += fabs(pair_info.
wid_0 - norm_width_0);
295 dist += fabs(pair_info.
wid_1 - norm_width_1);
296 dist += fabs(pair_info.
hgt_1 - norm_height_1);
297 dist += fabs(pair_info.
delta_top - norm_delta_top);
static int SizeCode(int cls_id, int start, int end)
static bool ReadFileToString(const string &file_name, string *str)
int Cost(CharSamp **samp_array, int samp_cnt) const
WordSizeModel(CharSet *, bool contextual)
unsigned short Width() const
int ClassID(const char_32 *str) const
static double PairCost(int width_0, int height_0, int top_0, int width_1, int height_1, int top_1, const PairSizeInfo &pair_info)
static WordSizeModel * Create(const string &data_file_path, const string &lang, CharSet *char_set, bool contextual)
unsigned short Top() const
static void SplitStringUsing(const string &str, const string &delims, vector< string > *str_vec)
PairSizeInfo ** pair_size_info
unsigned short Height() const