Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
trainingsample.h
Go to the documentation of this file.
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
15 
16 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H__
17 #define TESSERACT_TRAINING_TRAININGSAMPLE_H__
18 
19 #include "elst.h"
20 #include "featdefs.h"
21 #include "intfx.h"
22 #include "intmatcher.h"
23 #include "matrix.h"
24 #include "mf.h"
25 #include "picofeat.h"
26 #include "shapetable.h"
27 #include "unicharset.h"
28 
29 struct Pix;
30 
31 namespace tesseract {
32 
33 class IntFeatureMap;
34 class IntFeatureSpace;
35 class ShapeTable;
36 
37 // Number of elements of cn_feature_.
38 static const int kNumCNParams = 4;
39 // Number of ways to shift the features when randomizing.
40 static const int kSampleYShiftSize = 5;
41 // Number of ways to scale the features when randomizing.
42 static const int kSampleScaleSize = 3;
43 // Total number of different ways to manipulate the features when randomizing.
44 // The first and last combinations are removed to avoid an excessive
45 // top movement (first) and an identity transformation (last).
46 // WARNING: To avoid patterned duplication of samples, be sure to keep
47 // kSampleRandomSize prime!
48 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
49 // kSampleRandomSize is 13, which is prime.
50 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
51 // ASSERT_IS_PRIME(kSampleRandomSize) !!
52 
53 class TrainingSample : public ELIST_LINK {
54  public:
56  : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
57  num_features_(0), num_micro_features_(0),
58  features_(NULL), micro_features_(NULL), weight_(1.0),
59  max_dist_(0.0), sample_index_(0),
60  features_are_indexed_(false), features_are_mapped_(false),
61  is_error_(false) {
62  }
64 
65  // Saves the given features into a TrainingSample. The features are copied,
66  // so may be deleted afterwards. Delete the return value after use.
69  int num_features);
70  // Constructs and returns a copy "randomized" by the method given by
71  // the randomizer index. If index is out of [0, kSampleRandomSize) then
72  // an exact copy is returned.
73  TrainingSample* RandomizedCopy(int index) const;
74  // Constructs and returns an exact copy.
75  TrainingSample* Copy() const;
76 
77  // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
78  // members, which is mostly the mapped features, and the weight.
79  // It is assumed these can all be reconstructed from what is saved.
80  // Writes to the given file. Returns false in case of error.
81  bool Serialize(FILE* fp) const;
82  // Creates from the given file. Returns NULL in case of error.
83  // If swap is true, assumes a big/little-endian swap is needed.
84  static TrainingSample* DeSerializeCreate(bool swap, FILE* fp);
85  // Reads from the given file. Returns false in case of error.
86  // If swap is true, assumes a big/little-endian swap is needed.
87  bool DeSerialize(bool swap, FILE* fp);
88 
89  // Extracts the needed information from the CHAR_DESC_STRUCT.
90  void ExtractCharDesc(int feature_type, int micro_type,
91  int cn_type, int geo_type,
92  CHAR_DESC_STRUCT* char_desc);
93 
94  // Sets the mapped_features_ from the features_ using the provided
95  // feature_space to the indexed versions of the features.
96  void IndexFeatures(const IntFeatureSpace& feature_space);
97  // Sets the mapped_features_ from the features_ using the provided
98  // feature_map.
99  void MapFeatures(const IntFeatureMap& feature_map);
100 
101  // Returns a pix representing the sample. (Int features only.)
102  Pix* RenderToPix(const UNICHARSET* unicharset) const;
103  // Displays the features in the given window with the given color.
104  void DisplayFeatures(ScrollView::Color color, ScrollView* window) const;
105 
106  // Returns a pix of the original sample image. The pix is padded all round
107  // by padding wherever possible.
108  // The returned Pix must be pixDestroyed after use.
109  // If the input page_pix is NULL, NULL is returned.
110  Pix* GetSamplePix(int padding, Pix* page_pix) const;
111 
112  // Accessors.
114  return class_id_;
115  }
116  void set_class_id(int id) {
117  class_id_ = id;
118  }
119  int font_id() const {
120  return font_id_;
121  }
122  void set_font_id(int id) {
123  font_id_ = id;
124  }
125  int page_num() const {
126  return page_num_;
127  }
128  void set_page_num(int page) {
129  page_num_ = page;
130  }
131  const TBOX& bounding_box() const {
132  return bounding_box_;
133  }
134  void set_bounding_box(const TBOX& box) {
135  bounding_box_ = box;
136  }
137  int num_features() const {
138  return num_features_;
139  }
140  const INT_FEATURE_STRUCT* features() const {
141  return features_;
142  }
143  int num_micro_features() const {
144  return num_micro_features_;
145  }
146  const MicroFeature* micro_features() const {
147  return micro_features_;
148  }
149  float cn_feature(int index) const {
150  return cn_feature_[index];
151  }
152  int geo_feature(int index) const {
153  return geo_feature_[index];
154  }
155  double weight() const {
156  return weight_;
157  }
158  void set_weight(double value) {
159  weight_ = value;
160  }
161  double max_dist() const {
162  return max_dist_;
163  }
164  void set_max_dist(double value) {
165  max_dist_ = value;
166  }
167  int sample_index() const {
168  return sample_index_;
169  }
170  void set_sample_index(int value) {
171  sample_index_ = value;
172  }
173  bool features_are_mapped() const {
174  return features_are_mapped_;
175  }
177  ASSERT_HOST(features_are_mapped_);
178  return mapped_features_;
179  }
181  ASSERT_HOST(features_are_indexed_);
182  return mapped_features_;
183  }
184  bool is_error() const {
185  return is_error_;
186  }
187  void set_is_error(bool value) {
188  is_error_ = value;
189  }
190 
191  private:
192  // Unichar id that this sample represents. There obviously must be a
193  // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
194  UNICHAR_ID class_id_;
195  // Font id in which this sample was printed. Refers to a fontinfo_table_ in
196  // MasterTrainer.
197  int font_id_;
198  // Number of page that the sample came from.
199  int page_num_;
200  // Bounding box of sample in original image.
201  TBOX bounding_box_;
202  // Number of INT_FEATURE_STRUCT in features_ array.
203  int num_features_;
204  // Number of MicroFeature in micro_features_ array.
205  int num_micro_features_;
206  // Array of features.
207  INT_FEATURE_STRUCT* features_;
208  // Array of features.
209  MicroFeature* micro_features_;
210  // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
211  float cn_feature_[kNumCNParams];
212  // The one and only geometric feature. (Aims at replacing cn_feature_).
213  // Indexed by GeoParams enum in picofeat.h
214  int geo_feature_[GeoCount];
215 
216  // Non-serialized cache data.
217  // Weight used for boosting training.
218  double weight_;
219  // Maximum distance to other samples of same class/font used in computing
220  // the canonical sample.
221  double max_dist_;
222  // Global index of this sample.
223  int sample_index_;
224  // Indexed/mapped features, as indicated by the bools below.
225  GenericVector<int> mapped_features_;
226  bool features_are_indexed_;
227  bool features_are_mapped_;
228  // True if the last classification was an error by the current definition.
229  bool is_error_;
230 
231  // Randomizing factors.
232  static const int kYShiftValues[kSampleYShiftSize];
233  static const double kScaleValues[kSampleScaleSize];
234 };
235 
236 ELISTIZEH(TrainingSample)
237 
238 } // namespace tesseract
239 
240 #endif // TESSERACT_TRAINING_TRAININGSAMPLE_H__