Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
werd.h
Go to the documentation of this file.
1
/**********************************************************************
2
* File: word.c
3
* Description: Code for the WERD class.
4
* Author: Ray Smith
5
* Created: Tue Oct 08 14:32:12 BST 1991
6
*
7
* (C) Copyright 1991, Hewlett-Packard Ltd.
8
** Licensed under the Apache License, Version 2.0 (the "License");
9
** you may not use this file except in compliance with the License.
10
** You may obtain a copy of the License at
11
** http://www.apache.org/licenses/LICENSE-2.0
12
** Unless required by applicable law or agreed to in writing, software
13
** distributed under the License is distributed on an "AS IS" BASIS,
14
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
** See the License for the specific language governing permissions and
16
** limitations under the License.
17
*
18
**********************************************************************/
19
20
#ifndef WERD_H
21
#define WERD_H
22
23
#include "
params.h
"
24
#include "
bits16.h
"
25
#include "
elst2.h
"
26
#include "
strngs.h
"
27
#include "
blckerr.h
"
28
#include "
stepblob.h
"
29
30
enum
WERD_FLAGS
31
{
32
W_SEGMENTED
,
//< correctly segmented
33
W_ITALIC
,
//< italic text
34
W_BOLD
,
//< bold text
35
W_BOL
,
//< start of line
36
W_EOL
,
//< end of line
37
W_NORMALIZED
,
//< flags
38
W_SCRIPT_HAS_XHEIGHT
,
//< x-height concept makes sense.
39
W_SCRIPT_IS_LATIN
,
//< Special case latin for y. splitting.
40
W_DONT_CHOP
,
//< fixed pitch chopped
41
W_REP_CHAR
,
//< repeated character
42
W_FUZZY_SP
,
//< fuzzy space
43
W_FUZZY_NON
,
//< fuzzy nonspace
44
W_INVERSE
//< white on black
45
};
46
47
enum
DISPLAY_FLAGS
48
{
49
/* Display flags bit number allocations */
50
DF_BOX
,
//< Bounding box
51
DF_TEXT
,
//< Correct ascii
52
DF_POLYGONAL
,
//< Polyg approx
53
DF_EDGE_STEP
,
//< Edge steps
54
DF_BN_POLYGONAL
,
//< BL normalisd polyapx
55
DF_BLAMER
//< Blamer information
56
};
57
58
class
ROW
;
//forward decl
59
60
class
WERD
:
public
ELIST2_LINK
{
61
public
:
62
WERD
() {}
63
// WERD constructed with:
64
// blob_list - blobs of the word (we take this list's contents)
65
// blanks - number of blanks before the word
66
// text - correct text (outlives WERD)
67
WERD
(C_BLOB_LIST *blob_list,
uinT8
blanks,
const
char
*
text
);
68
69
// WERD constructed from:
70
// blob_list - blobs in the word
71
// clone - werd to clone flags, etc from.
72
WERD
(C_BLOB_LIST *blob_list,
WERD
*clone);
73
74
// Construct a WERD from a single_blob and clone the flags from this.
75
// W_BOL and W_EOL flags are set according to the given values.
76
WERD
*
ConstructFromSingleBlob
(
bool
bol,
bool
eol,
C_BLOB
* blob);
77
78
~WERD
() {
79
}
80
81
// assignment
82
WERD
&
operator=
(
const
WERD
&source);
83
84
// This method returns a new werd constructed using the blobs in the input
85
// all_blobs list, which correspond to the blobs in this werd object. The
86
// blobs used to construct the new word are consumed and removed from the
87
// input all_blobs list.
88
// Returns NULL if the word couldn't be constructed.
89
// Returns original blobs for which no matches were found in the output list
90
// orphan_blobs (appends).
91
WERD
*
ConstructWerdWithNewBlobs
(C_BLOB_LIST *all_blobs,
92
C_BLOB_LIST *orphan_blobs);
93
94
// Accessors for reject / DUFF blobs in various formats
95
C_BLOB_LIST *
rej_cblob_list
() {
// compact format
96
return
&rej_cblobs;
97
}
98
99
// Accessors for good blobs in various formats.
100
C_BLOB_LIST *
cblob_list
() {
// get compact blobs
101
return
&cblobs;
102
}
103
104
uinT8
space
() {
// access function
105
return
blanks;
106
}
107
void
set_blanks
(
uinT8
new_blanks) {
108
blanks = new_blanks;
109
}
110
int
script_id
()
const
{
111
return
script_id_;
112
}
113
void
set_script_id
(
int
id
) {
114
script_id_ = id;
115
}
116
117
TBOX
bounding_box
();
// compute bounding box
118
119
const
char
*
text
()
const
{
return
correct.
string
(); }
120
void
set_text
(
const
char
*new_text) { correct = new_text; }
121
122
BOOL8
flag
(
WERD_FLAGS
mask)
const
{
return
flags.
bit
(mask); }
123
void
set_flag
(
WERD_FLAGS
mask,
BOOL8
value) { flags.
set_bit
(mask, value); }
124
125
BOOL8
display_flag
(
uinT8
flag
)
const
{
return
disp_flags.
bit
(flag); }
126
void
set_display_flag
(
uinT8
flag
,
BOOL8
value) {
127
disp_flags.
set_bit
(flag, value);
128
}
129
130
WERD
*
shallow_copy
();
// shallow copy word
131
132
// reposition word by vector
133
void
move
(
const
ICOORD
vec);
134
135
// join other's blobs onto this werd, emptying out other.
136
void
join_on
(
WERD
* other);
137
138
// copy other's blobs onto this word, leaving other intact.
139
void
copy_on
(
WERD
* other);
140
141
// tprintf word metadata (but not blob innards)
142
void
print
();
143
144
#ifndef GRAPHICS_DISABLED
145
// plot word on window in a uniform colour
146
void
plot
(
ScrollView
*window,
ScrollView::Color
colour);
147
148
// Get the next color in the (looping) rainbow.
149
static
ScrollView::Color
NextColor
(
ScrollView::Color
colour);
150
151
// plot word on window in a rainbow of colours
152
void
plot
(
ScrollView
*window);
153
154
// plot rejected blobs in a rainbow of colours
155
void
plot_rej_blobs
(
ScrollView
*window);
156
#endif // GRAPHICS_DISABLED
157
158
private
:
159
uinT8
blanks;
// no of blanks
160
uinT8
dummy;
// padding
161
BITS16
flags;
// flags about word
162
BITS16
disp_flags;
// display flags
163
inT16
script_id_;
// From unicharset.
164
STRING
correct;
// correct text
165
C_BLOB_LIST cblobs;
// compacted blobs
166
C_BLOB_LIST rej_cblobs;
// DUFF blobs
167
};
168
169
ELIST2IZEH
(
WERD
)
170
#include "ocrrow.h" // placed here due to
171
// compare words by increasing order of left edge, suitable for qsort(3)
172
int
word_comparator
(
const
void
*word1p,
const
void
*word2p);
173
#endif
mnt
data
src
tesseract-ocr
ccstruct
werd.h
Generated on Thu Nov 1 2012 20:19:45 for Tesseract by
1.8.1