Tesseract
3.02
Main Page
Related Pages
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
imagefind.h
Go to the documentation of this file.
1
2
// File: imagefind.h
3
// Description: Class to find image and drawing regions in an image
4
// and create a corresponding list of empty blobs.
5
// Author: Ray Smith
6
// Created: Fri Aug 01 10:50:01 PDT 2008
7
//
8
// (C) Copyright 2008, Google Inc.
9
// Licensed under the Apache License, Version 2.0 (the "License");
10
// you may not use this file except in compliance with the License.
11
// You may obtain a copy of the License at
12
// http://www.apache.org/licenses/LICENSE-2.0
13
// Unless required by applicable law or agreed to in writing, software
14
// distributed under the License is distributed on an "AS IS" BASIS,
15
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
// See the License for the specific language governing permissions and
17
// limitations under the License.
18
//
20
21
#ifndef TESSERACT_TEXTORD_IMAGEFIND_H__
22
#define TESSERACT_TEXTORD_IMAGEFIND_H__
23
24
#include "
host.h
"
25
26
struct
Boxa;
27
struct
Pix;
28
struct
Pixa;
29
class
TBOX
;
30
class
FCOORD
;
31
class
TO_BLOCK
;
32
class
BLOBNBOX_LIST;
33
34
namespace
tesseract
{
35
36
class
ColPartitionGrid;
37
class
ColPartition_LIST;
38
class
TabFind;
39
40
// The ImageFind class is a simple static function wrapper class that
41
// exposes the FindImages function and some useful helper functions.
42
class
ImageFind
{
43
public
:
44
// Finds image regions within the BINARY source pix (page image) and returns
45
// the image regions as a mask image.
46
// The returned pix may be NULL, meaning no images found.
47
// If not NULL, it must be PixDestroyed by the caller.
48
static
Pix*
FindImages
(Pix* pix);
49
50
// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
51
// analgous to pixConnComp, except that connected components which are nearly
52
// rectangular are replaced with solid rectangles.
53
// The returned boxa, pixa may be NULL, meaning no images found.
54
// If not NULL, they must be destroyed by the caller.
55
// Resolution of pix should match the source image (Tesseract::pix_binary_)
56
// so the output coordinate systems match.
57
static
void
ConnCompAndRectangularize
(Pix* pix, Boxa** boxa, Pixa** pixa);
58
59
// Returns true if there is a rectangle in the source pix, such that all
60
// pixel rows and column slices outside of it have less than
61
// min_fraction of the pixels black, and within max_skew_gradient fraction
62
// of the pixels on the inside, there are at least max_fraction of the
63
// pixels black. In other words, the inside of the rectangle looks roughly
64
// rectangular, and the outside of it looks like extra bits.
65
// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
66
// Note: the algorithm is iterative, allowing it to slice off pixels from
67
// one edge, allowing it to then slice off more pixels from another edge.
68
static
bool
pixNearlyRectangular
(Pix* pix,
69
double
min_fraction,
double
max_fraction,
70
double
max_skew_gradient,
71
int
* x_start,
int
* y_start,
72
int
* x_end,
int
* y_end);
73
74
// Given an input pix, and a bounding rectangle, the sides of the rectangle
75
// are shrunk inwards until they bound any black pixels found within the
76
// original rectangle. Returns false if the rectangle contains no black
77
// pixels at all.
78
static
bool
BoundsWithinRect
(Pix* pix,
int
* x_start,
int
* y_start,
79
int
* x_end,
int
* y_end);
80
81
// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
82
// of the point from the given line, defined by a pair of points in the 3-D
83
// (RGB) space, line1 and line2.
84
static
double
ColorDistanceFromLine
(
const
uinT8
* line1,
const
uinT8
* line2,
85
const
uinT8
* point);
86
87
// Returns the leptonica combined code for the given RGB triplet.
88
static
uinT32
ComposeRGB
(
uinT32
r,
uinT32
g
,
uinT32
b);
89
90
// Returns the input value clipped to a uinT8.
91
static
uinT8
ClipToByte
(
double
pixel);
92
93
// Computes the light and dark extremes of color in the given rectangle of
94
// the given pix, which is factor smaller than the coordinate system in rect.
95
// The light and dark points are taken to be the upper and lower 8th-ile of
96
// the most deviant of R, G and B. The value of the other 2 channels are
97
// computed by linear fit against the most deviant.
98
// The colors of the two point are returned in color1 and color2, with the
99
// alpha channel set to a scaled mean rms of the fits.
100
// If color_map1 is not null then it and color_map2 get rect pasted in them
101
// with the two calculated colors, and rms map gets a pasted rect of the rms.
102
// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
103
static
void
ComputeRectangleColors
(
const
TBOX
& rect, Pix* pix,
int
factor,
104
Pix* color_map1, Pix* color_map2,
105
Pix* rms_map,
106
uinT8
* color1,
uinT8
* color2);
107
108
// Returns true if there are no black pixels in between the boxes.
109
// The im_box must represent the bounding box of the pix in tesseract
110
// coordinates, which may be negative, due to rotations to make the textlines
111
// horizontal. The boxes are rotated by rotation, which should undo such
112
// rotations, before mapping them onto the pix.
113
static
bool
BlankImageInBetween
(
const
TBOX
& box1,
const
TBOX
& box2,
114
const
TBOX
& im_box,
const
FCOORD
& rotation,
115
Pix* pix);
116
117
// Returns the number of pixels in box in the pix.
118
// The im_box must represent the bounding box of the pix in tesseract
119
// coordinates, which may be negative, due to rotations to make the textlines
120
// horizontal. The boxes are rotated by rotation, which should undo such
121
// rotations, before mapping them onto the pix.
122
static
int
CountPixelsInRotatedBox
(
TBOX
box,
const
TBOX
& im_box,
123
const
FCOORD
& rotation, Pix* pix);
124
125
126
// Locates all the image partitions in the part_grid, that were found by a
127
// previous call to FindImagePartitions, marks them in the image_mask,
128
// removes them from the grid, and deletes them. This makes it possble to
129
// call FindImagePartitions again to produce less broken-up and less
130
// overlapping image partitions.
131
// rerotation specifies how to rotate the partition coords to match
132
// the image_mask, since this function is used after orientation correction.
133
static
void
TransferImagePartsToImageMask
(
const
FCOORD
& rerotation,
134
ColPartitionGrid
* part_grid,
135
Pix* image_mask);
136
137
// Runs a CC analysis on the image_pix mask image, and creates
138
// image partitions from them, cutting out strong text, and merging with
139
// nearby image regions such that they don't interfere with text.
140
// Rotation and rerotation specify how to rotate image coords to match
141
// the blob and partition coords and back again.
142
// The input/output part_grid owns all the created partitions, and
143
// the partitions own all the fake blobs that belong in the partitions.
144
// Since the other blobs in the other partitions will be owned by the block,
145
// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
146
// situation and collect the image blobs.
147
static
void
FindImagePartitions
(Pix* image_pix,
148
const
FCOORD
& rotation,
149
const
FCOORD
& rerotation,
150
TO_BLOCK
* block,
151
TabFind
* tab_grid,
152
ColPartitionGrid
* part_grid,
153
ColPartition_LIST* big_parts);
154
};
155
156
}
// namespace tesseract.
157
158
#endif // TESSERACT_TEXTORD_LINEFIND_H__
159
mnt
data
src
tesseract-ocr
textord
imagefind.h
Generated on Thu Nov 1 2012 20:19:50 for Tesseract by
1.8.1