blob: c74058d0680dd34b3b1ea8d57213c3c100d18c14 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
#include <leptonica/allheaders.h>
#include <tesseract/publictypes.h>
#include <vector>
// NOTE: Remember to update size and alignment in ocr hs module on change
struct OCRMatch {
int startX, startY;
int endX, endY;
const char *text;
};
// OCR configuration
#define CONFIDENCE_THRESHOLD 25.
#define MIN_CHARACTER_COUNT 3
const tesseract::PageIteratorLevel RESULT_ITER_MODE = tesseract::RIL_WORD;
// Preprocessing configuration
const float contrast = 0.3;
const float sharpness = 0.7;
const float scaleFactor = 1;
const float grayscaleWeightRed = 0.114;
const float grayscaleWeightGreen = 0.587;
const float grayscaleWeightBlue = 0.299;
extern "C" {
OCRMatch *findWordCoordinates(const char *image_path, /* returns */ int *size);
}
std::vector<OCRMatch> extractTextCoordinates(const char *imagePath);
void showMatch(const OCRMatch &match);
void preprocessImage(Pix **image);
|