1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/publictypes.h>
#include <vector>
#include "../include/libchelleport.h"
OCRMatch *findWordCoordinates(const char *image_path, int *size) {
auto matches = extractTextCoordinates(image_path);
static OCRMatch *ptr = new OCRMatch[matches.size()];
std::copy(matches.begin(), matches.end(), ptr);
// for (const auto &match : matches)
// showMatch(match);
printf("Count: %ld\n", matches.size());
*size = matches.size();
return ptr;
}
std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) {
std::vector<OCRMatch> results;
tesseract::TessBaseAPI *tesseract = new tesseract::TessBaseAPI();
if (tesseract->Init(nullptr, "eng")) {
std::cerr << "Could not initialize tesseract." << std::endl;
return results;
}
Pix *image = pixRead(imagePath);
if (!image) {
std::cerr << "Could not load image " << imagePath << std::endl;
return results;
}
preprocessImage(&image);
// printf("imagePath: %s\n", imagePath);
// pixWrite(imagePath, image, IFF_JFIF_JPEG);
tesseract->SetImage(image);
tesseract->Recognize(0);
tesseract::ResultIterator *iterator = tesseract->GetIterator();
auto level = RESULT_ITER_MODE;
if (iterator != 0) {
do {
float conf = iterator->Confidence(level);
const char *word = iterator->GetUTF8Text(level);
if (conf > CONFIDENCE_THRESHOLD && word != nullptr &&
strlen(word) >= MIN_CHARACTER_COUNT) {
int x1, y1, x2, y2;
iterator->BoundingBox(level, &x1, &y1, &x2, &y2);
results.push_back(
OCRMatch{(int)(x1 / scaleFactor), (int)(y1 / scaleFactor),
(int)(x2 / scaleFactor), (int)(y2 / scaleFactor), word});
}
} while (iterator->Next(level));
}
delete iterator;
tesseract->End();
delete tesseract;
pixDestroy(&image);
return results;
}
void preprocessImage(Pix **image) {
Pix *temp;
// Scale
if (scaleFactor != 1) {
temp = pixScale(*image, scaleFactor, scaleFactor);
pixDestroy(image);
*image = temp;
}
// Grayscale
if (pixGetDepth(*image) > 8) {
temp = pixConvertRGBToGray(*image, grayscaleWeightRed, grayscaleWeightGreen,
grayscaleWeightBlue);
pixDestroy(image);
*image = temp;
}
// Contrast
pixContrastTRC(*image, *image, contrast);
// Sharpness
// temp = pixUnsharpMaskingGrayFast(*image, 1, sharpness, 1);
temp = pixUnsharpMasking(*image, 1, sharpness);
pixDestroy(image);
*image = temp;
}
void showMatch(const OCRMatch &match) {
std::cout << "Text: " << match.text << "; Position: (" << match.startX << ","
<< match.startY << ") -> (" << match.endX << "," << match.endY
<< ")" << "\n\n";
}
|