aboutsummaryrefslogtreecommitdiff
path: root/cpp
diff options
context:
space:
mode:
authorAkshay Nair <phenax5@gmail.com>2024-12-25 17:46:41 +0530
committerAkshay Nair <phenax5@gmail.com>2024-12-25 18:09:18 +0530
commit580439bf8dd33e00f6a668a4828eab01d24d7abf (patch)
tree2f47ce1d6abbf33dfe232a13cc31a6534ca5dd5c /cpp
parent4e74eeebbaa441cda3a6846c47d82516878f8f05 (diff)
downloadchelleport-580439bf8dd33e00f6a668a4828eab01d24d7abf.tar.gz
chelleport-580439bf8dd33e00f6a668a4828eab01d24d7abf.zip
Add image preprocessing before ocr
Diffstat (limited to '')
-rw-r--r--cpp/libchelleport.cpp41
1 files changed, 39 insertions, 2 deletions
diff --git a/cpp/libchelleport.cpp b/cpp/libchelleport.cpp
index 4ec3599..5653068 100644
--- a/cpp/libchelleport.cpp
+++ b/cpp/libchelleport.cpp
@@ -19,6 +19,8 @@ OCRMatch *findWordCoordinates(const char *image_path, int *size) {
// for (const auto &match : matches)
// showMatch(match);
+ printf("Count: %ld\n", matches.size());
+
*size = matches.size();
return ptr;
}
@@ -38,12 +40,16 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) {
return results;
}
+ preprocessImage(&image);
+
+ // printf("imagePath: %s\n", imagePath);
+ // pixWrite(imagePath, image, IFF_JFIF_JPEG);
+
tesseract->SetImage(image);
tesseract->Recognize(0);
tesseract::ResultIterator *iterator = tesseract->GetIterator();
auto level = RESULT_ITER_MODE;
- int x1, y1, x2, y2;
if (iterator != 0) {
do {
@@ -52,8 +58,11 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) {
if (conf > CONFIDENCE_THRESHOLD && word != nullptr &&
strlen(word) >= MIN_CHARACTER_COUNT) {
+ int x1, y1, x2, y2;
iterator->BoundingBox(level, &x1, &y1, &x2, &y2);
- results.push_back(OCRMatch{x1, y1, x2, y2, word});
+ results.push_back(
+ OCRMatch{(int)(x1 / scaleFactor), (int)(y1 / scaleFactor),
+ (int)(x2 / scaleFactor), (int)(y2 / scaleFactor), word});
}
} while (iterator->Next(level));
}
@@ -66,6 +75,34 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) {
return results;
}
+void preprocessImage(Pix **image) {
+ Pix *temp;
+
+ // Scale
+ if (scaleFactor != 1) {
+ temp = pixScale(*image, scaleFactor, scaleFactor);
+ pixDestroy(image);
+ *image = temp;
+ }
+
+ // Grayscale
+ if (pixGetDepth(*image) > 8) {
+ temp = pixConvertRGBToGray(*image, grayscaleWeightRed, grayscaleWeightGreen,
+ grayscaleWeightBlue);
+ pixDestroy(image);
+ *image = temp;
+ }
+
+ // Contrast
+ pixContrastTRC(*image, *image, contrast);
+
+ // Sharpness
+ // temp = pixUnsharpMaskingGrayFast(*image, 1, sharpness, 1);
+ temp = pixUnsharpMasking(*image, 1, sharpness);
+ pixDestroy(image);
+ *image = temp;
+}
+
void showMatch(const OCRMatch &match) {
std::cout << "Text: " << match.text << "; Position: (" << match.startX << ","
<< match.startY << ") -> (" << match.endX << "," << match.endY