diff options
| author | Akshay Nair <phenax5@gmail.com> | 2024-12-25 17:46:41 +0530 |
|---|---|---|
| committer | Akshay Nair <phenax5@gmail.com> | 2024-12-25 18:09:18 +0530 |
| commit | 580439bf8dd33e00f6a668a4828eab01d24d7abf (patch) | |
| tree | 2f47ce1d6abbf33dfe232a13cc31a6534ca5dd5c /cpp | |
| parent | 4e74eeebbaa441cda3a6846c47d82516878f8f05 (diff) | |
| download | chelleport-580439bf8dd33e00f6a668a4828eab01d24d7abf.tar.gz chelleport-580439bf8dd33e00f6a668a4828eab01d24d7abf.zip | |
Add image preprocessing before ocr
Diffstat (limited to 'cpp')
| -rw-r--r-- | cpp/libchelleport.cpp | 41 |
1 files changed, 39 insertions, 2 deletions
diff --git a/cpp/libchelleport.cpp b/cpp/libchelleport.cpp index 4ec3599..5653068 100644 --- a/cpp/libchelleport.cpp +++ b/cpp/libchelleport.cpp @@ -19,6 +19,8 @@ OCRMatch *findWordCoordinates(const char *image_path, int *size) { // for (const auto &match : matches) // showMatch(match); + printf("Count: %ld\n", matches.size()); + *size = matches.size(); return ptr; } @@ -38,12 +40,16 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) { return results; } + preprocessImage(&image); + + // printf("imagePath: %s\n", imagePath); + // pixWrite(imagePath, image, IFF_JFIF_JPEG); + tesseract->SetImage(image); tesseract->Recognize(0); tesseract::ResultIterator *iterator = tesseract->GetIterator(); auto level = RESULT_ITER_MODE; - int x1, y1, x2, y2; if (iterator != 0) { do { @@ -52,8 +58,11 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) { if (conf > CONFIDENCE_THRESHOLD && word != nullptr && strlen(word) >= MIN_CHARACTER_COUNT) { + int x1, y1, x2, y2; iterator->BoundingBox(level, &x1, &y1, &x2, &y2); - results.push_back(OCRMatch{x1, y1, x2, y2, word}); + results.push_back( + OCRMatch{(int)(x1 / scaleFactor), (int)(y1 / scaleFactor), + (int)(x2 / scaleFactor), (int)(y2 / scaleFactor), word}); } } while (iterator->Next(level)); } @@ -66,6 +75,34 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) { return results; } +void preprocessImage(Pix **image) { + Pix *temp; + + // Scale + if (scaleFactor != 1) { + temp = pixScale(*image, scaleFactor, scaleFactor); + pixDestroy(image); + *image = temp; + } + + // Grayscale + if (pixGetDepth(*image) > 8) { + temp = pixConvertRGBToGray(*image, grayscaleWeightRed, grayscaleWeightGreen, + grayscaleWeightBlue); + pixDestroy(image); + *image = temp; + } + + // Contrast + pixContrastTRC(*image, *image, contrast); + + // Sharpness + // temp = pixUnsharpMaskingGrayFast(*image, 1, sharpness, 1); + temp = pixUnsharpMasking(*image, 1, sharpness); + pixDestroy(image); + *image = temp; +} + void showMatch(const OCRMatch &match) { std::cout << "Text: " << match.text << "; Position: (" << match.startX << "," << match.startY << ") -> (" << match.endX << "," << match.endY |
