diff options
| author | Akshay Nair <phenax5@gmail.com> | 2024-12-24 22:28:38 +0530 |
|---|---|---|
| committer | Akshay Nair <phenax5@gmail.com> | 2024-12-24 22:42:34 +0530 |
| commit | 459488a2e777380fcb65e3b4dd355fe525ff77ca (patch) | |
| tree | bd21b71b73fc627d37e91e7800dd514706e49942 /cpp/libchelleport.cpp | |
| parent | 70e3920556496e5fecb5fedddf1067b2522fcac7 (diff) | |
| download | chelleport-459488a2e777380fcb65e3b4dd355fe525ff77ca.tar.gz chelleport-459488a2e777380fcb65e3b4dd355fe525ff77ca.zip | |
Add search mode for text based searching with ocr
Diffstat (limited to '')
| -rw-r--r-- | cpp/libchelleport.cpp | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/cpp/libchelleport.cpp b/cpp/libchelleport.cpp index 8f2e9f2..345a454 100644 --- a/cpp/libchelleport.cpp +++ b/cpp/libchelleport.cpp @@ -13,6 +13,8 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath); #define CONFIDENCE_THRESHOLD 30. +#define MIN_CHARACTER_COUNT 2 +const tesseract::PageIteratorLevel RESULT_ITER_MODE = tesseract::RIL_WORD; OCRMatch *findWordCoordinates(const char *image_path, int *size) { auto boxes = extractTextCoordinates(image_path); @@ -49,7 +51,7 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) { tesseract->Recognize(0); tesseract::ResultIterator *iterator = tesseract->GetIterator(); - tesseract::PageIteratorLevel level = tesseract::RIL_TEXTLINE; + auto level = RESULT_ITER_MODE; if (iterator != 0) { do { @@ -58,9 +60,9 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) { int x1, y1, x2, y2; iterator->BoundingBox(level, &x1, &y1, &x2, &y2); - if (conf > CONFIDENCE_THRESHOLD && word != nullptr && strlen(word) >= 2) { - OCRMatch box{x1, y1, x2, y2, word}; - results.push_back(box); + if (conf > CONFIDENCE_THRESHOLD && word != nullptr && + strlen(word) >= MIN_CHARACTER_COUNT) { + results.push_back(OCRMatch{x1, y1, x2, y2, word}); } } while (iterator->Next(level)); } |
