From 83e2570d3c8da9920d66a00c4bdf5650fe1b3336 Mon Sep 17 00:00:00 2001 From: Akshay Nair Date: Wed, 25 Dec 2024 22:33:03 +0530 Subject: Parallel ocr evaluation for sections of screen + many refactorings --- cpp/libchelleport.cpp | 122 +++++++++++++++++--------------------------------- 1 file changed, 41 insertions(+), 81 deletions(-) (limited to 'cpp/libchelleport.cpp') diff --git a/cpp/libchelleport.cpp b/cpp/libchelleport.cpp index 923df6c..67abb06 100644 --- a/cpp/libchelleport.cpp +++ b/cpp/libchelleport.cpp @@ -1,19 +1,20 @@ -#include -#include -#include -#include -#include #include +#include +#include #include +#include #include +#include "../include/image.h" #include "../include/libchelleport.h" +#include "../include/recognizer.h" +extern "C" { OCRMatch *findWordCoordinates(const char *image_path, int *size) { std::vector matches; - MEASURE("OCR", { matches = extractTextCoordinates(image_path); }); + MEASURE("OCR", { matches = extractTextMatches(image_path); }); - std::cout << "Word count: " << matches.size() << std::endl; + std::cout << "Match count: " << matches.size() << std::endl; static OCRMatch *ptr = new OCRMatch[matches.size()]; std::copy(matches.begin(), matches.end(), ptr); @@ -21,101 +22,60 @@ OCRMatch *findWordCoordinates(const char *image_path, int *size) { *size = matches.size(); return ptr; } +} -std::vector extractTextCoordinates(const char *imagePath) { +std::vector extractTextMatches(const char *imagePath) { std::vector results; - auto tesseract = initializeTesseract(); - if (tesseract == nullptr) - return results; - - Pix *image = loadImage(imagePath); + Pix *image = image::loadImage(imagePath); if (image == nullptr) return results; // printf("imagePath: %s\n", imagePath); // pixWrite(imagePath, image, IFF_JFIF_JPEG); - tesseract->SetImage(image); - tesseract->Recognize(0); - - tesseract::ResultIterator *iterator = tesseract->GetIterator(); - auto level = RESULT_ITER_MODE; - - if (iterator != 0) { - do { - if (iterator->Confidence(level) > CONFIDENCE_THRESHOLD) { - const char *word = iterator->GetUTF8Text(level); - - if (word != nullptr && strlen(word) >= MIN_CHARACTER_COUNT) { - int x1, y1, x2, y2; - iterator->BoundingBox(level, &x1, &y1, &x2, &y2); - OCRMatch match({(int)(x1 / scaleFactor), (int)(y1 / scaleFactor), - (int)(x2 / scaleFactor), (int)(y2 / scaleFactor), - word}); - results.push_back(match); - } - } - } while (iterator->Next(level)); - } + int width = pixGetWidth(image); + int height = pixGetHeight(image); - delete iterator; - tesseract->End(); - delete tesseract; - pixDestroy(&image); + std::vector> recognizers; + recognizers.push_back( + std::make_unique(0, 0, width / 2, height / 2)); - return results; -} + recognizers.push_back( + std::make_unique(width / 2, 0, width / 2, height / 2)); -inline tesseract::TessBaseAPI *initializeTesseract() { - auto *tesseract = new tesseract::TessBaseAPI(); - tesseract->SetPageSegMode(tesseract::PSM_AUTO); + recognizers.push_back( + std::make_unique(0, height / 2, width / 2, height / 2)); - if (tesseract->Init(nullptr, "eng", tesseract::OEM_LSTM_ONLY)) { - std::cerr << "Could not initialize tesseract." << std::endl; - return nullptr; - } + recognizers.push_back(std::make_unique(width / 2, height / 2, + width / 2, height / 2)); - return tesseract; + return runRecognizers(recognizers, image); } -inline Pix *loadImage(const char *imagePath) { - Pix *image = pixRead(imagePath); - if (!image) { - std::cerr << "Could not load image " << imagePath << std::endl; - return nullptr; - } - - preprocessImage(&image); - - return image; -} +std::vector +runRecognizers(std::vector> &recognizers, + Pix *image) { + std::vector results; + std::shared_ptr sharedImage(image, [](Pix *p) { pixDestroy(&p); }); -void preprocessImage(Pix **image) { - Pix *temp; + std::vector workers; + workers.reserve(recognizers.size()); - // Scale - if (scaleFactor != 1) { - INLINE_IMAGE_PROC(pixScale(*image, scaleFactor, scaleFactor)); + for (auto &ext : recognizers) { + workers.push_back(std::thread( + [&ext, &sharedImage]() { ext->recognize(sharedImage.get()); })); } - // Grayscale - if (pixGetDepth(*image) > 8) { - INLINE_IMAGE_PROC(pixConvertRGBToGray( - *image, grayscaleWeightRed, grayscaleWeightGreen, grayscaleWeightBlue)); + for (std::thread &t : workers) { + if (t.joinable()) + t.join(); } - // Contrast - pixContrastTRC(*image, *image, contrast); - - // Sharpness - // INLINE_IMAGE_PROC(pixUnsharpMaskingGrayFast(*image, 1, sharpness, 1)); - INLINE_IMAGE_PROC(pixUnsharpMasking(*image, 1, sharpness)); -} + for (auto &ext : recognizers) { + for (auto &match : ext->getResults()) + results.push_back(match); + } -void printMatch(const OCRMatch &match) { - std::cout << "Text: " << match.text << "; Position: (" << match.startX << "," - << match.startY << ") -> (" << match.endX << "," << match.endY - << ")" << std::endl - << std::endl; + return results; } -- cgit v1.3.1