diff options
| author | Akshay Nair <phenax5@gmail.com> | 2024-12-25 22:33:03 +0530 |
|---|---|---|
| committer | Akshay Nair <phenax5@gmail.com> | 2024-12-25 22:42:27 +0530 |
| commit | 83e2570d3c8da9920d66a00c4bdf5650fe1b3336 (patch) | |
| tree | 5755d561acdb86422e2ce3621e217a9c05dfb640 /cpp/libchelleport.cpp | |
| parent | fb24e589290f7ffbee04972eed35fca37facdf1c (diff) | |
| download | chelleport-83e2570d3c8da9920d66a00c4bdf5650fe1b3336.tar.gz chelleport-83e2570d3c8da9920d66a00c4bdf5650fe1b3336.zip | |
Parallel ocr evaluation for sections of screen + many refactorings
Diffstat (limited to '')
| -rw-r--r-- | cpp/libchelleport.cpp | 120 |
1 files changed, 40 insertions, 80 deletions
diff --git a/cpp/libchelleport.cpp b/cpp/libchelleport.cpp index 923df6c..67abb06 100644 --- a/cpp/libchelleport.cpp +++ b/cpp/libchelleport.cpp @@ -1,19 +1,20 @@ -#include <algorithm> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <iostream> #include <leptonica/allheaders.h> +#include <memory> +#include <ostream> #include <tesseract/baseapi.h> +#include <thread> #include <vector> +#include "../include/image.h" #include "../include/libchelleport.h" +#include "../include/recognizer.h" +extern "C" { OCRMatch *findWordCoordinates(const char *image_path, int *size) { std::vector<OCRMatch> matches; - MEASURE("OCR", { matches = extractTextCoordinates(image_path); }); + MEASURE("OCR", { matches = extractTextMatches(image_path); }); - std::cout << "Word count: " << matches.size() << std::endl; + std::cout << "Match count: " << matches.size() << std::endl; static OCRMatch *ptr = new OCRMatch[matches.size()]; std::copy(matches.begin(), matches.end(), ptr); @@ -21,101 +22,60 @@ OCRMatch *findWordCoordinates(const char *image_path, int *size) { *size = matches.size(); return ptr; } +} -std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) { +std::vector<OCRMatch> extractTextMatches(const char *imagePath) { std::vector<OCRMatch> results; - auto tesseract = initializeTesseract(); - if (tesseract == nullptr) - return results; - - Pix *image = loadImage(imagePath); + Pix *image = image::loadImage(imagePath); if (image == nullptr) return results; // printf("imagePath: %s\n", imagePath); // pixWrite(imagePath, image, IFF_JFIF_JPEG); - tesseract->SetImage(image); - tesseract->Recognize(0); + int width = pixGetWidth(image); + int height = pixGetHeight(image); - tesseract::ResultIterator *iterator = tesseract->GetIterator(); - auto level = RESULT_ITER_MODE; + std::vector<std::unique_ptr<Recognizer>> recognizers; + recognizers.push_back( + std::make_unique<Recognizer>(0, 0, width / 2, height / 2)); - if (iterator != 0) { - do { - if (iterator->Confidence(level) > CONFIDENCE_THRESHOLD) { - const char *word = iterator->GetUTF8Text(level); + recognizers.push_back( + std::make_unique<Recognizer>(width / 2, 0, width / 2, height / 2)); - if (word != nullptr && strlen(word) >= MIN_CHARACTER_COUNT) { - int x1, y1, x2, y2; - iterator->BoundingBox(level, &x1, &y1, &x2, &y2); - OCRMatch match({(int)(x1 / scaleFactor), (int)(y1 / scaleFactor), - (int)(x2 / scaleFactor), (int)(y2 / scaleFactor), - word}); - results.push_back(match); - } - } - } while (iterator->Next(level)); - } + recognizers.push_back( + std::make_unique<Recognizer>(0, height / 2, width / 2, height / 2)); - delete iterator; - tesseract->End(); - delete tesseract; - pixDestroy(&image); + recognizers.push_back(std::make_unique<Recognizer>(width / 2, height / 2, + width / 2, height / 2)); - return results; + return runRecognizers(recognizers, image); } -inline tesseract::TessBaseAPI *initializeTesseract() { - auto *tesseract = new tesseract::TessBaseAPI(); - tesseract->SetPageSegMode(tesseract::PSM_AUTO); - - if (tesseract->Init(nullptr, "eng", tesseract::OEM_LSTM_ONLY)) { - std::cerr << "Could not initialize tesseract." << std::endl; - return nullptr; - } +std::vector<OCRMatch> +runRecognizers(std::vector<std::unique_ptr<Recognizer>> &recognizers, + Pix *image) { + std::vector<OCRMatch> results; + std::shared_ptr<Pix> sharedImage(image, [](Pix *p) { pixDestroy(&p); }); - return tesseract; -} + std::vector<std::thread> workers; + workers.reserve(recognizers.size()); -inline Pix *loadImage(const char *imagePath) { - Pix *image = pixRead(imagePath); - if (!image) { - std::cerr << "Could not load image " << imagePath << std::endl; - return nullptr; + for (auto &ext : recognizers) { + workers.push_back(std::thread( + [&ext, &sharedImage]() { ext->recognize(sharedImage.get()); })); } - preprocessImage(&image); - - return image; -} - -void preprocessImage(Pix **image) { - Pix *temp; - - // Scale - if (scaleFactor != 1) { - INLINE_IMAGE_PROC(pixScale(*image, scaleFactor, scaleFactor)); + for (std::thread &t : workers) { + if (t.joinable()) + t.join(); } - // Grayscale - if (pixGetDepth(*image) > 8) { - INLINE_IMAGE_PROC(pixConvertRGBToGray( - *image, grayscaleWeightRed, grayscaleWeightGreen, grayscaleWeightBlue)); + for (auto &ext : recognizers) { + for (auto &match : ext->getResults()) + results.push_back(match); } - // Contrast - pixContrastTRC(*image, *image, contrast); - - // Sharpness - // INLINE_IMAGE_PROC(pixUnsharpMaskingGrayFast(*image, 1, sharpness, 1)); - INLINE_IMAGE_PROC(pixUnsharpMasking(*image, 1, sharpness)); -} - -void printMatch(const OCRMatch &match) { - std::cout << "Text: " << match.text << "; Position: (" << match.startX << "," - << match.startY << ") -> (" << match.endX << "," << match.endY - << ")" << std::endl - << std::endl; + return results; } |
