diff options
| author | Akshay Nair <phenax5@gmail.com> | 2024-12-25 22:33:03 +0530 |
|---|---|---|
| committer | Akshay Nair <phenax5@gmail.com> | 2024-12-25 22:42:27 +0530 |
| commit | 83e2570d3c8da9920d66a00c4bdf5650fe1b3336 (patch) | |
| tree | 5755d561acdb86422e2ce3621e217a9c05dfb640 /include | |
| parent | fb24e589290f7ffbee04972eed35fca37facdf1c (diff) | |
| download | chelleport-83e2570d3c8da9920d66a00c4bdf5650fe1b3336.tar.gz chelleport-83e2570d3c8da9920d66a00c4bdf5650fe1b3336.zip | |
Parallel ocr evaluation for sections of screen + many refactorings
Diffstat (limited to 'include')
| -rw-r--r-- | include/image.h | 19 | ||||
| -rw-r--r-- | include/libchelleport.h | 52 | ||||
| -rw-r--r-- | include/recognizer.h | 50 |
3 files changed, 83 insertions, 38 deletions
diff --git a/include/image.h b/include/image.h new file mode 100644 index 0000000..ecbc255 --- /dev/null +++ b/include/image.h @@ -0,0 +1,19 @@ +#pragma once +#include <leptonica/allheaders.h> + +namespace image { +// Preprocessing configuration +static const float contrast = 0.3; +static const float sharpness = 0.7; +static const float scaleFactor = 1; +static const float grayscaleWeightRed = 0.114; +static const float grayscaleWeightGreen = 0.587; +static const float grayscaleWeightBlue = 0.299; + +Pix *loadImage(const char *imagePath); +} // namespace image + +#define INLINE_IMAGE_PROC(process) \ + temp = process; \ + pixDestroy(image); \ + *image = temp; diff --git a/include/libchelleport.h b/include/libchelleport.h index e6a074d..b69466e 100644 --- a/include/libchelleport.h +++ b/include/libchelleport.h @@ -1,46 +1,12 @@ +#pragma once #include <chrono> +#include <iostream> #include <leptonica/allheaders.h> +#include <memory> #include <tesseract/baseapi.h> #include <vector> -// NOTE: Remember to update size and alignment in ocr hs module on change -struct OCRMatch { - int startX, startY; - int endX, endY; - const char *text; -}; - -// OCR configuration -#define CONFIDENCE_THRESHOLD 25. -#define MIN_CHARACTER_COUNT 3 -const tesseract::PageIteratorLevel RESULT_ITER_MODE = tesseract::RIL_WORD; - -// Preprocessing configuration -const float contrast = 0.3; -const float sharpness = 0.7; -const float scaleFactor = 1; -const float grayscaleWeightRed = 0.114; -const float grayscaleWeightGreen = 0.587; -const float grayscaleWeightBlue = 0.299; - -extern "C" { -OCRMatch *findWordCoordinates(const char *image_path, /* returns */ int *size); -} - -tesseract::TessBaseAPI *initializeTesseract(); - -Pix *loadImage(const char *imagePath); - -std::vector<OCRMatch> extractTextCoordinates(const char *imagePath); - -void printMatch(const OCRMatch &match); - -void preprocessImage(Pix **image); - -#define INLINE_IMAGE_PROC(process) \ - temp = process; \ - pixDestroy(image); \ - *image = temp; +#include "./recognizer.h" #define MEASURE(label, stmts) \ auto start = std::chrono::high_resolution_clock::now(); \ @@ -49,3 +15,13 @@ void preprocessImage(Pix **image); auto duration = \ std::chrono::duration_cast<std::chrono::microseconds>(end - start); \ std::cout << label << ": " << duration.count() / 1000.0 << " ms" << std::endl; + +extern "C" { +OCRMatch *findWordCoordinates(const char *image_path, /* returns */ int *size); +} + +std::vector<OCRMatch> extractTextMatches(const char *imagePath); + +std::vector<OCRMatch> +runRecognizers(std::vector<std::unique_ptr<Recognizer>> &recognizers, + Pix *image); diff --git a/include/recognizer.h b/include/recognizer.h new file mode 100644 index 0000000..57747bb --- /dev/null +++ b/include/recognizer.h @@ -0,0 +1,50 @@ +#pragma once +#include <iostream> +#include <leptonica/allheaders.h> +#include <tesseract/baseapi.h> + +#include "./image.h" + +// OCR configuration +#define CONFIDENCE_THRESHOLD 25. +#define MIN_CHARACTER_COUNT 3 +const tesseract::PageIteratorLevel ITER_LEVEL = tesseract::RIL_WORD; + +// NOTE: Remember to update size and alignment in ocr hs module on change +struct OCRMatch { + int startX, startY; + int endX, endY; + const char *text; +}; + +class Recognizer { + tesseract::TessBaseAPI *tesseract; + int x, y, width, height; + bool failed = false; + +public: + Recognizer(int x, int y, int width, int height) + : x(x), y(y), width(width), height(height) { + initializeTesseract(); + } + + ~Recognizer() { tesseract->End(); } + + void fail(const char *msg); + + void recognize(Pix *image); + + OCRMatch *fetchMatch(tesseract::ResultIterator *iterator); + + std::vector<OCRMatch> getResults(); + +private: + void initializeTesseract(); +}; + +inline void printMatch(const OCRMatch &match) { + std::cout << "Text: " << match.text << "; Position: (" << match.startX << "," + << match.startY << ") -> (" << match.endX << "," << match.endY + << ")" << std::endl + << std::endl; +} |
