aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAkshay Nair <phenax5@gmail.com>2024-12-25 22:33:03 +0530
committerAkshay Nair <phenax5@gmail.com>2024-12-25 22:42:27 +0530
commit83e2570d3c8da9920d66a00c4bdf5650fe1b3336 (patch)
tree5755d561acdb86422e2ce3621e217a9c05dfb640 /include
parentfb24e589290f7ffbee04972eed35fca37facdf1c (diff)
downloadchelleport-83e2570d3c8da9920d66a00c4bdf5650fe1b3336.tar.gz
chelleport-83e2570d3c8da9920d66a00c4bdf5650fe1b3336.zip
Parallel ocr evaluation for sections of screen + many refactorings
Diffstat (limited to '')
-rw-r--r--include/image.h19
-rw-r--r--include/libchelleport.h52
-rw-r--r--include/recognizer.h50
3 files changed, 83 insertions, 38 deletions
diff --git a/include/image.h b/include/image.h
new file mode 100644
index 0000000..ecbc255
--- /dev/null
+++ b/include/image.h
@@ -0,0 +1,19 @@
+#pragma once
+#include <leptonica/allheaders.h>
+
+namespace image {
+// Preprocessing configuration
+static const float contrast = 0.3;
+static const float sharpness = 0.7;
+static const float scaleFactor = 1;
+static const float grayscaleWeightRed = 0.114;
+static const float grayscaleWeightGreen = 0.587;
+static const float grayscaleWeightBlue = 0.299;
+
+Pix *loadImage(const char *imagePath);
+} // namespace image
+
+#define INLINE_IMAGE_PROC(process) \
+ temp = process; \
+ pixDestroy(image); \
+ *image = temp;
diff --git a/include/libchelleport.h b/include/libchelleport.h
index e6a074d..b69466e 100644
--- a/include/libchelleport.h
+++ b/include/libchelleport.h
@@ -1,46 +1,12 @@
+#pragma once
#include <chrono>
+#include <iostream>
#include <leptonica/allheaders.h>
+#include <memory>
#include <tesseract/baseapi.h>
#include <vector>
-// NOTE: Remember to update size and alignment in ocr hs module on change
-struct OCRMatch {
- int startX, startY;
- int endX, endY;
- const char *text;
-};
-
-// OCR configuration
-#define CONFIDENCE_THRESHOLD 25.
-#define MIN_CHARACTER_COUNT 3
-const tesseract::PageIteratorLevel RESULT_ITER_MODE = tesseract::RIL_WORD;
-
-// Preprocessing configuration
-const float contrast = 0.3;
-const float sharpness = 0.7;
-const float scaleFactor = 1;
-const float grayscaleWeightRed = 0.114;
-const float grayscaleWeightGreen = 0.587;
-const float grayscaleWeightBlue = 0.299;
-
-extern "C" {
-OCRMatch *findWordCoordinates(const char *image_path, /* returns */ int *size);
-}
-
-tesseract::TessBaseAPI *initializeTesseract();
-
-Pix *loadImage(const char *imagePath);
-
-std::vector<OCRMatch> extractTextCoordinates(const char *imagePath);
-
-void printMatch(const OCRMatch &match);
-
-void preprocessImage(Pix **image);
-
-#define INLINE_IMAGE_PROC(process) \
- temp = process; \
- pixDestroy(image); \
- *image = temp;
+#include "./recognizer.h"
#define MEASURE(label, stmts) \
auto start = std::chrono::high_resolution_clock::now(); \
@@ -49,3 +15,13 @@ void preprocessImage(Pix **image);
auto duration = \
std::chrono::duration_cast<std::chrono::microseconds>(end - start); \
std::cout << label << ": " << duration.count() / 1000.0 << " ms" << std::endl;
+
+extern "C" {
+OCRMatch *findWordCoordinates(const char *image_path, /* returns */ int *size);
+}
+
+std::vector<OCRMatch> extractTextMatches(const char *imagePath);
+
+std::vector<OCRMatch>
+runRecognizers(std::vector<std::unique_ptr<Recognizer>> &recognizers,
+ Pix *image);
diff --git a/include/recognizer.h b/include/recognizer.h
new file mode 100644
index 0000000..57747bb
--- /dev/null
+++ b/include/recognizer.h
@@ -0,0 +1,50 @@
+#pragma once
+#include <iostream>
+#include <leptonica/allheaders.h>
+#include <tesseract/baseapi.h>
+
+#include "./image.h"
+
+// OCR configuration
+#define CONFIDENCE_THRESHOLD 25.
+#define MIN_CHARACTER_COUNT 3
+const tesseract::PageIteratorLevel ITER_LEVEL = tesseract::RIL_WORD;
+
+// NOTE: Remember to update size and alignment in ocr hs module on change
+struct OCRMatch {
+ int startX, startY;
+ int endX, endY;
+ const char *text;
+};
+
+class Recognizer {
+ tesseract::TessBaseAPI *tesseract;
+ int x, y, width, height;
+ bool failed = false;
+
+public:
+ Recognizer(int x, int y, int width, int height)
+ : x(x), y(y), width(width), height(height) {
+ initializeTesseract();
+ }
+
+ ~Recognizer() { tesseract->End(); }
+
+ void fail(const char *msg);
+
+ void recognize(Pix *image);
+
+ OCRMatch *fetchMatch(tesseract::ResultIterator *iterator);
+
+ std::vector<OCRMatch> getResults();
+
+private:
+ void initializeTesseract();
+};
+
+inline void printMatch(const OCRMatch &match) {
+ std::cout << "Text: " << match.text << "; Position: (" << match.startX << ","
+ << match.startY << ") -> (" << match.endX << "," << match.endY
+ << ")" << std::endl
+ << std::endl;
+}