aboutsummaryrefslogtreecommitdiff
path: root/cpp
diff options
context:
space:
mode:
Diffstat (limited to 'cpp')
-rw-r--r--cpp/image.cpp41
-rw-r--r--cpp/libchelleport.cpp120
-rw-r--r--cpp/recognizer.cpp65
3 files changed, 146 insertions, 80 deletions
diff --git a/cpp/image.cpp b/cpp/image.cpp
new file mode 100644
index 0000000..63f0a08
--- /dev/null
+++ b/cpp/image.cpp
@@ -0,0 +1,41 @@
+#include <iostream>
+#include <leptonica/allheaders.h>
+#include <tesseract/baseapi.h>
+
+#include "../include/image.h"
+
+namespace image {
+void preprocessImage(Pix **image) {
+ Pix *temp;
+
+ // Scale
+ if (scaleFactor != 1) {
+ INLINE_IMAGE_PROC(pixScale(*image, scaleFactor, scaleFactor));
+ }
+
+ // Grayscale
+ if (pixGetDepth(*image) > 8) {
+ INLINE_IMAGE_PROC(pixConvertRGBToGray(
+ *image, grayscaleWeightRed, grayscaleWeightGreen, grayscaleWeightBlue));
+ }
+
+ // Contrast
+ pixContrastTRC(*image, *image, contrast);
+
+ // Sharpness
+ // INLINE_IMAGE_PROC(pixUnsharpMaskingGrayFast(*image, 1, sharpness, 1));
+ INLINE_IMAGE_PROC(pixUnsharpMasking(*image, 1, sharpness));
+}
+
+Pix *loadImage(const char *imagePath) {
+ Pix *image = pixRead(imagePath);
+ if (!image) {
+ std::cerr << "Could not load image " << imagePath << std::endl;
+ return nullptr;
+ }
+
+ preprocessImage(&image);
+
+ return image;
+}
+} // namespace image
diff --git a/cpp/libchelleport.cpp b/cpp/libchelleport.cpp
index 923df6c..67abb06 100644
--- a/cpp/libchelleport.cpp
+++ b/cpp/libchelleport.cpp
@@ -1,19 +1,20 @@
-#include <algorithm>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <iostream>
#include <leptonica/allheaders.h>
+#include <memory>
+#include <ostream>
#include <tesseract/baseapi.h>
+#include <thread>
#include <vector>
+#include "../include/image.h"
#include "../include/libchelleport.h"
+#include "../include/recognizer.h"
+extern "C" {
OCRMatch *findWordCoordinates(const char *image_path, int *size) {
std::vector<OCRMatch> matches;
- MEASURE("OCR", { matches = extractTextCoordinates(image_path); });
+ MEASURE("OCR", { matches = extractTextMatches(image_path); });
- std::cout << "Word count: " << matches.size() << std::endl;
+ std::cout << "Match count: " << matches.size() << std::endl;
static OCRMatch *ptr = new OCRMatch[matches.size()];
std::copy(matches.begin(), matches.end(), ptr);
@@ -21,101 +22,60 @@ OCRMatch *findWordCoordinates(const char *image_path, int *size) {
*size = matches.size();
return ptr;
}
+}
-std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) {
+std::vector<OCRMatch> extractTextMatches(const char *imagePath) {
std::vector<OCRMatch> results;
- auto tesseract = initializeTesseract();
- if (tesseract == nullptr)
- return results;
-
- Pix *image = loadImage(imagePath);
+ Pix *image = image::loadImage(imagePath);
if (image == nullptr)
return results;
// printf("imagePath: %s\n", imagePath);
// pixWrite(imagePath, image, IFF_JFIF_JPEG);
- tesseract->SetImage(image);
- tesseract->Recognize(0);
+ int width = pixGetWidth(image);
+ int height = pixGetHeight(image);
- tesseract::ResultIterator *iterator = tesseract->GetIterator();
- auto level = RESULT_ITER_MODE;
+ std::vector<std::unique_ptr<Recognizer>> recognizers;
+ recognizers.push_back(
+ std::make_unique<Recognizer>(0, 0, width / 2, height / 2));
- if (iterator != 0) {
- do {
- if (iterator->Confidence(level) > CONFIDENCE_THRESHOLD) {
- const char *word = iterator->GetUTF8Text(level);
+ recognizers.push_back(
+ std::make_unique<Recognizer>(width / 2, 0, width / 2, height / 2));
- if (word != nullptr && strlen(word) >= MIN_CHARACTER_COUNT) {
- int x1, y1, x2, y2;
- iterator->BoundingBox(level, &x1, &y1, &x2, &y2);
- OCRMatch match({(int)(x1 / scaleFactor), (int)(y1 / scaleFactor),
- (int)(x2 / scaleFactor), (int)(y2 / scaleFactor),
- word});
- results.push_back(match);
- }
- }
- } while (iterator->Next(level));
- }
+ recognizers.push_back(
+ std::make_unique<Recognizer>(0, height / 2, width / 2, height / 2));
- delete iterator;
- tesseract->End();
- delete tesseract;
- pixDestroy(&image);
+ recognizers.push_back(std::make_unique<Recognizer>(width / 2, height / 2,
+ width / 2, height / 2));
- return results;
+ return runRecognizers(recognizers, image);
}
-inline tesseract::TessBaseAPI *initializeTesseract() {
- auto *tesseract = new tesseract::TessBaseAPI();
- tesseract->SetPageSegMode(tesseract::PSM_AUTO);
-
- if (tesseract->Init(nullptr, "eng", tesseract::OEM_LSTM_ONLY)) {
- std::cerr << "Could not initialize tesseract." << std::endl;
- return nullptr;
- }
+std::vector<OCRMatch>
+runRecognizers(std::vector<std::unique_ptr<Recognizer>> &recognizers,
+ Pix *image) {
+ std::vector<OCRMatch> results;
+ std::shared_ptr<Pix> sharedImage(image, [](Pix *p) { pixDestroy(&p); });
- return tesseract;
-}
+ std::vector<std::thread> workers;
+ workers.reserve(recognizers.size());
-inline Pix *loadImage(const char *imagePath) {
- Pix *image = pixRead(imagePath);
- if (!image) {
- std::cerr << "Could not load image " << imagePath << std::endl;
- return nullptr;
+ for (auto &ext : recognizers) {
+ workers.push_back(std::thread(
+ [&ext, &sharedImage]() { ext->recognize(sharedImage.get()); }));
}
- preprocessImage(&image);
-
- return image;
-}
-
-void preprocessImage(Pix **image) {
- Pix *temp;
-
- // Scale
- if (scaleFactor != 1) {
- INLINE_IMAGE_PROC(pixScale(*image, scaleFactor, scaleFactor));
+ for (std::thread &t : workers) {
+ if (t.joinable())
+ t.join();
}
- // Grayscale
- if (pixGetDepth(*image) > 8) {
- INLINE_IMAGE_PROC(pixConvertRGBToGray(
- *image, grayscaleWeightRed, grayscaleWeightGreen, grayscaleWeightBlue));
+ for (auto &ext : recognizers) {
+ for (auto &match : ext->getResults())
+ results.push_back(match);
}
- // Contrast
- pixContrastTRC(*image, *image, contrast);
-
- // Sharpness
- // INLINE_IMAGE_PROC(pixUnsharpMaskingGrayFast(*image, 1, sharpness, 1));
- INLINE_IMAGE_PROC(pixUnsharpMasking(*image, 1, sharpness));
-}
-
-void printMatch(const OCRMatch &match) {
- std::cout << "Text: " << match.text << "; Position: (" << match.startX << ","
- << match.startY << ") -> (" << match.endX << "," << match.endY
- << ")" << std::endl
- << std::endl;
+ return results;
}
diff --git a/cpp/recognizer.cpp b/cpp/recognizer.cpp
new file mode 100644
index 0000000..6f19322
--- /dev/null
+++ b/cpp/recognizer.cpp
@@ -0,0 +1,65 @@
+#include <leptonica/allheaders.h>
+#include <tesseract/baseapi.h>
+
+#include "../include/recognizer.h"
+
+void Recognizer::initializeTesseract() {
+ tesseract = new tesseract::TessBaseAPI();
+ tesseract->SetPageSegMode(tesseract::PSM_AUTO);
+
+ if (tesseract->Init(nullptr, "eng", tesseract::OEM_LSTM_ONLY))
+ fail("Could not initialize tesseract.");
+}
+
+void Recognizer::recognize(Pix *image) {
+ if (failed)
+ return;
+
+ tesseract->SetImage(image);
+ tesseract->SetRectangle(x, y, width, height);
+ if (tesseract->Recognize(0) != 0)
+ fail("tesseract recognize failed");
+}
+
+std::vector<OCRMatch> Recognizer::getResults() {
+ std::vector<OCRMatch> results;
+
+ if (failed)
+ return results;
+
+ tesseract::ResultIterator *iterator = tesseract->GetIterator();
+ if (iterator == 0)
+ return results;
+
+ do {
+ auto match = fetchMatch(iterator);
+ if (match != nullptr)
+ results.push_back(*match);
+ } while (iterator->Next(ITER_LEVEL));
+
+ delete iterator;
+
+ return results;
+}
+
+OCRMatch *Recognizer::fetchMatch(tesseract::ResultIterator *iterator) {
+ if (iterator->Confidence(ITER_LEVEL) < CONFIDENCE_THRESHOLD)
+ return nullptr;
+
+ const char *word = iterator->GetUTF8Text(ITER_LEVEL);
+
+ if (word == nullptr || strlen(word) < MIN_CHARACTER_COUNT)
+ return nullptr;
+
+ int x1, y1, x2, y2;
+ iterator->BoundingBox(ITER_LEVEL, &x1, &y1, &x2, &y2);
+
+ return new OCRMatch(
+ {(int)(x1 / image::scaleFactor), (int)(y1 / image::scaleFactor),
+ (int)(x2 / image::scaleFactor), (int)(y2 / image::scaleFactor), word});
+}
+
+void Recognizer::fail(const char *msg) {
+ this->failed = true;
+ std::cerr << msg << std::endl;
+}