aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chelleport.cabal1
-rw-r--r--cpp/libchelleport.cpp89
-rw-r--r--include/libchelleport.h22
-rw-r--r--src/Chelleport/OCR.hs2
4 files changed, 72 insertions, 42 deletions
diff --git a/chelleport.cabal b/chelleport.cabal
index f273a39..2d03b63 100644
--- a/chelleport.cabal
+++ b/chelleport.cabal
@@ -51,6 +51,7 @@ common extension
extra-libraries: stdc++ Xtst X11 tesseract leptonica
include-dirs: include
c-sources: cpp/libchelleport.cpp
+ cxx-options: -O3 -ffast-math -march=native
extra-source-files:
cpp/*.cpp
include/*.h
diff --git a/cpp/libchelleport.cpp b/cpp/libchelleport.cpp
index 5653068..923df6c 100644
--- a/cpp/libchelleport.cpp
+++ b/cpp/libchelleport.cpp
@@ -5,42 +5,33 @@
#include <iostream>
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
-#include <tesseract/publictypes.h>
#include <vector>
#include "../include/libchelleport.h"
OCRMatch *findWordCoordinates(const char *image_path, int *size) {
- auto matches = extractTextCoordinates(image_path);
+ std::vector<OCRMatch> matches;
+ MEASURE("OCR", { matches = extractTextCoordinates(image_path); });
+
+ std::cout << "Word count: " << matches.size() << std::endl;
static OCRMatch *ptr = new OCRMatch[matches.size()];
std::copy(matches.begin(), matches.end(), ptr);
- // for (const auto &match : matches)
- // showMatch(match);
-
- printf("Count: %ld\n", matches.size());
-
*size = matches.size();
return ptr;
}
std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) {
std::vector<OCRMatch> results;
- tesseract::TessBaseAPI *tesseract = new tesseract::TessBaseAPI();
- if (tesseract->Init(nullptr, "eng")) {
- std::cerr << "Could not initialize tesseract." << std::endl;
+ auto tesseract = initializeTesseract();
+ if (tesseract == nullptr)
return results;
- }
- Pix *image = pixRead(imagePath);
- if (!image) {
- std::cerr << "Could not load image " << imagePath << std::endl;
+ Pix *image = loadImage(imagePath);
+ if (image == nullptr)
return results;
- }
-
- preprocessImage(&image);
// printf("imagePath: %s\n", imagePath);
// pixWrite(imagePath, image, IFF_JFIF_JPEG);
@@ -53,16 +44,17 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) {
if (iterator != 0) {
do {
- float conf = iterator->Confidence(level);
- const char *word = iterator->GetUTF8Text(level);
+ if (iterator->Confidence(level) > CONFIDENCE_THRESHOLD) {
+ const char *word = iterator->GetUTF8Text(level);
- if (conf > CONFIDENCE_THRESHOLD && word != nullptr &&
- strlen(word) >= MIN_CHARACTER_COUNT) {
- int x1, y1, x2, y2;
- iterator->BoundingBox(level, &x1, &y1, &x2, &y2);
- results.push_back(
- OCRMatch{(int)(x1 / scaleFactor), (int)(y1 / scaleFactor),
- (int)(x2 / scaleFactor), (int)(y2 / scaleFactor), word});
+ if (word != nullptr && strlen(word) >= MIN_CHARACTER_COUNT) {
+ int x1, y1, x2, y2;
+ iterator->BoundingBox(level, &x1, &y1, &x2, &y2);
+ OCRMatch match({(int)(x1 / scaleFactor), (int)(y1 / scaleFactor),
+ (int)(x2 / scaleFactor), (int)(y2 / scaleFactor),
+ word});
+ results.push_back(match);
+ }
}
} while (iterator->Next(level));
}
@@ -75,36 +67,55 @@ std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) {
return results;
}
+inline tesseract::TessBaseAPI *initializeTesseract() {
+ auto *tesseract = new tesseract::TessBaseAPI();
+ tesseract->SetPageSegMode(tesseract::PSM_AUTO);
+
+ if (tesseract->Init(nullptr, "eng", tesseract::OEM_LSTM_ONLY)) {
+ std::cerr << "Could not initialize tesseract." << std::endl;
+ return nullptr;
+ }
+
+ return tesseract;
+}
+
+inline Pix *loadImage(const char *imagePath) {
+ Pix *image = pixRead(imagePath);
+ if (!image) {
+ std::cerr << "Could not load image " << imagePath << std::endl;
+ return nullptr;
+ }
+
+ preprocessImage(&image);
+
+ return image;
+}
+
void preprocessImage(Pix **image) {
Pix *temp;
// Scale
if (scaleFactor != 1) {
- temp = pixScale(*image, scaleFactor, scaleFactor);
- pixDestroy(image);
- *image = temp;
+ INLINE_IMAGE_PROC(pixScale(*image, scaleFactor, scaleFactor));
}
// Grayscale
if (pixGetDepth(*image) > 8) {
- temp = pixConvertRGBToGray(*image, grayscaleWeightRed, grayscaleWeightGreen,
- grayscaleWeightBlue);
- pixDestroy(image);
- *image = temp;
+ INLINE_IMAGE_PROC(pixConvertRGBToGray(
+ *image, grayscaleWeightRed, grayscaleWeightGreen, grayscaleWeightBlue));
}
// Contrast
pixContrastTRC(*image, *image, contrast);
// Sharpness
- // temp = pixUnsharpMaskingGrayFast(*image, 1, sharpness, 1);
- temp = pixUnsharpMasking(*image, 1, sharpness);
- pixDestroy(image);
- *image = temp;
+ // INLINE_IMAGE_PROC(pixUnsharpMaskingGrayFast(*image, 1, sharpness, 1));
+ INLINE_IMAGE_PROC(pixUnsharpMasking(*image, 1, sharpness));
}
-void showMatch(const OCRMatch &match) {
+void printMatch(const OCRMatch &match) {
std::cout << "Text: " << match.text << "; Position: (" << match.startX << ","
<< match.startY << ") -> (" << match.endX << "," << match.endY
- << ")" << "\n\n";
+ << ")" << std::endl
+ << std::endl;
}
diff --git a/include/libchelleport.h b/include/libchelleport.h
index c74058d..e6a074d 100644
--- a/include/libchelleport.h
+++ b/include/libchelleport.h
@@ -1,5 +1,6 @@
+#include <chrono>
#include <leptonica/allheaders.h>
-#include <tesseract/publictypes.h>
+#include <tesseract/baseapi.h>
#include <vector>
// NOTE: Remember to update size and alignment in ocr hs module on change
@@ -26,8 +27,25 @@ extern "C" {
OCRMatch *findWordCoordinates(const char *image_path, /* returns */ int *size);
}
+tesseract::TessBaseAPI *initializeTesseract();
+
+Pix *loadImage(const char *imagePath);
+
std::vector<OCRMatch> extractTextCoordinates(const char *imagePath);
-void showMatch(const OCRMatch &match);
+void printMatch(const OCRMatch &match);
void preprocessImage(Pix **image);
+
+#define INLINE_IMAGE_PROC(process) \
+ temp = process; \
+ pixDestroy(image); \
+ *image = temp;
+
+#define MEASURE(label, stmts) \
+ auto start = std::chrono::high_resolution_clock::now(); \
+ stmts; \
+ auto end = std::chrono::high_resolution_clock::now(); \
+ auto duration = \
+ std::chrono::duration_cast<std::chrono::microseconds>(end - start); \
+ std::cout << label << ": " << duration.count() / 1000.0 << " ms" << std::endl;
diff --git a/src/Chelleport/OCR.hs b/src/Chelleport/OCR.hs
index 87cad62..5ee331c 100644
--- a/src/Chelleport/OCR.hs
+++ b/src/Chelleport/OCR.hs
@@ -1,6 +1,7 @@
module Chelleport.OCR (MonadOCR (..)) where
import Chelleport.Types
+import Chelleport.Utils (benchmark)
import Control.Concurrent (threadDelay)
import Control.Monad.IO.Class (MonadIO (liftIO))
import Control.Monad.RWS (MonadReader (ask))
@@ -31,7 +32,6 @@ instance (MonadIO m) => MonadOCR (AppM m) where
pure path
getWordsInImage filePath = liftIO $ do
- print filePath
findWordCoordinates filePath <* removeFile filePath
findWordCoordinates :: String -> IO [OCRMatch]