From 70e3920556496e5fecb5fedddf1067b2522fcac7 Mon Sep 17 00:00:00 2001 From: Akshay Nair Date: Tue, 24 Dec 2024 18:51:17 +0530 Subject: Add setup for ocr with tesseract --- cpp/libchelleport.cpp | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 cpp/libchelleport.cpp (limited to 'cpp') diff --git a/cpp/libchelleport.cpp b/cpp/libchelleport.cpp new file mode 100644 index 0000000..8f2e9f2 --- /dev/null +++ b/cpp/libchelleport.cpp @@ -0,0 +1,74 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../include/libchelleport.h" + +std::vector extractTextCoordinates(const char *imagePath); + +#define CONFIDENCE_THRESHOLD 30. + +OCRMatch *findWordCoordinates(const char *image_path, int *size) { + auto boxes = extractTextCoordinates(image_path); + static OCRMatch *ptr = new OCRMatch[boxes.size()]; + std::copy(boxes.begin(), boxes.end(), ptr); + + // for (const auto &box : boxes) { + // std::cout << box.text << "\n\n"; + // std::cout << "Text: " << box.text << "\nPosition: (" << box.startX << "," + // << box.startY << ") -> (" << box.endX << "," << box.endY << ")" + // << "\n\n"; + // } + + *size = boxes.size(); + return ptr; +} + +std::vector extractTextCoordinates(const char *imagePath) { + std::vector results; + tesseract::TessBaseAPI *tesseract = new tesseract::TessBaseAPI(); + + if (tesseract->Init(nullptr, "eng")) { + std::cerr << "Could not initialize tesseract." << std::endl; + return results; + } + + Pix *image = pixRead(imagePath); + if (!image) { + std::cerr << "Could not load image " << imagePath << std::endl; + return results; + } + + tesseract->SetImage(image); + tesseract->Recognize(0); + + tesseract::ResultIterator *iterator = tesseract->GetIterator(); + tesseract::PageIteratorLevel level = tesseract::RIL_TEXTLINE; + + if (iterator != 0) { + do { + float conf = iterator->Confidence(level); + const char *word = iterator->GetUTF8Text(level); + int x1, y1, x2, y2; + iterator->BoundingBox(level, &x1, &y1, &x2, &y2); + + if (conf > CONFIDENCE_THRESHOLD && word != nullptr && strlen(word) >= 2) { + OCRMatch box{x1, y1, x2, y2, word}; + results.push_back(box); + } + } while (iterator->Next(level)); + } + + delete iterator; + tesseract->End(); + delete tesseract; + pixDestroy(&image); + + return results; +} -- cgit v1.3.1