aboutsummaryrefslogtreecommitdiff
path: root/cpp/libchelleport.cpp
blob: 8f2e9f21b1dc66bc0fca9a3ac6708e0f8f874e9e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>
#include <tesseract/publictypes.h>
#include <vector>

#include "../include/libchelleport.h"

std::vector<OCRMatch> extractTextCoordinates(const char *imagePath);

#define CONFIDENCE_THRESHOLD 30.

OCRMatch *findWordCoordinates(const char *image_path, int *size) {
  auto boxes = extractTextCoordinates(image_path);
  static OCRMatch *ptr = new OCRMatch[boxes.size()];
  std::copy(boxes.begin(), boxes.end(), ptr);

  // for (const auto &box : boxes) {
  //   std::cout << box.text << "\n\n";
  //   std::cout << "Text: " << box.text << "\nPosition: (" << box.startX << ","
  //             << box.startY << ") -> (" << box.endX << "," << box.endY << ")"
  //             << "\n\n";
  // }

  *size = boxes.size();
  return ptr;
}

std::vector<OCRMatch> extractTextCoordinates(const char *imagePath) {
  std::vector<OCRMatch> results;
  tesseract::TessBaseAPI *tesseract = new tesseract::TessBaseAPI();

  if (tesseract->Init(nullptr, "eng")) {
    std::cerr << "Could not initialize tesseract." << std::endl;
    return results;
  }

  Pix *image = pixRead(imagePath);
  if (!image) {
    std::cerr << "Could not load image " << imagePath << std::endl;
    return results;
  }

  tesseract->SetImage(image);
  tesseract->Recognize(0);

  tesseract::ResultIterator *iterator = tesseract->GetIterator();
  tesseract::PageIteratorLevel level = tesseract::RIL_TEXTLINE;

  if (iterator != 0) {
    do {
      float conf = iterator->Confidence(level);
      const char *word = iterator->GetUTF8Text(level);
      int x1, y1, x2, y2;
      iterator->BoundingBox(level, &x1, &y1, &x2, &y2);

      if (conf > CONFIDENCE_THRESHOLD && word != nullptr && strlen(word) >= 2) {
        OCRMatch box{x1, y1, x2, y2, word};
        results.push_back(box);
      }
    } while (iterator->Next(level));
  }

  delete iterator;
  tesseract->End();
  delete tesseract;
  pixDestroy(&image);

  return results;
}