aboutsummaryrefslogtreecommitdiff
path: root/cpp/recognizer.cpp
blob: 6f19322dee486b731b5abbd03d88c5a34933338a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>

#include "../include/recognizer.h"

void Recognizer::initializeTesseract() {
  tesseract = new tesseract::TessBaseAPI();
  tesseract->SetPageSegMode(tesseract::PSM_AUTO);

  if (tesseract->Init(nullptr, "eng", tesseract::OEM_LSTM_ONLY))
    fail("Could not initialize tesseract.");
}

void Recognizer::recognize(Pix *image) {
  if (failed)
    return;

  tesseract->SetImage(image);
  tesseract->SetRectangle(x, y, width, height);
  if (tesseract->Recognize(0) != 0)
    fail("tesseract recognize failed");
}

std::vector<OCRMatch> Recognizer::getResults() {
  std::vector<OCRMatch> results;

  if (failed)
    return results;

  tesseract::ResultIterator *iterator = tesseract->GetIterator();
  if (iterator == 0)
    return results;

  do {
    auto match = fetchMatch(iterator);
    if (match != nullptr)
      results.push_back(*match);
  } while (iterator->Next(ITER_LEVEL));

  delete iterator;

  return results;
}

OCRMatch *Recognizer::fetchMatch(tesseract::ResultIterator *iterator) {
  if (iterator->Confidence(ITER_LEVEL) < CONFIDENCE_THRESHOLD)
    return nullptr;

  const char *word = iterator->GetUTF8Text(ITER_LEVEL);

  if (word == nullptr || strlen(word) < MIN_CHARACTER_COUNT)
    return nullptr;

  int x1, y1, x2, y2;
  iterator->BoundingBox(ITER_LEVEL, &x1, &y1, &x2, &y2);

  return new OCRMatch(
      {(int)(x1 / image::scaleFactor), (int)(y1 / image::scaleFactor),
       (int)(x2 / image::scaleFactor), (int)(y2 / image::scaleFactor), word});
}

void Recognizer::fail(const char *msg) {
  this->failed = true;
  std::cerr << msg << std::endl;
}