aboutsummaryrefslogtreecommitdiff
path: root/include/recognizer.h
blob: 260a139651ca609bc57fff9910334005781a3ece (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#pragma once
#include <iostream>
#include <leptonica/allheaders.h>
#include <tesseract/baseapi.h>

#include "./image.h"

// OCR configuration
#define CONFIDENCE_THRESHOLD 20.
#define MIN_CHARACTER_COUNT 3
const tesseract::PageIteratorLevel ITER_LEVEL = tesseract::RIL_WORD;

// NOTE: Remember to update size and alignment in hs type on change
struct OCRMatch {
  int startX, startY;
  int endX, endY;
  const char *text;
};

class Recognizer {
  tesseract::TessBaseAPI *tesseract;
  int x, y, width, height;
  bool failed = false;

public:
  const char *id;

  Recognizer(const char *id, int x, int y, int width, int height)
      : id(id), x(x), y(y), width(width), height(height) {
    initializeTesseract();
  }

  ~Recognizer() { tesseract->End(); }

  void fail(const char *msg);

  void recognize(Pix *image);

  OCRMatch *fetchMatch(tesseract::ResultIterator *iterator);

  std::vector<OCRMatch> getResults();

private:
  void initializeTesseract();
};

inline void printMatch(const OCRMatch &match) {
  std::cout << "Text: " << match.text << "; Position: (" << match.startX << ","
            << match.startY << ") -> (" << match.endX << "," << match.endY
            << ")" << std::endl
            << std::endl;
}