-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimageToText.cpp
More file actions
116 lines (94 loc) · 3.33 KB
/
imageToText.cpp
File metadata and controls
116 lines (94 loc) · 3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
//
// imageToText.cpp
// prahvi
//
// Created by Yang Li on 4/29/17.
// Copyright © 2017 Portable Reading Assistant Headset for the Visually Impaired. All rights reserved.
//
// Description: module that converts the image received to a string of text
// the image received is alread preprocessed
// currently just passes the image to the google tesseract api
#include <tesseract/baseapi.h>
#include "imageToText.hpp"
// Function: replaceString
// Description: replace all "toReplace" with "replaceWith" in string "s"
std::string replaceString(std::string &text, const std::string &toReplace, const std::string &replaceWith)
{
int location = 0;
int replaceWithLength = replaceWith.length();
while((location = (int) text.find(toReplace, location)) != std::string::npos)
{
text.replace(text.find(toReplace), toReplace.length(), replaceWith);
location += replaceWithLength;
}
return text;
}
// Function: replaceLigatures
// Description: replace the ligatures with non-ligatures
std::string replaceLigatures(std::string text)
{
// list of ligatures and non ligatures
// the list is too long, and it is making the system really slow
//std::vector<std::string> ligatures = {"Ꜳ", "ꜳ", "Æ", "æ", "Ꜵ",
// "ꜵ", "Ꜷ", "ꜷ", "Ꜹ", "ꜹ",
// "Ꜻ", "ꜻ", "Ꜽ", "ꜽ", "ff",
// "ffi", "ffl", "fi", "fl", "Œ",
// "œ", "Ꝏ", "ꝏ", "ẞ", "ß",
// "st", "ſt", "Ꜩ", "ꜩ", "ᵫ",
// "Ꝡ", "ꝡ"};
//std::vector<std::string> nonLigatures = {"AA", "aa", "AE", "ae", "AO",
// "ao", "AU", "au", "AV", "av",
// "AV", "av", "AY", "ay", "ff",
// "ffi", "ffl", "fi", "fl", "OE",
// "oe", "OO", "oo", "fs", "fz",
// "st", "ft", "TZ", "tz", "ue",
// "VY", "vy"};
// thus a shorter list of common ligatures are searched and replaced
std::vector<std::string> ligatures = {"ff", "ffi", "ffl", "fi", "fl","st", "ſt"};
std::vector<std::string> nonLigatures = {"ff", "ffi", "ffl", "fi", "fl", "st", "ft"};
for(int i = 0; i < ligatures.size(); i++)
{
text = replaceString(text, ligatures[i], nonLigatures[i]);
}
return text;
}
// Function: imageToText
// Description: receive a Mat and pass the Mat to OCR to detect the text
// The border of the image (Mat) is removed to reduce noise
// The OCR is initialized for English ONLY.
std::string imageToText(cv::Mat &image)
{
std::string outText;
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api->Init(NULL, "eng"))
{
std::cerr << "ERROR: could not initialize tesseract" << std::endl;
exit(1);
}
// crop the image to remove the border
// this reduces the noise from the background
// can use fixed pixels or with respect to width and height
int offsetX = image.size().width*0.05;
int offsetY = image.size().height*0.05;
cv::Rect roi;
roi.x = offsetX;
roi.y = offsetY;
roi.width = image.size().width - (offsetX*2);
roi.height = image.size().height - (offsetY*2);
// crop the original image to the defined ROI
image = image(roi);
// send the image to OCR
api->SetImage((uchar*)image.data,
image.size().width,
image.size().height,
image.channels(),
image.step1());
// get OCR result
api->Recognize(0);
outText = api->GetUTF8Text();
// destroy used object and release memory
api->End();
outText = replaceLigatures(outText);
return outText;
}