20 #define HOCRDOCUMENT 1 23 #include "HOCRTextBox.h" 24 #include "resolution.h" 75 HOCRDocument(
const QImage &image, QStringList languages=QStringList()) {
read(image,languages);};
88 bool hasError()
const {
return !_error.isEmpty();};
96 QString
error()
const {
return _error; };
111 QSet<QString>
warnings()
const {
return _warnings;};
119 QSet<QString>
system()
const {
return _OCRSystem;};
136 QList<HOCRTextBox>
pages()
const {
return _pages;};
142 bool isEmpty()
const {
return _pages.isEmpty();};
163 if (_pages.size() > 0)
164 return _pages.takeFirst();
182 void read(QIODevice *device);
191 void read(
const QString& fileName);
210 void read(
const QImage &image,
const QStringList& languages=QStringList());
248 QString
toPDF(
const QString& fileName,
resolution _resolution,
const QString& title=QString(),
const QPageSize& overridePageSize=QPageSize(), QFont *overrideFont=0)
const;
264 QList<QImage>
toImages(QFont *overrideFont=0, QImage::Format format=QImage::Format_Grayscale8)
const;
308 QPageSize findPageSize(
int pageNumber,
resolution _resolution,
const QPageSize &overridePageSize)
const;
315 QSet<QString> _OCRSystem;
319 QSet<QString> _OCRCapabilities;
322 QList<HOCRTextBox> _pages;
325 QSet<QString> _warnings;
HOCRDocument(const QImage &image, QStringList languages=QStringList())
Constructs an HOCR document by running the tesseract OCR engine.
bool isEmpty() const
Returns true if the document contains no pages.
QFont suggestFont() const
Suggest font.
HOCRDocument(QIODevice *device)
Constructs an HOCR document from a QIODevice.
void read(QIODevice *device)
Reads an HOCR document from a QIODevice.
The resolution class stores a resolution and converts between units.
QSet< QString > warnings() const
Warning messages.
HOCRDocument(QString fileName)
Constructs an HOCR document from a file.
QList< QImage > toImages(QFont *overrideFont=0, QImage::Format format=QImage::Format_Grayscale8) const
Export to images.
bool hasWarnings() const
Warning status.
bool hasText() const
Check if the document does contain text.
QString toText() const
Export this document as text.
QSet< QString > system() const
System(s) that generated this file.
static bool areLanguagesSupportedByTesseract(const QStringList &lingos)
Check if languages are supported by tesseract.
void clear()
Resets the document.
Text box, as defined in an HOCR file.
void append(const HOCRDocument &other)
Appends other HOCRDocument.
QList< HOCRTextBox > pages() const
Pages in the document.
bool hasError() const
Error status.
QString toPDF(const QString &fileName, resolution _resolution, const QString &title=QString(), const QPageSize &overridePageSize=QPageSize(), QFont *overrideFont=0) const
Export to PDF.
QString error() const
Error message.
HOCRTextBox takeFirstPage()
Removes the first page of the document and returns it.
Reads and interprets HOCR files, the standard output file format for Optical Character Recognition sy...
static QStringList tesseractLanguages()
List of languages supported by tesseract.
HOCRDocument()
Constructs an empty HOCR document.
QSet< QString > capabilities() const
OCR capabilites.