diff options
-rw-r--r-- | base/tessocr.cpp | 221 | ||||
-rw-r--r-- | base/tessocr.h | 75 | ||||
-rw-r--r-- | devices/gdevocr.c | 40 | ||||
-rw-r--r-- | devices/gdevpdfocr.c | 2 |
4 files changed, 168 insertions, 170 deletions
diff --git a/base/tessocr.cpp b/base/tessocr.cpp index c438fca0c..fec5979d6 100644 --- a/base/tessocr.cpp +++ b/base/tessocr.cpp @@ -29,6 +29,13 @@ extern "C" void *leptonica_calloc(size_t numelm, size_t elemsize); extern "C" void *leptonica_realloc(void *ptr, size_t blocksize); extern "C" void leptonica_free(void *ptr); +typedef struct +{ + gs_memory_t *mem; + tesseract::TessBaseAPI *api; +} wrapped_api; + + void *leptonica_malloc(size_t blocksize) { void *ret = malloc(blocksize); @@ -255,19 +262,34 @@ tess_file_reader(const char *fname, std::vector<char> *out) int ocr_init_api(gs_memory_t *mem, const char *language, int engine, void **state) { - tesseract::TessBaseAPI *api; enum tesseract::OcrEngineMode mode; + wrapped_api *wrapped; + int code = 0; + + if (mem->non_gc_memory != mem) { + dlprintf("ocr_init_api must not be called with gc controlled memory!\n"); + return_error(gs_error_unknownerror); + } + + wrapped = (wrapped_api *)(void *)gs_alloc_bytes(mem, sizeof(*wrapped), "ocr_init_api"); + if (wrapped == NULL) + return gs_error_VMerror; - leptonica_mem = mem->non_gc_memory; + leptonica_mem = mem; setPixMemoryManager(my_leptonica_malloc, my_leptonica_free); - api = new tesseract::TessBaseAPI(); + + wrapped->mem = mem; + wrapped->api = new tesseract::TessBaseAPI(); *state = NULL; - if (api == NULL) { - leptonica_mem = NULL; - setPixMemoryManager(malloc, free); - return_error(gs_error_VMerror); + if (wrapped->api == NULL) { + code = gs_error_VMerror; + goto fail; + } + + if (language == NULL || language[0] == 0) { + language = "eng"; } switch (engine) @@ -285,38 +307,48 @@ ocr_init_api(gs_memory_t *mem, const char *language, int engine, void **state) mode = tesseract::OcrEngineMode::OEM_TESSERACT_LSTM_COMBINED; break; default: - return_error(gs_error_rangecheck); + code = gs_error_rangecheck; + goto fail; } // Initialize tesseract-ocr with English, without specifying tessdata path - if (api->Init(NULL, 0, /* data, data_size */ - language, - mode, - NULL, 0, /* configs, configs_size */ - NULL, NULL, /* vars_vec */ - false, /* set_only_non_debug_params */ - &tess_file_reader)) { - delete api; - leptonica_mem = NULL; - setPixMemoryManager(malloc, free); - return_error(gs_error_unknownerror); + if (wrapped->api->Init(NULL, 0, /* data, data_size */ + language, + mode, + NULL, 0, /* configs, configs_size */ + NULL, NULL, /* vars_vec */ + false, /* set_only_non_debug_params */ + &tess_file_reader)) { + code = gs_error_unknownerror; + goto fail; } - *state = (void *)api; + *state = (void *)wrapped; return 0; +fail: + if (wrapped->api) { + delete wrapped->api; + } + leptonica_mem = NULL; + setPixMemoryManager(malloc, free); + gs_free_object(wrapped->mem, wrapped, "ocr_init_api"); + return_error(code); } void ocr_fin_api(gs_memory_t *mem, void *api_) { - tesseract::TessBaseAPI *api = (tesseract::TessBaseAPI *)api_; + wrapped_api *wrapped = (wrapped_api *)api_; - if (api == NULL) + if (wrapped == NULL) return; - api->End(); - delete api; + if (wrapped->api) { + wrapped->api->End(); + delete wrapped->api; + } + gs_free_object(wrapped->mem, wrapped, "ocr_fin_api"); leptonica_mem = NULL; setPixMemoryManager(malloc, free); } @@ -347,45 +379,37 @@ ocr_clear_image(Pix *image) } static int -do_ocr_image(gs_memory_t *mem, +do_ocr_image(wrapped_api *wrapped, int w, int h, int bpp, int raster, int xres, int yres, void *data, int restore, int hocr, int pagecount, - const char *language, int engine, char **out) + char **out) { char *outText; - tesseract::TessBaseAPI *api; int code; Pix *image; *out = NULL; - if (language == NULL || *language == 0) - language = "eng"; - code = ocr_init_api(mem, language, engine, (void **)&api); - if (code < 0) - return code; - if (bpp == 8) w = convert2pix((l_uint32 *)data, w, h, raster); - image = ocr_set_image(api, w, h, data, xres, yres); + image = ocr_set_image(wrapped->api, w, h, data, xres, yres); if (image == NULL) { if (restore && bpp == 8) convert2pix((l_uint32 *)data, w, h, raster); - ocr_fin_api(mem, api); return_error(gs_error_VMerror); } // Get OCR result //pixWrite("test.pnm", image, IFF_PNM); if (hocr) { - api->SetVariable("hocr_font_info", "true"); - api->SetVariable("hocr_char_boxes", "true"); - outText = api->GetHOCRText(pagecount); + wrapped->api->SetVariable("hocr_font_info", "true"); + wrapped->api->SetVariable("hocr_char_boxes", "true"); + outText = wrapped->api->GetHOCRText(pagecount); } else - outText = api->GetUTF8Text(); + outText = wrapped->api->GetUTF8Text(); ocr_clear_image(image); @@ -397,36 +421,34 @@ do_ocr_image(gs_memory_t *mem, if (outText) { size_t len = strlen(outText)+1; - *out = (char *)(void *)gs_alloc_bytes(mem, len, "ocr_to_utf8"); + *out = (char *)(void *)gs_alloc_bytes(wrapped->mem, len, "ocr_to_utf8"); if (*out) memcpy(*out, outText, len); } delete [] outText; - // Destroy used object and release memory - ocr_fin_api(mem, api); - return 0; } -int ocr_image_to_hocr(gs_memory_t *mem, +int ocr_image_to_hocr(void *api, int w, int h, int bpp, int raster, int xres, int yres, void *data, int restore, - int pagecount, const char *language, - int engine, char **out) + int pagecount, char **out) { - return do_ocr_image(mem, w, h, bpp, raster, xres, yres, data, - restore, 1, pagecount, language, engine, out); + return do_ocr_image((wrapped_api *)api, + w, h, bpp, raster, xres, yres, data, + restore, 1, pagecount, out); } -int ocr_image_to_utf8(gs_memory_t *mem, +int ocr_image_to_utf8(void *api, int w, int h, int bpp, int raster, int xres, int yres, void *data, int restore, - const char *language, int engine, char **out) + char **out) { - return do_ocr_image(mem, w, h, bpp, raster, xres, yres, data, - restore, 0, 0, language, engine, out); + return do_ocr_image((wrapped_api *)api, + w, h, bpp, raster, xres, yres, data, + restore, 0, 0, out); } int @@ -435,7 +457,7 @@ ocr_recognise(void *api_, int w, int h, void *data, int (*callback)(void *, const char *, const int *, const int *, const int *, int), void *arg) { - tesseract::TessBaseAPI *api = (tesseract::TessBaseAPI *)api_; + wrapped_api *wrapped = (wrapped_api *)api_; Pix *image; int code; int word_bbox[4]; @@ -445,17 +467,17 @@ ocr_recognise(void *api_, int w, int h, void *data, int pointsize, font_id; const char* font_name; - if (api == NULL) + if (wrapped == NULL || wrapped->api == NULL) return 0; - image = ocr_set_image(api, w, h, data, xres, yres); + image = ocr_set_image(wrapped->api, w, h, data, xres, yres); if (image == NULL) return_error(gs_error_VMerror); - code = api->Recognize(NULL); + code = wrapped->api->Recognize(NULL); if (code >= 0) { /* Bingo! */ - tesseract::ResultIterator *res_it = api->GetIterator(); + tesseract::ResultIterator *res_it = wrapped->api->GetIterator(); while (!res_it->Empty(tesseract::RIL_BLOCK)) { if (res_it->Empty(tesseract::RIL_WORD)) { @@ -504,7 +526,7 @@ ocr_recognise(void *api_, int w, int h, void *data, } static Pix * -ocr_set_bitmap(tesseract::TessBaseAPI *api, +ocr_set_bitmap(wrapped_api *wrapped, int w, int h, const unsigned char *data, int data_x, int raster, int xres, int yres) @@ -521,7 +543,7 @@ ocr_set_bitmap(tesseract::TessBaseAPI *api, if (image == NULL) return NULL; - pdata = gs_alloc_bytes(leptonica_mem, r * (h+BORDER_SIZE*2), "ocr_set_bitmap"); + pdata = gs_alloc_bytes(wrapped->mem, r * (h+BORDER_SIZE*2), "ocr_set_bitmap"); if (pdata == NULL) { pixDestroy(&image); return NULL; @@ -550,16 +572,16 @@ ocr_set_bitmap(tesseract::TessBaseAPI *api, d += r; } - api->SetImage(image); + wrapped->api->SetImage(image); // pixWrite("test.pnm", image, IFF_PNM); return image; } static void -ocr_clear_bitmap(Pix *image) +ocr_clear_bitmap(wrapped_api *wrapped, Pix *image) { - gs_free_object(leptonica_mem, pixGetData(image), "ocr_clear_bitmap"); + gs_free_object(wrapped->mem, pixGetData(image), "ocr_clear_bitmap"); pixSetData(image, NULL); pixDestroy(&image); } @@ -569,22 +591,22 @@ int ocr_bitmap_to_unicodes(void *state, int w, int h, int raster, int xres, int yres, int *unicode, int *char_count) { - tesseract::TessBaseAPI *api = (tesseract::TessBaseAPI *)state; + wrapped_api *wrapped = (wrapped_api *)state; Pix *image; int code, max_chars = *char_count, count = 0; - if (api == NULL) + if (wrapped == NULL || wrapped->api == NULL) return 0; - image = ocr_set_bitmap(api, w, h, (const unsigned char *)data, + image = ocr_set_bitmap(wrapped, w, h, (const unsigned char *)data, data_x, raster, xres, yres); if (image == NULL) return_error(gs_error_VMerror); - code = api->Recognize(NULL); + code = wrapped->api->Recognize(NULL); if (code >= 0) { /* Bingo! */ - tesseract::ResultIterator *res_it = api->GetIterator(); + tesseract::ResultIterator *res_it = wrapped->api->GetIterator(); while (!res_it->Empty(tesseract::RIL_BLOCK)) { if (res_it->Empty(tesseract::RIL_WORD)) { @@ -639,75 +661,12 @@ int ocr_bitmap_to_unicodes(void *state, code = code; } - ocr_clear_bitmap(image); + ocr_clear_bitmap(wrapped, image); *char_count = count; return code; } -int ocr_bitmap_to_unicode(void *state, - const void *data, int data_x, - int w, int h, int raster, - int xres, int yres, int *unicode) -{ - tesseract::TessBaseAPI *api = (tesseract::TessBaseAPI *)state; - Pix *image; - int code; - - if (api == NULL) - return 0; - - image = ocr_set_bitmap(api, w, h, (const unsigned char *)data, - data_x, raster, xres, yres); - if (image == NULL) - return_error(gs_error_VMerror); - - code = api->Recognize(NULL); - if (code >= 0) { - /* Bingo! */ - tesseract::ResultIterator *res_it = api->GetIterator(); - - while (!res_it->Empty(tesseract::RIL_BLOCK)) { - if (res_it->Empty(tesseract::RIL_WORD)) { - res_it->Next(tesseract::RIL_WORD); - continue; - } - - do { - const unsigned char *graph = (unsigned char *)res_it->GetUTF8Text(tesseract::RIL_SYMBOL); - if (graph && graph[0] != 0) { - /* Quick and nasty conversion from UTF8 to unicode. */ - if (graph[0] < 0x80) - *unicode = graph[0]; - else { - *unicode = graph[1] & 0x3f; - if (graph[0] < 0xE0) - *unicode += (graph[0] & 0x1f)<<6; - else { - *unicode = (graph[2] & 0x3f) | (*unicode << 6); - if (graph[0] < 0xF0) { - *unicode += (graph[0] & 0x0F)<<6; - } else { - *unicode = (graph[3] & 0x3f) | (*unicode<<6); - *unicode += (graph[0] & 0x7); - } - } - } - } - res_it->Next(tesseract::RIL_SYMBOL); - } while (!res_it->Empty(tesseract::RIL_BLOCK) && - !res_it->IsAtBeginningOf(tesseract::RIL_WORD)); - } - delete res_it; - code = code; - } - - ocr_clear_bitmap(image); - - return code; -} - - }; /* Currently tesseract is the only C++ lib we have. diff --git a/base/tessocr.h b/base/tessocr.h index 8beaa8915..b4603d470 100644 --- a/base/tessocr.h +++ b/base/tessocr.h @@ -29,36 +29,55 @@ enum OCR_ENGINE_BOTH = 3 }; -int ocr_image_to_utf8(gs_memory_t *mem, - int w, int h, int bpp, int raster, - int xres, int yres, - void *data, int restore_data, - const char *language, int engine, char **out); - -int ocr_image_to_hocr(gs_memory_t *mem, - int w, int h, int bpp, int raster, - int xres, int yres, void *data, int restore, - int pagecount, const char *language, - int engine, char **out); - -int ocr_init_api(gs_memory_t *mem, const char *language, int engine, void **state); - -void ocr_fin_api(gs_memory_t *mem, void *api_); - -int ocr_recognise(void *api_, int w, int h, void *data, - int xres, int yres, +int ocr_init_api(gs_memory_t *mem, + const char *language, + int engine, + void **state); + +void ocr_fin_api(gs_memory_t *mem, + void *state); + +int ocr_recognise(void *state, + int w, + int h, + void *data, + int xres, + int yres, int (*callback)(void *, const char *, const int *, const int *, const int *, int), void *arg); -int ocr_bitmap_to_unicodes(void* state, - const void* data,int data_x, - int w,int h,int raster, - int xres,int yres,int* unicode, int* char_count); - -int ocr_bitmap_to_unicode(void *state, - const void *data, int data_x, - int w, int h, int raster, - int xres, int yres, int *unicode); +int ocr_bitmap_to_unicodes(void *state, + const void *data, + int data_x, + int w, + int h, + int raster, + int xres, + int yres, + int *unicode, + int *char_count); + +int ocr_image_to_utf8(void *state, + int w, + int h, + int bpp, + int raster, + int xres, + int yres, + void *data, + int restore_data, + char **out); + +int ocr_image_to_hocr(void *state, + int w, + int h, + int bpp, + int raster, + int xres, + int yres, + void *data, + int restore, + int pagecount, + char **out); #endif - diff --git a/devices/gdevocr.c b/devices/gdevocr.c index 93d3b6551..dbfee0bec 100644 --- a/devices/gdevocr.c +++ b/devices/gdevocr.c @@ -34,7 +34,8 @@ static dev_proc_print_page(ocr_print_page); static dev_proc_print_page(hocr_print_page); static dev_proc_get_params(ocr_get_params); static dev_proc_put_params(ocr_put_params); -static dev_proc_open_device(hocr_open); +static dev_proc_open_device(ocr_open); +static dev_proc_close_device(ocr_close); static dev_proc_close_device(hocr_close); typedef struct gx_device_ocr_s gx_device_ocr; @@ -45,12 +46,13 @@ struct gx_device_ocr_s { char language[1024]; int engine; int page_count; + void *api; }; /* 8-bit gray bitmap -> UTF8 OCRd text */ static const gx_device_procs ocr_procs = -prn_color_params_procs(gdev_prn_open, gdev_prn_bg_output_page, gdev_prn_close, +prn_color_params_procs(ocr_open, gdev_prn_bg_output_page, ocr_close, gx_default_gray_map_rgb_color, gx_default_gray_map_color_rgb, ocr_get_params, ocr_put_params); @@ -67,7 +69,7 @@ const gx_device_ocr gs_ocr_device = /* 8-bit gray bitmap -> HTML OCRd text */ static const gx_device_procs hocr_procs = -prn_color_params_procs(hocr_open, gdev_prn_bg_output_page, hocr_close, +prn_color_params_procs(ocr_open, gdev_prn_bg_output_page, hocr_close, gx_default_gray_map_rgb_color, gx_default_gray_map_color_rgb, ocr_get_params, ocr_put_params); @@ -87,16 +89,33 @@ const gx_device_ocr gs_hocr_device = #define HOCR_TRAILER " </body>\n</html>\n" static int -hocr_open(gx_device *pdev) +ocr_open(gx_device *pdev) { gx_device_ocr *dev = (gx_device_ocr *)pdev; + int code; dev->page_count = 0; + code = ocr_init_api(dev->memory->non_gc_memory, + dev->language, dev->engine, &dev->api); + if (code < 0) + return code; + return gdev_prn_open(pdev); } static int +ocr_close(gx_device *pdev) +{ + gx_device_ocr *dev = (gx_device_ocr *)pdev; + gx_device_printer * const ppdev = (gx_device_printer *)pdev; + + ocr_fin_api(dev->memory->non_gc_memory, dev->api); + + return gdev_prn_close(pdev); +} + +static int hocr_close(gx_device *pdev) { gx_device_ocr *dev = (gx_device_ocr *)pdev; @@ -106,7 +125,7 @@ hocr_close(gx_device *pdev) gp_fwrite(HOCR_TRAILER, 1, sizeof(HOCR_TRAILER)-1, dev->file); } - return gdev_prn_close(pdev); + return ocr_close(pdev); } static int @@ -235,20 +254,20 @@ do_ocr_print_page(gx_device_ocr * pdev, gp_file * file, int hocr) goto done; if (hocr) - code = ocr_image_to_hocr(pdev->memory, + code = ocr_image_to_hocr(pdev->api, width, height, 8, raster, (int)pdev->HWResolution[0], (int)pdev->HWResolution[1], data, 0, pdev->page_count, - "eng", pdev->engine, &out); + &out); else - code = ocr_image_to_utf8(pdev->memory, + code = ocr_image_to_utf8(pdev->api, width, height, 8, raster, (int)pdev->HWResolution[0], (int)pdev->HWResolution[1], - data, 0, "eng", pdev->engine, &out); + data, 0, &out); if (code < 0) goto done; if (out) @@ -257,7 +276,8 @@ do_ocr_print_page(gx_device_ocr * pdev, gp_file * file, int hocr) gp_fwrite(HOCR_HEADER, 1, sizeof(HOCR_HEADER)-1, file); } gp_fwrite(out, 1, strlen(out), file); - gs_free_object(pdev->memory, out, "ocr_image_to_utf8"); + gs_free_object(pdev->memory->non_gc_memory, + out, "ocr_image_to_utf8"); } done: diff --git a/devices/gdevpdfocr.c b/devices/gdevpdfocr.c index d9cb2de87..07d60d84a 100644 --- a/devices/gdevpdfocr.c +++ b/devices/gdevpdfocr.c @@ -420,7 +420,7 @@ ocr_file_init(gx_device_pdf_image *dev) stream_write(dev->strm, funky_font6a, sizeof(funky_font6a)); stream_write(dev->strm, funky_font6b, sizeof(funky_font6b)-1); - return ocr_init_api(dev->memory, language, dev->ocr.engine, &dev->ocr.state); + return ocr_init_api(dev->memory->non_gc_memory, language, dev->ocr.engine, &dev->ocr.state); } static void |