/* out-chars.c: try to extract the real characters from the image. Copyright (C) 1992, 93, 04 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "config.h" #include "tex-file.h" #include "paths.h" #include "bb-outline.h" #include "gf.h" #include "report.h" #include "bitmap2.h" #include "extract.h" #include "ifi.h" #include "image-header.h" #include "main.h" #include "out-strips.h" /*xx*/ FILE *testfileptr; const char* whole_bitmap_filename = "test-whole-bitmap"; const char* bb_whole_bitmap_filename = "test-bb-whole-bitmap"; const char* subbitmap_filename = "test-subbitmap"; const char* bb_subbitmap_filename = "test-bb-subbitmap"; const char* line_subbitmap_filename = "test-line-subbitmap"; const char* line_bb_subbitmap_filename = "test-line-bb-subbitmap"; const char* gf_bitmap_filename = "test-gf-bitmap"; const char* gf_bb_bitmap_filename = "test-gf-bb-bitmap"; /* A list of the image rows on which the baselines occur. The end of the list is marked with an element -1. (-baselines) */ int *baseline_list = NULL; /* Find about this many characters in the image, then stop. This is useful only for testing, because converting the entire image takes a long time. So we might in truth output a few more characters than specified here. (-nchars) */ unsigned nchars_wanted = MAX_CHARCODE + 1; /* If set, prints diagnostics about which boxes are and aren't cleaned. (-print-clean-info) */ boolean print_clean_info = false; /* Says whether to print the row numbers of each character as we go, so that the user can decide how to adjust jumping baselines. (-print-guidelines) */ boolean print_guidelines = false; /* Says which characters to output. This is independent of the ordering in the font file. (-range) */ int starting_char = 0; int ending_char = MAX_CHARCODE; /* The design size of the font we're creating. */ static real design_size; /* Where the baseline of the current row is in the image. The first row is #1, and so on. We start over at row 1 at each image row.*/ static unsigned row_baseline; static unsigned row_height; /* How many boxes total the characters take up. */ static unsigned total_boxes_expected; static boolean bb_equal_p (bounding_box_type, bounding_box_type); static gf_char_type bitmap_to_gf_char (bitmap_type, real, bounding_box_type, image_char_type); static void clean_bitmap (bitmap_type *, bb_list_type); static boolean do_image_line (bitmap_type, unsigned *, unsigned *, real, image_char_list_type); static void image_to_bitmap_bbs (bb_list_type *); extern void offset_bb_list (bb_list_type *l, int offset); static unsigned output_chars (bb_list_type, bitmap_type, real, image_char_list_type, unsigned); /* Analyze the input image, outputting the characters we find. */ void write_image_chars (image_header_type image_header, real ds) { bitmap_type *image_line_bitmap; unsigned nchars_done = 0; /* Read the image information. This tells us to which character each bounding box belongs, among other things. */ image_char_list_type image_char_list = read_ifi_file (&total_boxes_expected); /* Remember this so we don't need to pass it through all the intervening routines to the low-level output. */ design_size = ds; /* The main loop also (and more commonly) exits when we've read the entire image. */ while (nchars_done < nchars_wanted) { /* Vector of column numbers where the image column alternates between some black to all white; used to get the bounding box column min and max for each character. */ unsigned *transitions; /* Read one line of characters, noting the transitions (see above), in the image. After this, `image_line_bitmap' is, for example, `a...z', with blank columns at the left and right. Whatever is in the original image. */ image_line_bitmap = some_black_to_all_white_row (image_header.width, &transitions); if (image_line_bitmap == NULL) break; /* We've read the whole image file. */ if (baseline_list == NULL || *baseline_list == -1) { if (baseline_list != NULL) WARNING ("imageto: Not enough baselines specified"); row_baseline = 0; } else row_baseline = *baseline_list++; row_height = BITMAP_HEIGHT (*image_line_bitmap); /* Process one line of characters. If `do_image_line' fails, we need to read the next row in the image, and put it directly below the current line to complete a character. For example, if a line has only an `!' on it, we will only get the stem on the first call to `some_black_to_all_white_row'. We want to get the dot in there, too. */ while (!do_image_line (*image_line_bitmap, transitions, &nchars_done, (real) image_header.hres, image_char_list)) { bitmap_type *revised = append_next_image_row (*image_line_bitmap, image_header.width, &transitions); if (revised == NULL) { WARNING ("imageto: Image ended in the midst of a character"); break; } /* Tell the user the image row didn't end on a character. */ REPORT ("+"); free_bitmap (image_line_bitmap); image_line_bitmap = revised; if (baseline_list == NULL) row_baseline = 0; row_height = BITMAP_HEIGHT (*image_line_bitmap); } free_bitmap (image_line_bitmap); } } /* Analyze and output all of the bitmap IMAGE, which is one line of type in the original. The resolution of the image is H_RESOLUTION, and we've read NCHARS characters (including .notdefs) so far. We use IMAGE_CHAR_LIST, a list of character information from the IFI file, and the transition vector TRANSITIONS, which has in it how IMAGE breaks into characters or character groups. We return false if we need to be given another image row to complete a character. */ #define NEXT_TRANSITION() ({ \ if (*transitions == BITMAP_WIDTH (image) + 1) \ { \ WARNING ("imageto: Expected more transitions"); \ break; \ } \ *transitions++; \ }) static boolean do_image_line (bitmap_type image, unsigned *transitions, unsigned *nchars, real h_resolution, image_char_list_type image_char_list) { static unsigned box_count = 0; bounding_box_type bb; /* We always use the entire image line vertically. */ MIN_ROW (bb) = 0; MAX_ROW (bb) = BITMAP_HEIGHT (image) - 1; /* `nchars_wanted' is an option to the program, defined at the top. Go through the whole line, unless we've found as many characters as the user requested. */ while (*nchars < nchars_wanted && *transitions != BITMAP_WIDTH (image) + 1) { int bb_offset = 0; unsigned previous_box_count = box_count; bb_list_type boxes = bb_list_init (); bitmap_type *char_bitmap = XTALLOC1 (bitmap_type); bitmap_type *temp_bitmap = XTALLOC1 (bitmap_type); /* The first element of TRANSITIONS is white-to-black. Thereafter, they alternate. */ MIN_COL (bb) = NEXT_TRANSITION (); MAX_COL (bb) = NEXT_TRANSITION (); /* Get the bitmap of the first transition. `char_bitmap' is either a single character (if the character didn't overlap with a consecutive one in the font specimen) or more than one (if the characters did overlap). For example, it might contain just an `a' this time through the loop (with blank rows above and below, because `a' has neither a descender nor an ascender), then next time a `b', and so on. Or it might contain `ij' (if they overlap in the specimen) instead of `i' this time and `j' next time. */ *char_bitmap = extract_subbitmap (image, bb); *temp_bitmap = copy_bitmap (*char_bitmap); /*xx*/ if ((testfileptr = fopen(line_subbitmap_filename, "w")) != NULL) print_bitmap(testfileptr, *temp_bitmap); else printf("Bad file pointer for printing subbitmap.\n"); fclose(testfileptr); if ((testfileptr = fopen(line_bb_subbitmap_filename, "w")) != NULL) print_bounded_bitmap(testfileptr, *temp_bitmap, bb); else printf("Bad file pointer for printing bb-subbitmap.\n"); fclose(testfileptr); /* Unless we've read enough boxes, find the ones in the current character(s), and exit, either: 1) to output a character if the boxes ``finish'' a character, or 2) to exit the routine with false, if they don't. If don't find all the boxes needed to finish a character (i.e., it has vertical white space in it, loop around getting more ``transitions'' until we do. */ while (true) { bb_list_type temp_boxes; bounding_box_type previous_bb; /* If we've read all the bounding boxes specified (in the IFI file), we're done. */ if (box_count == total_boxes_expected) break; /* If we've read more boxes than we expected to, trouble. */ if (box_count > total_boxes_expected) { WARNING2 ("imageto: Read box #%u but expected only %u", box_count, total_boxes_expected); /* No point in giving this message more than once. */ total_boxes_expected = INT_MAX; } /* Find the bounding boxes around all the shapes in `temp_bitmap'. (The first time through the loop, `temp_bitmap' is the same as `char_bitmap.') Continuing the above `ij' example, this would result in four bounding boxes (one for the ``dotless'' part of the `i,' one for the ``dotless'' part of the `j' and two for each of the dots). */ temp_boxes = find_outline_bbs (*temp_bitmap, false, 0, 0); /* The subimages we've created all start at column zero. But if there are overlapping characters (the `ij' example above), we want to put the images (for, e.g., `i' and `j') side-by-side instead of overlaying them. So we change the bounding box numbers for `temp_boxes' by adding `bb_offset.' */ offset_bb_list (&temp_boxes, bb_offset); box_count += BB_LIST_LENGTH (temp_boxes); bb_list_splice (&boxes, temp_boxes); /* Break if the number of boxes indicated for the current character(s) coincide with the end of a character. This translates to: if the white column was at the end of some character in the list, exit the loop to output what we've got. */ if (box_at_char_boundary_p (image_char_list, *nchars, box_count - previous_box_count)) break; /* If we're at the end of the image row, return to our caller that we had to quit in the middle of a character. */ if (*transitions == BITMAP_WIDTH (image) + 1) { /* Forget that we've seen these boxes. */ box_count = previous_box_count; return false; } /* If the character has white vertical space between its parts, e.g., double quotes or ellipses, have to get the rest of its bitmap. */ previous_bb = bb; MIN_COL (bb) = NEXT_TRANSITION (); MAX_COL (bb) = NEXT_TRANSITION (); /* Leave in the white space between the character parts. */ MIN_COL (bb) = MAX_COL (previous_bb); free_bitmap (temp_bitmap); *temp_bitmap = extract_subbitmap (image, bb); /*xx*/ if ((testfileptr = fopen(line_subbitmap_filename, "w")) != NULL) print_bitmap(testfileptr, *temp_bitmap); else printf("Bad file pointer for printing subbitmap.\n"); fclose(testfileptr); if ((testfileptr = fopen(line_bb_subbitmap_filename, "w")) != NULL) print_bounded_bitmap(testfileptr, *temp_bitmap, bb); else printf("Bad file pointer for printing bb-subbitmap.\n"); fclose(testfileptr); if (temp_bitmap == NULL) { WARNING1 ("imageto: Expected more bounding boxes for `%d'", IMAGE_CHARCODE (IMAGE_CHAR (image_char_list, *nchars))); break; } /* Next time through the loop, the boxes `temp_bitmap' should be just to the right of the bitmap we've accumulated in `char_bitmap'. This happens if the character has vertical white space between its parts. */ bb_offset = BITMAP_WIDTH (*char_bitmap); /* When this happens, it usually means that the IFI file didn't specify enough bounding boxes for some character, and so things are out of sync. */ if (BITMAP_HEIGHT (*char_bitmap) != BITMAP_HEIGHT (*temp_bitmap)) { WARNING ("imageto: Line ended inside a character"); break; } bitmap_concat (char_bitmap, *temp_bitmap); } /* while (true) */ free_bitmap (temp_bitmap); /* Convert the bits inside those bounding boxes into one (if not overlapping with another) or more (if overlapping) characters in the GF font. */ *nchars += output_chars (boxes, *char_bitmap, h_resolution, image_char_list, *nchars); free_bitmap (char_bitmap); bb_list_free (&boxes); } /* end while (*nchars < nchars_wanted && *transitions != BITMAP_WIDTH (image) + 1) */ return true; } /* Move all the elements in BB_LIST to the right by OFFSET. */ void offset_bb_list (bb_list_type *bb_list, int offset) { unsigned this_bb; for (this_bb = 0; this_bb < BB_LIST_LENGTH (*bb_list); this_bb++) { bounding_box_type *bb = &BB_LIST_ELT (*bb_list, this_bb); MIN_COL (*bb) += offset; MAX_COL (*bb) += offset; } } /* Return true if BB1 and BB2 are equal. */ static boolean bb_equal_p (bounding_box_type bb1, bounding_box_type bb2) { return MIN_COL (bb1) == MIN_COL (bb2) && MIN_ROW (bb1) == MIN_ROW (bb2) && MAX_COL (bb1) == MAX_COL (bb2) && MAX_ROW (bb1) == MAX_ROW (bb2); } /* For each bounding box in the list BOXES, extract from IMAGE_LINE_BITMAP and turn the resulting bitmap into a single character in the font. The information in IMAGE_CHAR_LIST maps bounding boxes to character codes; consecutive bounding boxes may belong to the same character. For example, `i' will appear twice, once for the dot and once for the stem. We assume that all the bounding boxes for a given character will appear in IMAGE_LINE_BITMAP. We return the number of characters (not bounding boxes) found, including characters that were omitted. */ /* Predicate to tell us if we want to actually write the character. */ #define OUTPUT_CHAR_P(code, image_char) \ ((code) >= starting_char && (code) <= ending_char \ && !IMAGE_CHAR_OMIT (image_char)) static unsigned output_chars (bb_list_type boxes, bitmap_type image_line_bitmap, real h_resolution, image_char_list_type image_char_list, unsigned current_char) { static unsigned char_count = 0; int this_box; /* Because we might have to subtract when it's zero. */ boolean done[BB_LIST_LENGTH (boxes)]; /* Since we report (a lot) more information when `print_guidelines' is true, we can fit fewer characters per line. */ unsigned nchars_per_line = print_guidelines ? 1 : 11; unsigned nchars_written = 0; for (this_box = 0; this_box < BB_LIST_LENGTH (boxes); this_box++) done[this_box] = false; for (this_box = 0; this_box < BB_LIST_LENGTH (boxes); this_box++) { bounding_box_type bb; bitmap_type bitmap; image_char_type c; charcode_type charcode; bb_list_type bb_list = bb_list_init (); /* `done[this_box]' will be set if we get to a bounding box that has already been combined with a previous one, because of an alternating combination. Since we never go backwards, we don't bother to set `done' for every box we look at. */ if (done[this_box]) continue; c = IMAGE_CHAR (image_char_list, current_char++); charcode = IMAGE_CHARCODE (c); REPORT ("["); /* Only bother to collect the character image if we're going to output it; otherwise, it just wastes a lot of time and space. */ if (OUTPUT_CHAR_P (charcode, c)) { /* A character consisting of zero bounding boxes is invisible; e.g., a space. We don't want to read any of the bitmap for such a thing. */ if (IMAGE_CHAR_BB_COUNT (c) == 0) { BITMAP_HEIGHT (bitmap) = 0; BITMAP_WIDTH (bitmap) = 0; BITMAP_BITS (bitmap) = NULL; bb = (bounding_box_type) { 0, 0, 0, 0 }; /* Since we're not eating up any bounding boxes, reconsider the current one. */ this_box--; } else { bb = BB_LIST_ELT (boxes, this_box); bitmap = extract_subbitmap (image_line_bitmap, bb); /*xx*/ if ((testfileptr = fopen(subbitmap_filename, "w")) != NULL) print_bitmap(testfileptr, bitmap); else printf("Bad file pointer for printing subbitmap.\n"); fclose(testfileptr); if ((testfileptr = fopen(bb_subbitmap_filename, "w")) != NULL) print_bounded_bitmap(testfileptr, bitmap, bb); else printf("Bad file pointer for printing bb-subbitmap.\n"); fclose(testfileptr); bb_list_append (&bb_list, bb); } } while (IMAGE_CHAR_BB_COUNT (c)-- > 1) { unsigned combine_box; if (IMAGE_CHAR_BB_ALTERNATING (c)) { /* Don't increment `this_box', since it is incremented at the end of the loop, and the next box is part of another character. */ combine_box = this_box + 2; /* Don't look at the second box again in the outer loop. */ done[combine_box] = true; } else /* Increment `this_box' so can keep combining nonalternating bounding boxes until they are used up or we hit an alternating one. */ combine_box = ++this_box; /* combine_box starts at 0 and BB_LIST_LENGTH starts at 1, so... */ if (combine_box >= BB_LIST_LENGTH (boxes)) { WARNING1 ("imageto: Not enough outlines for char %u", (unsigned) charcode); break; } if (OUTPUT_CHAR_P (charcode, c)) { /* Get the shape to combine with `bitmap'. */ bounding_box_type next_bb = BB_LIST_ELT (boxes, combine_box); bitmap_type next_bitmap = extract_subbitmap (image_line_bitmap, next_bb); /*xx*/ if ((testfileptr = fopen(subbitmap_filename, "w")) != NULL) print_bitmap(testfileptr, next_bitmap); else printf("Bad file pointer for printing subbitmap.\n"); fclose(testfileptr); if ((testfileptr = fopen(bb_subbitmap_filename, "w")) != NULL) print_bounded_bitmap(testfileptr, next_bitmap, next_bb); else printf("Bad file pointer for printing bb-subbitmap.\n"); fclose(testfileptr); bb_list_append (&bb_list, next_bb); combine_images (&bitmap, next_bitmap, &bb, next_bb); /*xx*/ if ((testfileptr = fopen(whole_bitmap_filename, "w")) != NULL) print_bitmap(testfileptr, bitmap); else printf("Bad file pointer for printing whole_bitmap.\n"); fclose(testfileptr); if ((testfileptr = fopen(bb_whole_bitmap_filename, "w")) != NULL) print_bounded_bitmap(testfileptr, bitmap, bb); else printf("Bad file pointer for printing bb_whole_bitmap.\n"); fclose(testfileptr); free_bitmap (&next_bitmap); } } /* while (IMAGE_CHAR_BB_COUNT (c)-- > 1) */ if (OUTPUT_CHAR_P (charcode, c)) { gf_char_type gf_char; if (BITMAP_BITS (bitmap) != NULL) clean_bitmap (&bitmap, bb_list); gf_char = bitmap_to_gf_char (bitmap, h_resolution, bb, c); gf_put_char (gf_char); /* This and the GF character's bitmap are the same, so we only need to free one of the two. */ if (BITMAP_BITS (bitmap) != NULL) free_bitmap (&bitmap); } else { REPORT (""); /* We're ignoring this character. */ /*xx this_box--;*//* Reconsider the bounding box. */ } REPORT1 ("]%c", ++char_count % nchars_per_line ? ' ' : '\n'); nchars_written++; bb_list_free (&bb_list); } /* for (this_box = 0; this_box < BB_LIST_LENGTH (boxes); this_box++) */ return nchars_written; } /* Remove bits of adjacent characters that may have crept into B because of overlapping characters in the original image. KNOWN_BOXES lists all the known parts of B; if we find other bounding boxes in B, we remove them. */ static void clean_bitmap (bitmap_type *b, bb_list_type known_boxes) { unsigned test; bb_list_type test_boxes = find_outline_bbs (*b, false, BITMAP_WIDTH (*b), 0); if (print_clean_info) REPORT2 ("Cleaning %ux%u bitmap:\n", BITMAP_WIDTH (*b), BITMAP_HEIGHT (*b)); /* Convert KNOWN_BOXES to the same coordinates as `test_boxes'. */ image_to_bitmap_bbs (&known_boxes); for (test = 0; test < BB_LIST_LENGTH (test_boxes); test++) { unsigned known; unsigned known_length = BB_LIST_LENGTH (known_boxes); bounding_box_type test_bb = BB_LIST_ELT (test_boxes, test); if (print_clean_info) REPORT4 (" checking (%d,%d)-(%d,%d) ... ", MIN_COL (test_bb), MIN_ROW (test_bb), MAX_COL (test_bb), MAX_ROW (test_bb)); /* If we want to keep `test_bb', it will be one of the elements of BB_LIST. Otherwise, it is a piece of an adjacent character, and we should erase it. */ for (known = 0; known < known_length && !bb_equal_p (test_bb, BB_LIST_ELT (known_boxes, known)); known++) ; if (known == known_length) { unsigned r; int test_bb_width = BB_WIDTH (test_bb); assert (test_bb_width > 0); if (print_clean_info) REPORT ("clearing.\n"); for (r = MIN_ROW (test_bb); r <= MAX_ROW (test_bb); r++) { one_byte *row = BITMAP_ROW (*b, r); memset (row + MIN_COL (test_bb), 0, test_bb_width); } } else if (print_clean_info) REPORT ("keeping.\n"); } } /* Translate the elements of BOXES to the origin, i.e., shift each down by the minimum row and column. We use this in `clean_bitmap' to change bounding boxes in the coordinates of the entire image to the coordinates of the single character we are cleaning. */ static void image_to_bitmap_bbs (bb_list_type *boxes) { unsigned b; unsigned min_col = UINT_MAX; unsigned min_row = UINT_MAX; /* First find the minimum row and column of all the bb's in BOXES. */ for (b = 0; b < BB_LIST_LENGTH (*boxes); b++) { bounding_box_type bb = BB_LIST_ELT (*boxes, b); assert (MIN_COL (bb) >= 0 && MIN_ROW (bb) >= 0); MIN_EQUALS (min_col, MIN_COL (bb)); MIN_EQUALS (min_row, MIN_ROW (bb)); } /* Now translate all the bb's by those minimums. */ for (b = 0; b < BB_LIST_LENGTH (*boxes); b++) { bounding_box_type *bb = &BB_LIST_ELT (*boxes, b); MIN_COL (*bb) -= min_col; MAX_COL (*bb) -= min_col; MIN_ROW (*bb) -= min_row; MAX_ROW (*bb) -= min_row; } } /* Derive the information necessary to output the font character from the bitmap B, and return it. The resolution of the bitmap is given in pixels per inch as H_RESOLUTION. The bounding box BB encloses the character in the image coordinates. We use BB and the static variables `row_baseline' and `row_height' to determine the positioning of the GF character. */ #define BB_TO_CARTESIAN(x) \ (row_height - 1 - (x) - row_baseline \ - IMAGE_CHAR_BASELINE_ADJUST (image_char)) static gf_char_type bitmap_to_gf_char (bitmap_type b, real h_resolution, bounding_box_type bb, image_char_type image_char) { static boolean first = true; static boolean have_tfm_file = false; gf_char_type gf_char; bounding_box_type cartesian_bb; charcode_type charcode = IMAGE_CHARCODE (image_char); boolean have_tfm_width = false; /*xx*/ if(b.bitmap){ if ((testfileptr = fopen(gf_bitmap_filename, "w")) != NULL) print_bitmap(testfileptr, b); else printf("Bad file pointer for printing gf_bitmap.\n"); fclose(testfileptr); if ((testfileptr = fopen(gf_bb_bitmap_filename, "w")) != NULL) print_bounded_bitmap(testfileptr, b, bb); else printf("Bad file pointer for printing gf_bb_bitmap.\n"); fclose(testfileptr); } MIN_ROW (cartesian_bb) = BB_TO_CARTESIAN (MAX_ROW (bb)); MAX_ROW (cartesian_bb) = BB_TO_CARTESIAN (MIN_ROW (bb)); REPORT1 ("%u", charcode); GF_CHARCODE (gf_char) = charcode; GF_BITMAP (gf_char) = b; GF_CHAR_MIN_COL (gf_char) = IMAGE_CHAR_LSB (image_char); GF_CHAR_MAX_COL (gf_char) = GF_CHAR_MIN_COL (gf_char) + BITMAP_WIDTH (b); GF_CHAR_MIN_ROW (gf_char) = MIN_ROW (cartesian_bb); GF_CHAR_MAX_ROW (gf_char) = MAX_ROW (cartesian_bb); GF_H_ESCAPEMENT (gf_char) = (GF_CHAR_MAX_COL (gf_char) + IMAGE_CHAR_RSB (image_char)); if (first) { string tfm_name = remove_suffix (output_name); if (tfm_name) have_tfm_file = tfm_open_input_file (tfm_name); first = false; } have_tfm_width = false; if (have_tfm_file) { tfm_char_type *tfm_char = tfm_get_char (charcode); if (tfm_char) { GF_TFM_WIDTH (gf_char) = TFM_FIX_WIDTH (*tfm_char); have_tfm_width = true; } } /* If no TFM file, or the TFM file doesn't define this character, use the pixel width for the TFM value. */ if (!have_tfm_width) { real width_in_points = GF_H_ESCAPEMENT (gf_char) * POINTS_PER_INCH / h_resolution; GF_TFM_WIDTH (gf_char) = real_to_fix (width_in_points / design_size); } if (print_guidelines) REPORT3 (" (%s) %d/%d", IMAGE_CHARNAME (image_char), MIN_ROW (cartesian_bb), MAX_ROW (cartesian_bb)); return gf_char; }