diff options
author | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-04-22 04:38:07 +0000 |
---|---|---|
committer | Lorry Tar Creator <lorry-tar-importer@lorry> | 2016-04-22 04:38:07 +0000 |
commit | 28ef1abc10cfbc2c3d2747c008eb2300858d0426 (patch) | |
tree | 41208fb8f393e6cb6cc8f939623ad47a0db17876 /src/dosbuf.c | |
download | grep-tarball-28ef1abc10cfbc2c3d2747c008eb2300858d0426.tar.gz |
Diffstat (limited to 'src/dosbuf.c')
-rw-r--r-- | src/dosbuf.c | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/src/dosbuf.c b/src/dosbuf.c new file mode 100644 index 0000000..839cc3a --- /dev/null +++ b/src/dosbuf.c @@ -0,0 +1,222 @@ +/* dosbuf.c + Copyright (C) 1992, 1997-2002, 2004-2016 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA + 02110-1301, USA. */ + +/* Messy DOS-specific code for correctly treating binary, Unix text + and DOS text files. + + This has several aspects: + + * Guessing the file type (unless the user tells us); + * Stripping CR characters from DOS text files (otherwise regex + functions won't work correctly); + * Reporting correct byte count with -b for any kind of file. + +*/ + +#include <config.h> + +typedef enum { + UNKNOWN, DOS_BINARY, DOS_TEXT, UNIX_TEXT +} File_type; + +struct dos_map { + off_t pos; /* position in buffer passed to matcher */ + off_t add; /* how much to add when reporting char position */ +}; + +static int dos_report_unix_offset = 0; + +static File_type dos_file_type = UNKNOWN; +static File_type dos_use_file_type = UNKNOWN; +static off_t dos_stripped_crs = 0; +static struct dos_map *dos_pos_map; +static int dos_pos_map_size = 0; +static int dos_pos_map_used = 0; +static int inp_map_idx = 0, out_map_idx = 1; + +/* Set default DOS file type to binary. */ +static void +dos_binary (void) +{ + if (O_BINARY) + dos_use_file_type = DOS_BINARY; +} + +/* Tell DOS routines to report Unix offset. */ +static void +dos_unix_byte_offsets (void) +{ + if (O_BINARY) + dos_report_unix_offset = 1; +} + +/* Guess DOS file type by looking at its contents. */ +static File_type +guess_type (char *buf, size_t buflen) +{ + int crlf_seen = 0; + char *bp = buf; + + while (buflen--) + { + /* Treat a file as binary if it has a NUL character. */ + if (!*bp) + return DOS_BINARY; + + /* CR before LF means DOS text file (unless we later see + binary characters). */ + else if (*bp == '\r' && buflen && bp[1] == '\n') + crlf_seen = 1; + + bp++; + } + + return crlf_seen ? DOS_TEXT : UNIX_TEXT; +} + +/* Convert external DOS file representation to internal. + Return the count of bytes left in the buffer. + Build table to map character positions when reporting byte counts. */ +static size_t +undossify_input (char *buf, size_t buflen) +{ + if (! O_BINARY) + return buflen; + + size_t bytes_left = 0; + + if (totalcc == 0) + { + /* New file: forget everything we knew about character + position mapping table and file type. */ + inp_map_idx = 0; + out_map_idx = 1; + dos_pos_map_used = 0; + dos_stripped_crs = 0; + dos_file_type = dos_use_file_type; + } + + /* Guess if this file is binary, unless we already know that. */ + if (dos_file_type == UNKNOWN) + dos_file_type = guess_type(buf, buflen); + + /* If this file is to be treated as DOS Text, strip the CR characters + and maybe build the table for character position mapping on output. */ + if (dos_file_type == DOS_TEXT) + { + char *destp = buf; + + while (buflen--) + { + if (*buf != '\r') + { + *destp++ = *buf++; + bytes_left++; + } + else + { + buf++; + if (out_byte && !dos_report_unix_offset) + { + dos_stripped_crs++; + while (buflen && *buf == '\r') + { + dos_stripped_crs++; + buflen--; + buf++; + } + if (inp_map_idx >= dos_pos_map_size - 1) + { + dos_pos_map_size = inp_map_idx ? inp_map_idx * 2 : 1000; + dos_pos_map = xrealloc(dos_pos_map, + dos_pos_map_size * + sizeof(struct dos_map)); + } + + if (!inp_map_idx) + { + /* Add sentinel entry. */ + dos_pos_map[inp_map_idx].pos = 0; + dos_pos_map[inp_map_idx++].add = 0; + + /* Initialize first real entry. */ + dos_pos_map[inp_map_idx].add = 0; + } + + /* Put the new entry. If the stripped CR characters + precede a Newline (the usual case), pretend that + they were found *after* the Newline. This makes + displayed byte offsets more reasonable in some + cases, and fits better the intuitive notion that + the line ends *before* the CR, not *after* it. */ + inp_map_idx++; + dos_pos_map[inp_map_idx-1].pos = + (*buf == '\n' ? destp + 1 : destp ) - bufbeg + totalcc; + dos_pos_map[inp_map_idx].add = dos_stripped_crs; + dos_pos_map_used = inp_map_idx; + + /* The following will be updated on the next pass. */ + dos_pos_map[inp_map_idx].pos = destp - bufbeg + totalcc + 1; + } + } + } + + return bytes_left; + } + + return buflen; +} + +/* Convert internal byte count into external. */ +static off_t +dossified_pos (off_t byteno) +{ + if (! O_BINARY) + return byteno; + + off_t pos_lo; + off_t pos_hi; + + if (dos_file_type != DOS_TEXT || dos_report_unix_offset) + return byteno; + + /* Optimization: usually the file will be scanned sequentially. + So in most cases, this byte position will be found in the + table near the previous one, as recorded in 'out_map_idx'. */ + pos_lo = dos_pos_map[out_map_idx-1].pos; + pos_hi = dos_pos_map[out_map_idx].pos; + + /* If the initial guess failed, search up or down, as + appropriate, beginning with the previous place. */ + if (byteno >= pos_hi) + { + out_map_idx++; + while (out_map_idx < dos_pos_map_used + && byteno >= dos_pos_map[out_map_idx].pos) + out_map_idx++; + } + + else if (byteno < pos_lo) + { + out_map_idx--; + while (out_map_idx > 1 && byteno < dos_pos_map[out_map_idx-1].pos) + out_map_idx--; + } + + return byteno + dos_pos_map[out_map_idx].add; +} |