diff options
Diffstat (limited to 'pcre_newline.c')
-rw-r--r-- | pcre_newline.c | 45 |
1 files changed, 33 insertions, 12 deletions
diff --git a/pcre_newline.c b/pcre_newline.c index 0bfcba0..3a5db61 100644 --- a/pcre_newline.c +++ b/pcre_newline.c @@ -38,13 +38,12 @@ POSSIBILITY OF SUCH DAMAGE. */ -/* This module contains internal functions for testing newlines when more than +/* This module contains internal functions for testing newlines when more than one kind of newline is to be recognized. When a newline is found, its length is -returned. In principle, we could implement several newline "types", each -referring to a different set of newline characters. At present, PCRE supports -only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL, -so for now the type isn't passed into the functions. It can easily be added -later if required. The full list of Unicode newline characters is taken from +returned. In principle, we could implement several newline "types", each +referring to a different set of newline characters. At present, PCRE supports +only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, +and NLTYPE_ANY. The full list of Unicode newline characters is taken from http://unicode.org/unicode/reports/tr18/. */ @@ -61,6 +60,7 @@ string that is being processed. Arguments: ptr pointer to possible newline + type the newline type endptr pointer to the end of the string lenptr where to return the length utf8 TRUE if in utf8 mode @@ -69,12 +69,23 @@ Returns: TRUE or FALSE */ BOOL -_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr, - BOOL utf8) +_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr, + int *lenptr, BOOL utf8) { int c; if (utf8) { GETCHAR(c, ptr); } else c = *ptr; -switch(c) + +if (type == NLTYPE_ANYCRLF) switch(c) + { + case 0x000a: *lenptr = 1; return TRUE; /* LF */ + case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; + return TRUE; /* CR */ + default: return FALSE; + } + +/* NLTYPE_ANY */ + +else switch(c) { case 0x000a: /* LF */ case 0x000b: /* VT */ @@ -99,6 +110,7 @@ the string that is being processed. Arguments: ptr pointer to possible newline + type the newline type startptr pointer to the start of the string lenptr where to return the length utf8 TRUE if in utf8 mode @@ -107,8 +119,8 @@ Returns: TRUE or FALSE */ BOOL -_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr, - BOOL utf8) +_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr, + int *lenptr, BOOL utf8) { int c; ptr--; @@ -118,7 +130,16 @@ if (utf8) GETCHAR(c, ptr); } else c = *ptr; -switch(c) + +if (type == NLTYPE_ANYCRLF) switch(c) + { + case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; + return TRUE; /* LF */ + case 0x000d: *lenptr = 1; return TRUE; /* CR */ + default: return FALSE; + } + +else switch(c) { case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; return TRUE; /* LF */ |