summaryrefslogtreecommitdiff
path: root/pcre_exec.c
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-05-07 15:37:31 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2011-05-07 15:37:31 +0000
commitf2702980263d533b1acfece8305daaf377dfc62b (patch)
tree8b83b076f946b4dd6a60594352e69148492660cd /pcre_exec.c
parent312428a6f56cd0b84811f7bcae8354c56e60dff0 (diff)
downloadpcre-f2702980263d533b1acfece8305daaf377dfc62b.tar.gz
Pass back detailed info when UTF-8 check fails at runtime.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@598 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'pcre_exec.c')
-rw-r--r--pcre_exec.c20
1 files changed, 14 insertions, 6 deletions
diff --git a/pcre_exec.c b/pcre_exec.c
index caf5fc3..8e965ba 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -6,7 +6,7 @@
and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
- Copyright (c) 1997-2010 University of Cambridge
+ Copyright (c) 1997-2011 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -5812,16 +5812,24 @@ defined (though never set). So there's no harm in leaving this code. */
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
return PCRE_ERROR_BADPARTIAL;
-/* Check a UTF-8 string if required. Unfortunately there's no way of passing
-back the character offset. */
+/* Check a UTF-8 string if required. Pass back the character offset and error
+code if a results vector is available. */
#ifdef SUPPORT_UTF8
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
{
- int tb;
- if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0)
- return (tb == length && md->partial > 1)?
+ int errorcode;
+ int tb = _pcre_valid_utf8((USPTR)subject, length, &errorcode);
+ if (tb >= 0)
+ {
+ if (offsetcount >= 2)
+ {
+ offsets[0] = tb;
+ offsets[1] = errorcode;
+ }
+ return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
+ }
if (start_offset > 0 && start_offset < length)
{
tb = ((USPTR)subject)[start_offset] & 0xc0;