summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2009-04-19 21:08:54 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2009-04-19 21:08:54 +0000
commit22c922269f5f8a80267389e1c879c0b65fbba902 (patch)
treef82f522d11c899bbdad662da74c1d01fcaa5b04d
parentc1c40e580a5498ae7804270fe20dd8023fc7a9d6 (diff)
downloadpostgresql-22c922269f5f8a80267389e1c879c0b65fbba902.tar.gz
Fix de-escaping checks so that we will reject \000 as well as other invalidly
encoded sequences. Per discussion of a couple of days ago.
-rw-r--r--src/backend/commands/copy.c16
-rw-r--r--src/backend/parser/scan.l38
2 files changed, 28 insertions, 26 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 90ceb77bbb..9ba7c5fc03 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.307 2009/03/31 22:12:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.308 2009/04/19 21:08:54 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2718,7 +2718,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
char *start_ptr;
char *end_ptr;
int input_len;
- bool saw_high_bit = false;
+ bool saw_non_ascii = false;
/* Make sure space remains in fieldvals[] */
if (fieldno >= maxfields)
@@ -2783,8 +2783,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
}
}
c = val & 0377;
- if (IS_HIGHBIT_SET(c))
- saw_high_bit = true;
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ saw_non_ascii = true;
}
break;
case 'x':
@@ -2808,8 +2808,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
}
}
c = val & 0xff;
- if (IS_HIGHBIT_SET(c))
- saw_high_bit = true;
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ saw_non_ascii = true;
}
}
break;
@@ -2847,11 +2847,11 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
*output_ptr++ = '\0';
/*
- * If we de-escaped a char with the high bit set, make sure we still
+ * If we de-escaped a non-7-bit-ASCII char, make sure we still
* have valid data for the db encoding. Avoid calling strlen here for
* the sake of efficiency.
*/
- if (saw_high_bit)
+ if (saw_non_ascii)
{
char *fld = fieldvals[fieldno];
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index a3d4d857c8..8551cd2753 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -24,7 +24,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.150 2009/04/14 22:18:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.151 2009/04/19 21:08:54 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -60,7 +60,7 @@ bool escape_string_warning = true;
bool standard_conforming_strings = false;
static bool warn_on_first_escape;
-static bool saw_high_bit = false;
+static bool saw_non_ascii = false;
/*
* literalbuf is used to accumulate literal values when multiple rules
@@ -453,7 +453,7 @@ other .
{xqstart} {
warn_on_first_escape = true;
- saw_high_bit = false;
+ saw_non_ascii = false;
SET_YYLLOC();
if (standard_conforming_strings)
BEGIN(xq);
@@ -463,7 +463,7 @@ other .
}
{xestart} {
warn_on_first_escape = false;
- saw_high_bit = false;
+ saw_non_ascii = false;
SET_YYLLOC();
BEGIN(xe);
startlit();
@@ -477,10 +477,11 @@ other .
<xq,xe>{quotefail} {
yyless(1);
BEGIN(INITIAL);
- /* check that the data remains valid if it might have been
+ /*
+ * check that the data remains valid if it might have been
* made invalid by unescaping any chars.
*/
- if (saw_high_bit)
+ if (saw_non_ascii)
pg_verifymbstr(literalbuf, literallen, false);
yylval.str = litbufdup();
return SCONST;
@@ -526,16 +527,16 @@ other .
check_escape_warning();
addlitchar(c);
- if (IS_HIGHBIT_SET(c))
- saw_high_bit = true;
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ saw_non_ascii = true;
}
<xe>{xehexesc} {
unsigned char c = strtoul(yytext+2, NULL, 16);
check_escape_warning();
addlitchar(c);
- if (IS_HIGHBIT_SET(c))
- saw_high_bit = true;
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ saw_non_ascii = true;
}
<xq,xe,xus>{quotecontinue} {
/* ignore */
@@ -1083,6 +1084,11 @@ litbuf_udeescape(unsigned char escape)
}
*out = '\0';
+ /*
+ * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
+ * codes; but it's probably not worth the trouble, since this isn't
+ * likely to be a performance-critical path.
+ */
pg_verifymbstr(new, out - new, false);
return new;
}
@@ -1090,14 +1096,6 @@ litbuf_udeescape(unsigned char escape)
static unsigned char
unescape_single_char(unsigned char c)
{
- /* Normally we wouldn't expect to see \n where n has its high bit set
- * but we set the flag to check the string if we do get it, so
- * that this doesn't become a way of getting around the coding validity
- * checks.
- */
- if (IS_HIGHBIT_SET(c))
- saw_high_bit = true;
-
switch (c)
{
case 'b':
@@ -1111,6 +1109,10 @@ unescape_single_char(unsigned char c)
case 't':
return '\t';
default:
+ /* check for backslash followed by non-7-bit-ASCII */
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ saw_non_ascii = true;
+
return c;
}
}