summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2009-01-04 18:37:36 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2009-01-04 18:37:36 +0000
commit1efd5ff89b8bf841324245e75739800414cf47aa (patch)
treecaa5d3f488d30f4c7940c9e89f3367249c21236a /src
parent74ef810ca6df5bfbebdd3ea4ddeee1fa24327fb2 (diff)
downloadpostgresql-1efd5ff89b8bf841324245e75739800414cf47aa.tar.gz
Add a pg_encoding_mbcliplen() function that is just like pg_mbcliplen()
except the caller can specify the encoding to work in; this will be needed for pg_stat_statements. In passing, do some marginal efficiency hacking and clean up some comments. Also, prevent the single-byte-encoding code path from fetching one byte past the stated length of the string (this last is a bug that might need to be back-patched at some point).
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/mb/mbutils.c57
-rw-r--r--src/include/mb/pg_wchar.h4
2 files changed, 38 insertions, 23 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 6465afa104..4831e4da48 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
*
- * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.75 2008/11/11 03:01:20 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.76 2009/01/04 18:37:35 tgl Exp $
*/
#include "postgres.h"
@@ -710,14 +710,14 @@ pg_encoding_mb2wchar_with_len(int encoding,
return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
}
-/* returns the byte length of a multibyte word */
+/* returns the byte length of a multibyte character */
int
pg_mblen(const char *mbstr)
{
return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
}
-/* returns the display length of a multibyte word */
+/* returns the display length of a multibyte character */
int
pg_dsplen(const char *mbstr)
{
@@ -767,23 +767,37 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
/*
* returns the byte length of a multibyte string
- * (not necessarily NULL terminated)
+ * (not necessarily NULL terminated)
* that is no longer than limit.
- * this function does not break multibyte word boundary.
+ * this function does not break multibyte character boundary.
*/
int
pg_mbcliplen(const char *mbstr, int len, int limit)
{
+ return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
+ len, limit);
+}
+
+/*
+ * pg_mbcliplen with specified encoding
+ */
+int
+pg_encoding_mbcliplen(int encoding, const char *mbstr,
+ int len, int limit)
+{
+ mblen_converter mblen_fn;
int clen = 0;
int l;
/* optimization for single byte encoding */
- if (pg_database_encoding_max_length() == 1)
+ if (pg_encoding_max_length(encoding) == 1)
return cliplen(mbstr, len, limit);
+ mblen_fn = pg_wchar_table[encoding].mblen;
+
while (len > 0 && *mbstr)
{
- l = pg_mblen(mbstr);
+ l = (*mblen_fn) ((const unsigned char *) mbstr);
if ((clen + l) > limit)
break;
clen += l;
@@ -797,7 +811,8 @@ pg_mbcliplen(const char *mbstr, int len, int limit)
/*
* Similar to pg_mbcliplen except the limit parameter specifies the
- * character length, not the byte length. */
+ * character length, not the byte length.
+ */
int
pg_mbcharcliplen(const char *mbstr, int len, int limit)
{
@@ -822,6 +837,18 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
return clen;
}
+/* mbcliplen for any single-byte encoding */
+static int
+cliplen(const char *str, int len, int limit)
+{
+ int l = 0;
+
+ len = Min(len, limit);
+ while (l < len && str[l])
+ l++;
+ return l;
+}
+
void
SetDatabaseEncoding(int encoding)
{
@@ -884,17 +911,3 @@ pg_client_encoding(PG_FUNCTION_ARGS)
Assert(ClientEncoding);
return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
}
-
-static int
-cliplen(const char *str, int len, int limit)
-{
- int l = 0;
- const char *s;
-
- for (s = str; *s; s++, l++)
- {
- if (l >= len || l >= limit)
- return l;
- }
- return (s - str);
-}
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index e9a26d4843..dc89af9d50 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.81 2009/01/01 17:23:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.82 2009/01/04 18:37:36 tgl Exp $
*
* NOTES
* This is used both by the backend and by libpq, but should not be
@@ -358,6 +358,8 @@ extern int pg_mic_mblen(const unsigned char *mbstr);
extern int pg_mbstrlen(const char *mbstr);
extern int pg_mbstrlen_with_len(const char *mbstr, int len);
extern int pg_mbcliplen(const char *mbstr, int len, int limit);
+extern int pg_encoding_mbcliplen(int encoding, const char *mbstr,
+ int len, int limit);
extern int pg_mbcharcliplen(const char *mbstr, int len, int imit);
extern int pg_encoding_max_length(int encoding);
extern int pg_database_encoding_max_length(void);