diff options
Diffstat (limited to 'src/backend')
28 files changed, 1188 insertions, 441 deletions
diff --git a/src/backend/commands/conversioncmds.c b/src/backend/commands/conversioncmds.c index f7ff321de7..5fed97a2f9 100644 --- a/src/backend/commands/conversioncmds.c +++ b/src/backend/commands/conversioncmds.c @@ -45,8 +45,9 @@ CreateConversionCommand(CreateConversionStmt *stmt) const char *from_encoding_name = stmt->for_encoding_name; const char *to_encoding_name = stmt->to_encoding_name; List *func_name = stmt->func_name; - static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID}; + static const Oid funcargs[] = {INT4OID, INT4OID, CSTRINGOID, INTERNALOID, INT4OID, BOOLOID}; char result[1]; + Datum funcresult; /* Convert list of names to a name and namespace */ namespaceId = QualifiedNameGetCreationNamespace(stmt->conversion_name, @@ -92,12 +93,12 @@ CreateConversionCommand(CreateConversionStmt *stmt) funcoid = LookupFuncName(func_name, sizeof(funcargs) / sizeof(Oid), funcargs, false); - /* Check it returns VOID, else it's probably the wrong function */ - if (get_func_rettype(funcoid) != VOIDOID) + /* Check it returns int4, else it's probably the wrong function */ + if (get_func_rettype(funcoid) != INT4OID) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("encoding conversion function %s must return type %s", - NameListToString(func_name), "void"))); + NameListToString(func_name), "integer"))); /* Check we have EXECUTE rights for the function */ aclresult = pg_proc_aclcheck(funcoid, GetUserId(), ACL_EXECUTE); @@ -111,12 +112,23 @@ CreateConversionCommand(CreateConversionStmt *stmt) * string; the conversion function should throw an error if it can't * perform the requested conversion. */ - OidFunctionCall5(funcoid, - Int32GetDatum(from_encoding), - Int32GetDatum(to_encoding), - CStringGetDatum(""), - CStringGetDatum(result), - Int32GetDatum(0)); + funcresult = OidFunctionCall6(funcoid, + Int32GetDatum(from_encoding), + Int32GetDatum(to_encoding), + CStringGetDatum(""), + CStringGetDatum(result), + Int32GetDatum(0), + BoolGetDatum(false)); + + /* + * The function should return 0 for empty input. Might as well check that, + * too. + */ + if (DatumGetInt32(funcresult) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("encoding conversion function %s returned incorrect result for empty input", + NameListToString(func_name)))); /* * All seem ok, go ahead (possible failure would be a duplicate conversion diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c index 9e4ea1b345..423df2f300 100644 --- a/src/backend/utils/error/elog.c +++ b/src/backend/utils/error/elog.c @@ -2271,6 +2271,8 @@ write_console(const char *line, int len) * Conversion on non-win32 platforms is not implemented yet. It requires * non-throw version of pg_do_encoding_conversion(), that converts * unconvertable characters to '?' without errors. + * + * XXX: We have a no-throw version now. It doesn't convert to '?' though. */ #endif diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c index a07b54bd3b..33e9c9a9e3 100644 --- a/src/backend/utils/mb/conv.c +++ b/src/backend/utils/mb/conv.c @@ -25,15 +25,20 @@ * tab holds conversion entries for the source charset * starting from 128 (0x80). each entry in the table holds the corresponding * code point for the target charset, or 0 if there is no equivalent code. + * + * Returns the number of input bytes consumed. If noError is true, this can + * be less than 'len'. */ -void +int local2local(const unsigned char *l, unsigned char *p, int len, int src_encoding, int dest_encoding, - const unsigned char *tab) + const unsigned char *tab, + bool noError) { + const unsigned char *start = l; unsigned char c1, c2; @@ -41,7 +46,11 @@ local2local(const unsigned char *l, { c1 = *l; if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(src_encoding, (const char *) l, len); + } if (!IS_HIGHBIT_SET(c1)) *p++ = c1; else @@ -50,13 +59,19 @@ local2local(const unsigned char *l, if (c2) *p++ = c2; else + { + if (noError) + break; report_untranslatable_char(src_encoding, dest_encoding, (const char *) l, len); + } } l++; len--; } *p = '\0'; + + return l - start; } /* @@ -66,18 +81,26 @@ local2local(const unsigned char *l, * p is the output area (must be large enough!) * lc is the mule character set id for the local encoding * encoding is the PG identifier for the local encoding + * + * Returns the number of input bytes consumed. If noError is true, this can + * be less than 'len'. */ -void +int latin2mic(const unsigned char *l, unsigned char *p, int len, - int lc, int encoding) + int lc, int encoding, bool noError) { + const unsigned char *start = l; int c1; while (len > 0) { c1 = *l; if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(encoding, (const char *) l, len); + } if (IS_HIGHBIT_SET(c1)) *p++ = lc; *p++ = c1; @@ -85,6 +108,8 @@ latin2mic(const unsigned char *l, unsigned char *p, int len, len--; } *p = '\0'; + + return l - start; } /* @@ -94,18 +119,26 @@ latin2mic(const unsigned char *l, unsigned char *p, int len, * p is the output area (must be large enough!) * lc is the mule character set id for the local encoding * encoding is the PG identifier for the local encoding + * + * Returns the number of input bytes consumed. If noError is true, this can + * be less than 'len'. */ -void +int mic2latin(const unsigned char *mic, unsigned char *p, int len, - int lc, int encoding) + int lc, int encoding, bool noError) { + const unsigned char *start = mic; int c1; while (len > 0) { c1 = *mic; if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } if (!IS_HIGHBIT_SET(c1)) { /* easy for ASCII */ @@ -118,17 +151,27 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len, int l = pg_mule_mblen(mic); if (len < l) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1])) + { + if (noError) + break; report_untranslatable_char(PG_MULE_INTERNAL, encoding, (const char *) mic, len); + } *p++ = mic[1]; mic += 2; len -= 2; } } *p = '\0'; + + return mic - start; } @@ -143,15 +186,20 @@ mic2latin(const unsigned char *mic, unsigned char *p, int len, * tab holds conversion entries for the local charset * starting from 128 (0x80). each entry in the table holds the corresponding * code point for the mule encoding, or 0 if there is no equivalent code. + * + * Returns the number of input bytes consumed. If noError is true, this can + * be less than 'len'. */ -void +int latin2mic_with_table(const unsigned char *l, unsigned char *p, int len, int lc, int encoding, - const unsigned char *tab) + const unsigned char *tab, + bool noError) { + const unsigned char *start = l; unsigned char c1, c2; @@ -159,7 +207,11 @@ latin2mic_with_table(const unsigned char *l, { c1 = *l; if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(encoding, (const char *) l, len); + } if (!IS_HIGHBIT_SET(c1)) *p++ = c1; else @@ -171,13 +223,19 @@ latin2mic_with_table(const unsigned char *l, *p++ = c2; } else + { + if (noError) + break; report_untranslatable_char(encoding, PG_MULE_INTERNAL, (const char *) l, len); + } } l++; len--; } *p = '\0'; + + return l - start; } /* @@ -191,15 +249,20 @@ latin2mic_with_table(const unsigned char *l, * tab holds conversion entries for the mule internal code's second byte, * starting from 128 (0x80). each entry in the table holds the corresponding * code point for the local charset, or 0 if there is no equivalent code. + * + * Returns the number of input bytes consumed. If noError is true, this can + * be less than 'len'. */ -void +int mic2latin_with_table(const unsigned char *mic, unsigned char *p, int len, int lc, int encoding, - const unsigned char *tab) + const unsigned char *tab, + bool noError) { + const unsigned char *start = mic; unsigned char c1, c2; @@ -207,7 +270,11 @@ mic2latin_with_table(const unsigned char *mic, { c1 = *mic; if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } if (!IS_HIGHBIT_SET(c1)) { /* easy for ASCII */ @@ -220,11 +287,17 @@ mic2latin_with_table(const unsigned char *mic, int l = pg_mule_mblen(mic); if (len < l) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) || (c2 = tab[mic[1] - HIGHBIT]) == 0) { + if (noError) + break; report_untranslatable_char(PG_MULE_INTERNAL, encoding, (const char *) mic, len); break; /* keep compiler quiet */ @@ -235,6 +308,8 @@ mic2latin_with_table(const unsigned char *mic, } } *p = '\0'; + + return mic - start; } /* @@ -424,18 +499,22 @@ pg_mb_radix_conv(const pg_mb_radix_tree *rt, * is applied. An error is raised if no match is found. * * See pg_wchar.h for more details about the data structures used here. + * + * Returns the number of input bytes consumed. If noError is true, this can + * be less than 'len'. */ -void +int UtfToLocal(const unsigned char *utf, int len, unsigned char *iso, const pg_mb_radix_tree *map, const pg_utf_to_local_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, - int encoding) + int encoding, bool noError) { uint32 iutf; int l; const pg_utf_to_local_combined *cp; + const unsigned char *start = utf; if (!PG_VALID_ENCODING(encoding)) ereport(ERROR, @@ -505,10 +584,19 @@ UtfToLocal(const unsigned char *utf, int len, l = pg_utf_mblen(utf); if (len < l) + { + /* need more data to decide if this is a combined char */ + utf -= l_save; break; + } if (!pg_utf8_islegal(utf, l)) + { + if (!noError) + report_invalid_encoding(PG_UTF8, (const char *) utf, len); + utf -= l_save; break; + } /* We assume ASCII character cannot be in combined map */ if (l > 1) @@ -584,15 +672,20 @@ UtfToLocal(const unsigned char *utf, int len, } /* failed to translate this character */ + utf -= l; + if (noError) + break; report_untranslatable_char(PG_UTF8, encoding, - (const char *) (utf - l), len); + (const char *) utf, len); } /* if we broke out of loop early, must be invalid input */ - if (len > 0) + if (len > 0 && !noError) report_invalid_encoding(PG_UTF8, (const char *) utf, len); *iso = '\0'; + + return utf - start; } /* @@ -616,18 +709,23 @@ UtfToLocal(const unsigned char *utf, int len, * (if provided) is applied. An error is raised if no match is found. * * See pg_wchar.h for more details about the data structures used here. + * + * Returns the number of input bytes consumed. If noError is true, this can + * be less than 'len'. */ -void +int LocalToUtf(const unsigned char *iso, int len, unsigned char *utf, const pg_mb_radix_tree *map, const pg_local_to_utf_combined *cmap, int cmapsize, utf_local_conversion_func conv_func, - int encoding) + int encoding, + bool noError) { uint32 iiso; int l; const pg_local_to_utf_combined *cp; + const unsigned char *start = iso; if (!PG_VALID_ENCODING(encoding)) ereport(ERROR, @@ -723,13 +821,18 @@ LocalToUtf(const unsigned char *iso, int len, } /* failed to translate this character */ + iso -= l; + if (noError) + break; report_untranslatable_char(encoding, PG_UTF8, - (const char *) (iso - l), len); + (const char *) iso, len); } /* if we broke out of loop early, must be invalid input */ - if (len > 0) + if (len > 0 && !noError) report_invalid_encoding(encoding, (const char *) iso, len); *utf = '\0'; + + return iso - start; } diff --git a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c index 4c5b02654d..368c2deb5e 100644 --- a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c @@ -44,8 +44,11 @@ PG_FUNCTION_INFO_V1(win866_to_iso); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ @@ -306,12 +309,14 @@ koi8r_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_MULE_INTERNAL); - latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R); + converted = latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -320,12 +325,14 @@ mic_to_koi8r(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_KOI8R); - mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R); + converted = mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -334,12 +341,14 @@ iso_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_MULE_INTERNAL); - latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi); + converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -348,12 +357,14 @@ mic_to_iso(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_ISO_8859_5); - mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso); + converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -362,12 +373,14 @@ win1251_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_MULE_INTERNAL); - latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi); + converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -376,12 +389,14 @@ mic_to_win1251(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1251); - mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251); + converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -390,12 +405,14 @@ win866_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_MULE_INTERNAL); - latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi); + converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -404,12 +421,14 @@ mic_to_win866(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN866); - mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866); + converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -418,12 +437,14 @@ koi8r_to_win1251(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN1251); - local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251); + converted = local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -432,12 +453,14 @@ win1251_to_koi8r(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_KOI8R); - local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi); + converted = local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -446,12 +469,14 @@ koi8r_to_win866(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN866); - local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866); + converted = local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -460,12 +485,14 @@ win866_to_koi8r(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_KOI8R); - local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi); + converted = local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -474,12 +501,14 @@ win866_to_win1251(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_WIN1251); - local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251); + converted = local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -488,12 +517,14 @@ win1251_to_win866(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_WIN866); - local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866); + converted = local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -502,12 +533,14 @@ iso_to_koi8r(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_KOI8R); - local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi); + converted = local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -516,12 +549,14 @@ koi8r_to_iso(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_ISO_8859_5); - local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso); + converted = local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -530,12 +565,14 @@ iso_to_win1251(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN1251); - local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251); + converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -544,12 +581,14 @@ win1251_to_iso(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_ISO_8859_5); - local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso); + converted = local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -558,12 +597,14 @@ iso_to_win866(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN866); - local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866); + converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -572,10 +613,12 @@ win866_to_iso(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_ISO_8859_5); - local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso); + converted = local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c index 4d7fb116cf..a3fd35bd40 100644 --- a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c +++ b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c @@ -19,8 +19,8 @@ PG_MODULE_MAGIC; PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004); PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004); -static void euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len); -static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len); +static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError); /* ---------- * conv_proc( @@ -28,8 +28,11 @@ static void shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ @@ -39,12 +42,14 @@ euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004); - euc_jis_20042shift_jis_2004(src, dest, len); + converted = euc_jis_20042shift_jis_2004(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -53,20 +58,23 @@ shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004); - shift_jis_20042euc_jis_2004(src, dest, len); + converted = shift_jis_20042euc_jis_2004(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } /* * EUC_JIS_2004 -> SHIFT_JIS_2004 */ -static void -euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) +static int +euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError) { + const unsigned char *start = euc; int c1, ku, ten; @@ -79,8 +87,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_JIS_2004, (const char *) euc, len); + } *p++ = c1; euc++; len--; @@ -90,8 +102,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_JIS_2004, (const char *) euc, len); + } if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */ { @@ -121,8 +137,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) *p++ = (ku + 0x19b) >> 1; } else + { + if (noError) + break; report_invalid_encoding(PG_EUC_JIS_2004, (const char *) euc, len); + } } if (ku % 2) @@ -132,8 +152,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) else if (ten >= 64 && ten <= 94) *p++ = ten + 0x40; else + { + if (noError) + break; report_invalid_encoding(PG_EUC_JIS_2004, (const char *) euc, len); + } } else *p++ = ten + 0x9e; @@ -149,8 +173,12 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) else if (ku >= 63 && ku <= 94) *p++ = (ku + 0x181) >> 1; else + { + if (noError) + break; report_invalid_encoding(PG_EUC_JIS_2004, (const char *) euc, len); + } if (ku % 2) { @@ -159,20 +187,30 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) else if (ten >= 64 && ten <= 94) *p++ = ten + 0x40; else + { + if (noError) + break; report_invalid_encoding(PG_EUC_JIS_2004, (const char *) euc, len); + } } else *p++ = ten + 0x9e; } else + { + if (noError) + break; report_invalid_encoding(PG_EUC_JIS_2004, (const char *) euc, len); + } euc += l; len -= l; } *p = '\0'; + + return euc - start; } /* @@ -212,9 +250,10 @@ get_ten(int b, int *ku) * SHIFT_JIS_2004 ---> EUC_JIS_2004 */ -static void -shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len) +static int +shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError) { + const unsigned char *start = sjis; int c1; int ku, ten, @@ -230,8 +269,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_SHIFT_JIS_2004, (const char *) sjis, len); + } *p++ = c1; sjis++; len--; @@ -241,8 +284,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len); if (l < 0 || l > len) + { + if (noError) + break; report_invalid_encoding(PG_SHIFT_JIS_2004, (const char *) sjis, len); + } if (c1 >= 0xa1 && c1 <= 0xdf && l == 1) { @@ -266,8 +313,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ku = (c1 << 1) - 0x100; ten = get_ten(c2, &kubun); if (ten < 0) + { + if (noError) + break; report_invalid_encoding(PG_SHIFT_JIS_2004, (const char *) sjis, len); + } ku -= kubun; } else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */ @@ -275,9 +326,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ku = (c1 << 1) - 0x180; ten = get_ten(c2, &kubun); if (ten < 0) + { + if (noError) + break; report_invalid_encoding(PG_SHIFT_JIS_2004, - (const char *) sjis, len); + } ku -= kubun; } else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2 @@ -286,8 +340,12 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len plane = 2; ten = get_ten(c2, &kubun); if (ten < 0) + { + if (noError) + break; report_invalid_encoding(PG_SHIFT_JIS_2004, (const char *) sjis, len); + } switch (c1) { case 0xf0: @@ -309,16 +367,24 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len plane = 2; ten = get_ten(c2, &kubun); if (ten < 0) + { + if (noError) + break; report_invalid_encoding(PG_SHIFT_JIS_2004, (const char *) sjis, len); + } if (c1 == 0xf4 && kubun == 1) ku = 15; else ku = (c1 << 1) - 0x19a - kubun; } else + { + if (noError) + break; report_invalid_encoding(PG_SHIFT_JIS_2004, (const char *) sjis, len); + } if (plane == 2) *p++ = SS3; @@ -330,4 +396,6 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len len -= l; } *p = '\0'; + + return sjis - start; } diff --git a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c index e9bb896935..09b3c2e75b 100644 --- a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c @@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_cn); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ -static void euc_cn2mic(const unsigned char *euc, unsigned char *p, int len); -static void mic2euc_cn(const unsigned char *mic, unsigned char *p, int len); +static int euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError); Datum euc_cn_to_mic(PG_FUNCTION_ARGS) @@ -40,12 +43,14 @@ euc_cn_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_MULE_INTERNAL); - euc_cn2mic(src, dest, len); + converted = euc_cn2mic(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -54,20 +59,23 @@ mic_to_euc_cn(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_CN); - mic2euc_cn(src, dest, len); + converted = mic2euc_cn(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } /* * EUC_CN ---> MIC */ -static void -euc_cn2mic(const unsigned char *euc, unsigned char *p, int len) +static int +euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError) { + const unsigned char *start = euc; int c1; while (len > 0) @@ -76,7 +84,11 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len) if (IS_HIGHBIT_SET(c1)) { if (len < 2 || !IS_HIGHBIT_SET(euc[1])) + { + if (noError) + break; report_invalid_encoding(PG_EUC_CN, (const char *) euc, len); + } *p++ = LC_GB2312_80; *p++ = c1; *p++ = euc[1]; @@ -86,21 +98,28 @@ euc_cn2mic(const unsigned char *euc, unsigned char *p, int len) else { /* should be ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_CN, (const char *) euc, len); + } *p++ = c1; euc++; len--; } } *p = '\0'; + + return euc - start; } /* * MIC ---> EUC_CN */ -static void -mic2euc_cn(const unsigned char *mic, unsigned char *p, int len) +static int +mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError) { + const unsigned char *start = mic; int c1; while (len > 0) @@ -109,11 +128,19 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len) if (IS_HIGHBIT_SET(c1)) { if (c1 != LC_GB2312_80) + { + if (noError) + break; report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN, (const char *) mic, len); + } if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2])) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } mic++; *p++ = *mic++; *p++ = *mic++; @@ -122,12 +149,18 @@ mic2euc_cn(const unsigned char *mic, unsigned char *p, int len) else { /* should be ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } *p++ = c1; mic++; len--; } } *p = '\0'; + + return mic - start; } diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c index 5059f917a9..2e68708893 100644 --- a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c +++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c @@ -42,17 +42,20 @@ PG_FUNCTION_INFO_V1(mic_to_sjis); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ -static void sjis2mic(const unsigned char *sjis, unsigned char *p, int len); -static void mic2sjis(const unsigned char *mic, unsigned char *p, int len); -static void euc_jp2mic(const unsigned char *euc, unsigned char *p, int len); -static void mic2euc_jp(const unsigned char *mic, unsigned char *p, int len); -static void euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len); -static void sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len); +static int sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError); +static int mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError); +static int euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError); +static int euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError); +static int sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError); Datum euc_jp_to_sjis(PG_FUNCTION_ARGS) @@ -60,12 +63,14 @@ euc_jp_to_sjis(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_SJIS); - euc_jp2sjis(src, dest, len); + converted = euc_jp2sjis(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -74,12 +79,14 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_EUC_JP); - sjis2euc_jp(src, dest, len); + converted = sjis2euc_jp(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -88,12 +95,14 @@ euc_jp_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_MULE_INTERNAL); - euc_jp2mic(src, dest, len); + converted = euc_jp2mic(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -102,12 +111,14 @@ mic_to_euc_jp(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_JP); - mic2euc_jp(src, dest, len); + converted = mic2euc_jp(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -116,12 +127,14 @@ sjis_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_MULE_INTERNAL); - sjis2mic(src, dest, len); + converted = sjis2mic(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -130,20 +143,23 @@ mic_to_sjis(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SJIS); - mic2sjis(src, dest, len); + converted = mic2sjis(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } /* * SJIS ---> MIC */ -static void -sjis2mic(const unsigned char *sjis, unsigned char *p, int len) +static int +sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError) { + const unsigned char *start = sjis; int c1, c2, i, @@ -167,7 +183,11 @@ sjis2mic(const unsigned char *sjis, unsigned char *p, int len) * JIS X0208, X0212, user defined extended characters */ if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1])) + { + if (noError) + break; report_invalid_encoding(PG_SJIS, (const char *) sjis, len); + } c2 = sjis[1]; k = (c1 << 8) + c2; if (k >= 0xed40 && k < 0xf040) @@ -257,21 +277,28 @@ sjis2mic(const unsigned char *sjis, unsigned char *p, int len) else { /* should be ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_SJIS, (const char *) sjis, len); + } *p++ = c1; sjis++; len--; } } *p = '\0'; + + return sjis - start; } /* * MIC ---> SJIS */ -static void -mic2sjis(const unsigned char *mic, unsigned char *p, int len) +static int +mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError) { + const unsigned char *start = mic; int c1, c2, k, @@ -284,8 +311,12 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } *p++ = c1; mic++; len--; @@ -293,8 +324,12 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len) } l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } if (c1 == LC_JISX0201K) *p++ = mic[1]; else if (c1 == LC_JISX0208) @@ -350,20 +385,27 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len) } } else + { + if (noError) + break; report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS, (const char *) mic, len); + } mic += l; len -= l; } *p = '\0'; + + return mic - start; } /* * EUC_JP ---> MIC */ -static void -euc_jp2mic(const unsigned char *euc, unsigned char *p, int len) +static int +euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError) { + const unsigned char *start = euc; int c1; int l; @@ -374,8 +416,12 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_JP, (const char *) euc, len); + } *p++ = c1; euc++; len--; @@ -383,8 +429,12 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len) } l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_JP, (const char *) euc, len); + } if (c1 == SS2) { /* 1 byte kana? */ *p++ = LC_JISX0201K; @@ -406,14 +456,17 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len) len -= l; } *p = '\0'; + + return euc - start; } /* * MIC ---> EUC_JP */ -static void -mic2euc_jp(const unsigned char *mic, unsigned char *p, int len) +static int +mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError) { + const unsigned char *start = mic; int c1; int l; @@ -424,8 +477,12 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } *p++ = c1; mic++; len--; @@ -433,8 +490,12 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len) } l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } if (c1 == LC_JISX0201K) { *p++ = SS2; @@ -452,20 +513,27 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len) *p++ = mic[2]; } else + { + if (noError) + break; report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP, (const char *) mic, len); + } mic += l; len -= l; } *p = '\0'; + + return mic - start; } /* * EUC_JP -> SJIS */ -static void -euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len) +static int +euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError) { + const unsigned char *start = euc; int c1, c2, k; @@ -478,8 +546,12 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_JP, (const char *) euc, len); + } *p++ = c1; euc++; len--; @@ -487,8 +559,12 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len) } l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_JP, (const char *) euc, len); + } if (c1 == SS2) { /* hankaku kana? */ @@ -551,14 +627,17 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len) len -= l; } *p = '\0'; + + return euc - start; } /* * SJIS ---> EUC_JP */ -static void -sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len) +static int +sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError) { + const unsigned char *start = sjis; int c1, c2, i, @@ -573,8 +652,12 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_SJIS, (const char *) sjis, len); + } *p++ = c1; sjis++; len--; @@ -582,8 +665,12 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len) } l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_SJIS, (const char *) sjis, len); + } if (c1 >= 0xa1 && c1 <= 0xdf) { /* JIS X0201 (1 byte kana) */ @@ -680,4 +767,6 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len) len -= l; } *p = '\0'; + + return sjis - start; } diff --git a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c index ac823d6c27..3b85f0c186 100644 --- a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c @@ -26,13 +26,16 @@ PG_FUNCTION_INFO_V1(mic_to_euc_kr); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ -static void euc_kr2mic(const unsigned char *euc, unsigned char *p, int len); -static void mic2euc_kr(const unsigned char *mic, unsigned char *p, int len); +static int euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError); Datum euc_kr_to_mic(PG_FUNCTION_ARGS) @@ -40,12 +43,14 @@ euc_kr_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_MULE_INTERNAL); - euc_kr2mic(src, dest, len); + converted = euc_kr2mic(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -54,20 +59,23 @@ mic_to_euc_kr(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_KR); - mic2euc_kr(src, dest, len); + converted = mic2euc_kr(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } /* * EUC_KR ---> MIC */ -static void -euc_kr2mic(const unsigned char *euc, unsigned char *p, int len) +static int +euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError) { + const unsigned char *start = euc; int c1; int l; @@ -78,8 +86,12 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len) { l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len); if (l != 2) + { + if (noError) + break; report_invalid_encoding(PG_EUC_KR, (const char *) euc, len); + } *p++ = LC_KS5601; *p++ = c1; *p++ = euc[1]; @@ -89,22 +101,29 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len) else { /* should be ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_KR, (const char *) euc, len); + } *p++ = c1; euc++; len--; } } *p = '\0'; + + return euc - start; } /* * MIC ---> EUC_KR */ -static void -mic2euc_kr(const unsigned char *mic, unsigned char *p, int len) +static int +mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError) { + const unsigned char *start = mic; int c1; int l; @@ -115,8 +134,12 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } *p++ = c1; mic++; len--; @@ -124,18 +147,28 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len) } l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } if (c1 == LC_KS5601) { *p++ = mic[1]; *p++ = mic[2]; } else + { + if (noError) + break; report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR, (const char *) mic, len); + } mic += l; len -= l; } *p = '\0'; + + return mic - start; } diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c index 66c242d7f3..4bf8acda99 100644 --- a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c +++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c @@ -32,17 +32,20 @@ PG_FUNCTION_INFO_V1(mic_to_big5); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ -static void euc_tw2big5(const unsigned char *euc, unsigned char *p, int len); -static void big52euc_tw(const unsigned char *euc, unsigned char *p, int len); -static void big52mic(const unsigned char *big5, unsigned char *p, int len); -static void mic2big5(const unsigned char *mic, unsigned char *p, int len); -static void euc_tw2mic(const unsigned char *euc, unsigned char *p, int len); -static void mic2euc_tw(const unsigned char *mic, unsigned char *p, int len); +static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int big52euc_tw(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError); +static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError); +static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError); Datum euc_tw_to_big5(PG_FUNCTION_ARGS) @@ -50,12 +53,14 @@ euc_tw_to_big5(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_BIG5); - euc_tw2big5(src, dest, len); + converted = euc_tw2big5(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -64,12 +69,14 @@ big5_to_euc_tw(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_EUC_TW); - big52euc_tw(src, dest, len); + converted = big52euc_tw(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -78,12 +85,14 @@ euc_tw_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_MULE_INTERNAL); - euc_tw2mic(src, dest, len); + converted = euc_tw2mic(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -92,12 +101,14 @@ mic_to_euc_tw(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_TW); - mic2euc_tw(src, dest, len); + converted = mic2euc_tw(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -106,12 +117,14 @@ big5_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_MULE_INTERNAL); - big52mic(src, dest, len); + converted = big52mic(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -120,21 +133,24 @@ mic_to_big5(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_BIG5); - mic2big5(src, dest, len); + converted = mic2big5(src, dest, len, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } /* * EUC_TW ---> Big5 */ -static void -euc_tw2big5(const unsigned char *euc, unsigned char *p, int len) +static int +euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError) { + const unsigned char *start = euc; unsigned char c1; unsigned short big5buf, cnsBuf; @@ -149,8 +165,12 @@ euc_tw2big5(const unsigned char *euc, unsigned char *p, int len) /* Verify and decode the next EUC_TW input character */ l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_TW, (const char *) euc, len); + } if (c1 == SS2) { c1 = euc[1]; /* plane No. */ @@ -171,8 +191,12 @@ euc_tw2big5(const unsigned char *euc, unsigned char *p, int len) /* Write it out in Big5 */ big5buf = CNStoBIG5(cnsBuf, lc); if (big5buf == 0) + { + if (noError) + break; report_untranslatable_char(PG_EUC_TW, PG_BIG5, (const char *) euc, len); + } *p++ = (big5buf >> 8) & 0x00ff; *p++ = big5buf & 0x00ff; @@ -182,22 +206,29 @@ euc_tw2big5(const unsigned char *euc, unsigned char *p, int len) else { /* should be ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_TW, (const char *) euc, len); + } *p++ = c1; euc++; len--; } } *p = '\0'; + + return euc - start; } /* * Big5 ---> EUC_TW */ -static void -big52euc_tw(const unsigned char *big5, unsigned char *p, int len) +static int +big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError) { + const unsigned char *start = big5; unsigned short c1; unsigned short big5buf, cnsBuf; @@ -212,8 +243,12 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len) { l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_BIG5, (const char *) big5, len); + } big5buf = (c1 << 8) | big5[1]; cnsBuf = BIG5toCNS(big5buf, &lc); @@ -237,8 +272,12 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len) *p++ = cnsBuf & 0x00ff; } else + { + if (noError) + break; report_untranslatable_char(PG_BIG5, PG_EUC_TW, (const char *) big5, len); + } big5 += l; len -= l; @@ -256,14 +295,17 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len) } } *p = '\0'; + + return big5 - start; } /* * EUC_TW ---> MIC */ -static void -euc_tw2mic(const unsigned char *euc, unsigned char *p, int len) +static int +euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError) { + const unsigned char *start = euc; int c1; int l; @@ -274,8 +316,12 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len) { l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_TW, (const char *) euc, len); + } if (c1 == SS2) { c1 = euc[1]; /* plane No. */ @@ -304,22 +350,29 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len) else { /* should be ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_EUC_TW, (const char *) euc, len); + } *p++ = c1; euc++; len--; } } *p = '\0'; + + return euc - start; } /* * MIC ---> EUC_TW */ -static void -mic2euc_tw(const unsigned char *mic, unsigned char *p, int len) +static int +mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError) { + const unsigned char *start = mic; int c1; int l; @@ -330,8 +383,12 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } *p++ = c1; mic++; len--; @@ -339,8 +396,12 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len) } l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } if (c1 == LC_CNS11643_1) { *p++ = mic[1]; @@ -362,20 +423,27 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len) *p++ = mic[3]; } else + { + if (noError) + break; report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW, (const char *) mic, len); + } mic += l; len -= l; } *p = '\0'; + + return mic - start; } /* * Big5 ---> MIC */ -static void -big52mic(const unsigned char *big5, unsigned char *p, int len) +static int +big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError) { + const unsigned char *start = big5; unsigned short c1; unsigned short big5buf, cnsBuf; @@ -389,8 +457,12 @@ big52mic(const unsigned char *big5, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_BIG5, (const char *) big5, len); + } *p++ = c1; big5++; len--; @@ -398,8 +470,12 @@ big52mic(const unsigned char *big5, unsigned char *p, int len) } l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_BIG5, (const char *) big5, len); + } big5buf = (c1 << 8) | big5[1]; cnsBuf = BIG5toCNS(big5buf, &lc); if (lc != 0) @@ -412,20 +488,27 @@ big52mic(const unsigned char *big5, unsigned char *p, int len) *p++ = cnsBuf & 0x00ff; } else + { + if (noError) + break; report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL, (const char *) big5, len); + } big5 += l; len -= l; } *p = '\0'; + + return big5 - start; } /* * MIC ---> Big5 */ -static void -mic2big5(const unsigned char *mic, unsigned char *p, int len) +static int +mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError) { + const unsigned char *start = mic; unsigned short c1; unsigned short big5buf, cnsBuf; @@ -438,8 +521,12 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len) { /* ASCII */ if (c1 == 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } *p++ = c1; mic++; len--; @@ -447,8 +534,12 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len) } l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); if (l < 0) + { + if (noError) + break; report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len); + } if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B) { if (c1 == LCPRV2_B) @@ -462,16 +553,26 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len) } big5buf = CNStoBIG5(cnsBuf, c1); if (big5buf == 0) + { + if (noError) + break; report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5, (const char *) mic, len); + } *p++ = (big5buf >> 8) & 0x00ff; *p++ = big5buf & 0x00ff; } else + { + if (noError) + break; report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5, (const char *) mic, len); + } mic += l; len -= l; } *p = '\0'; + + return mic - start; } diff --git a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c index 2e28e6780a..8610fcb69a 100644 --- a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c +++ b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c @@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(win1250_to_latin2); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ @@ -82,12 +85,14 @@ latin2_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL); - latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2); + converted = latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -96,12 +101,14 @@ mic_to_latin2(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2); - mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2); + converted = mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -110,13 +117,15 @@ win1250_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL); - latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, - win1250_2_iso88592); + converted = latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, + win1250_2_iso88592, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -125,13 +134,15 @@ mic_to_win1250(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250); - mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, - iso88592_2_win1250); + converted = mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, + iso88592_2_win1250, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -140,12 +151,15 @@ latin2_to_win1250(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_WIN1250); - local2local(src, dest, len, PG_LATIN2, PG_WIN1250, iso88592_2_win1250); + converted = local2local(src, dest, len, PG_LATIN2, PG_WIN1250, + iso88592_2_win1250, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -154,10 +168,13 @@ win1250_to_latin2(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2); - local2local(src, dest, len, PG_WIN1250, PG_LATIN2, win1250_2_iso88592); + converted = local2local(src, dest, len, PG_WIN1250, PG_LATIN2, + win1250_2_iso88592, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c index bc651410f2..bff27d1c29 100644 --- a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c +++ b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c @@ -30,8 +30,11 @@ PG_FUNCTION_INFO_V1(mic_to_latin4); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ @@ -42,12 +45,14 @@ latin1_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL); - latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1); + converted = latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,12 +61,14 @@ mic_to_latin1(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1); - mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1); + converted = mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -70,12 +77,14 @@ latin3_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL); - latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3); + converted = latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -84,12 +93,14 @@ mic_to_latin3(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3); - mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3); + converted = mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -98,12 +109,14 @@ latin4_to_mic(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL); - latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4); + converted = latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -112,10 +125,12 @@ mic_to_latin4(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4); - mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4); + converted = mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c index d6067cdc24..3838b15cab 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_big5); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ big5_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8); - LocalToUtf(src, len, dest, - &big5_to_unicode_tree, - NULL, 0, - NULL, - PG_BIG5); + converted = LocalToUtf(src, len, dest, + &big5_to_unicode_tree, + NULL, 0, + NULL, + PG_BIG5, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_big5(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5); - UtfToLocal(src, len, dest, - &big5_from_unicode_tree, - NULL, 0, - NULL, - PG_BIG5); + converted = UtfToLocal(src, len, dest, + &big5_from_unicode_tree, + NULL, 0, + NULL, + PG_BIG5, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c index ed90e8e682..75719fe5f1 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c @@ -33,8 +33,11 @@ PG_FUNCTION_INFO_V1(koi8u_to_utf8); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ @@ -44,16 +47,19 @@ utf8_to_koi8r(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R); - UtfToLocal(src, len, dest, - &koi8r_from_unicode_tree, - NULL, 0, - NULL, - PG_KOI8R); + converted = UtfToLocal(src, len, dest, + &koi8r_from_unicode_tree, + NULL, 0, + NULL, + PG_KOI8R, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -62,16 +68,19 @@ koi8r_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8); - LocalToUtf(src, len, dest, - &koi8r_to_unicode_tree, - NULL, 0, - NULL, - PG_KOI8R); + converted = LocalToUtf(src, len, dest, + &koi8r_to_unicode_tree, + NULL, 0, + NULL, + PG_KOI8R, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -80,16 +89,19 @@ utf8_to_koi8u(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U); - UtfToLocal(src, len, dest, - &koi8u_from_unicode_tree, - NULL, 0, - NULL, - PG_KOI8U); + converted = UtfToLocal(src, len, dest, + &koi8u_from_unicode_tree, + NULL, 0, + NULL, + PG_KOI8U, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -98,14 +110,17 @@ koi8u_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8); - LocalToUtf(src, len, dest, - &koi8u_to_unicode_tree, - NULL, 0, - NULL, - PG_KOI8U); + converted = LocalToUtf(src, len, dest, + &koi8u_to_unicode_tree, + NULL, 0, + NULL, + PG_KOI8U, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c index d699affce4..5391001951 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jis_2004); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ euc_jis_2004_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8); - LocalToUtf(src, len, dest, - &euc_jis_2004_to_unicode_tree, - LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined), - NULL, - PG_EUC_JIS_2004); + converted = LocalToUtf(src, len, dest, + &euc_jis_2004_to_unicode_tree, + LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined), + NULL, + PG_EUC_JIS_2004, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_euc_jis_2004(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004); - UtfToLocal(src, len, dest, - &euc_jis_2004_from_unicode_tree, - ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined), - NULL, - PG_EUC_JIS_2004); + converted = UtfToLocal(src, len, dest, + &euc_jis_2004_from_unicode_tree, + ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined), + NULL, + PG_EUC_JIS_2004, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c index d7c0ba6a58..c87d1bf239 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_cn); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ euc_cn_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8); - LocalToUtf(src, len, dest, - &euc_cn_to_unicode_tree, - NULL, 0, - NULL, - PG_EUC_CN); + converted = LocalToUtf(src, len, dest, + &euc_cn_to_unicode_tree, + NULL, 0, + NULL, + PG_EUC_CN, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_euc_cn(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN); - UtfToLocal(src, len, dest, - &euc_cn_from_unicode_tree, - NULL, 0, - NULL, - PG_EUC_CN); + converted = UtfToLocal(src, len, dest, + &euc_cn_from_unicode_tree, + NULL, 0, + NULL, + PG_EUC_CN, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c index 13a3a23e77..6a55134db2 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_jp); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ euc_jp_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8); - LocalToUtf(src, len, dest, - &euc_jp_to_unicode_tree, - NULL, 0, - NULL, - PG_EUC_JP); + converted = LocalToUtf(src, len, dest, + &euc_jp_to_unicode_tree, + NULL, 0, + NULL, + PG_EUC_JP, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_euc_jp(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP); - UtfToLocal(src, len, dest, - &euc_jp_from_unicode_tree, - NULL, 0, - NULL, - PG_EUC_JP); + converted = UtfToLocal(src, len, dest, + &euc_jp_from_unicode_tree, + NULL, 0, + NULL, + PG_EUC_JP, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c index 1bbb8aaef7..fe1924e2fe 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_kr); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ euc_kr_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8); - LocalToUtf(src, len, dest, - &euc_kr_to_unicode_tree, - NULL, 0, - NULL, - PG_EUC_KR); + converted = LocalToUtf(src, len, dest, + &euc_kr_to_unicode_tree, + NULL, 0, + NULL, + PG_EUC_KR, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_euc_kr(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR); - UtfToLocal(src, len, dest, - &euc_kr_from_unicode_tree, - NULL, 0, - NULL, - PG_EUC_KR); + converted = UtfToLocal(src, len, dest, + &euc_kr_from_unicode_tree, + NULL, 0, + NULL, + PG_EUC_KR, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c index 9830045dcc..68215659b5 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_euc_tw); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ euc_tw_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8); - LocalToUtf(src, len, dest, - &euc_tw_to_unicode_tree, - NULL, 0, - NULL, - PG_EUC_TW); + converted = LocalToUtf(src, len, dest, + &euc_tw_to_unicode_tree, + NULL, 0, + NULL, + PG_EUC_TW, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_euc_tw(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW); - UtfToLocal(src, len, dest, - &euc_tw_from_unicode_tree, - NULL, 0, - NULL, - PG_EUC_TW); + converted = UtfToLocal(src, len, dest, + &euc_tw_from_unicode_tree, + NULL, 0, + NULL, + PG_EUC_TW, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c index f86ecf2742..e1a59c39a4 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c @@ -183,8 +183,11 @@ conv_utf8_to_18030(uint32 code) * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -193,16 +196,19 @@ gb18030_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8); - LocalToUtf(src, len, dest, - &gb18030_to_unicode_tree, - NULL, 0, - conv_18030_to_utf8, - PG_GB18030); + converted = LocalToUtf(src, len, dest, + &gb18030_to_unicode_tree, + NULL, 0, + conv_18030_to_utf8, + PG_GB18030, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -211,14 +217,17 @@ utf8_to_gb18030(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030); - UtfToLocal(src, len, dest, - &gb18030_from_unicode_tree, - NULL, 0, - conv_utf8_to_18030, - PG_GB18030); + converted = UtfToLocal(src, len, dest, + &gb18030_from_unicode_tree, + NULL, 0, + conv_utf8_to_18030, + PG_GB18030, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c index 2ab8b16c8a..881386d534 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_gbk); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ gbk_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8); - LocalToUtf(src, len, dest, - &gbk_to_unicode_tree, - NULL, 0, - NULL, - PG_GBK); + converted = LocalToUtf(src, len, dest, + &gbk_to_unicode_tree, + NULL, 0, + NULL, + PG_GBK, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_gbk(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK); - UtfToLocal(src, len, dest, - &gbk_from_unicode_tree, - NULL, 0, - NULL, - PG_GBK); + converted = UtfToLocal(src, len, dest, + &gbk_from_unicode_tree, + NULL, 0, + NULL, + PG_GBK, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c index 3e49f67ea2..d93a521bad 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c @@ -52,8 +52,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ @@ -100,6 +103,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); int i; CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8); @@ -108,12 +112,15 @@ iso8859_to_utf8(PG_FUNCTION_ARGS) { if (encoding == maps[i].encoding) { - LocalToUtf(src, len, dest, - maps[i].map1, - NULL, 0, - NULL, - encoding); - PG_RETURN_VOID(); + int converted; + + converted = LocalToUtf(src, len, dest, + maps[i].map1, + NULL, 0, + NULL, + encoding, + noError); + PG_RETURN_INT32(converted); } } @@ -122,7 +129,7 @@ iso8859_to_utf8(PG_FUNCTION_ARGS) errmsg("unexpected encoding ID %d for ISO 8859 character sets", encoding))); - PG_RETURN_VOID(); + PG_RETURN_INT32(0); } Datum @@ -132,6 +139,7 @@ utf8_to_iso8859(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); int i; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1); @@ -140,12 +148,15 @@ utf8_to_iso8859(PG_FUNCTION_ARGS) { if (encoding == maps[i].encoding) { - UtfToLocal(src, len, dest, - maps[i].map2, - NULL, 0, - NULL, - encoding); - PG_RETURN_VOID(); + int converted; + + converted = UtfToLocal(src, len, dest, + maps[i].map2, + NULL, 0, + NULL, + encoding, + noError); + PG_RETURN_INT32(converted); } } @@ -154,5 +165,5 @@ utf8_to_iso8859(PG_FUNCTION_ARGS) errmsg("unexpected encoding ID %d for ISO 8859 character sets", encoding))); - PG_RETURN_VOID(); + PG_RETURN_INT32(0); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c index 67e713cca1..d0dc4cca37 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c @@ -26,8 +26,11 @@ PG_FUNCTION_INFO_V1(utf8_to_iso8859_1); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ @@ -37,6 +40,8 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + unsigned char *start = src; unsigned short c; CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8); @@ -45,7 +50,11 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS) { c = *src; if (c == 0) + { + if (noError) + break; report_invalid_encoding(PG_LATIN1, (const char *) src, len); + } if (!IS_HIGHBIT_SET(c)) *dest++ = c; else @@ -58,7 +67,7 @@ iso8859_1_to_utf8(PG_FUNCTION_ARGS) } *dest = '\0'; - PG_RETURN_VOID(); + PG_RETURN_INT32(src - start); } Datum @@ -67,6 +76,8 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + unsigned char *start = src; unsigned short c, c1; @@ -76,7 +87,11 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS) { c = *src; if (c == 0) + { + if (noError) + break; report_invalid_encoding(PG_UTF8, (const char *) src, len); + } /* fast path for ASCII-subset characters */ if (!IS_HIGHBIT_SET(c)) { @@ -89,10 +104,18 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS) int l = pg_utf_mblen(src); if (l > len || !pg_utf8_islegal(src, l)) + { + if (noError) + break; report_invalid_encoding(PG_UTF8, (const char *) src, len); + } if (l != 2) + { + if (noError) + break; report_untranslatable_char(PG_UTF8, PG_LATIN1, (const char *) src, len); + } c1 = src[1] & 0x3f; c = ((c & 0x1f) << 6) | c1; if (c >= 0x80 && c <= 0xff) @@ -102,11 +125,15 @@ utf8_to_iso8859_1(PG_FUNCTION_ARGS) len -= 2; } else + { + if (noError) + break; report_untranslatable_char(PG_UTF8, PG_LATIN1, (const char *) src, len); + } } } *dest = '\0'; - PG_RETURN_VOID(); + PG_RETURN_INT32(src - start); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c index 578f5df4e7..317daa2d5e 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_johab); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ johab_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8); - LocalToUtf(src, len, dest, - &johab_to_unicode_tree, - NULL, 0, - NULL, - PG_JOHAB); + converted = LocalToUtf(src, len, dest, + &johab_to_unicode_tree, + NULL, 0, + NULL, + PG_JOHAB, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_johab(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB); - UtfToLocal(src, len, dest, - &johab_from_unicode_tree, - NULL, 0, - NULL, - PG_JOHAB); + converted = UtfToLocal(src, len, dest, + &johab_from_unicode_tree, + NULL, 0, + NULL, + PG_JOHAB, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c index dd9fc2975a..4c9348aba5 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_sjis); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ sjis_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8); - LocalToUtf(src, len, dest, - &sjis_to_unicode_tree, - NULL, 0, - NULL, - PG_SJIS); + converted = LocalToUtf(src, len, dest, + &sjis_to_unicode_tree, + NULL, 0, + NULL, + PG_SJIS, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_sjis(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS); - UtfToLocal(src, len, dest, - &sjis_from_unicode_tree, - NULL, 0, - NULL, - PG_SJIS); + converted = UtfToLocal(src, len, dest, + &sjis_from_unicode_tree, + NULL, 0, + NULL, + PG_SJIS, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c index 4bcc886d67..1fffdc5930 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_shift_jis_2004); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ shift_jis_2004_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8); - LocalToUtf(src, len, dest, - &shift_jis_2004_to_unicode_tree, - LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined), - NULL, - PG_SHIFT_JIS_2004); + converted = LocalToUtf(src, len, dest, + &shift_jis_2004_to_unicode_tree, + LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined), + NULL, + PG_SHIFT_JIS_2004, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_shift_jis_2004(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004); - UtfToLocal(src, len, dest, - &shift_jis_2004_from_unicode_tree, - ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined), - NULL, - PG_SHIFT_JIS_2004); + converted = UtfToLocal(src, len, dest, + &shift_jis_2004_from_unicode_tree, + ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined), + NULL, + PG_SHIFT_JIS_2004, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c index c8e512994a..d9471dad09 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c @@ -28,8 +28,11 @@ PG_FUNCTION_INFO_V1(utf8_to_uhc); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ Datum @@ -38,16 +41,19 @@ uhc_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8); - LocalToUtf(src, len, dest, - &uhc_to_unicode_tree, - NULL, 0, - NULL, - PG_UHC); + converted = LocalToUtf(src, len, dest, + &uhc_to_unicode_tree, + NULL, 0, + NULL, + PG_UHC, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } Datum @@ -56,14 +62,17 @@ utf8_to_uhc(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC); - UtfToLocal(src, len, dest, - &uhc_from_unicode_tree, - NULL, 0, - NULL, - PG_UHC); + converted = UtfToLocal(src, len, dest, + &uhc_from_unicode_tree, + NULL, 0, + NULL, + PG_UHC, + noError); - PG_RETURN_VOID(); + PG_RETURN_INT32(converted); } diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c index 0c9493dee5..110ba5677d 100644 --- a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c @@ -48,8 +48,11 @@ PG_FUNCTION_INFO_V1(utf8_to_win); * INTEGER, -- destination encoding id * CSTRING, -- source string (null terminated C string) * CSTRING, -- destination string (null terminated C string) - * INTEGER -- source string length - * ) returns VOID; + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. * ---------- */ @@ -81,6 +84,7 @@ win_to_utf8(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); int i; CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8); @@ -89,12 +93,15 @@ win_to_utf8(PG_FUNCTION_ARGS) { if (encoding == maps[i].encoding) { - LocalToUtf(src, len, dest, - maps[i].map1, - NULL, 0, - NULL, - encoding); - PG_RETURN_VOID(); + int converted; + + converted = LocalToUtf(src, len, dest, + maps[i].map1, + NULL, 0, + NULL, + encoding, + noError); + PG_RETURN_INT32(converted); } } @@ -103,7 +110,7 @@ win_to_utf8(PG_FUNCTION_ARGS) errmsg("unexpected encoding ID %d for WIN character sets", encoding))); - PG_RETURN_VOID(); + PG_RETURN_INT32(0); } Datum @@ -113,6 +120,7 @@ utf8_to_win(PG_FUNCTION_ARGS) unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); int i; CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1); @@ -121,12 +129,15 @@ utf8_to_win(PG_FUNCTION_ARGS) { if (encoding == maps[i].encoding) { - UtfToLocal(src, len, dest, - maps[i].map2, - NULL, 0, - NULL, - encoding); - PG_RETURN_VOID(); + int converted; + + converted = UtfToLocal(src, len, dest, + maps[i].map2, + NULL, 0, + NULL, + encoding, + noError); + PG_RETURN_INT32(converted); } } @@ -135,5 +146,5 @@ utf8_to_win(PG_FUNCTION_ARGS) errmsg("unexpected encoding ID %d for WIN character sets", encoding))); - PG_RETURN_VOID(); + PG_RETURN_INT32(0); } diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 2578573b0a..a13c398f4a 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -406,12 +406,13 @@ pg_do_encoding_conversion(unsigned char *src, int len, MemoryContextAllocHuge(CurrentMemoryContext, (Size) len * MAX_CONVERSION_GROWTH + 1); - OidFunctionCall5(proc, - Int32GetDatum(src_encoding), - Int32GetDatum(dest_encoding), - CStringGetDatum(src), - CStringGetDatum(result), - Int32GetDatum(len)); + (void) OidFunctionCall6(proc, + Int32GetDatum(src_encoding), + Int32GetDatum(dest_encoding), + CStringGetDatum(src), + CStringGetDatum(result), + Int32GetDatum(len), + BoolGetDatum(false)); /* * If the result is large, it's worth repalloc'ing to release any extra @@ -436,6 +437,62 @@ pg_do_encoding_conversion(unsigned char *src, int len, } /* + * Convert src string to another encoding. + * + * This function has a different API than the other conversion functions. + * The caller should've looked up the conversion function using + * FindDefaultConversionProc(). Unlike the other functions, the converted + * result is not palloc'd. It is written to the caller-supplied buffer + * instead. + * + * src_encoding - encoding to convert from + * dest_encoding - encoding to convert to + * src, srclen - input buffer and its length in bytes + * dest, destlen - destination buffer and its size in bytes + * + * The output is null-terminated. + * + * If destlen < srclen * MAX_CONVERSION_LENGTH + 1, the converted output + * wouldn't necessarily fit in the output buffer, and the function will not + * convert the whole input. + * + * TODO: The conversion function interface is not great. Firstly, it + * would be nice to pass through the destination buffer size to the + * conversion function, so that if you pass a shorter destination buffer, it + * could still continue to fill up the whole buffer. Currently, we have to + * assume worst case expansion and stop the conversion short, even if there + * is in fact space left in the destination buffer. Secondly, it would be + * nice to return the number of bytes written to the caller, to avoid a call + * to strlen(). + */ +int +pg_do_encoding_conversion_buf(Oid proc, + int src_encoding, + int dest_encoding, + unsigned char *src, int srclen, + unsigned char *dest, int destlen, + bool noError) +{ + Datum result; + + /* + * If the destination buffer is not large enough to hold the result in the + * worst case, limit the input size passed to the conversion function. + */ + if ((Size) srclen >= ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH)) + srclen = ((destlen - 1) / (Size) MAX_CONVERSION_GROWTH); + + result = OidFunctionCall6(proc, + Int32GetDatum(src_encoding), + Int32GetDatum(dest_encoding), + CStringGetDatum(src), + CStringGetDatum(dest), + Int32GetDatum(srclen), + BoolGetDatum(noError)); + return DatumGetInt32(result); +} + +/* * Convert string to encoding encoding_name. The source * encoding is the DB encoding. * @@ -762,12 +819,13 @@ perform_default_encoding_conversion(const char *src, int len, MemoryContextAllocHuge(CurrentMemoryContext, (Size) len * MAX_CONVERSION_GROWTH + 1); - FunctionCall5(flinfo, + FunctionCall6(flinfo, Int32GetDatum(src_encoding), Int32GetDatum(dest_encoding), CStringGetDatum(src), CStringGetDatum(result), - Int32GetDatum(len)); + Int32GetDatum(len), + BoolGetDatum(false)); /* * Release extra space if there might be a lot --- see comments in @@ -849,12 +907,13 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s) c_as_utf8[c_as_utf8_len] = '\0'; /* Convert, or throw error if we can't */ - FunctionCall5(Utf8ToServerConvProc, + FunctionCall6(Utf8ToServerConvProc, Int32GetDatum(PG_UTF8), Int32GetDatum(server_encoding), CStringGetDatum(c_as_utf8), CStringGetDatum(s), - Int32GetDatum(c_as_utf8_len)); + Int32GetDatum(c_as_utf8_len), + BoolGetDatum(false)); } |