From 53a2b763b8dc516bd20258ea5aead80bc7ab7cf3 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Fri, 10 Mar 2023 23:32:24 +0100 Subject: xgettext: In language PO, handle non-ASCII non-UTF-8 input files correctly. * gettext-tools/src/x-po.c: Include msgl-iconv.h, msgl-ascii.h, po-charset.h. (extract): If a header charset is present, convert the messages to UTF-8; otherwise verify that they are all ASCII. * gettext-tools/tests/xgettext-po-3: New file. * gettext-tools/tests/xgettext-po-4: New file. * gettext-tools/tests/testdata/xg-po-3.po: New file. * gettext-tools/tests/testdata/xg-po-4.po: New file. * gettext-tools/tests/Makefile.am (TESTS, EXTRA_DIST): Add them. * NEWS: Mention the change. --- NEWS | 4 +++- gettext-tools/src/x-po.c | 26 +++++++++++++++++++++++- gettext-tools/tests/Makefile.am | 5 +++-- gettext-tools/tests/testdata/xg-po-3.po | 7 +++++++ gettext-tools/tests/testdata/xg-po-4.po | 2 ++ gettext-tools/tests/xgettext-po-3 | 36 +++++++++++++++++++++++++++++++++ gettext-tools/tests/xgettext-po-4 | 12 +++++++++++ 7 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 gettext-tools/tests/testdata/xg-po-3.po create mode 100644 gettext-tools/tests/testdata/xg-po-4.po create mode 100755 gettext-tools/tests/xgettext-po-3 create mode 100755 gettext-tools/tests/xgettext-po-4 diff --git a/NEWS b/NEWS index 1456de2e1..828853bc3 100644 --- a/NEWS +++ b/NEWS @@ -17,7 +17,9 @@ Version 0.21.2 - February 2023 arguments. * xgettext: - The xgettext option '--sorted-output' is now deprecated. + - The xgettext option '--sorted-output' is now deprecated. + - xgettext input files of type PO that are not all ASCII and not UTF-8 + encoded are now handled correctly. * Emacs PO mode: Fix an incompatibility with Emacs version 29 or newer. diff --git a/gettext-tools/src/x-po.c b/gettext-tools/src/x-po.c index 9b5690f94..fd6b517a0 100644 --- a/gettext-tools/src/x-po.c +++ b/gettext-tools/src/x-po.c @@ -1,5 +1,5 @@ /* xgettext PO, JavaProperties, and NXStringTable backends. - Copyright (C) 1995-1998, 2000-2003, 2005-2006, 2008-2009, 2014, 2018, 2020 Free Software Foundation, Inc. + Copyright (C) 1995-1998, 2000-2003, 2005-2006, 2008-2009, 2014, 2018, 2020, 2023 Free Software Foundation, Inc. This file was written by Peter Miller @@ -37,6 +37,9 @@ #include "read-po.h" #include "read-properties.h" #include "read-stringtable.h" +#include "msgl-iconv.h" +#include "msgl-ascii.h" +#include "po-charset.h" #include "po-lex.h" #include "gettext.h" @@ -201,10 +204,31 @@ extract (FILE *fp, } } } + + if (!input_syntax->produces_utf8) + { + /* Convert the messages to UTF-8. + finalize_header() expects this. */ + message_list_ty *mlp = mdlp->item[0]->messages; + iconv_message_list (mlp, NULL, po_charset_utf8, logical_filename); + } } free (header_charset); } + else + { + if (!xgettext_omit_header && !input_syntax->produces_utf8) + { + /* finalize_header() expects the messages to be in UTF-8 encoding. + We don't know the encoding here; therefore we have to reject the + input if it is not entirely ASCII. */ + if (!is_ascii_msgdomain_list (mdlp)) + error (EXIT_FAILURE, 0, + _("%s: input file doesn't contain a header entry with a charset specification"), + logical_filename); + } + } } diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index ba4ebc3a9..240a88a51 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -132,7 +132,7 @@ TESTS = gettext-1 gettext-2 \ xgettext-php-1 xgettext-php-2 xgettext-php-3 xgettext-php-4 \ xgettext-php-stackovfl-1 xgettext-php-stackovfl-2 \ xgettext-php-stackovfl-3 xgettext-php-stackovfl-4 \ - xgettext-po-1 xgettext-po-2 \ + xgettext-po-1 xgettext-po-2 xgettext-po-3 xgettext-po-4 \ xgettext-properties-1 xgettext-properties-2 xgettext-properties-3 \ xgettext-properties-4 \ xgettext-rst-1 xgettext-rst-2 \ @@ -227,7 +227,8 @@ EXTRA_DIST += init.sh init.cfg $(TESTS) \ xgettext-1 \ xgettext-c-1 xg-c-comment-6.c xg-c-escape-3.c xg-vala-2.vala \ common/supplemental/plurals.xml \ - testdata/xg-el-so-3.el testdata/xg-el-so-4.el + testdata/xg-el-so-3.el testdata/xg-el-so-4.el \ + testdata/xg-po-3.po testdata/xg-po-4.po XGETTEXT = ../src/xgettext diff --git a/gettext-tools/tests/testdata/xg-po-3.po b/gettext-tools/tests/testdata/xg-po-3.po new file mode 100644 index 000000000..c2930feeb --- /dev/null +++ b/gettext-tools/tests/testdata/xg-po-3.po @@ -0,0 +1,7 @@ +msgid "" +msgstr "" +"Content-Type: text/plain; charset=ISO-8859-1\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "ü" +msgstr "" diff --git a/gettext-tools/tests/testdata/xg-po-4.po b/gettext-tools/tests/testdata/xg-po-4.po new file mode 100644 index 000000000..cc199ee24 --- /dev/null +++ b/gettext-tools/tests/testdata/xg-po-4.po @@ -0,0 +1,2 @@ +msgid "ü" +msgstr "" diff --git a/gettext-tools/tests/xgettext-po-3 b/gettext-tools/tests/xgettext-po-3 new file mode 100755 index 000000000..6742b1e53 --- /dev/null +++ b/gettext-tools/tests/xgettext-po-3 @@ -0,0 +1,36 @@ +#! /bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test PO extractor with non-ASCII input file with header entry. + +: ${XGETTEXT=xgettext} +${XGETTEXT} --no-location -d xg-po-3.tmp "$wabs_srcdir"/testdata/xg-po-3.po || Exit 1 +func_filter_POT_Creation_Date xg-po-3.tmp.po xg-po-3.pot + +cat <<\EOF > xg-po-3.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "ü" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-po-3.ok xg-po-3.pot || Exit 1 + +Exit 0 diff --git a/gettext-tools/tests/xgettext-po-4 b/gettext-tools/tests/xgettext-po-4 new file mode 100755 index 000000000..6ed4e88e3 --- /dev/null +++ b/gettext-tools/tests/xgettext-po-4 @@ -0,0 +1,12 @@ +#! /bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test PO extractor with non-ASCII input file without header entry. + +: ${XGETTEXT=xgettext} +${XGETTEXT} --no-location -d xg-po-4.tmp "$wabs_srcdir"/testdata/xg-po-4.po 2>xg-po-so-4.err +result=$? +cat xg-po-so-4.err +test $result = 1 || Exit 1 + +exit 0 -- cgit v1.2.1