Imported from /home/lorry/working-area/delta_gettext-tarball/gettext-0.19.4.tar.xz.gettext-0.19.4

author: Lorry Tar Creator <lorry-tar-importer@baserock.org> 2014-12-24 07:38:37 +0000
committer: <> 2015-02-02 12:02:29 +0000
commit: 482840e61f86ca321838a91e902c41d40c098bbb (patch)
tree: 01ea2e242fd2792d19fe192476601587901db794 /gettext-tools/src/read-properties.c
download: gettext-tarball-482840e61f86ca321838a91e902c41d40c098bbb.tar.gz
1 files changed, 560 insertions, 0 deletions
diff --git a/gettext-tools/src/read-properties.c b/gettext-tools/src/read-properties.c
new file mode 100644
index 0000000..0c64730
--- /dev/null
+++ b/gettext-tools/src/read-properties.c
@@ -0,0 +1,560 @@
+/* Reading Java .properties files.
+   Copyright (C) 2003, 2005-2007, 2009 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2003.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* Specification.  */
+#include "read-properties.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "error.h"
+#include "error-progname.h"
+#include "message.h"
+#include "read-catalog-abstract.h"
+#include "xalloc.h"
+#include "xvasprintf.h"
+#include "po-xerror.h"
+#include "msgl-ascii.h"
+#include "unistr.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+/* For compiling this file in C++ mode.  */
+#ifdef __cplusplus
+# define this thiss
+#endif
+
+
+/* The format of the Java .properties files is documented in the JDK
+   documentation for class java.util.Properties.  In the case of .properties
+   files for PropertyResourceBundle, each non-comment line contains a
+   key/value pair in the form "key = value" or "key : value" or "key value",
+   where the key is the msgid and the value is the msgstr.  Messages with
+   plurals are not supported in this format.  */
+
+/* Handling of comments: We copy all comments from the .properties file to
+   the PO file. This is not really needed; it's a service for translators
+   who don't like PO files and prefer to maintain the .properties file.  */
+
+/* Real filename, used in error messages about the input file.  */
+static const char *real_file_name;
+
+/* File name and line number.  */
+extern lex_pos_ty gram_pos;
+
+/* The input file stream.  */
+static FILE *fp;
+
+
+/* Phase 1: Read an ISO-8859-1 character.
+   Max. 1 pushback character.  */
+
+static int
+phase1_getc ()
+{
+  int c;
+
+  c = getc (fp);
+
+  if (c == EOF)
+    {
+      if (ferror (fp))
+        {
+          const char *errno_description = strerror (errno);
+          po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
+                     xasprintf ("%s: %s",
+                                xasprintf (_("error while reading \"%s\""),
+                                           real_file_name),
+                                errno_description));
+        }
+      return EOF;
+    }
+
+  return c;
+}
+
+static inline void
+phase1_ungetc (int c)
+{
+  if (c != EOF)
+    ungetc (c, fp);
+}
+
+
+/* Phase 2: Read an ISO-8859-1 character, treating CR/LF like a single LF.
+   Max. 2 pushback characters.  */
+
+static unsigned char phase2_pushback[2];
+static int phase2_pushback_length;
+
+static int
+phase2_getc ()
+{
+  int c;
+
+  if (phase2_pushback_length)
+    c = phase2_pushback[--phase2_pushback_length];
+  else
+    {
+      c = phase1_getc ();
+
+      if (c == '\r')
+        {
+          int c2 = phase1_getc ();
+          if (c2 == '\n')
+            c = c2;
+          else
+            phase1_ungetc (c2);
+        }
+    }
+
+  if (c == '\n')
+    gram_pos.line_number++;
+
+  return c;
+}
+
+static void
+phase2_ungetc (int c)
+{
+  if (c == '\n')
+    --gram_pos.line_number;
+  if (c != EOF)
+    phase2_pushback[phase2_pushback_length++] = c;
+}
+
+
+/* Phase 3: Read an ISO-8859-1 character, treating CR/LF like a single LF,
+   with handling of continuation lines.
+   Max. 1 pushback character.  */
+
+static int
+phase3_getc ()
+{
+  int c = phase2_getc ();
+
+  for (;;)
+    {
+      if (c != '\\')
+        return c;
+
+      c = phase2_getc ();
+      if (c != '\n')
+        {
+          phase2_ungetc (c);
+          return '\\';
+        }
+
+      /* Skip the backslash-newline and all whitespace that follows it.  */
+      do
+        c = phase2_getc ();
+      while (c == ' ' || c == '\t' || c == '\r' || c == '\f');
+    }
+}
+
+static inline void
+phase3_ungetc (int c)
+{
+  phase2_ungetc (c);
+}
+
+
+/* Phase 4: Read an UTF-16 codepoint, treating CR/LF like a single LF,
+   with handling of continuation lines and of \uxxxx sequences.  */
+
+static int
+phase4_getuc ()
+{
+  int c = phase3_getc ();
+
+  if (c == EOF)
+    return -1;
+  if (c == '\\')
+    {
+      int c2 = phase3_getc ();
+
+      if (c2 == 't')
+        return '\t';
+      if (c2 == 'n')
+        return '\n';
+      if (c2 == 'r')
+        return '\r';
+      if (c2 == 'f')
+        return '\f';
+      if (c2 == 'u')
+        {
+          unsigned int n = 0;
+          int i;
+
+          for (i = 0; i < 4; i++)
+            {
+              int c1 = phase3_getc ();
+
+              if (c1 >= '0' && c1 <= '9')
+                n = (n << 4) + (c1 - '0');
+              else if (c1 >= 'A' && c1 <= 'F')
+                n = (n << 4) + (c1 - 'A' + 10);
+              else if (c1 >= 'a' && c1 <= 'f')
+                n = (n << 4) + (c1 - 'a' + 10);
+              else
+                {
+                  phase3_ungetc (c1);
+                  po_xerror (PO_SEVERITY_ERROR, NULL,
+                             real_file_name, gram_pos.line_number, (size_t)(-1),
+                             false, _("warning: invalid \\uxxxx syntax for Unicode character"));
+                  return 'u';
+                }
+            }
+          return n;
+        }
+
+      return c2;
+    }
+  else
+    return c;
+}
+
+
+/* Converts a string from ISO-8859-1 encoding to UTF-8 encoding.  */
+static char *
+conv_from_iso_8859_1 (char *string)
+{
+  if (is_ascii_string (string))
+    return string;
+  else
+    {
+      size_t length = strlen (string);
+      /* Each ISO-8859-1 character needs 2 bytes at worst.  */
+      unsigned char *utf8_string = XNMALLOC (2 * length + 1, unsigned char);
+      unsigned char *q = utf8_string;
+      const char *str = string;
+      const char *str_limit = str + length;
+
+      while (str < str_limit)
+        {
+          unsigned int uc = (unsigned char) *str++;
+          int n = u8_uctomb (q, uc, 6);
+          assert (n > 0);
+          q += n;
+        }
+      *q = '\0';
+      assert (q - utf8_string <= 2 * length);
+
+      return (char *) utf8_string;
+    }
+}
+
+
+/* Converts a string from JAVA encoding (with \uxxxx sequences) to UTF-8
+   encoding.  May destructively modify the argument string.  */
+static char *
+conv_from_java (char *string)
+{
+  /* This conversion can only shrink the string, never increase its size.
+     So there is no need to xmalloc the result freshly.  */
+  const char *p = string;
+  unsigned char *q = (unsigned char *) string;
+
+  while (*p != '\0')
+    {
+      if (p[0] == '\\' && p[1] == 'u')
+        {
+          unsigned int n = 0;
+          int i;
+
+          for (i = 0; i < 4; i++)
+            {
+              int c1 = (unsigned char) p[2 + i];
+
+              if (c1 >= '0' && c1 <= '9')
+                n = (n << 4) + (c1 - '0');
+              else if (c1 >= 'A' && c1 <= 'F')
+                n = (n << 4) + (c1 - 'A' + 10);
+              else if (c1 >= 'a' && c1 <= 'f')
+                n = (n << 4) + (c1 - 'a' + 10);
+              else
+                goto just_one_byte;
+            }
+
+          if (i == 4)
+            {
+              unsigned int uc;
+
+              if (n >= 0xd800 && n < 0xdc00)
+                {
+                  if (p[6] == '\\' && p[7] == 'u')
+                    {
+                      unsigned int m = 0;
+
+                      for (i = 0; i < 4; i++)
+                        {
+                          int c1 = (unsigned char) p[8 + i];
+
+                          if (c1 >= '0' && c1 <= '9')
+                            m = (m << 4) + (c1 - '0');
+                          else if (c1 >= 'A' && c1 <= 'F')
+                            m = (m << 4) + (c1 - 'A' + 10);
+                          else if (c1 >= 'a' && c1 <= 'f')
+                            m = (m << 4) + (c1 - 'a' + 10);
+                          else
+                            goto just_one_byte;
+                        }
+
+                      if (i == 4 && (m >= 0xdc00 && m < 0xe000))
+                        {
+                          /* Combine two UTF-16 words to a character.  */
+                          uc = 0x10000 + ((n - 0xd800) << 10) + (m - 0xdc00);
+                          p += 12;
+                        }
+                      else
+                        goto just_one_byte;
+                    }
+                  else
+                    goto just_one_byte;
+                }
+              else
+                {
+                  uc = n;
+                  p += 6;
+                }
+
+              q += u8_uctomb (q, uc, 6);
+              continue;
+            }
+        }
+      just_one_byte:
+        *q++ = (unsigned char) *p++;
+    }
+  *q = '\0';
+  return string;
+}
+
+
+/* Reads a key or value string.
+   Returns the string in UTF-8 encoding, or NULL if the end of the logical
+   line is reached.
+   Parsing ends:
+     - when returning NULL, after the end of the logical line,
+     - otherwise, if in_key is true, after the whitespace and possibly the
+       separator that follows after the string,
+     - otherwise, if in_key is false, after the end of the logical line. */
+
+static char *
+read_escaped_string (bool in_key)
+{
+  static unsigned short *buffer;
+  static size_t bufmax;
+  static size_t buflen;
+  int c;
+
+  /* Skip whitespace before the string.  */
+  do
+    c = phase3_getc ();
+  while (c == ' ' || c == '\t' || c == '\r' || c == '\f');
+
+  if (c == EOF || c == '\n')
+    /* Empty string.  */
+    return NULL;
+
+  /* Start accumulating the string.  We store the string in UTF-16 before
+     converting it to UTF-8.  Why not converting every character directly to
+     UTF-8? Because a string can contain surrogates like \uD800\uDF00, and
+     we must combine them to a single UTF-8 character.  */
+  buflen = 0;
+  for (;;)
+    {
+      if (in_key && (c == '=' || c == ':'
+                     || c == ' ' || c == '\t' || c == '\r' || c == '\f'))
+        {
+          /* Skip whitespace after the string.  */
+          while (c == ' ' || c == '\t' || c == '\r' || c == '\f')
+            c = phase3_getc ();
+          /* Skip '=' or ':' separator.  */
+          if (!(c == '=' || c == ':'))
+            phase3_ungetc (c);
+          break;
+        }
+
+      phase3_ungetc (c);
+
+      /* Read the next UTF-16 codepoint.  */
+      c = phase4_getuc ();
+      if (c < 0)
+        break;
+      /* Append it to the buffer.  */
+      if (buflen >= bufmax)
+        {
+          bufmax += 100;
+          buffer = xrealloc (buffer, bufmax * sizeof (unsigned short));
+        }
+      buffer[buflen++] = c;
+
+      c = phase3_getc ();
+      if (c == EOF || c == '\n')
+        {
+          if (in_key)
+            phase3_ungetc (c);
+          break;
+        }
+    }
+
+  /* Now convert from UTF-16 to UTF-8.  */
+  {
+    size_t pos;
+    unsigned char *utf8_string;
+    unsigned char *q;
+
+    /* Each UTF-16 word needs 3 bytes at worst.  */
+    utf8_string = XNMALLOC (3 * buflen + 1, unsigned char);
+    for (pos = 0, q = utf8_string; pos < buflen; )
+      {
+        ucs4_t uc;
+        int n;
+
+        pos += u16_mbtouc (&uc, buffer + pos, buflen - pos);
+        n = u8_uctomb (q, uc, 6);
+        assert (n > 0);
+        q += n;
+      }
+    *q = '\0';
+    assert (q - utf8_string <= 3 * buflen);
+
+    return (char *) utf8_string;
+  }
+}
+
+
+/* Read a .properties file from a stream, and dispatch to the various
+   abstract_catalog_reader_class_ty methods.  */
+static void
+properties_parse (abstract_catalog_reader_ty *this, FILE *file,
+                  const char *real_filename, const char *logical_filename)
+{
+  fp = file;
+  real_file_name = real_filename;
+  gram_pos.file_name = xstrdup (real_file_name);
+  gram_pos.line_number = 1;
+
+  for (;;)
+    {
+      int c;
+      bool comment;
+      bool hidden;
+
+      c = phase2_getc ();
+
+      if (c == EOF)
+        break;
+
+      comment = false;
+      hidden = false;
+      if (c == '#')
+        comment = true;
+      else if (c == '!')
+        {
+          /* For compatibility with write-properties.c, we treat '!' not
+             followed by space as a fuzzy or untranslated message.  */
+          int c2 = phase2_getc ();
+          if (c2 == ' ' || c2 == '\n' || c2 == EOF)
+            comment = true;
+          else
+            hidden = true;
+          phase2_ungetc (c2);
+        }
+      else
+        phase2_ungetc (c);
+
+      if (comment)
+        {
+          /* A comment line.  */
+          static char *buffer;
+          static size_t bufmax;
+          static size_t buflen;
+
+          buflen = 0;
+          for (;;)
+            {
+              c = phase2_getc ();
+
+              if (buflen >= bufmax)
+                {
+                  bufmax += 100;
+                  buffer = xrealloc (buffer, bufmax);
+                }
+
+              if (c == EOF || c == '\n')
+                break;
+
+              buffer[buflen++] = c;
+            }
+          buffer[buflen] = '\0';
+
+          po_callback_comment_dispatcher (conv_from_java (conv_from_iso_8859_1 (buffer)));
+        }
+      else
+        {
+          /* A key/value pair.  */
+          char *msgid;
+          lex_pos_ty msgid_pos;
+
+          msgid_pos = gram_pos;
+          msgid = read_escaped_string (true);
+          if (msgid == NULL)
+            /* Skip blank line.  */
+            ;
+          else
+            {
+              char *msgstr;
+              lex_pos_ty msgstr_pos;
+              bool force_fuzzy;
+
+              msgstr_pos = gram_pos;
+              msgstr = read_escaped_string (false);
+              if (msgstr == NULL)
+                msgstr = xstrdup ("");
+
+              /* Be sure to make the message fuzzy if it was commented out
+                 and if it is not already header/fuzzy/untranslated.  */
+              force_fuzzy = (hidden && msgid[0] != '\0' && msgstr[0] != '\0');
+
+              po_callback_message (NULL, msgid, &msgid_pos, NULL,
+                                   msgstr, strlen (msgstr) + 1, &msgstr_pos,
+                                   NULL, NULL, NULL,
+                                   force_fuzzy, false);
+            }
+        }
+    }
+
+  fp = NULL;
+  real_file_name = NULL;
+  gram_pos.line_number = 0;
+}
+
+const struct catalog_input_format input_format_properties =
+{
+  properties_parse,                     /* parse */
+  true                                  /* produces_utf8 */
+};
author	Lorry Tar Creator <lorry-tar-importer@baserock.org>	2014-12-24 07:38:37 +0000
committer	<>	2015-02-02 12:02:29 +0000
commit	482840e61f86ca321838a91e902c41d40c098bbb (patch)
tree	01ea2e242fd2792d19fe192476601587901db794 /gettext-tools/src/read-properties.c
download	gettext-tarball-482840e61f86ca321838a91e902c41d40c098bbb.tar.gz