1 files changed, 323 insertions, 320 deletions
diff --git a/maint/ucptest.c b/maint/ucptest.c
index e946226..49616e2 100644
--- a/maint/ucptest.c
+++ b/maint/ucptest.c
@@ -16,36 +16,58 @@
 /* This is a hacked-up program for testing the Unicode properties tables of
 PCRE2. It can also be used for finding characters with certain properties.
 I wrote it to help with debugging PCRE, and have added things that I found
-useful, in a rather haphazard way. The code has never been "tidied" or checked
-for robustness.
-
-If there are arguments, they are a list of hexadecimal code points whose
-properties are to be output. Otherwise, the program expects to read commands on
-stdin, and it writes output to stdout. There are two commands:
-
-"findprop" must be followed by a list of Unicode code points as hex numbers
-(without any prefixes). The output is one line per character, giving its
-Unicode properties followed by its other case if there is one, followed by its
-Script Extension list if it is not just the same as the base script.
-
-"find" must be followed by a list of property names and their values. This
-finds characters that have those properties. If multiple properties are listed,
-they must all be matched. Currently supported:
+useful, in a rather haphazard way. The code has never been seriously tidied or
+checked for robustness, but it shouldn't now give compiler warnings.
+
+There is only one option: "-s". If given, it applies only to the "findprop" 
+command. It causes the UTF-8 sequence of bytes that encode the character to be 
+output between angle brackets at the end of the line. On a UTF-8 terminal, this 
+will show the appropriate graphic for the code point.
+
+If the command has arguments, they are concatenated into a buffer, separated by
+spaces. If the first argument starts "U+" or consists entirely of hexadecimal
+digits, "findprop" is inserted at the start. The buffer is then processed as a
+single line file, after which the program exits. If there are no arguments, the
+program reads commands line by line on stdin and writes output to stdout. The 
+return code is always zero.
+
+There are three commands:
+
+"findprop" must be followed by a space-separated list of Unicode code points as
+hex numbers, either without any prefix or starting with "U+". The output is one
+line per character, giving its Unicode properties followed by its other case or 
+cases if one or more exist, followed by its Script Extension list if it is not
+just the same as the base script. This list is in square brackets. The
+properties are:
+
+General type        e.g. Letter
+Specific type       e.g. Upper case letter
+Script              e.g. Medefaidrin
+Grapheme break type e.g. Extend (most common is Other)
+
+"find" must be followed by a list of property names and their values. The 
+values are case-sensitive. This finds characters that have those properties. If
+multiple properties are listed, they must all be matched. Currently supported:
 
   script <name>    The character must have this script property. Only one
                      such script may be given.
   scriptx <name>   This script must be in the character's Script Extension
                      property list. If this is used many times, all the given
                      scripts must be present.
-  type <abbrev>    The character's type (e.g. Lu or Nd) must match.
+  type <abbrev>    The character's specific type (e.g. Lu or Nd) must match.
   gbreak <name>    The grapheme break property must match.
 
 If a <name> or <abbrev> is preceded by !, the value must NOT be present. For
 Script Extensions, there may be a mixture of positive and negative
 requirements. All must be satisfied.
 
-No more than 100 characters are output. If there are more, the list ends with
-... */
+Sequences of two or more characters are shown as ranges, for example
+U+0041..U+004A. No more than 100 lines are are output. If there are more
+characters, the list ends with ... 
+
+"list" must be followed by a property name (script, type, or gbreak). The
+defined values for that property are listed. */
+
 
 #ifdef HAVE_CONFIG_H
 #include "../src/config.h"
@@ -91,228 +113,99 @@ No more than 100 characters are output. If there are more, the list ends with
 
 /* -------------------------------------------------------------------*/
 
-
-const unsigned char *script_names[] = {
-  US"Unknown",
-  US"Arabic",
-  US"Armenian",
-  US"Bengali",
-  US"Bopomofo",
-  US"Braille",
-  US"Buginese",
-  US"Buhid",
-  US"Canadian_Aboriginal",
-  US"Cherokee",
-  US"Common",
-  US"Coptic",
-  US"Cypriot",
-  US"Cyrillic",
-  US"Deseret",
-  US"Devanagari",
-  US"Ethiopic",
-  US"Georgian",
-  US"Glagolitic",
-  US"Gothic",
-  US"Greek",
-  US"Gujarati",
-  US"Gurmukhi",
-  US"Han",
-  US"Hangul",
-  US"Hanunoo",
-  US"Hebrew",
-  US"Hiragana",
-  US"Inherited",
-  US"Kannada",
-  US"Katakana",
-  US"Kharoshthi",
-  US"Khmer",
-  US"Lao",
-  US"Latin",
-  US"Limbu",
-  US"Linear_B",
-  US"Malayalam",
-  US"Mongolian",
-  US"Myanmar",
-  US"New_Tai_Lue",
-  US"Ogham",
-  US"Old_Italic",
-  US"Old_Persian",
-  US"Oriya",
-  US"Osmanya",
-  US"Runic",
-  US"Shavian",
-  US"Sinhala",
-  US"Syloti_Nagri",
-  US"Syriac",
-  US"Tagalog",
-  US"Tagbanwa",
-  US"Tai_Le",
-  US"Tamil",
-  US"Telugu",
-  US"Thaana",
-  US"Thai",
-  US"Tibetan",
-  US"Tifinagh",
-  US"Ugaritic",
-  US"Yi",
-  /* New for Unicode 5.0: */
-  US"Balinese",
-  US"Cuneiform",
-  US"Nko",
-  US"Phags_Pa",
-  US"Phoenician",
-  /* New for Unicode 5.1: */
-  US"Carian",
-  US"Cham",
-  US"Kayah_Li",
-  US"Lepcha",
-  US"Lycian",
-  US"Lydian",
-  US"Ol_Chiki",
-  US"Rejang",
-  US"Saurashtra",
-  US"Sundanese",
-  US"Vai",
-  /* New for Unicode 5.2: */
-  US"Avestan",
-  US"Bamum",
-  US"Egyptian_Hieroglyphs",
-  US"Imperial_Aramaic",
-  US"Inscriptional_Pahlavi",
-  US"Inscriptional_Parthian",
-  US"Javanese",
-  US"Kaithi",
-  US"Lisu",
-  US"Meetei_Mayek",
-  US"Old_South_Arabian",
-  US"Old_Turkic",
-  US"Samaritan",
-  US"Tai_Tham",
-  US"Tai_Viet",
-  /* New for Unicode 6.0.0 */
-  US"Batak",
-  US"Brahmi",
-  US"Mandaic",
-  /* New for Unicode 6.1.0 */
-  US"Chakma",
-  US"Meroitic_Cursive",
-  US"Meroitic_Hieroglyphs",
-  US"Miao",
-  US"Sharada",
-  US"Sora Sompent",
-  US"Takri",
-  /* New for Unicode 7.0.0 */
-  US"Bassa_Vah",
-  US"Caucasian_Albanian",
-  US"Duployan",
-  US"Elbasan",
-  US"Grantha",
-  US"Khojki",
-  US"Khudawadi",
-  US"Linear_A",
-  US"Mahajani",
-  US"Manichaean",
-  US"Mende_Kikakui",
-  US"Modi",
-  US"Mro",
-  US"Nabataean",
-  US"Old_North_Arabian",
-  US"Old_Permic",
-  US"Pahawh_Hmong",
-  US"Palmyrene",
-  US"Psalter_Pahlavi",
-  US"Pau_Cin_Hau",
-  US"Siddham",
-  US"Tirhuta",
-  US"Warang_Citi",
-  /* New for Unicode 8.0.0 */
-  US"Ahom",
-  US"Anatolian_Hieroglyphs",
-  US"Hatran",
-  US"Multani",
-  US"Old_Hungarian",
-  US"SignWriting",
-  /* New for Unicode 10.0.0 (no update since 8.0.0) */
-  US"Adlam",
-  US"Bhaiksuki",
-  US"Marchen",
-  US"Newa",
-  US"Osage",
-  US"Tangut",
-  US"Masaram_Gondi",
-  US"Nushu",
-  US"Soyombo",
-  US"Zanabazar_Square",
-  /* New for Unicode 11.0.0 */
-  US"Dogra",
-  US"Gunjala_Gondi",
-  US"Hanifi_Rohingya",
-  US"Makasar",
-  US"Medefaidrin",
-  US"Old_Sogdian",
-  US"Sogdian",
-  /* New for Unicode 12.0.0 */
-  US"Elymaic",
-  US"Nandinagari",
-  US"Nyiakeng_Puachue_Hmong",
-  US"Wancho",
-  /* New for Unicode 13.0.0 */
-  US"Chorasmian",
-  US"Dives_Akuru",
-  US"Khitan_Small_Script",
-  US"Yezidi"
+static BOOL show_character = FALSE;
+
+static const unsigned char *type_names[] = {
+  US"Cc", US"Control",
+  US"Cf", US"Format",
+  US"Cn", US"Unassigned",
+  US"Co", US"Private use",
+  US"Cs", US"Surrogate",
+  US"Ll", US"Lower case letter",
+  US"Lm", US"Modifier letter",
+  US"Lo", US"Other letter",
+  US"Lt", US"Title case letter",
+  US"Lu", US"Upper case letter",
+  US"Mc", US"Spacing mark",
+  US"Me", US"Enclosing mark",
+  US"Mn", US"Non-spacing mark",
+  US"Nd", US"Decimal number",
+  US"Nl", US"Letter number",
+  US"No", US"Other number",
+  US"Pc", US"Connector punctuation",
+  US"Pd", US"Dash punctuation",
+  US"Pe", US"Close punctuation",
+  US"Pf", US"Final punctuation",
+  US"Pi", US"Initial punctuation",
+  US"Po", US"Other punctuation",
+  US"Ps", US"Open punctuation",
+  US"Sc", US"Currency symbol",
+  US"Sk", US"Modifier symbol",
+  US"Sm", US"Mathematical symbol",
+  US"So", US"Other symbol",
+  US"Zl", US"Line separator",
+  US"Zp", US"Paragraph separator",
+  US"Zs", US"Space separator" 
 };
 
-const unsigned char *type_names[] = {
-  US"Cc",
-  US"Cf",
-  US"Cn",
-  US"Co",
-  US"Cs",
-  US"Ll",
-  US"Lm",
-  US"Lo",
-  US"Lt",
-  US"Lu",
-  US"Mc",
-  US"Me",
-  US"Mn",
-  US"Nd",
-  US"Nl",
-  US"No",
-  US"Pc",
-  US"Pd",
-  US"Pe",
-  US"Pf",
-  US"Pi",
-  US"Po",
-  US"Ps",
-  US"Sc",
-  US"Sk",
-  US"Sm",
-  US"So",
-  US"Zl",
-  US"Zp",
-  US"Zs"
+static const unsigned char *gb_names[] = {
+  US"CR",                    US"carriage return",
+  US"LF",                    US"linefeed",
+  US"Control",               US"",
+  US"Extend",                US"",
+  US"Prepend",               US"",
+  US"SpacingMark",           US"",
+  US"L",                     US"Hangul syllable type L",
+  US"V",                     US"Hangul syllable type V",
+  US"T",                     US"Hangul syllable type T",
+  US"LV",                    US"Hangul syllable type LV",
+  US"LVT",                   US"Hangul syllable type LVT",
+  US"RegionalIndicator",     US"",
+  US"Other",                 US"",
+  US"ZWJ",                   US"zero width joiner",
+  US"Extended_Pictographic", US""
 };
 
-const unsigned char *gb_names[] = {
-  US"CR",
-  US"LF",
-  US"Control",
-  US"Extend",
-  US"Prepend",
-  US"SpacingMark",
-  US"L",
-  US"V",
-  US"T",
-  US"LV",
-  US"LVT",
-  US"RegionalIndicator",
-  US"Other",
-  US"ZWJ",
-  US"Extended_Pictographic"
-};
+
+static const unsigned int utf8_table1[] = {
+  0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
+
+static const int utf8_table2[] = {
+  0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
+
+
+/*************************************************
+*       Convert character value to UTF-8         *
+*************************************************/
+
+/* This function takes an unsigned long integer value in the range 0 -
+0x7fffffff and encodes it as a UTF-8 character in 1 to 6 bytes.
+
+Arguments:
+  cvalue     the character value
+  buffer     pointer to buffer for result - at least 6 bytes long
+
+Returns:     number of bytes placed in the buffer
+             0 if input code point is too big
+*/
+
+static size_t
+ord2utf8(unsigned int cvalue, unsigned char *buffer)
+{
+size_t i, j;
+for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
+  if (cvalue <= utf8_table1[i]) break;
+if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
+buffer += i;
+for (j = i; j > 0; j--)
+ {
+ *buffer-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*buffer = utf8_table2[i] | cvalue;
+return i + 1;
+}
+
 
 
 /*************************************************
@@ -331,27 +224,46 @@ return isatty(fileno(stdin));
 
 
 /*************************************************
+*      Get script name from ucp ident            *
+*************************************************/
+
+static const char *
+get_scriptname(int script)
+{
+size_t i;
+const ucp_type_table *u;
+
+for (i = 0; i < PRIV(utt_size); i++)
+  {
+  u = PRIV(utt) + i; 
+  if (u->type == PT_SC && u->value == script) break;
+  }
+if (i < PRIV(utt_size))
+  return PRIV(utt_names) + u->name_offset;
+  
+return "??";
+}  
+
+
+/*************************************************
 *      Print Unicode property info for a char    *
 *************************************************/
 
 static void
-print_prop(int c)
+print_prop(unsigned int c, BOOL is_just_one)
 {
 int type = UCD_CATEGORY(c);
 int fulltype = UCD_CHARTYPE(c);
 int script = UCD_SCRIPT(c);
 int scriptx = UCD_SCRIPTX(c);
 int gbprop = UCD_GRAPHBREAK(c);
-int othercase = UCD_OTHERCASE(c);
+unsigned int othercase = UCD_OTHERCASE(c);
 int caseset = UCD_CASESET(c);
 
 const unsigned char *fulltypename = US"??";
 const unsigned char *typename = US"??";
-const unsigned char *scriptname = US"??";
 const unsigned char *graphbreak = US"??";
-
-if (script < sizeof(script_names)/sizeof(char *))
-  scriptname = script_names[script];
+const unsigned char *scriptname = CUS get_scriptname(script);
 
 switch (type)
   {
@@ -420,15 +332,18 @@ switch(gbprop)
   default:                 graphbreak = US"Unknown"; break;
   }
 
-printf("%04x %s: %s, %s, %s", c, typename, fulltypename, scriptname, graphbreak);
-if (othercase != c)
+printf("U+%04X %s: %s, %s, %s", c, typename, fulltypename, scriptname, graphbreak);
+if (is_just_one && othercase != c)
   {
-  printf(", %04x", othercase);
+  printf(", U+%04X", othercase);
   if (caseset != 0)
     {
     const uint32_t *p = PRIV(ucd_caseless_sets) + caseset - 1;
     while (*(++p) < NOTACHAR)
-      if (*p != othercase && *p != c) printf(", %04x", *p);
+      {
+      unsigned int d = *p;  
+      if (d != othercase && d != c) printf(", U+%04X", d);
+      } 
     }
   }
 
@@ -436,25 +351,26 @@ if (scriptx != script)
   {
   printf(", [");
   if (scriptx >= 0)
-    {
-    scriptname = (scriptx >= sizeof(script_names)/sizeof(char *))?
-      US"??" : script_names[scriptx];
-    printf("%s", scriptname);
-    }
+    printf("%s", get_scriptname(scriptx));
   else
     {
-    char *sep = "";
+    const char *sep = "";
     const uint8_t *p = PRIV(ucd_script_sets) - scriptx;
     while (*p != 0)
       {
-      scriptname = (*p >= sizeof(script_names)/sizeof(char *))?
-        US"??" : script_names[*p++];
-      printf("%s%s", sep, scriptname);
+      printf("%s%s", sep, get_scriptname(*p++));
       sep = ", ";
       }
     }
   printf("]");
   }
+  
+if (show_character && is_just_one)
+  {
+  unsigned char buffer[8];
+  size_t len = ord2utf8(c, buffer);
+  printf(", >%.*s<", (int)len, buffer);  
+  }  
 
 printf("\n");
 }
@@ -483,7 +399,7 @@ BOOL type_not = FALSE;
 BOOL gbreak_not = FALSE;
 BOOL hadrange = FALSE;
 const ucd_record *ucd, *next_ucd;
-const char *pad = "      ";
+const char *pad = "        ";
 
 while (*s != 0)
   {
@@ -508,17 +424,20 @@ while (*s != 0)
       offset = 1;
       }
 
-    for (i = 0; i < sizeof(script_names)/sizeof(char *); i++)
+    for (i = 0; i < PRIV(utt_size); i++)
       {
-      if (strcmp(CS value + offset, script_names[i]) == 0)
+      const ucp_type_table *u = PRIV(utt) + i; 
+      if (u->type == PT_SC && strcmp(CS(value + offset), 
+            PRIV(utt_names) + u->name_offset) == 0)
         {
+        c = u->value; 
         if (name[6] == 'x')
           {
-          scriptx_list[scriptx_count++] = scriptx_not? (-i):i;
+          scriptx_list[scriptx_count++] = scriptx_not? (-c):c;
           }
         else
           {
-          if (script < 0) script = i; else
+          if (script < 0) script = c; else
             {
             printf("** Only 1 script value allowed\n");
             return;
@@ -528,9 +447,9 @@ while (*s != 0)
         }
       }
 
-    if (i >= sizeof(script_names)/sizeof(char *))
+    if (i >= PRIV(utt_size))
       {
-      printf("** Unrecognized script name '%s'\n", value);
+      printf("** Unrecognized script name \"%s\"\n", value);
       return;
       }
     }
@@ -550,17 +469,17 @@ while (*s != 0)
         offset = 1;
         }
 
-      for (i = 0; i < sizeof(type_names)/sizeof(char *); i++)
+      for (i = 0; i < sizeof(type_names)/sizeof(char *); i += 2)
         {
-        if (strcmp(CS (value + offset), type_names[i]) == 0)
+        if (strcmp(CS (value + offset), CS type_names[i]) == 0)
           {
-          type = i;
+          type = i/2;
           break;
           }
         }
       if (i >= sizeof(type_names)/sizeof(char *))
         {
-        printf("** Unrecognized type name '%s'\n", value);
+        printf("** Unrecognized type name \"%s\"\n", value);
         return;
         }
       }
@@ -581,17 +500,17 @@ while (*s != 0)
         offset = 1;
         }
 
-      for (i = 0; i < sizeof(gb_names)/sizeof(char *); i++)
+      for (i = 0; i < sizeof(gb_names)/sizeof(char *); i += 2)
         {
-        if (strcmp(CS (value + offset), gb_names[i]) == 0)
+        if (strcmp(CS (value + offset), CS gb_names[i]) == 0)
           {
-          gbreak = i;
+          gbreak = i/2;
           break;
           }
         }
       if (i >= sizeof(gb_names)/sizeof(char *))
         {
-        printf("** Unrecognized gbreak name '%s'\n", value);
+        printf("** Unrecognized gbreak name \"%s\"\n", value);
         return;
         }
       }
@@ -599,7 +518,7 @@ while (*s != 0)
 
   else
     {
-    printf("** Unrecognized property name '%s'\n", name);
+    printf("** Unrecognized property name \"%s\"\n", name);
     return;
     }
   }
@@ -617,7 +536,7 @@ for (c = 0; c <= 0x10ffff; c++)
   if (scriptx_count > 0)
     {
     const uint8_t *char_scriptx = NULL;
-    int found = 0;
+    unsigned int found = 0;
     int scriptx = UCD_SCRIPTX(c);
 
     if (scriptx < 0) char_scriptx = PRIV(ucd_script_sets) - scriptx;
@@ -701,13 +620,13 @@ for (c = 0; c <= 0x10ffff; c++)
 
   if (--i > c)
     {
-    printf("%04x..", c);
+    printf("U+%04X..", c);
     c = i;
     hadrange = TRUE;
     }
   else if (hadrange) printf("%s", pad);
 
-  print_prop(c);
+  print_prop(c, FALSE);
   if (c >= 0x100000) pad = "        ";
     else if (c >= 0x10000) pad = "       ";
   count++;
@@ -723,6 +642,101 @@ if (count == 0) printf("No characters found\n");
 
 
 /*************************************************
+*        Process command line                    *
+*************************************************/
+
+static void
+process_command_line(unsigned char *buffer)
+{
+unsigned char *s, *t;
+unsigned char name[24];
+
+s = buffer;
+while (isspace(*s)) s++;
+if (*s == 0) return;
+
+for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
+*t = 0;
+while (isspace(*s)) s++;
+
+if (strcmp(CS name, "findprop") == 0)
+  {
+  while (*s != 0)
+    {
+    unsigned int c; 
+    unsigned char *endptr;
+    t = s; 
+    if (strncmp(CS t, "U+", 2) == 0) t += 2;
+    c = strtoul(CS t, CSS(&endptr), 16);
+    if (*endptr != 0 && !isspace(*endptr))
+      {
+      while (*endptr != 0 && !isspace(*endptr)) endptr++;
+      printf("** Invalid hex number: ignored \"%.*s\"\n", (int)(endptr-s), s);
+      }
+    else  
+      {
+      if (c > 0x10ffff) 
+        printf("** U+%x is too big for a Unicode code point\n", c);
+      else   
+        print_prop(c, TRUE);
+      } 
+    s = endptr;
+    while (isspace(*s)) s++;
+    }
+  }
+
+else if (strcmp(CS name, "find") == 0)
+  {
+  find_chars(s);
+  }
+  
+else if (strcmp(CS name, "list") == 0)
+  {
+  while (*s != 0)
+    {
+    size_t i;
+    for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
+    *t = 0;
+    while (isspace(*s)) s++;
+    
+    if (strcmp(CS name, "script") == 0 || strcmp(CS name, "scripts") == 0)
+      {
+      for (i = 0; i < PRIV(utt_size); i++) 
+        if (PRIV(utt)[i].type == PT_SC)
+          printf("%s\n", PRIV(utt_names) + PRIV(utt)[i].name_offset);  
+      }
+      
+    else if (strcmp(CS name, "type") == 0 || strcmp(CS name, "types") == 0)
+      {
+      for (i = 0; i < sizeof(type_names)/sizeof(char *); i += 2)
+        printf("%s %s\n", type_names[i], type_names[i+1]); 
+      }  
+      
+    else if (strcmp(CS name, "gbreak") == 0 || strcmp(CS name, "gbreaks") == 0)
+      {
+      for (i = 0; i < sizeof(gb_names)/sizeof(char *); i += 2)
+        {
+        if (gb_names[i+1][0] != 0)  
+          printf("%-3s (%s)\n", gb_names[i], gb_names[i+1]);
+        else   
+          printf("%s\n", gb_names[i]);
+        } 
+      }    
+
+    else 
+      {
+      printf("** Unknown property \"%s\"\n", name);  
+      break;
+      }  
+    }  
+  }  
+
+else printf("** Unknown test command \"%s\"\n", name);
+}
+
+
+
+/*************************************************
 *               Main program                     *
 *************************************************/
 
@@ -730,19 +744,42 @@ int
 main(int argc, char **argv)
 {
 BOOL interactive;
+int first_arg = 1;
 unsigned char buffer[1024];
 
-if (argc > 1)
+if (argc > 1 && strcmp(argv[1], "-s") == 0)
+  {
+  show_character = TRUE;
+  first_arg++;
+  }   
+
+if (argc > first_arg)
   {
   int i;
-  for (i = 1; i < argc; i++)
+  BOOL hexfirst = TRUE; 
+  char *arg = argv[first_arg]; 
+  unsigned char *s = buffer;
+  
+  if (strncmp(arg, "U+", 2) != 0 && !isdigit(*arg)) 
     {
-    unsigned char *endptr;
-    int c = strtoul(argv[i], CSS(&endptr), 16);
-    if (*endptr != 0)
-      printf("** Hex number expected; ignored '%s'\n", argv[i]);
-    else print_prop(c);
+    while (*arg != 0) 
+      {
+      if (!isxdigit(*arg++)) { hexfirst = FALSE; break; }  
+      } 
+    } 
+     
+  if (hexfirst)
+    {
+    strcpy(CS s, "findprop ");
+    s += 9;
+    }
+    
+  for (i = first_arg; i < argc; i++)
+    {
+    s += sprintf(CS s, "%s ", argv[i]);       
     }
+
+  process_command_line(buffer);
   return 0;
   }
 
@@ -754,17 +791,14 @@ if (interactive) using_history();
 
 for(;;)
   {
-  unsigned char name[24];
-  unsigned char *s, *t;
-
 #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
   if (interactive)
     {
     size_t len;
-    s = readline("> ");
+    unsigned char *s = US readline("> ");
     if (s == NULL) break;
-    len = strlen(s);
-    if (len > 0) add_history(s);
+    len = strlen(CS s);
+    if (len > 0) add_history(CS s);
     memcpy(buffer, s, len);
     buffer[len] = '\n';
     buffer[len+1] = 0;
@@ -778,39 +812,8 @@ for(;;)
     if (fgets(CS buffer, sizeof(buffer), stdin) == NULL) break;
     if (!interactive) printf("%s", buffer);
     }
-
-  s = buffer;
-  while (isspace(*s)) s++;
-  if (*s == 0) continue;
-
-  for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
-  *t = 0;
-  while (isspace(*s)) s++;
-
-  if (strcmp(CS name, "findprop") == 0)
-    {
-    while (*s != 0)
-      {
-      unsigned char *endptr;
-      int c = strtoul(CS s, CSS(&endptr), 16);
-
-      if (*endptr != 0 && !isspace(*endptr))
-        {
-        while (*endptr != 0 && !isspace(*endptr)) endptr++;
-        printf("** Hex number expected; ignored '%.*s'\n", endptr-s, s);
-        }
-      else  print_prop(c);
-      s = endptr;
-      while (isspace(*s)) s++;
-      }
-    }
-
-  else if (strcmp(CS name, "find") == 0)
-    {
-    find_chars(s);
-    }
-
-  else printf("** Unknown test command %s\n", name);
+    
+  process_command_line(buffer);
   }
 
 if (interactive) printf("\n");