summaryrefslogtreecommitdiff
path: root/libguile/print.c
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2010-09-14 16:10:52 +0200
committerLudovic Courtès <ludo@gnu.org>2010-09-14 16:11:19 +0200
commit07f49ac786e0f1c007eb336e2fb7a572e8405316 (patch)
treeab04b1efe4ee3062959b659456b3abc53b9d1b6f /libguile/print.c
parent4ff2b9f4b6fab00e0e982ce6d1b2594c19704d6e (diff)
downloadguile-07f49ac786e0f1c007eb336e2fb7a572e8405316.tar.gz
Factorize and optimize `write' for strings and characters.
According to `write.bm', this makes `write' 2.6 times faster for strings. * libguile/print.c (iprin1): Use `write_character' when `SCM_WRITINGP (pstate)' and `SCM_CHARP (exp)' or `scm_is_string (exp)'. (scm_i_charprint): Remove. (display_character, write_character): New functions. (scm_write_char): Use `display_character' instead of `scm_i_charprint'. * libguile/print.h (scm_i_charprint): Remove declaration. * benchmark-suite/benchmarks/write.bm: New file. * benchmark-suite/Makefile.am (SCM_BENCHMARKS): Add `benchmarks/write.bm'.
Diffstat (limited to 'libguile/print.c')
-rw-r--r--libguile/print.c401
1 files changed, 199 insertions, 202 deletions
diff --git a/libguile/print.c b/libguile/print.c
index 212b70d2b..5acb06b3e 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -54,6 +54,14 @@
+/* Character printers. */
+
+static int display_character (scm_t_wchar, SCM,
+ scm_t_string_failed_conversion_handler);
+static void write_character (scm_t_wchar, SCM, int);
+
+
+
/* {Names of immediate symbols}
*
* This table must agree with the declarations in scm.h: {Immediate Symbols}.
@@ -461,79 +469,17 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
case scm_tc3_imm24:
if (SCM_CHARP (exp))
{
- scm_t_wchar i = SCM_CHAR (exp);
- const char *name;
-
if (SCM_WRITINGP (pstate))
+ write_character (SCM_CHAR (exp), port, 0);
+ else
{
- scm_puts ("#\\", port);
- name = scm_i_charname (exp);
- if (name != NULL)
- scm_puts (name, port);
- else if (uc_is_general_category_withtable (i, UC_CATEGORY_MASK_L
- | UC_CATEGORY_MASK_M
- | UC_CATEGORY_MASK_N
- | UC_CATEGORY_MASK_P
- | UC_CATEGORY_MASK_S))
- /* Print the character if is graphic character. */
- {
- scm_t_wchar *wbuf;
- SCM wstr;
- char *buf;
- size_t len;
- const char *enc;
-
- enc = scm_i_get_port_encoding (port);
- if (uc_combining_class (i) == UC_CCC_NR)
- {
- wstr = scm_i_make_wide_string (1, &wbuf);
- wbuf[0] = i;
- }
- else
- {
- /* Character is a combining character: print it connected
- to a dotted circle instead of connecting it to the
- backslash in '#\' */
- wstr = scm_i_make_wide_string (2, &wbuf);
- wbuf[0] = SCM_CODEPOINT_DOTTED_CIRCLE;
- wbuf[1] = i;
- }
- if (enc == NULL)
- {
- if (i <= 0xFF)
- /* Character is graphic and Latin-1. Print it */
- scm_lfwrite_str (wstr, port);
- else
- /* Character is graphic but unrepresentable in
- this port's encoding. */
- PRINT_CHAR_ESCAPE (i, port);
- }
- else
- {
- buf = u32_conv_to_encoding (enc,
- iconveh_error,
- (scm_t_uint32 *) wbuf,
- 1,
- NULL,
- NULL, &len);
- if (buf != NULL)
- {
- /* Character is graphic. Print it. */
- scm_lfwrite_str (wstr, port);
- free (buf);
- }
- else
- /* Character is graphic but unrepresentable in
- this port's encoding. */
- PRINT_CHAR_ESCAPE (i, port);
- }
- }
- else
- /* Character is a non-graphical character. */
- PRINT_CHAR_ESCAPE (i, port);
+ if (!display_character (SCM_CHAR (exp), port,
+ scm_i_get_conversion_strategy (port)))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ "UTF-32", scm_i_get_port_encoding (port),
+ scm_string (scm_list_1 (exp)));
}
- else
- scm_i_charprint (i, port);
}
else if (SCM_IFLAGP (exp)
&& ((size_t) SCM_IFLAGNUM (exp) < (sizeof iflagnames / sizeof (char *))))
@@ -597,132 +543,13 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
case scm_tc7_string:
if (SCM_WRITINGP (pstate))
{
- size_t i, len;
- static char const hex[] = "0123456789abcdef";
- char buf[9];
-
+ size_t len, i;
scm_putc ('"', port);
len = scm_i_string_length (exp);
for (i = 0; i < len; ++i)
- {
- scm_t_wchar ch = scm_i_string_ref (exp, i);
- int printed = 0;
-
- if (ch == ' ' || ch == '\n')
- {
- scm_putc (ch, port);
- printed = 1;
- }
- else if (ch == '"' || ch == '\\')
- {
- scm_putc ('\\', port);
- scm_i_charprint (ch, port);
- printed = 1;
- }
- else
- if (uc_is_general_category_withtable
- (ch,
- UC_CATEGORY_MASK_L | UC_CATEGORY_MASK_M |
- UC_CATEGORY_MASK_N | UC_CATEGORY_MASK_P |
- UC_CATEGORY_MASK_S))
- {
- /* Print the character since it is a graphic
- character. */
- scm_t_wchar *wbuf;
- SCM wstr = scm_i_make_wide_string (1, &wbuf);
- char *buf;
- size_t len;
-
- if (scm_i_get_port_encoding (port))
- {
- wstr = scm_i_make_wide_string (1, &wbuf);
- wbuf[0] = ch;
- buf = u32_conv_to_encoding (scm_i_get_port_encoding (port),
- iconveh_error,
- (scm_t_uint32 *) wbuf,
- 1 ,
- NULL,
- NULL, &len);
- if (buf != NULL)
- {
- /* Character is graphic and representable in
- this encoding. Print it. */
- scm_lfwrite_str (wstr, port);
- free (buf);
- printed = 1;
- }
- }
- else
- if (ch <= 0xFF)
- {
- scm_putc (ch, port);
- printed = 1;
- }
- }
-
- if (!printed)
- {
- /* Character is graphic but unrepresentable in
- this port's encoding or is not graphic. */
- if (!SCM_R6RS_ESCAPES_P)
- {
- if (ch <= 0xFF)
- {
- buf[0] = '\\';
- buf[1] = 'x';
- buf[2] = hex[ch / 16];
- buf[3] = hex[ch % 16];
- scm_lfwrite (buf, 4, port);
- }
- else if (ch <= 0xFFFF)
- {
- buf[0] = '\\';
- buf[1] = 'u';
- buf[2] = hex[(ch & 0xF000) >> 12];
- buf[3] = hex[(ch & 0xF00) >> 8];
- buf[4] = hex[(ch & 0xF0) >> 4];
- buf[5] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 6, port);
- }
- else if (ch > 0xFFFF)
- {
- buf[0] = '\\';
- buf[1] = 'U';
- buf[2] = hex[(ch & 0xF00000) >> 20];
- buf[3] = hex[(ch & 0xF0000) >> 16];
- buf[4] = hex[(ch & 0xF000) >> 12];
- buf[5] = hex[(ch & 0xF00) >> 8];
- buf[6] = hex[(ch & 0xF0) >> 4];
- buf[7] = hex[(ch & 0xF)];
- scm_lfwrite (buf, 8, port);
- }
- }
- else
- {
- scm_t_wchar ch2 = ch;
-
- /* Print an R6RS variable-length hex escape: "\xNNNN;"
- */
- int i = 8;
- buf[i] = ';';
- i --;
- if (ch == 0)
- buf[i--] = '0';
- else
- while (ch2 > 0)
- {
- buf[i] = hex[ch2 & 0xF];
- ch2 >>= 4;
- i --;
- }
- buf[i] = 'x';
- i --;
- buf[i] = '\\';
- scm_lfwrite (buf + i, 9 - i, port);
- }
- }
- }
+ write_character (scm_i_string_ref (exp, i), port, 1);
+
scm_putc ('"', port);
scm_remember_upto_here_1 (exp);
}
@@ -917,16 +744,179 @@ scm_prin1 (SCM exp, SCM port, int writingp)
}
}
-/* Print a character.
- */
-void
-scm_i_charprint (scm_t_wchar ch, SCM port)
+/* Attempt to display CH to PORT according to STRATEGY. Return non-zero
+ if CH was successfully displayed, zero otherwise (e.g., if it was not
+ representable in PORT's encoding.) */
+static int
+display_character (scm_t_wchar ch, SCM port,
+ scm_t_string_failed_conversion_handler strategy)
{
- scm_t_wchar *wbuf;
- SCM wstr = scm_i_make_wide_string (1, &wbuf);
+ int printed;
+ const char *encoding;
+
+ encoding = scm_i_get_port_encoding (port);
+ if (encoding == NULL)
+ {
+ if (ch <= 0xff)
+ {
+ scm_putc (ch, port);
+ printed = 1;
+ }
+ else
+ printed = 0;
+ }
+ else
+ {
+ size_t len;
+ char locale_encoded[sizeof (ch)], *result;
+
+ len = sizeof (locale_encoded);
+ result = u32_conv_to_encoding (encoding, strategy,
+ (scm_t_uint32 *) &ch, 1,
+ NULL, locale_encoded, &len);
+ if (result != NULL)
+ {
+ /* CH is graphic; print it. */
+
+ if (strategy == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
+ {
+ /* Apply the same escaping syntax as in `write_character'. */
+ if (SCM_R6RS_ESCAPES_P)
+ scm_i_unistring_escapes_to_r6rs_escapes (result, &len);
+ else
+ scm_i_unistring_escapes_to_guile_escapes (result, &len);
+ }
- wbuf[0] = ch;
- scm_lfwrite_str (wstr, port);
+ scm_lfwrite (result, len, port);
+ printed = 1;
+
+ if (SCM_UNLIKELY (result != locale_encoded))
+ free (result);
+ }
+ else
+ printed = 0;
+ }
+
+ return printed;
+}
+
+/* Write CH to PORT, escaping it if it's non-graphic or not
+ representable in PORT's encoding. If STRING_ESCAPES_P is true and CH
+ needs to be escaped, it is escaped using the in-string escape syntax;
+ otherwise the character escape syntax is used. */
+static void
+write_character (scm_t_wchar ch, SCM port, int string_escapes_p)
+{
+ int printed = 0;
+
+ if (string_escapes_p)
+ {
+ /* Check if CH deserves special treatment. */
+ if (ch == '"' || ch == '\\')
+ {
+ scm_putc ('\\', port);
+ scm_putc (ch, port);
+ printed = 1;
+ }
+ else if (ch == ' ' || ch == '\n')
+ {
+ scm_putc (ch, port);
+ printed = 1;
+ }
+ }
+ else
+ scm_puts ("#\\", port);
+
+ if (!printed
+ && uc_is_general_category_withtable (ch,
+ UC_CATEGORY_MASK_L |
+ UC_CATEGORY_MASK_M |
+ UC_CATEGORY_MASK_N |
+ UC_CATEGORY_MASK_P |
+ UC_CATEGORY_MASK_S))
+ /* CH is graphic; attempt to display it. */
+ printed = display_character (ch, port, iconveh_error);
+
+ if (!printed)
+ {
+ /* CH isn't graphic or cannot be represented in PORT's
+ encoding. */
+
+ if (string_escapes_p)
+ {
+ /* Represent CH using the in-string escape syntax. */
+
+ static const char hex[] = "0123456789abcdef";
+ char buf[9];
+
+ if (!SCM_R6RS_ESCAPES_P)
+ {
+ if (ch <= 0xFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'x';
+ buf[2] = hex[ch / 16];
+ buf[3] = hex[ch % 16];
+ scm_lfwrite (buf, 4, port);
+ }
+ else if (ch <= 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'u';
+ buf[2] = hex[(ch & 0xF000) >> 12];
+ buf[3] = hex[(ch & 0xF00) >> 8];
+ buf[4] = hex[(ch & 0xF0) >> 4];
+ buf[5] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 6, port);
+ }
+ else if (ch > 0xFFFF)
+ {
+ buf[0] = '\\';
+ buf[1] = 'U';
+ buf[2] = hex[(ch & 0xF00000) >> 20];
+ buf[3] = hex[(ch & 0xF0000) >> 16];
+ buf[4] = hex[(ch & 0xF000) >> 12];
+ buf[5] = hex[(ch & 0xF00) >> 8];
+ buf[6] = hex[(ch & 0xF0) >> 4];
+ buf[7] = hex[(ch & 0xF)];
+ scm_lfwrite (buf, 8, port);
+ }
+ }
+ else
+ {
+ /* Print an R6RS variable-length hex escape: "\xNNNN;". */
+ scm_t_wchar ch2 = ch;
+
+ int i = 8;
+ buf[i] = ';';
+ i --;
+ if (ch == 0)
+ buf[i--] = '0';
+ else
+ while (ch2 > 0)
+ {
+ buf[i] = hex[ch2 & 0xF];
+ ch2 >>= 4;
+ i --;
+ }
+ buf[i] = 'x';
+ i --;
+ buf[i] = '\\';
+ scm_lfwrite (buf + i, 9 - i, port);
+ }
+ }
+ else
+ {
+ /* Represent CH using the character escape syntax. */
+ const char *name;
+
+ name = scm_i_charname (SCM_MAKE_CHAR (ch));
+ if (name != NULL)
+ scm_puts (name, port);
+ else
+ PRINT_CHAR_ESCAPE (ch, port);
+ }
+ }
}
/* Print an integer.
@@ -1248,8 +1238,15 @@ SCM_DEFINE (scm_write_char, "write-char", 1, 1, 0,
SCM_VALIDATE_CHAR (1, chr);
SCM_VALIDATE_OPORT_VALUE (2, port);
-
- scm_i_charprint (SCM_CHAR (chr), SCM_COERCE_OUTPORT (port));
+
+ port = SCM_COERCE_OUTPORT (port);
+ if (!display_character (SCM_CHAR (chr), port,
+ scm_i_get_conversion_strategy (port)))
+ scm_encoding_error (__func__, errno,
+ "cannot convert to output locale",
+ "UTF-32", scm_i_get_port_encoding (port),
+ scm_string (scm_list_1 (chr)));
+
#if 0
#ifdef HAVE_PIPE
# ifdef EPIPE