diff options
author | Eric Blake <ebb9@byu.net> | 2008-12-09 20:56:37 -0700 |
---|---|---|
committer | Eric Blake <ebb9@byu.net> | 2008-12-15 05:57:01 -0700 |
commit | 789d06f131983794a089ad6c09e79e3613ac132f (patch) | |
tree | 5861ded21521b2e75adada9a93167838956408b4 | |
parent | 41d0c77062c8046730101d82b56f682edc70957e (diff) | |
download | m4-789d06f131983794a089ad6c09e79e3613ac132f.tar.gz |
Avoid quadratic behavior for some cases of divert/undivert.
* m4/output.c (struct m4_diversion): Improve comments.
(m4_tmpname, m4_make_diversion): Strengthen preconditions.
(m4_tmprename): New function.
(m4_output_init, m4_output_exit): Move after internal functions.
(make_room_for): Don't bother copying uninitialized bytes.
(insert_diversion_helper): Transfer metadata, rather than copying
contents, when undiverting into a previously unused diversion.
* tests/builtins.at (divert): Add a check to the test.
* doc/m4.texinfo (Undivert): Enhance test.
* NEWS: Document the speedup.
Signed-off-by: Eric Blake <ebb9@byu.net>
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | doc/m4.texinfo | 8 | ||||
-rw-r--r-- | m4/output.c | 170 | ||||
-rw-r--r-- | tests/builtins.at | 14 |
5 files changed, 155 insertions, 53 deletions
@@ -1,3 +1,17 @@ +2008-12-15 Eric Blake <ebb9@byu.net> + + Avoid quadratic behavior for some cases of divert/undivert. + * m4/output.c (struct m4_diversion): Improve comments. + (m4_tmpname, m4_make_diversion): Strengthen preconditions. + (m4_tmprename): New function. + (m4_output_init, m4_output_exit): Move after internal functions. + (make_room_for): Don't bother copying uninitialized bytes. + (insert_diversion_helper): Transfer metadata, rather than copying + contents, when undiverting into a previously unused diversion. + * tests/builtins.at (divert): Add a check to the test. + * doc/m4.texinfo (Undivert): Enhance test. + * NEWS: Document the speedup. + 2008-12-02 Eric Blake <ebb9@byu.net> Stage 27: Allow embedded NUL in text processing macros. @@ -244,7 +244,7 @@ promoted to 2.0. ** The `divert' builtin now accepts an optional second argument of text that is immediately placed in the new diversion, regardless of whether the current expansion is nested within argument collection of another - macro. + macro. It has also been optimized for faster performance. ** The `-d'/`--debug' command-line option now understands `-' and `+' modifiers, the way the builtin `debugmode' has always done; this allows diff --git a/doc/m4.texinfo b/doc/m4.texinfo index bee9aec9..173921cc 100644 --- a/doc/m4.texinfo +++ b/doc/m4.texinfo @@ -6267,6 +6267,14 @@ undivert(`0') undivert @result{}diverted text @result{} +divert(`1')more +divert(`2')undivert(`1')diverted text`'divert +@result{} +undivert(`1') +@result{} +undivert(`2') +@result{}more +@result{}diverted text @end example When a diversion has been undiverted, the diverted text is discarded, diff --git a/m4/output.c b/m4/output.c index 7f6f9c12..6e810e5c 100644 --- a/m4/output.c +++ b/m4/output.c @@ -65,13 +65,13 @@ struct m4_diversion { union { - FILE *file; /* diversion file on disk */ - char *buffer; /* in-memory diversion buffer */ - m4_diversion *next; /* free-list pointer */ + FILE *file; /* Diversion file on disk. */ + char *buffer; /* Malloc'd diversion buffer. */ + m4_diversion *next; /* Free-list pointer */ } u; - int divnum; /* which diversion this represents */ - size_t size; /* usable size before reallocation */ - size_t used; /* used length in characters */ + int divnum; /* Which diversion this represents. */ + size_t size; /* Usable size before reallocation. */ + size_t used; /* Used buffer length, or tmp file exists. */ }; /* Sorted set of diversions 1 through INT_MAX. */ @@ -89,20 +89,32 @@ static m4_obstack diversion_storage; /* Total size of all in-memory buffer sizes. */ static size_t total_buffer_size; -/* Current output diversion, NULL if output is being currently discarded. */ +/* Current output diversion, NULL if output is being currently + discarded. output_diversion->u is guaranteed non-NULL except when + the diversion has never been used; use size to determine if it is a + malloc'd buffer or a FILE. output_diversion->used is 0 if u.file + is stdout, and non-zero if this is a malloc'd buffer or a temporary + diversion file. */ static m4_diversion *output_diversion; -/* Values of some output_diversion fields, cached out for speed. */ -static FILE *output_file; /* current value of (file) */ -static char *output_cursor; /* current value of (buffer + used) */ -static size_t output_unused; /* current value of (size - used) */ +/* Cache of output_diversion->u.file, only valid when + output_diversion->size is 0. */ +static FILE *output_file; + +/* Cache of output_diversion->u.buffer + output_diversion->used, only + valid when output_diversion->size is non-zero. */ +static char *output_cursor; + +/* Cache of output_diversion->size - output_diversion->used, only + valid when output_diversion->size is non-zero. */ +static size_t output_unused; /* Temporary directory holding all spilled diversion files. */ static m4_temp_dir *output_temp_dir; -/* --- OUTPUT INITIALIZATION --- */ +/* Internal routines. */ /* Callback for comparing list elements ELT1 and ELT2 for order in diversion_table. */ @@ -126,32 +138,6 @@ threshold_diversion_CB (const void *elt, const void *threshold) return diversion->divnum >= *(const int *) threshold; } -/* Initialize the output engine. */ -void -m4_output_init (m4 *context) -{ - diversion_table = gl_oset_create_empty (GL_AVLTREE_OSET, cmp_diversion_CB, - NULL); - div0.u.file = stdout; - m4_set_current_diversion (context, 0); - output_diversion = &div0; - output_file = stdout; - obstack_init (&diversion_storage); -} - -/* Clean up memory allocated during use. */ -void -m4_output_exit (void) -{ - /* Order is important, since we may have registered cleanup_tmpfile - as an atexit handler, and it must not traverse stale memory. */ - gl_oset_t table = diversion_table; - assert (gl_oset_size (diversion_table) == 0); - diversion_table = NULL; - gl_oset_free (table); - obstack_free (&diversion_storage, NULL); -} - /* Clean up any temporary directory. Designed for use as an atexit handler, where it is not safe to call exit() recursively; so this calls _exit if a problem is encountered. */ @@ -199,6 +185,7 @@ m4_tmpname (int divnum) buffer = (char *) obstack_alloc (&diversion_storage, INT_BUFSIZE_BOUND (divnum)); } + assert (0 < divnum); if (snprintf (&buffer[offset], INT_BUFSIZE_BOUND (divnum), "%d", divnum) < 0) abort (); return buffer; @@ -207,9 +194,10 @@ m4_tmpname (int divnum) /* Create a temporary file for diversion DIVNUM open for reading and writing in a secure temp directory. The file will be automatically closed and deleted on a fatal signal. The file can be closed and - reopened with m4_tmpclose and m4_tmpopen; when finally done with - the file, close it and use m4_tmpremove. Exits on failure, so the - return value is always an open file. */ + reopened with m4_tmpclose and m4_tmpopen, or moved with + m4_tmprename; when finally done with the file, close it with + m4_tmpremove. Exits on failure, so the return value is always an + open file. */ static FILE * m4_tmpfile (m4 *context, int divnum) { @@ -275,6 +263,53 @@ m4_tmpremove (int divnum) return cleanup_temp_file (output_temp_dir, m4_tmpname (divnum)); } +/* Transfer the temporary file for diversion OLDNUM to the previously + unused diversion NEWNUM. Return an open stream visiting the new + temporary file, exiting on failure. */ +static FILE* +m4_tmprename (m4 *context, int oldnum, int newnum) +{ + /* m4_tmpname reuses its return buffer. */ + char *oldname = xstrdup (m4_tmpname (oldnum)); + const char *newname = m4_tmpname (newnum); + register_temp_file (output_temp_dir, newname); + if (rename (oldname, newname)) + m4_error (context, EXIT_FAILURE, errno, NULL, + _("cannot create temporary file for diversion")); + unregister_temp_file (output_temp_dir, oldname); + free (oldname); + return m4_tmpopen (context, newnum); +} + + +/* --- OUTPUT INITIALIZATION --- */ + +/* Initialize the output engine. */ +void +m4_output_init (m4 *context) +{ + diversion_table = gl_oset_create_empty (GL_AVLTREE_OSET, cmp_diversion_CB, + NULL); + div0.u.file = stdout; + m4_set_current_diversion (context, 0); + output_diversion = &div0; + output_file = stdout; + obstack_init (&diversion_storage); +} + +/* Clean up memory allocated during use. */ +void +m4_output_exit (void) +{ + /* Order is important, since we may have registered cleanup_tmpfile + as an atexit handler, and it must not traverse stale memory. */ + gl_oset_t table = diversion_table; + assert (gl_oset_size (diversion_table) == 0); + diversion_table = NULL; + gl_oset_free (table); + obstack_free (&diversion_storage, NULL); +} + /* Reorganize in-memory diversion buffers so the current diversion can accomodate LENGTH more characters without further reorganization. The current diversion buffer is made bigger if possible. But to make room @@ -385,11 +420,14 @@ make_room_for (m4 *context, size_t length) _("cannot close temporary file for diversion")); } - /* The buffer may be safely reallocated. */ - + /* The current buffer may be safely reallocated. */ assert (wanted_size >= length); - output_diversion->u.buffer = xrealloc (output_diversion->u.buffer, - wanted_size); + { + char *buffer = output_diversion->u.buffer; + output_diversion->u.buffer = xcharalloc ((size_t) wanted_size); + memcpy (output_diversion->u.buffer, buffer, output_diversion->used); + free (buffer); + } total_buffer_size += wanted_size - output_diversion->size; output_diversion->size = wanted_size; @@ -664,10 +702,10 @@ m4_make_diversion (m4 *context, int divnum) assert (output_diversion->divnum != divnum); if (!output_diversion->size && !output_diversion->u.file) { + assert (!output_diversion->used); if (!gl_oset_remove (diversion_table, output_diversion)) assert (false); output_diversion->u.next = free_list; - output_diversion->used = 0; free_list = output_diversion; } else if (output_diversion->size) @@ -802,18 +840,46 @@ insert_diversion_helper (m4 *context, m4_diversion *diversion, bool escaped) { if (diversion->size) { - char *str = diversion->u.buffer; - size_t len = diversion->used; - if (escaped) - str = quotearg_style_mem (escape_quoting_style, str, len); - m4_output_text (context, str, escaped ? strlen (str) : len); + if (!output_diversion->u.file) + { + /* Transferring diversion metadata is faster than + copying contents. */ + assert (!output_diversion->used && output_diversion != &div0 + && !output_file); + output_diversion->u.buffer = diversion->u.buffer; + output_diversion->size = diversion->size; + output_cursor = diversion->u.buffer + diversion->used; + output_unused = diversion->size - diversion->used; + diversion->u.buffer = NULL; + } + else + { + char *str = diversion->u.buffer; + size_t len = diversion->used; + if (escaped) + str = quotearg_style_mem (escape_quoting_style, str, len); + m4_output_text (context, str, escaped ? strlen (str) : len); + } + } + else if (!output_diversion->u.file) + { + /* Transferring diversion metadata is faster than copying + contents. */ + assert (!output_diversion->used && output_diversion != &div0 + && !output_file); + output_diversion->u.file = m4_tmprename (context, diversion->divnum, + output_diversion->divnum); + output_diversion->used = 1; + output_file = output_diversion->u.file; + diversion->u.file = NULL; + diversion->size = 1; } else { assert (diversion->used); if (!diversion->u.file) diversion->u.file = m4_tmpopen (context, diversion->divnum); - m4_insert_file (context, diversion->u.file); + insert_file (context, diversion->u.file, escaped); } m4_set_output_line (context, -1); diff --git a/tests/builtins.at b/tests/builtins.at index 83810fa2..f819403b 100644 --- a/tests/builtins.at +++ b/tests/builtins.at @@ -353,6 +353,20 @@ rm expout2 AT_CHECK_M4([in.m4], [0], [expout]) +dnl Avoid quadratic copying time when transferring diversions; test +dnl both in-memory and diversions spilled to a file. +AT_DATA([in.m4], [[include(`forloop2.m4')dnl +divert(`1')format(`%10000s', `')dnl +forloop(`i', `1', `10000', + `divert(incr(i))undivert(i)')dnl +divert(`9001')format(`%1000000s', `')dnl +forloop(`i', `9001', `10000', + `divert(incr(i))undivert(i)')dnl +divert(`-1')undivert +]]) + +AT_CHECK_M4([-I "$top_srcdir/examples" in.m4]) + AT_CLEANUP |