From 9f5d9416ed81e64796ca9bc5d566638a0f2c2a6b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 9 Oct 2015 01:53:21 +0200 Subject: Issue #25318: Move _PyBytesWriter to bytesobject.c Declare also the private API in bytesobject.h. --- Include/bytesobject.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'Include/bytesobject.h') diff --git a/Include/bytesobject.h b/Include/bytesobject.h index e379bace37..eafcdea4bb 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -123,6 +123,58 @@ PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGrouping(char *buffer, #define F_ALT (1<<3) #define F_ZERO (1<<4) +#ifndef Py_LIMITED_API +/* The _PyBytesWriter structure is big: it contains an embeded "stack buffer". + A _PyBytesWriter variable must be declared at the end of variables in a + function to optimize the memory allocation on the stack. */ +typedef struct { + /* bytes object */ + PyObject *buffer; + + /* Number of allocated size */ + Py_ssize_t allocated; + + /* Current size of the buffer (can be smaller than the allocated size) */ + Py_ssize_t size; + + /* If non-zero, overallocate the buffer (default: 0). */ + int overallocate; + + /* Stack buffer */ + int use_stack_buffer; + char stack_buffer[512]; +} _PyBytesWriter; + +/* Initialize a bytes writer + + By default, the overallocation is disabled. Set the overallocate attribute + to control the allocation of the buffer. */ +PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer); + +/* Get the buffer content and reset the writer. + Return a bytes object. + Raise an exception and return NULL on error. */ +PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer, + char *str); + +/* Deallocate memory of a writer (clear its internal buffer). */ +PyAPI_FUNC(void) _PyBytesWriter_Dealloc(_PyBytesWriter *writer); + +/* Allocate the buffer to write size bytes. + Return the pointer to the beginning of buffer data. + Raise an exception and return NULL on error. */ +PyAPI_FUNC(char*) _PyBytesWriter_Alloc(_PyBytesWriter *writer, + Py_ssize_t size); + +/* Add *size* bytes to the buffer. + str is the current pointer inside the buffer. + Return the updated current pointer inside the buffer. + Raise an exception and return NULL on error. */ +PyAPI_FUNC(char*) _PyBytesWriter_Prepare(_PyBytesWriter *writer, + char *str, + Py_ssize_t size); +#endif /* Py_LIMITED_API */ + #ifdef __cplusplus } #endif -- cgit v1.2.1 From 5c141ee6fd49d7412b4ff6fc08d6d602a6a3f4d7 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 9 Oct 2015 03:38:24 +0200 Subject: Issue #25318: cleanup code _PyBytesWriter Rename "stack buffer" to "small buffer". Add also an assertion in _PyBytesWriter_GetPos(). --- Include/bytesobject.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Include/bytesobject.h') diff --git a/Include/bytesobject.h b/Include/bytesobject.h index eafcdea4bb..ffa529b862 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -141,8 +141,8 @@ typedef struct { int overallocate; /* Stack buffer */ - int use_stack_buffer; - char stack_buffer[512]; + int use_small_buffer; + char small_buffer[512]; } _PyBytesWriter; /* Initialize a bytes writer -- cgit v1.2.1 From 51d8f337b55dfe65d2c878ce62d35b1e30bf3cd4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 9 Oct 2015 12:37:03 +0200 Subject: _PyBytesWriter: rename size attribute to min_size --- Include/bytesobject.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'Include/bytesobject.h') diff --git a/Include/bytesobject.h b/Include/bytesobject.h index ffa529b862..6cd5a340a5 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -134,8 +134,9 @@ typedef struct { /* Number of allocated size */ Py_ssize_t allocated; - /* Current size of the buffer (can be smaller than the allocated size) */ - Py_ssize_t size; + /* Minimum number of allocated bytes, + incremented by _PyBytesWriter_Prepare() */ + Py_ssize_t min_size; /* If non-zero, overallocate the buffer (default: 0). */ int overallocate; -- cgit v1.2.1 From 3d3208933919b23ac4bd27f72698b69b1620542d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 9 Oct 2015 12:57:22 +0200 Subject: Add _PyBytesWriter_WriteBytes() to factorize the code --- Include/bytesobject.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'Include/bytesobject.h') diff --git a/Include/bytesobject.h b/Include/bytesobject.h index 6cd5a340a5..2c4c4c4fd4 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -174,6 +174,13 @@ PyAPI_FUNC(char*) _PyBytesWriter_Alloc(_PyBytesWriter *writer, PyAPI_FUNC(char*) _PyBytesWriter_Prepare(_PyBytesWriter *writer, char *str, Py_ssize_t size); + +/* Write bytes. + Raise an exception and return NULL on error. */ +PyAPI_FUNC(char*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, + char *str, + char *bytes, + Py_ssize_t size); #endif /* Py_LIMITED_API */ #ifdef __cplusplus -- cgit v1.2.1 From 26797c6212e515467a5185297a92e7c4ae19ce94 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 12 Oct 2015 13:12:54 +0200 Subject: Relax _PyBytesWriter API Don't require _PyBytesWriter pointer to be a "char *". Same change for _PyBytesWriter_WriteBytes() parameter. For example, binascii uses "unsigned char*". --- Include/bytesobject.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'Include/bytesobject.h') diff --git a/Include/bytesobject.h b/Include/bytesobject.h index 2c4c4c4fd4..b7a7c36bcb 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -156,7 +156,7 @@ PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer); Return a bytes object. Raise an exception and return NULL on error. */ PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer, - char *str); + void *str); /* Deallocate memory of a writer (clear its internal buffer). */ PyAPI_FUNC(void) _PyBytesWriter_Dealloc(_PyBytesWriter *writer); @@ -164,22 +164,22 @@ PyAPI_FUNC(void) _PyBytesWriter_Dealloc(_PyBytesWriter *writer); /* Allocate the buffer to write size bytes. Return the pointer to the beginning of buffer data. Raise an exception and return NULL on error. */ -PyAPI_FUNC(char*) _PyBytesWriter_Alloc(_PyBytesWriter *writer, +PyAPI_FUNC(void*) _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size); /* Add *size* bytes to the buffer. str is the current pointer inside the buffer. Return the updated current pointer inside the buffer. Raise an exception and return NULL on error. */ -PyAPI_FUNC(char*) _PyBytesWriter_Prepare(_PyBytesWriter *writer, - char *str, +PyAPI_FUNC(void*) _PyBytesWriter_Prepare(_PyBytesWriter *writer, + void *str, Py_ssize_t size); /* Write bytes. Raise an exception and return NULL on error. */ -PyAPI_FUNC(char*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, - char *str, - char *bytes, +PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, + void *str, + const void *bytes, Py_ssize_t size); #endif /* Py_LIMITED_API */ -- cgit v1.2.1 From a09df79b583368bf3e5c830dfa20fe17c7ce85cf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 14 Oct 2015 09:41:48 +0200 Subject: Add use_bytearray attribute to _PyBytesWriter Issue #25399: Add a new use_bytearray attribute to _PyBytesWriter to use a bytearray buffer, instead of using a bytes object. --- Include/bytesobject.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'Include/bytesobject.h') diff --git a/Include/bytesobject.h b/Include/bytesobject.h index b7a7c36bcb..fbb63226f6 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -128,17 +128,21 @@ PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGrouping(char *buffer, A _PyBytesWriter variable must be declared at the end of variables in a function to optimize the memory allocation on the stack. */ typedef struct { - /* bytes object */ + /* bytes, bytearray or NULL (when the small buffer is used) */ PyObject *buffer; - /* Number of allocated size */ + /* Number of allocated size. */ Py_ssize_t allocated; /* Minimum number of allocated bytes, incremented by _PyBytesWriter_Prepare() */ Py_ssize_t min_size; - /* If non-zero, overallocate the buffer (default: 0). */ + /* If non-zero, use a bytearray instead of a bytes object for buffer. */ + int use_bytearray; + + /* If non-zero, overallocate the buffer (default: 0). + This flag must be zero if use_bytearray is non-zero. */ int overallocate; /* Stack buffer */ @@ -153,7 +157,7 @@ typedef struct { PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer); /* Get the buffer content and reset the writer. - Return a bytes object. + Return a bytes object, or a bytearray object if use_bytearray is non-zero. Raise an exception and return NULL on error. */ PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer, void *str); -- cgit v1.2.1 From c61468316bcc26ec56eaef5bf7a0f010acc715a2 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 14 Oct 2015 09:56:53 +0200 Subject: Optimize bytearray % args Issue #25399: Don't create temporary bytes objects: modify _PyBytes_Format() to create work directly on bytearray objects. * Rename _PyBytes_Format() to _PyBytes_FormatEx() just in case if something outside CPython uses it * _PyBytes_FormatEx() now uses (char*, Py_ssize_t) for the input string, so bytearray_format() doesn't need tot create a temporary input bytes object * Add use_bytearray parameter to _PyBytes_FormatEx() which is passed to _PyBytesWriter, to create a bytearray buffer instead of a bytes buffer Most formatting operations are now between 2.5 and 5 times faster. --- Include/bytesobject.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'Include/bytesobject.h') diff --git a/Include/bytesobject.h b/Include/bytesobject.h index fbb63226f6..b5b37efd25 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -62,7 +62,11 @@ PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *); PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); #ifndef Py_LIMITED_API PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); -PyAPI_FUNC(PyObject *) _PyBytes_Format(PyObject *, PyObject *); +PyAPI_FUNC(PyObject*) _PyBytes_FormatEx( + const char *format, + Py_ssize_t format_len, + PyObject *args, + int use_bytearray); #endif PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, -- cgit v1.2.1 From ebccea7d65ef26d6bf89900a22a82bb9247c4ab4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 14 Oct 2015 11:25:33 +0200 Subject: Optimize bytes.fromhex() and bytearray.fromhex() Issue #25401: Optimize bytes.fromhex() and bytearray.fromhex(): they are now between 2x and 3.5x faster. Changes: * Use a fast-path working on a char* string for ASCII string * Use a slow-path for non-ASCII string * Replace slow hex_digit_to_int() function with a O(1) lookup in _PyLong_DigitValue precomputed table * Use _PyBytesWriter API to handle the buffer * Add unit tests to check the error position in error messages --- Include/bytesobject.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Include/bytesobject.h') diff --git a/Include/bytesobject.h b/Include/bytesobject.h index b5b37efd25..4046c1cf85 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -67,6 +67,9 @@ PyAPI_FUNC(PyObject*) _PyBytes_FormatEx( Py_ssize_t format_len, PyObject *args, int use_bytearray); +PyAPI_FUNC(PyObject*) _PyBytes_FromHex( + PyObject *string, + int use_bytearray); #endif PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, const char *, Py_ssize_t, -- cgit v1.2.1 From 2af47689c3bfd20fb0419de0321bfb8bd440c0f9 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 14 Oct 2015 13:56:47 +0200 Subject: Add _PyBytesWriter_Resize() function This function gives a control to the buffer size without using min_size. --- Include/bytesobject.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'Include/bytesobject.h') diff --git a/Include/bytesobject.h b/Include/bytesobject.h index 4046c1cf85..8469112959 100644 --- a/Include/bytesobject.h +++ b/Include/bytesobject.h @@ -178,7 +178,9 @@ PyAPI_FUNC(void) _PyBytesWriter_Dealloc(_PyBytesWriter *writer); PyAPI_FUNC(void*) _PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size); -/* Add *size* bytes to the buffer. +/* Ensure that the buffer is large enough to write *size* bytes. + Add size to the writer minimum size (min_size attribute). + str is the current pointer inside the buffer. Return the updated current pointer inside the buffer. Raise an exception and return NULL on error. */ @@ -186,6 +188,21 @@ PyAPI_FUNC(void*) _PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size); +/* Resize the buffer to make it larger. + The new buffer may be larger than size bytes because of overallocation. + Return the updated current pointer inside the buffer. + Raise an exception and return NULL on error. + + Note: size must be greater than the number of allocated bytes in the writer. + + This function doesn't use the writer minimum size (min_size attribute). + + See also _PyBytesWriter_Prepare(). + */ +PyAPI_FUNC(void*) _PyBytesWriter_Resize(_PyBytesWriter *writer, + void *str, + Py_ssize_t size); + /* Write bytes. Raise an exception and return NULL on error. */ PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, -- cgit v1.2.1