diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/textreading/stream.h | 5 | ||||
-rw-r--r-- | numpy/core/src/multiarray/textreading/tokenize.c.src | 16 |
2 files changed, 13 insertions, 8 deletions
diff --git a/numpy/core/src/multiarray/textreading/stream.h b/numpy/core/src/multiarray/textreading/stream.h index 0c4567329..b2fb1e1bf 100644 --- a/numpy/core/src/multiarray/textreading/stream.h +++ b/numpy/core/src/multiarray/textreading/stream.h @@ -9,9 +9,8 @@ * we definitely expect to get line-by-line buffers. */ #define BUFFER_MAY_CONTAIN_NEWLINE 0 -#define BUFFER_IS_PARTIAL_LINE 1 -#define BUFFER_IS_LINEND 2 -#define BUFFER_IS_FILEEND 3 +#define BUFFER_IS_LINEND 1 +#define BUFFER_IS_FILEEND 2 typedef struct _stream { void *stream_data; diff --git a/numpy/core/src/multiarray/textreading/tokenize.c.src b/numpy/core/src/multiarray/textreading/tokenize.c.src index ed68749d1..10475b921 100644 --- a/numpy/core/src/multiarray/textreading/tokenize.c.src +++ b/numpy/core/src/multiarray/textreading/tokenize.c.src @@ -249,6 +249,8 @@ tokenizer_core_@type@(tokenizer_state *ts, parser_config *const config) if (ts->buf_state != BUFFER_MAY_CONTAIN_NEWLINE) { pos = stop; /* advance to next buffer */ ts->state = TOKENIZE_LINE_END; + /* Ensure we don't think we have an empty line left to parse: */ + ts->buf_state = BUFFER_MAY_CONTAIN_NEWLINE; break; } for (; pos < stop; pos++) { @@ -322,16 +324,20 @@ tokenize(stream *s, tokenizer_state *ts, parser_config *const config) if (NPY_UNLIKELY(ts->pos >= ts->end)) { if (ts->buf_state == BUFFER_IS_LINEND && - ts->state != TOKENIZE_QUOTED && - ts->state != TOKENIZE_CHECK_QUOTED) { + ts->state != TOKENIZE_QUOTED) { /* * Finished line, do not read anymore (also do not eat \n). * If we are in a quoted field and the "line" does not end with * a newline, the quoted field will be missing it right now. - * TODO: We should probably just insert a "\n" character here, - * which is also closer to what the python code did - * (either by setting pos/end or manually). + * (i.e. `np.loadtxt(['"a', 'b"'], dtype="S2")` reads "ab") + * TODO: We should possibly insert a '\n' character when inside + * a quoted field the and '\n' character is not included + * in the string. `FileLike.readline()` does ensure it + * is included. + * + * Ensure we don't think we have an empty line left to parse: */ + ts->buf_state = BUFFER_MAY_CONTAIN_NEWLINE; goto finish; } /* fetch new data */ |