From 6f3857f6a7b3cd6bd7e62e4efdbb1b841544e053 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Fri, 18 Jun 2021 16:05:15 -0700 Subject: Support Encoding::Converter newline: :lf and :lf_newline options Previously, newline: :lf was accepted but ignored. Where it should have been used was commented out code that didn't work, but unlike all other invalid values, using newline: :lf did not raise an error. This adds support for newline: :lf and :lf_newline, for consistency with newline: :cr and :cr_newline. This is basically the same as universal_newline, except that it only affects writing and not reading due to RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK. Add tests for the File.open :newline option while here. Fixes [Bug #12436] --- transcode.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) (limited to 'transcode.c') diff --git a/transcode.c b/transcode.c index 5fafad398f..535e436b03 100644 --- a/transcode.c +++ b/transcode.c @@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr; static VALUE sym_universal_newline; static VALUE sym_crlf_newline; static VALUE sym_cr_newline; +static VALUE sym_lf_newline; #ifdef ENABLE_ECONV_NEWLINE_OPTION static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf; #endif @@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret) case ECONV_UNIVERSAL_NEWLINE_DECORATOR: case ECONV_CRLF_NEWLINE_DECORATOR: case ECONV_CR_NEWLINE_DECORATOR: + case ECONV_LF_NEWLINE_DECORATOR: case 0: break; default: @@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret) decorators_ret[num_decorators++] = "crlf_newline"; if (ecflags & ECONV_CR_NEWLINE_DECORATOR) decorators_ret[num_decorators++] = "cr_newline"; + if (ecflags & ECONV_LF_NEWLINE_DECORATOR) + decorators_ret[num_decorators++] = "lf_newline"; if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR) decorators_ret[num_decorators++] = "universal_newline"; @@ -1982,6 +1986,9 @@ rb_econv_binmode(rb_econv_t *ec) case ECONV_CR_NEWLINE_DECORATOR: dname = "cr_newline"; break; + case ECONV_LF_NEWLINE_DECORATOR: + dname = "lf_newline"; + break; } if (dname) { @@ -2040,6 +2047,10 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg) rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, "cr_newline"); } + if (ecflags & ECONV_LF_NEWLINE_DECORATOR) { + rb_str_cat2(mesg, pre); pre = ","; + rb_str_cat2(mesg, "lf_newline"); + } if (ecflags & ECONV_XML_TEXT_DECORATOR) { rb_str_cat2(mesg, pre); pre = ","; rb_str_cat2(mesg, "xml_text"); @@ -2515,7 +2526,7 @@ econv_opts(VALUE opt, int ecflags) ecflags |= ECONV_CR_NEWLINE_DECORATOR; } else if (v == sym_lf) { - /* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */ + ecflags |= ECONV_LF_NEWLINE_DECORATOR; } else if (SYMBOL_P(v)) { rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE, @@ -2544,6 +2555,11 @@ econv_opts(VALUE opt, int ecflags) setflags |= ECONV_CR_NEWLINE_DECORATOR; newlineflag |= !NIL_P(v); + v = rb_hash_aref(opt, sym_lf_newline); + if (RTEST(v)) + setflags |= ECONV_LF_NEWLINE_DECORATOR; + newlineflag |= !NIL_P(v); + switch (newlineflag) { case 1: ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; @@ -3281,11 +3297,13 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * :undef => :replace # replace undefined conversion * :replace => string # replacement string ("?" or "\uFFFD" if not specified) * :newline => :universal # decorator for converting CRLF and CR to LF + * :newline => :lf # decorator for converting CRLF and CR to LF when writing * :newline => :crlf # decorator for converting LF to CRLF * :newline => :cr # decorator for converting LF to CR * :universal_newline => true # decorator for converting CRLF and CR to LF * :crlf_newline => true # decorator for converting LF to CRLF * :cr_newline => true # decorator for converting LF to CR + * :lf_newline => true # decorator for converting CRLF and CR to LF when writing * :xml => :text # escape as XML CharData. * :xml => :attr # escape as XML AttValue * integer form: @@ -3293,6 +3311,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Encoding::Converter::UNDEF_REPLACE * Encoding::Converter::UNDEF_HEX_CHARREF * Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR + * Encoding::Converter::LF_NEWLINE_DECORATOR * Encoding::Converter::CRLF_NEWLINE_DECORATOR * Encoding::Converter::CR_NEWLINE_DECORATOR * Encoding::Converter::XML_TEXT_DECORATOR @@ -3335,6 +3354,8 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * Convert LF to CRLF. * [:cr_newline => true] * Convert LF to CR. + * [:lf_newline => true] + * Convert CRLF and CR to LF (when writing). * [:xml => :text] * Escape as XML CharData. * This form can be used as an HTML 4.0 #PCDATA. @@ -4437,6 +4458,7 @@ Init_transcode(void) sym_universal_newline = ID2SYM(rb_intern_const("universal_newline")); sym_crlf_newline = ID2SYM(rb_intern_const("crlf_newline")); sym_cr_newline = ID2SYM(rb_intern_const("cr_newline")); + sym_lf_newline = ID2SYM(rb_intern("lf_newline")); sym_partial_input = ID2SYM(rb_intern_const("partial_input")); #ifdef ENABLE_ECONV_NEWLINE_OPTION @@ -4533,6 +4555,12 @@ InitVM_transcode(void) */ rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR)); + /* Document-const: LF_NEWLINE_DECORATOR + * + * Decorator for converting CRLF and CR to LF when writing + */ + rb_define_const(rb_cEncodingConverter, "LF_NEWLINE_DECORATOR", INT2FIX(ECONV_LF_NEWLINE_DECORATOR)); + /* Document-const: CRLF_NEWLINE_DECORATOR * * Decorator for converting LF to CRLF -- cgit v1.2.1