summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2021-02-18 16:08:19 -0700
committerKarl Williamson <khw@cpan.org>2022-08-22 06:52:08 -0600
commit7b12ce912aaddd1bd2f0dfc4cef735d45600d0fa (patch)
treee9cc3ecc92cb91fcc4b2432c24d6b0247dd9d5b4
parent692d08c5148904dec25634369d9e81ede7ff18ee (diff)
downloadperl-7b12ce912aaddd1bd2f0dfc4cef735d45600d0fa.tar.gz
Add my_strftime8()
This is like plain my_strftime(), but additionally returns an indication of the UTF-8ness of the returned string
-rw-r--r--embed.fnc7
-rw-r--r--embed.h1
-rw-r--r--ext/POSIX/POSIX.xs16
-rw-r--r--locale.c33
-rw-r--r--pod/perldelta.pod7
-rw-r--r--proto.h5
-rw-r--r--util.c15
7 files changed, 66 insertions, 18 deletions
diff --git a/embed.fnc b/embed.fnc
index ac55565e05..a13820c68e 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1449,7 +1449,12 @@ Cpd |PerlIO*|my_popen_list |NN const char* mode|int n|NN SV ** args
Apd |void |my_setenv |NULLOK const char* nam|NULLOK const char* val
m |I32 |my_stat
pX |I32 |my_stat_flags |NULLOK const U32 flags
-Adfp |char * |my_strftime |NN const char *fmt|int sec|int min|int hour|int mday|int mon|int year|int wday|int yday|int isdst
+Adfp |char * |my_strftime |NN const char *fmt|int sec|int min|int hour \
+ |int mday|int mon|int year|int wday|int yday \
+ |int isdst
+Adfp |char * |my_strftime8 |NN const char *fmt|int sec|int min|int hour \
+ |int mday|int mon|int year|int wday|int yday \
+ |int isdst|NULLOK utf8ness_t * utf8ness
: Used in pp_ctl.c
p |void |my_unexec
ApR |OP* |newANONLIST |NULLOK OP* o
diff --git a/embed.h b/embed.h
index ae37387b29..46b743a86c 100644
--- a/embed.h
+++ b/embed.h
@@ -327,6 +327,7 @@
#define my_setenv(a,b) Perl_my_setenv(aTHX_ a,b)
#define my_socketpair Perl_my_socketpair
#define my_strftime(a,b,c,d,e,f,g,h,i,j) Perl_my_strftime(aTHX_ a,b,c,d,e,f,g,h,i,j)
+#define my_strftime8(a,b,c,d,e,f,g,h,i,j,k) Perl_my_strftime8(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
#define my_strtod Perl_my_strtod
#define newANONATTRSUB(a,b,c,d) Perl_newANONATTRSUB(aTHX_ a,b,c,d)
#define newANONHASH(a) Perl_newANONHASH(aTHX_ a)
diff --git a/ext/POSIX/POSIX.xs b/ext/POSIX/POSIX.xs
index 062fa309b4..69250a8408 100644
--- a/ext/POSIX/POSIX.xs
+++ b/ext/POSIX/POSIX.xs
@@ -3577,29 +3577,19 @@ strftime(fmt, sec, min, hour, mday, mon, year, wday = -1, yday = -1, isdst = -1)
{
char *buf;
SV *sv;
+ utf8ness_t is_utf8;
/* allowing user-supplied (rather than literal) formats
* is normally frowned upon as a potential security risk;
* but this is part of the API so we have to allow it */
GCC_DIAG_IGNORE_STMT(-Wformat-nonliteral);
- buf = my_strftime(SvPV_nolen(fmt), sec, min, hour, mday, mon, year, wday, yday, isdst);
+ buf = my_strftime8(SvPV_nolen(fmt), sec, min, hour, mday, mon, year, wday, yday, isdst, &is_utf8);
GCC_DIAG_RESTORE_STMT;
sv = sv_newmortal();
if (buf) {
STRLEN len = strlen(buf);
sv_usepvn_flags(sv, buf, len, SV_HAS_TRAILING_NUL);
- if ( SvUTF8(fmt)
- || ( is_utf8_non_invariant_string((U8*) buf, len)
-#ifdef USE_LOCALE_TIME
- && _is_cur_LC_category_utf8(LC_TIME)
-#else /* If can't check directly, at least can see if script is consistent,
- under UTF-8, which gives us an extra measure of confidence. */
-
- && isSCRIPT_RUN((const U8 *) buf,
- (const U8 *) buf + len,
- TRUE) /* Means assume UTF-8 */
-#endif
- )) {
+ if (SvUTF8(fmt) || is_utf8 == UTF8NESS_YES) {
SvUTF8_on(sv);
}
}
diff --git a/locale.c b/locale.c
index 41f9ef1bdb..05f26e3814 100644
--- a/locale.c
+++ b/locale.c
@@ -4240,6 +4240,39 @@ S_my_langinfo_i(pTHX_
#endif /* USE_LOCALE */
+char *
+Perl_my_strftime8(pTHX_ const char *fmt, int sec, int min, int hour, int mday,
+ int mon, int year, int wday, int yday, int isdst,
+ utf8ness_t * utf8ness)
+{ /* Documented in util.c */
+ char * retval = my_strftime(fmt, sec, min, hour, mday, mon, year, wday,
+ yday, isdst);
+
+ PERL_ARGS_ASSERT_MY_STRFTIME8;
+
+ if (utf8ness) {
+
+#ifdef USE_LOCALE_TIME
+ *utf8ness = get_locale_string_utf8ness_i(NULL, LC_TIME_INDEX_,
+ retval, LOCALE_UTF8NESS_UNKNOWN);
+#else
+ *utf8ness = UTF8NESS_IMMATERIAL;
+#endif
+
+ }
+
+ DEBUG_Lv(PerlIO_printf(Perl_debug_log, "fmt=%s, retval=%s", fmt,
+ ((is_utf8_string((U8 *) retval, 0))
+ ? retval
+ :_byte_dump_string((U8 *) retval, strlen(retval), 0)));
+ if (utf8ness) PerlIO_printf(Perl_debug_log, "; utf8ness=%d",
+ (int) *utf8ness);
+ PerlIO_printf(Perl_debug_log, "\n");
+ );
+
+ return retval;
+}
+
/*
* Initialize locale awareness.
*/
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index ceadb9f66e..82db679dd9 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -352,6 +352,13 @@ A new API function L<perlapi/C<Perl_localeconv>> is added. This is the
same as L<C<POSIX::localeconv>|POSIX/localeconv> (returning a hash of
the C<localeconv()>> fields), but directly callable from XS code.
+=item *
+
+A new API function L<perlapi/C<my_strftime8>> is added. This is the
+same as plain L<perlapi/C<my_strftime>>, but with an extra parameter
+that allows the caller to simply and reliably know if the returned
+string is UTF-8.
+
=back
=head1 Selected Bug Fixes
diff --git a/proto.h b/proto.h
index b0560d7e77..be82a7f480 100644
--- a/proto.h
+++ b/proto.h
@@ -2554,6 +2554,11 @@ PERL_CALLCONV char * Perl_my_strftime(pTHX_ const char *fmt, int sec, int min, i
#define PERL_ARGS_ASSERT_MY_STRFTIME \
assert(fmt)
+PERL_CALLCONV char * Perl_my_strftime8(pTHX_ const char *fmt, int sec, int min, int hour, int mday, int mon, int year, int wday, int yday, int isdst, utf8ness_t * utf8ness)
+ __attribute__format__(__strftime__,pTHX_1,0);
+#define PERL_ARGS_ASSERT_MY_STRFTIME8 \
+ assert(fmt)
+
PERL_CALLCONV NV Perl_my_strtod(const char * const s, char ** e)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_MY_STRTOD \
diff --git a/util.c b/util.c
index 231daf06da..930ae8ea60 100644
--- a/util.c
+++ b/util.c
@@ -4189,16 +4189,23 @@ Perl_my_strftime(pTHX_ const char *fmt, int sec, int min, int hour, int mday, in
/*
=for apidoc_section $time
-=for apidoc my_strftime
+=for apidoc my_strftime
+=for apidoc_item my_strftime8
+
strftime(), but with a different API so that the return value is a pointer
to the formatted result (which MUST be arranged to be FREED BY THE
-CALLER). This allows this function to increase the buffer size as needed,
+CALLER). This allows these functions to increase the buffer size as needed,
so that the caller doesn't have to worry about that.
-Note that yday and wday effectively are ignored by this function, as
+C<my_strftime8> is the same as plain C<my_strftime>, but has an extra
+parameter, a pointer to a variable declared as L</C<utf8ness_t>>.
+Upon return, its variable will be set to indicate how the resultant string
+should be treated with regards to its UTF-8ness.
+
+Note that yday and wday effectively are ignored by these functions, as
mini_mktime() overwrites them
-Also note that this is always executed in the underlying locale of the program,
+Also note that they are always executed in the underlying locale of the program,
giving localized results.
=cut