summaryrefslogtreecommitdiff
path: root/proto.h
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2016-01-18 14:25:02 -0700
committerKarl Williamson <khw@cpan.org>2016-01-19 15:08:59 -0700
commit6b659339f976d014a1a53731d86cedd01f5921ec (patch)
tree852b02830e8b19bf700af95791214485d1f4e2e8 /proto.h
parentca8226cfa2cc0ddcc50f60505c42078df8e3b766 (diff)
downloadperl-6b659339f976d014a1a53731d86cedd01f5921ec.tar.gz
Add qr/\b{lb}/
This adds the final Unicode boundary type previously missing from core Perl: the LineBreak one. This feature is already available in the Unicode::LineBreak module, but I've been told that there are portability and some other issues with that module. What's added here is a light-weight version that is lacking the customizable features of the module. This implements the default Line Breaking algorithm, but with the customizations that Unicode is expecting everybody to add, as their test file tests for them. In other words, this passes Unicode's fairly extensive furnished tests, but wouldn't if it didn't include certain customizations specified by Unicode beyond the basic algorithm. The implementation uses a look-up table of the characters surrounding a boundary to see if it is a suitable place to break a line. In a few cases, context needs to be taken into account, so there is code in addition to the lookup table to handle those. This should meet the needs for line breaking of many applications, without having to load the module. The algorithm is somewhat independent of the Unicode version, just like the other boundary types. Only if new rules are added, or existing ones modified is there need to go in and change this code. Otherwise, running regen/mk_invlists.pl should be sufficient when a new Unicode release is done to keep it up-to-date, again like the other Unicode boundary types.
Diffstat (limited to 'proto.h')
-rw-r--r--proto.h15
1 files changed, 15 insertions, 0 deletions
diff --git a/proto.h b/proto.h
index 1bbdaced80..6a45b134b1 100644
--- a/proto.h
+++ b/proto.h
@@ -4967,6 +4967,11 @@ PERL_CALLCONV SV* Perl__swash_to_invlist(pTHX_ SV* const swash)
#endif
#if defined(PERL_IN_REGEXEC_C)
+STATIC LB_enum S_advance_one_LB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_ADVANCE_ONE_LB \
+ assert(curpos); assert(strend)
+
STATIC SB_enum S_advance_one_SB(pTHX_ U8 ** curpos, const U8 * const strend, const bool utf8_target)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_ADVANCE_ONE_SB \
@@ -4977,6 +4982,11 @@ STATIC WB_enum S_advance_one_WB(pTHX_ U8 ** curpos, const U8 * const strend, con
#define PERL_ARGS_ASSERT_ADVANCE_ONE_WB \
assert(curpos); assert(strend)
+STATIC LB_enum S_backup_one_LB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_BACKUP_ONE_LB \
+ assert(strbeg); assert(curpos)
+
STATIC SB_enum S_backup_one_SB(pTHX_ const U8 * const strbeg, U8 ** curpos, const bool utf8_target)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_BACKUP_ONE_SB \
@@ -5003,6 +5013,11 @@ STATIC bool S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character)
STATIC bool S_isGCB(const GCB_enum before, const GCB_enum after)
__attribute__warn_unused_result__;
+STATIC bool S_isLB(pTHX_ LB_enum before, LB_enum after, const U8 * const strbeg, const U8 * const curpos, const U8 * const strend, const bool utf8_target)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_ISLB \
+ assert(strbeg); assert(curpos); assert(strend)
+
STATIC bool S_isSB(pTHX_ SB_enum before, SB_enum after, const U8 * const strbeg, const U8 * const curpos, const U8 * const strend, const bool utf8_target)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_ISSB \