diff options
29 files changed, 122 insertions, 288 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index a1c085d7c3f..ce5b45aaae1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -350,7 +350,7 @@ IF(NOT HAVE_CXX_NEW) ENDIF() # Find header files from the bundled libraries -# (wolfssl, readline, pcre, etc) +# (wolfssl, readline, pcre2, etc) # before the ones installed in the system SET(CMAKE_INCLUDE_DIRECTORIES_PROJECT_BEFORE ON) diff --git a/client/CMakeLists.txt b/client/CMakeLists.txt index 164424a87ff..a9a122317dc 100644 --- a/client/CMakeLists.txt +++ b/client/CMakeLists.txt @@ -46,7 +46,7 @@ ENDIF(UNIX) MYSQL_ADD_EXECUTABLE(mysqltest mysqltest.cc COMPONENT Test) SET_SOURCE_FILES_PROPERTIES(mysqltest.cc PROPERTIES COMPILE_FLAGS "-DTHREADS") -TARGET_LINK_LIBRARIES(mysqltest ${CLIENT_LIB} pcreposix pcre) +TARGET_LINK_LIBRARIES(mysqltest ${CLIENT_LIB} pcre2-posix pcre2-8) SET_TARGET_PROPERTIES(mysqltest PROPERTIES ENABLE_EXPORTS TRUE) diff --git a/client/mysqltest.cc b/client/mysqltest.cc index 2e3f0aa79fa..69bd0ec3309 100644 --- a/client/mysqltest.cc +++ b/client/mysqltest.cc @@ -44,8 +44,8 @@ #include <hash.h> #include <stdarg.h> #include <violite.h> -#define PCRE_STATIC 1 /* Important on Windows */ -#include "pcreposix.h" /* pcreposix regex library */ +#define PCRE2_STATIC 1 /* Important on Windows */ +#include "pcre2posix.h" /* pcreposix regex library */ #ifdef HAVE_SYS_WAIT_H #include <sys/wait.h> #endif diff --git a/cmake/pcre.cmake b/cmake/pcre.cmake index 4c113929866..a3a845bf30f 100644 --- a/cmake/pcre.cmake +++ b/cmake/pcre.cmake @@ -5,24 +5,17 @@ SET(WITH_PCRE "auto" CACHE STRING MACRO (CHECK_PCRE) IF(WITH_PCRE STREQUAL "system" OR WITH_PCRE STREQUAL "auto") - CHECK_LIBRARY_EXISTS(pcre pcre_stack_guard "" HAVE_PCRE_STACK_GUARD) - IF(NOT CMAKE_CROSSCOMPILING) - SET(CMAKE_REQUIRED_LIBRARIES "pcre") - CHECK_C_SOURCE_RUNS(" - #include <pcre.h> - int main() { - return -pcre_exec(NULL, NULL, NULL, -999, -999, 0, NULL, 0) < 256; - }" PCRE_STACK_SIZE_OK) - SET(CMAKE_REQUIRED_LIBRARIES) - ENDIF() + CHECK_LIBRARY_EXISTS(pcre2-8 pcre2_match_8 "" HAVE_PCRE2) ENDIF() - IF(NOT HAVE_PCRE_STACK_GUARD OR NOT PCRE_STACK_SIZE_OK OR - WITH_PCRE STREQUAL "bundled") + IF(NOT HAVE_PCRE2 OR WITH_PCRE STREQUAL "bundled") IF (WITH_PCRE STREQUAL "system") - MESSAGE(FATAL_ERROR "system pcre is not found or unusable") + MESSAGE(FATAL_ERROR "system pcre2-8 library is not found or unusable") ENDIF() - SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre ${CMAKE_SOURCE_DIR}/pcre) - ADD_SUBDIRECTORY(pcre) + SET(PCRE_INCLUDES ${CMAKE_BINARY_DIR}/pcre2 ${CMAKE_SOURCE_DIR}/pcre2 + ${CMAKE_BINARY_DIR}/pcre2/src ${CMAKE_SOURCE_DIR}/pcre2/src) + SET(PCRE2_BUILD_TESTS OFF CACHE BOOL "Disable tests.") + SET(PCRE2_BUILD_PCRE2GREP OFF CACHE BOOL "Disable pcre2grep") + ADD_SUBDIRECTORY(pcre2) ENDIF() ENDMACRO() diff --git a/config.h.cmake b/config.h.cmake index afb10348d40..3f8634519af 100644 --- a/config.h.cmake +++ b/config.h.cmake @@ -549,6 +549,7 @@ #define PACKAGE_VERSION "@VERSION@" #define VERSION "@VERSION@" #define PROTOCOL_VERSION 10 +#define PCRE2_CODE_UNIT_WIDTH 8 #define MALLOC_LIBRARY "@MALLOC_LIBRARY@" diff --git a/extra/mariabackup/CMakeLists.txt b/extra/mariabackup/CMakeLists.txt index 71d97886b3f..1649676e468 100644 --- a/extra/mariabackup/CMakeLists.txt +++ b/extra/mariabackup/CMakeLists.txt @@ -37,7 +37,7 @@ INCLUDE_DIRECTORIES( ) IF(NOT HAVE_SYSTEM_REGEX) - INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/pcre) + INCLUDE_DIRECTORIES(${PCRE_INCLUDES}) ENDIF() @@ -91,7 +91,7 @@ ADD_SUBDIRECTORY(crc) TARGET_LINK_LIBRARIES(mariabackup sql sql_builtins crc) IF(NOT HAVE_SYSTEM_REGEX) - TARGET_LINK_LIBRARIES(mariabackup pcreposix) + TARGET_LINK_LIBRARIES(mariabackup pcre2-posix) ENDIF() diff --git a/extra/mariabackup/xb_regex.h b/extra/mariabackup/xb_regex.h index 9b8f5789eeb..8f2f0908658 100644 --- a/extra/mariabackup/xb_regex.h +++ b/extra/mariabackup/xb_regex.h @@ -25,7 +25,8 @@ my_regex is used on Windows and native calls are used on POSIX platforms. */ #ifdef HAVE_SYSTEM_REGEX #include <regex.h> #else -#include <pcreposix.h> +#define PCRE2_STATIC 1 /* Important on Windows */ +#include <pcre2posix.h> #endif typedef regex_t* xb_regex_t; diff --git a/libmysqld/CMakeLists.txt b/libmysqld/CMakeLists.txt index d910d354631..73e089af4d0 100644 --- a/libmysqld/CMakeLists.txt +++ b/libmysqld/CMakeLists.txt @@ -154,7 +154,7 @@ ENDIF() SET(LIBS - dbug strings mysys mysys_ssl pcre vio + dbug strings mysys mysys_ssl pcre2-8 vio ${ZLIB_LIBRARY} ${SSL_LIBRARIES} ${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${EMBEDDED_PLUGIN_LIBS} diff --git a/libmysqld/examples/CMakeLists.txt b/libmysqld/examples/CMakeLists.txt index 52fbe42aaa2..f94bc4ba969 100644 --- a/libmysqld/examples/CMakeLists.txt +++ b/libmysqld/examples/CMakeLists.txt @@ -34,7 +34,7 @@ ENDIF(UNIX) MYSQL_ADD_EXECUTABLE(mysqltest_embedded ../../client/mysqltest.cc COMPONENT Test) -TARGET_LINK_LIBRARIES(mysqltest_embedded mysqlserver pcreposix pcre) +TARGET_LINK_LIBRARIES(mysqltest_embedded mysqlserver pcre2-8 pcre2-posix) IF(CMAKE_GENERATOR MATCHES "Xcode") # It does not seem possible to tell Xcode the resulting target might need diff --git a/mysql-test/main/func_regexp_pcre.result b/mysql-test/main/func_regexp_pcre.result index e030df99756..0187831aff6 100644 --- a/mysql-test/main/func_regexp_pcre.result +++ b/mysql-test/main/func_regexp_pcre.result @@ -793,7 +793,7 @@ SELECT 'a\nb' RLIKE '(?-s)a.b'; 0 SET default_regex_flags=DEFAULT; SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$'); -ERROR 42000: Got error 'two named subpatterns have the same name at offset 29' from regexp +ERROR 42000: Got error 'two named subpatterns have the same name (PCRE2_DUPNAMES not set' from regexp SET default_regex_flags='DUPNAMES'; SELECT REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$'); REGEXP_SUBSTR('Monday Mon','^((?<DN>Mon|Fri|Sun)day|(?<DN>Tue)sday).*(?P=DN)$') @@ -817,8 +817,7 @@ SELECT 'AB' RLIKE 'A# this is a comment\nB'; 1 SET default_regex_flags=DEFAULT; SELECT 'Aq' RLIKE 'A\\q'; -'Aq' RLIKE 'A\\q' -1 +ERROR 42000: Got error 'unrecognized character follows \ at offset 2' from regexp SET default_regex_flags='EXTRA'; SELECT 'Aq' RLIKE 'A\\q'; ERROR 42000: Got error 'unrecognized character follows \ at offset 2' from regexp @@ -861,7 +860,7 @@ SELECT 0xE001 REGEXP @regCheck; 0xE001 REGEXP @regCheck 0 Warnings: -Warning 1139 Got error 'pcre_exec: Invalid utf8 byte sequence in the subject string' from regexp +Warning 1139 Got error 'UTF-8 error: 1 byte missing at end' from regexp # Testing workaround N1: This makes the pattern to be a binary string: SET NAMES latin1; SET @regCheck= X'E001'; @@ -883,40 +882,31 @@ CAST(0xE001 AS BINARY) REGEXP @regCheck # MDEV-12420: Testing recursion overflow SELECT 1 FROM dual WHERE ('Alpha,Bravo,Charlie,Delta,Echo,Foxtrot,StrataCentral,Golf,Hotel,India,Juliet,Kilo,Lima,Mike,StrataL3,November,Oscar,StrataL2,Sand,P3,P4SwitchTest,Arsys,Poppa,ExtensionMgr,Arp,Quebec,Romeo,StrataApiV2,PtReyes,Sierra,SandAcl,Arrow,Artools,BridgeTest,Tango,SandT,PAlaska,Namespace,Agent,Qos,PatchPanel,ProjectReport,Ark,Gimp,Agent,SliceAgent,Arnet,Bgp,Ale,Tommy,Central,AsicPktTestLib,Hsc,SandL3,Abuild,Pca9555,Standby,ControllerDut,CalSys,SandLib,Sb820,PointV2,BfnLib,Evpn,BfnSdk,Sflow,ManagementActive,AutoTest,GatedTest,Bgp,Sand,xinetd,BfnAgentLib,bf-utils,Hello,BfnState,Eos,Artest,Qos,Scd,ThermoMgr,Uniform,EosUtils,Eb,FanController,Central,BfnL3,BfnL2,tcp_wrappers,Victor,Environment,Route,Failover,Whiskey,Xray,Gimp,BfnFixed,Strata,SoCal,XApi,Msrp,XpProfile,tcpdump,PatchPanel,ArosTest,FhTest,Arbus,XpAcl,MacConc,XpApi,telnet,QosTest,Alpha2,BfnVlan,Stp,VxlanControllerTest,MplsAgent,Bravo2,Lanz,BfnMbb,Intf,XCtrl,Unicast,SandTunnel,L3Unicast,Ipsec,MplsTest,Rsvp,EthIntf,StageMgr,Sol,MplsUtils,Nat,Ira,P4NamespaceDut,Counters,Charlie2,Aqlc,Mlag,Power,OpenFlow,Lag,RestApi,BfdTest,strongs,Sfa,CEosUtils,Adt746,MaintenanceMode,MlagDut,EosImage,IpEth,MultiProtocol,Launcher,Max3179,Snmp,Acl,IpEthTest,PhyEee,bf-syslibs,tacc,XpL2,p4-ar-switch,p4-bf-switch,LdpTest,BfnPhy,Mirroring,Phy6,Ptp' REGEXP '^((?!\b(Strata|StrataApi|StrataApiV2)\b).)*$'); 1 -Warnings: -Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp +1 SELECT CONCAT(REPEAT('100,',60),'101') RLIKE '^(([1-9][0-9]*),)*[1-9][0-9]*$'; CONCAT(REPEAT('100,',60),'101') RLIKE '^(([1-9][0-9]*),)*[1-9][0-9]*$' 1 SELECT CONCAT(REPEAT('100,',200),'101') RLIKE '^(([1-9][0-9]*),)*[1-9][0-9]*$'; CONCAT(REPEAT('100,',200),'101') RLIKE '^(([1-9][0-9]*),)*[1-9][0-9]*$' -0 -Warnings: -Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp +1 SELECT REGEXP_INSTR(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$'); REGEXP_INSTR(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$') 1 SELECT REGEXP_INSTR(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$'); REGEXP_INSTR(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$') -0 -Warnings: -Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp +1 SELECT LENGTH(REGEXP_SUBSTR(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$')); LENGTH(REGEXP_SUBSTR(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$')) 243 SELECT LENGTH(REGEXP_SUBSTR(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$')); LENGTH(REGEXP_SUBSTR(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$')) -0 -Warnings: -Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp +803 SELECT LENGTH(REGEXP_REPLACE(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$', '')); LENGTH(REGEXP_REPLACE(CONCAT(REPEAT('100,',60),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$', '')) 0 SELECT LENGTH(REGEXP_REPLACE(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$', '')); LENGTH(REGEXP_REPLACE(CONCAT(REPEAT('100,',200),'101'), '^(([1-9][0-9]*),)*[1-9][0-9]*$', '')) -803 -Warnings: -Warning 1139 Got error 'pcre_exec: recursion limit of NUM exceeded' from regexp +0 SELECT REGEXP_INSTR('a_kollision', 'oll'); REGEXP_INSTR('a_kollision', 'oll') 4 diff --git a/mysql-test/main/func_regexp_pcre.test b/mysql-test/main/func_regexp_pcre.test index 21600390bb2..30969b3e9ae 100644 --- a/mysql-test/main/func_regexp_pcre.test +++ b/mysql-test/main/func_regexp_pcre.test @@ -382,6 +382,7 @@ SELECT 'AB' RLIKE 'A B'; SELECT 'AB' RLIKE 'A# this is a comment\nB'; SET default_regex_flags=DEFAULT; +--error ER_REGEXP_ERROR SELECT 'Aq' RLIKE 'A\\q'; SET default_regex_flags='EXTRA'; --error ER_REGEXP_ERROR diff --git a/mysql-test/main/mysqld--help.result b/mysql-test/main/mysqld--help.result index c173f435964..b0694a5c82b 100644 --- a/mysql-test/main/mysqld--help.result +++ b/mysql-test/main/mysqld--help.result @@ -202,7 +202,8 @@ The following specify which files/extra groups are read (specified before remain using the password expiration options in ALTER USER. --default-regex-flags=name Default flags for the regex library. Any combination of: - DOTALL, DUPNAMES, EXTENDED, EXTRA, MULTILINE, UNGREEDY + DOTALL, DUPNAMES, EXTENDED, EXTENDED_MORE, EXTRA, + MULTILINE, UNGREEDY --default-storage-engine=name The default storage engine for new tables --default-time-zone=name diff --git a/mysql-test/suite/sys_vars/r/default_regex_flags_basic.result b/mysql-test/suite/sys_vars/r/default_regex_flags_basic.result index d25f3ca1c88..e384943cd2f 100644 --- a/mysql-test/suite/sys_vars/r/default_regex_flags_basic.result +++ b/mysql-test/suite/sys_vars/r/default_regex_flags_basic.result @@ -13,8 +13,8 @@ SELECT @@default_regex_flags; SET default_regex_flags='UNKNOWN'; ERROR 42000: Variable 'default_regex_flags' can't be set to the value of 'UNKNOWN' -SET default_regex_flags=123; -ERROR 42000: Variable 'default_regex_flags' can't be set to the value of '123' +SET default_regex_flags=325; +ERROR 42000: Variable 'default_regex_flags' can't be set to the value of '325' SET default_regex_flags=123.0; ERROR 42000: Incorrect argument type to variable 'default_regex_flags' SET default_regex_flags=123e0; @@ -31,7 +31,7 @@ DOTALL SET @@default_regex_flags=63; SELECT @@default_regex_flags; @@default_regex_flags -DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY +DOTALL,DUPNAMES,EXTENDED,EXTENDED_MORE,EXTRA,MULTILINE SET @@default_regex_flags='DOTALL'; SELECT @@default_regex_flags; @@default_regex_flags diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result index 9ed579fb632..55e6bb0dea1 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_embedded.result @@ -740,7 +740,7 @@ VARIABLE_COMMENT Default flags for the regex library NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY +ENUM_VALUE_LIST DOTALL,DUPNAMES,EXTENDED,EXTENDED_MORE,EXTRA,MULTILINE,UNGREEDY READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME DEFAULT_STORAGE_ENGINE diff --git a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result index caf1654a2c1..bf52f23c630 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result +++ b/mysql-test/suite/sys_vars/r/sysvars_server_notembedded.result @@ -750,7 +750,7 @@ VARIABLE_COMMENT Default flags for the regex library NUMERIC_MIN_VALUE NULL NUMERIC_MAX_VALUE NULL NUMERIC_BLOCK_SIZE NULL -ENUM_VALUE_LIST DOTALL,DUPNAMES,EXTENDED,EXTRA,MULTILINE,UNGREEDY +ENUM_VALUE_LIST DOTALL,DUPNAMES,EXTENDED,EXTENDED_MORE,EXTRA,MULTILINE,UNGREEDY READ_ONLY NO COMMAND_LINE_ARGUMENT REQUIRED VARIABLE_NAME DEFAULT_STORAGE_ENGINE diff --git a/mysql-test/suite/sys_vars/t/default_regex_flags_basic.test b/mysql-test/suite/sys_vars/t/default_regex_flags_basic.test index 94607432fd9..ca72c5ceafc 100644 --- a/mysql-test/suite/sys_vars/t/default_regex_flags_basic.test +++ b/mysql-test/suite/sys_vars/t/default_regex_flags_basic.test @@ -9,7 +9,7 @@ SELECT @@default_regex_flags; --error ER_WRONG_VALUE_FOR_VAR SET default_regex_flags='UNKNOWN'; --error ER_WRONG_VALUE_FOR_VAR -SET default_regex_flags=123; +SET default_regex_flags=325; --error ER_WRONG_TYPE_FOR_VAR SET default_regex_flags=123.0; --error ER_WRONG_TYPE_FOR_VAR diff --git a/sql/CMakeLists.txt b/sql/CMakeLists.txt index 4545bca5768..98df1988176 100644 --- a/sql/CMakeLists.txt +++ b/sql/CMakeLists.txt @@ -193,7 +193,7 @@ RECOMPILE_FOR_EMBEDDED) ADD_LIBRARY(sql STATIC ${SQL_SOURCE}) DTRACE_INSTRUMENT(sql) TARGET_LINK_LIBRARIES(sql - mysys mysys_ssl dbug strings vio pcre + mysys mysys_ssl dbug strings vio pcre2-8 ${LIBWRAP} ${LIBCRYPT} ${LIBDL} ${CMAKE_THREAD_LIBS_INIT} ${SSL_LIBRARIES} ${LIBSYSTEMD}) diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index e9571381dc8..2e892fb36e8 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -5827,15 +5827,6 @@ int Regexp_processor_pcre::default_regex_flags() return default_regex_flags_pcre(current_thd); } -void Regexp_processor_pcre::set_recursion_limit(THD *thd) -{ - long stack_used; - DBUG_ASSERT(thd == current_thd); - stack_used= available_stack_size(thd->thread_stack, &stack_used); - m_pcre_extra.match_limit_recursion= - (ulong)((my_thread_stack_size - STACK_MIN_SIZE - stack_used)/my_pcre_frame_size); -} - /** Convert string to lib_charset, if needed. @@ -5869,8 +5860,8 @@ String *Regexp_processor_pcre::convert_if_needed(String *str, String *converter) bool Regexp_processor_pcre::compile(String *pattern, bool send_error) { - const char *pcreErrorStr; - int pcreErrorOffset; + int pcreErrorNumber; + PCRE2_SIZE pcreErrorOffset; if (is_compiled()) { @@ -5883,19 +5874,30 @@ bool Regexp_processor_pcre::compile(String *pattern, bool send_error) if (!(pattern= convert_if_needed(pattern, &pattern_converter))) return true; - m_pcre= pcre_compile(pattern->c_ptr_safe(), m_library_flags, - &pcreErrorStr, &pcreErrorOffset, NULL); + m_pcre= pcre2_compile((PCRE2_SPTR8) pattern->ptr(), pattern->length(), + m_library_flags, + &pcreErrorNumber, &pcreErrorOffset, NULL); if (unlikely(m_pcre == NULL)) { if (send_error) { char buff[MAX_FIELD_WIDTH]; - my_snprintf(buff, sizeof(buff), "%s at offset %d", pcreErrorStr, pcreErrorOffset); + int lmsg= pcre2_get_error_message(pcreErrorNumber, + (PCRE2_UCHAR8 *)buff, sizeof(buff)); + if (lmsg >= 0) + my_snprintf(buff+lmsg, sizeof(buff)-lmsg, + " at offset %d", pcreErrorOffset); my_error(ER_REGEXP_ERROR, MYF(0), buff); } return true; } + m_pcre_match_data= pcre2_match_data_create_from_pattern(m_pcre, NULL); + if (m_pcre_match_data == NULL) + { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + return true; + } return false; } @@ -5916,124 +5918,46 @@ bool Regexp_processor_pcre::compile(Item *item, bool send_error) */ void Regexp_processor_pcre::pcre_exec_warn(int rc) const { - char buf[64]; - const char *errmsg= NULL; + PCRE2_UCHAR8 buf[128]; THD *thd= current_thd; - /* - Make a descriptive message only for those pcre_exec() error codes - that can actually happen in MariaDB. - */ - switch (rc) + int errlen= pcre2_get_error_message(rc, buf, sizeof(buf)); + if (errlen <= 0) { - case PCRE_ERROR_NULL: - errmsg= "pcre_exec: null argument passed"; - break; - case PCRE_ERROR_BADOPTION: - errmsg= "pcre_exec: bad option"; - break; - case PCRE_ERROR_BADMAGIC: - errmsg= "pcre_exec: bad magic - not a compiled regex"; - break; - case PCRE_ERROR_UNKNOWN_OPCODE: - errmsg= "pcre_exec: error in compiled regex"; - break; - case PCRE_ERROR_NOMEMORY: - errmsg= "pcre_exec: Out of memory"; - break; - case PCRE_ERROR_NOSUBSTRING: - errmsg= "pcre_exec: no substring"; - break; - case PCRE_ERROR_MATCHLIMIT: - errmsg= "pcre_exec: match limit exceeded"; - break; - case PCRE_ERROR_CALLOUT: - errmsg= "pcre_exec: callout error"; - break; - case PCRE_ERROR_BADUTF8: - errmsg= "pcre_exec: Invalid utf8 byte sequence in the subject string"; - break; - case PCRE_ERROR_BADUTF8_OFFSET: - errmsg= "pcre_exec: Started at invalid location within utf8 byte sequence"; - break; - case PCRE_ERROR_PARTIAL: - errmsg= "pcre_exec: partial match"; - break; - case PCRE_ERROR_INTERNAL: - errmsg= "pcre_exec: internal error"; - break; - case PCRE_ERROR_BADCOUNT: - errmsg= "pcre_exec: ovesize is negative"; - break; - case PCRE_ERROR_RECURSIONLIMIT: - my_snprintf(buf, sizeof(buf), "pcre_exec: recursion limit of %ld exceeded", - m_pcre_extra.match_limit_recursion); - errmsg= buf; - break; - case PCRE_ERROR_BADNEWLINE: - errmsg= "pcre_exec: bad newline options"; - break; - case PCRE_ERROR_BADOFFSET: - errmsg= "pcre_exec: start offset negative or greater than string length"; - break; - case PCRE_ERROR_SHORTUTF8: - errmsg= "pcre_exec: ended in middle of utf8 sequence"; - break; - case PCRE_ERROR_JIT_STACKLIMIT: - errmsg= "pcre_exec: insufficient stack memory for JIT compile"; - break; - case PCRE_ERROR_RECURSELOOP: - errmsg= "pcre_exec: Recursion loop detected"; - break; - case PCRE_ERROR_BADMODE: - errmsg= "pcre_exec: compiled pattern passed to wrong bit library function"; - break; - case PCRE_ERROR_BADENDIANNESS: - errmsg= "pcre_exec: compiled pattern passed to wrong endianness processor"; - break; - case PCRE_ERROR_JIT_BADOPTION: - errmsg= "pcre_exec: bad jit option"; - break; - case PCRE_ERROR_BADLENGTH: - errmsg= "pcre_exec: negative length"; - break; - default: - /* - As other error codes should normally not happen, - we just report the error code without textual description - of the code. - */ - my_snprintf(buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc); - errmsg= buf; + my_snprintf((char *)buf, sizeof(buf), "pcre_exec: Internal error (%d)", rc); } push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, - ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), errmsg); + ER_REGEXP_ERROR, ER_THD(thd, ER_REGEXP_ERROR), buf); } /** Call pcre_exec() and send a warning if pcre_exec() returned with an error. */ -int Regexp_processor_pcre::pcre_exec_with_warn(const pcre *code, - const pcre_extra *extra, +int Regexp_processor_pcre::pcre_exec_with_warn(const pcre2_code *code, + pcre2_match_data *data, const char *subject, int length, int startoffset, - int options, int *ovector, - int ovecsize) + int options) { - int rc= pcre_exec(code, extra, subject, length, - startoffset, options, ovector, ovecsize); + int rc= pcre2_match(code, (PCRE2_SPTR8) subject, (PCRE2_SIZE) length, + (PCRE2_SIZE) startoffset, options, data, NULL); DBUG_EXECUTE_IF("pcre_exec_error_123", rc= -123;); - if (unlikely(rc < PCRE_ERROR_NOMATCH)) + if (unlikely(rc < PCRE2_ERROR_NOMATCH)) + { + m_SubStrVec= NULL; pcre_exec_warn(rc); + } + else + m_SubStrVec= pcre2_get_ovector_pointer(data); return rc; } bool Regexp_processor_pcre::exec(const char *str, size_t length, size_t offset) { - m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, &m_pcre_extra, str, (int)length, (int)offset, 0, - m_SubStrVec, array_elements(m_SubStrVec)); + m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data, + str, (int)length, (int)offset, 0); return false; } @@ -6043,10 +5967,8 @@ bool Regexp_processor_pcre::exec(String *str, int offset, { if (!(str= convert_if_needed(str, &subject_converter))) return true; - m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, &m_pcre_extra, - str->c_ptr_safe(), str->length(), - offset, 0, - m_SubStrVec, array_elements(m_SubStrVec)); + m_pcre_exec_rc= pcre_exec_with_warn(m_pcre, m_pcre_match_data, + str->ptr(), str->length(), offset, 0); if (m_pcre_exec_rc > 0) { uint i; @@ -6096,12 +6018,6 @@ void Regexp_processor_pcre::fix_owner(Item_func *owner, } -bool Item_func_regex::fix_fields(THD *thd, Item **ref) -{ - re.set_recursion_limit(thd); - return Item_bool_func::fix_fields(thd, ref); -} - bool Item_func_regex::fix_length_and_dec() { @@ -6128,13 +6044,6 @@ longlong Item_func_regex::val_int() } -bool Item_func_regexp_instr::fix_fields(THD *thd, Item **ref) -{ - re.set_recursion_limit(thd); - return Item_int_func::fix_fields(thd, ref); -} - - bool Item_func_regexp_instr::fix_length_and_dec() { @@ -6157,7 +6066,7 @@ longlong Item_func_regexp_instr::val_int() if ((null_value= re.exec(args[0], 0, 1))) return 0; - return re.match() ? re.subpattern_start(0) + 1 : 0; + return re.match() ? (longlong) (re.subpattern_start(0) + 1) : 0; } diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index cb1a467c357..c6ce2f1792a 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -24,8 +24,8 @@ #endif #include "item_func.h" /* Item_int_func, Item_bool_func */ -#define PCRE_STATIC 1 /* Important on Windows */ -#include "pcre.h" /* pcre header file */ +#define PCRE2_STATIC 1 /* Important on Windows */ +#include "pcre2.h" /* pcre2 header file */ #include "item.h" extern Item_result item_cmp_type(Item_result a,Item_result b); @@ -2804,41 +2804,39 @@ public: class Regexp_processor_pcre { - pcre *m_pcre; - pcre_extra m_pcre_extra; + pcre2_code *m_pcre; + pcre2_match_data *m_pcre_match_data; bool m_conversion_is_needed; bool m_is_const; int m_library_flags; CHARSET_INFO *m_library_charset; String m_prev_pattern; int m_pcre_exec_rc; - int m_SubStrVec[30]; + PCRE2_SIZE *m_SubStrVec; void pcre_exec_warn(int rc) const; - int pcre_exec_with_warn(const pcre *code, const pcre_extra *extra, + int pcre_exec_with_warn(const pcre2_code *code, + pcre2_match_data *data, const char *subject, int length, int startoffset, - int options, int *ovector, int ovecsize); + int options); public: String *convert_if_needed(String *src, String *converter); String subject_converter; String pattern_converter; String replace_converter; Regexp_processor_pcre() : - m_pcre(NULL), m_conversion_is_needed(true), m_is_const(0), + m_pcre(NULL), m_pcre_match_data(NULL), + m_conversion_is_needed(true), m_is_const(0), m_library_flags(0), m_library_charset(&my_charset_utf8mb3_general_ci) - { - m_pcre_extra.flags= PCRE_EXTRA_MATCH_LIMIT_RECURSION; - m_pcre_extra.match_limit_recursion= 100L; - } + {} int default_regex_flags(); - void set_recursion_limit(THD *); void init(CHARSET_INFO *data_charset, int extra_flags) { m_library_flags= default_regex_flags() | extra_flags | (data_charset != &my_charset_bin ? - (PCRE_UTF8 | PCRE_UCP) : 0) | + (PCRE2_UTF | PCRE2_UCP) : 0) | ((data_charset->state & - (MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE_CASELESS); + (MY_CS_BINSORT | MY_CS_CSSORT)) ? 0 : PCRE2_CASELESS); // Convert text data to utf-8. m_library_charset= data_charset == &my_charset_bin ? @@ -2859,26 +2857,28 @@ public: bool exec(Item *item, int offset, uint n_result_offsets_to_convert); bool match() const { return m_pcre_exec_rc < 0 ? 0 : 1; } int nsubpatterns() const { return m_pcre_exec_rc <= 0 ? 0 : m_pcre_exec_rc; } - int subpattern_start(int n) const + size_t subpattern_start(int n) const { return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2]; } - int subpattern_end(int n) const + size_t subpattern_end(int n) const { return m_pcre_exec_rc <= 0 ? 0 : m_SubStrVec[n * 2 + 1]; } - int subpattern_length(int n) const + size_t subpattern_length(int n) const { return subpattern_end(n) - subpattern_start(n); } void reset() { m_pcre= NULL; + m_pcre_match_data= NULL; m_prev_pattern.length(0); } void cleanup() { - pcre_free(m_pcre); + pcre2_match_data_free(m_pcre_match_data); + pcre2_code_free(m_pcre); reset(); } bool is_compiled() const { return m_pcre != NULL; } @@ -2903,7 +2903,6 @@ public: DBUG_VOID_RETURN; } longlong val_int(); - bool fix_fields(THD *thd, Item **ref); bool fix_length_and_dec(); const char *func_name() const { return "regexp"; } enum precedence precedence() const { return CMP_PRECEDENCE; } @@ -2944,7 +2943,6 @@ public: DBUG_VOID_RETURN; } longlong val_int(); - bool fix_fields(THD *thd, Item **ref); bool fix_length_and_dec(); const char *func_name() const { return "regexp_instr"; } Item *get_copy(THD *thd) { return 0; } diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index 5342a9080b6..5082dfd119a 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -1302,13 +1302,6 @@ bool Item_func_replace::fix_length_and_dec() /*********************************************************************/ -bool Item_func_regexp_replace::fix_fields(THD *thd, Item **ref) -{ - re.set_recursion_limit(thd); - return Item_str_func::fix_fields(thd, ref); -} - - bool Item_func_regexp_replace::fix_length_and_dec() { if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 3)) @@ -1360,7 +1353,7 @@ bool Item_func_regexp_replace::append_replacement(String *str, if (n < re.nsubpatterns()) { /* A valid sub-pattern reference found */ - int pbeg= re.subpattern_start(n), plength= re.subpattern_end(n) - pbeg; + size_t pbeg= re.subpattern_start(n), plength= re.subpattern_end(n) - pbeg; if (str->append(source->str + pbeg, plength, cs)) return true; } @@ -1389,7 +1382,7 @@ String *Item_func_regexp_replace::val_str(String *str) String *source= args[0]->val_str(&tmp0); String *replace= args[2]->val_str(&tmp2); LEX_CSTRING src, rpl; - int startoffset= 0; + size_t startoffset= 0; if ((null_value= (args[0]->null_value || args[2]->null_value || re.recompile(args[1])))) @@ -1418,7 +1411,8 @@ String *Item_func_regexp_replace::val_str(String *str) Append the rest of the source string starting from startoffset until the end of the source. */ - if (str->append(src.str + startoffset, src.length - startoffset, re.library_charset())) + if (str->append(src.str + startoffset, src.length - startoffset, + re.library_charset())) goto err; return str; } @@ -1427,7 +1421,8 @@ String *Item_func_regexp_replace::val_str(String *str) Append prefix, the part before the matching pattern. starting from startoffset until the next match */ - if (str->append(src.str + startoffset, re.subpattern_start(0) - startoffset, re.library_charset())) + if (str->append(src.str + startoffset, + re.subpattern_start(0) - startoffset, re.library_charset())) goto err; // Append replacement @@ -1445,13 +1440,6 @@ err: } -bool Item_func_regexp_substr::fix_fields(THD *thd, Item **ref) -{ - re.set_recursion_limit(thd); - return Item_str_func::fix_fields(thd, ref); -} - - bool Item_func_regexp_substr::fix_length_and_dec() { if (agg_arg_charsets_for_string_result_with_comparison(collation, args, 2)) @@ -1486,8 +1474,7 @@ String *Item_func_regexp_substr::val_str(String *str) return str; if (str->append(source->ptr() + re.subpattern_start(0), - re.subpattern_end(0) - re.subpattern_start(0), - re.library_charset())) + re.subpattern_length(0), re.library_charset())) goto err; return str; diff --git a/sql/item_strfunc.h b/sql/item_strfunc.h index e84696fe22c..2d0a096b618 100644 --- a/sql/item_strfunc.h +++ b/sql/item_strfunc.h @@ -374,7 +374,6 @@ public: DBUG_VOID_RETURN; } String *val_str(String *str); - bool fix_fields(THD *thd, Item **ref); bool fix_length_and_dec(); const char *func_name() const { return "regexp_replace"; } Item *get_copy(THD *thd) { return 0;} @@ -396,7 +395,6 @@ public: DBUG_VOID_RETURN; } String *val_str(String *str); - bool fix_fields(THD *thd, Item **ref); bool fix_length_and_dec(); const char *func_name() const { return "regexp_substr"; } Item *get_copy(THD *thd) { return 0; } diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 8c2b63e27fe..b7f7615636b 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -113,7 +113,6 @@ #include "sp_rcontext.h" #include "sp_cache.h" #include "sql_reload.h" // reload_acl_and_cache -#include "pcre.h" #ifdef HAVE_POLL_H #include <poll.h> @@ -3260,20 +3259,6 @@ static void init_libstrings() #endif } -ulonglong my_pcre_frame_size; - -static void init_pcre() -{ - pcre_malloc= pcre_stack_malloc= my_str_malloc_mysqld; - pcre_free= pcre_stack_free= my_free; - pcre_stack_guard= check_enough_stack_size_slow; - /* See http://pcre.org/original/doc/html/pcrestack.html */ - my_pcre_frame_size= -pcre_exec(NULL, NULL, NULL, -999, -999, 0, NULL, 0); - // pcre can underestimate its stack usage. Use a safe value, as in the manual - set_if_bigger(my_pcre_frame_size, 500); - my_pcre_frame_size += 16; // Again, safety margin, see the manual -} - /** Initialize one of the global date/time format variables. @@ -4130,7 +4115,6 @@ static int init_common_variables() if (item_create_init()) return 1; item_init(); - init_pcre(); /* Process a comma-separated character set list and choose the first available character set. This is mostly for diff --git a/sql/mysqld.h b/sql/mysqld.h index eb92b5999ca..c643f3786ef 100644 --- a/sql/mysqld.h +++ b/sql/mysqld.h @@ -575,8 +575,6 @@ extern pthread_t signal_thread; extern struct st_VioSSLFd * ssl_acceptor_fd; #endif /* HAVE_OPENSSL */ -extern ulonglong my_pcre_frame_size; - /* The following variables were under INNODB_COMPABILITY_HOOKS */ diff --git a/sql/set_var.h b/sql/set_var.h index e1e4d6ea0b9..c3147c1e3ca 100644 --- a/sql/set_var.h +++ b/sql/set_var.h @@ -443,7 +443,7 @@ sql_mode_t expand_sql_mode(sql_mode_t sql_mode); const char *sql_mode_string_representation(uint bit_number); bool sql_mode_string_representation(THD *thd, sql_mode_t sql_mode, LEX_CSTRING *ls); -int default_regex_flags_pcre(const THD *thd); +int default_regex_flags_pcre(THD *thd); extern sys_var *Sys_autocommit_ptr, *Sys_last_gtid_ptr, *Sys_character_set_client_ptr, *Sys_character_set_connection_ptr, diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index ab53f339b2b..f5b915ec916 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -5994,29 +5994,40 @@ static const char *default_regex_flags_names[]= "DOTALL", // (?s) . matches anything including NL "DUPNAMES", // (?J) Allow duplicate names for subpatterns "EXTENDED", // (?x) Ignore white space and # comments - "EXTRA", // (?X) extra features (e.g. error on unknown escape character) + "EXTENDED_MORE",//(?xx) Ignore white space and # comments inside cheracter + "EXTRA", // means nothing since PCRE2 "MULTILINE", // (?m) ^ and $ match newlines within data "UNGREEDY", // (?U) Invert greediness of quantifiers 0 }; static const int default_regex_flags_to_pcre[]= { - PCRE_DOTALL, - PCRE_DUPNAMES, - PCRE_EXTENDED, - PCRE_EXTRA, - PCRE_MULTILINE, - PCRE_UNGREEDY, + PCRE2_DOTALL, + PCRE2_DUPNAMES, + PCRE2_EXTENDED, + PCRE2_EXTENDED_MORE, + -1, /* EXTRA flag not available since PCRE2 */ + PCRE2_MULTILINE, + PCRE2_UNGREEDY, 0 }; -int default_regex_flags_pcre(const THD *thd) +int default_regex_flags_pcre(THD *thd) { ulonglong src= thd->variables.default_regex_flags; int i, res; for (i= res= 0; default_regex_flags_to_pcre[i]; i++) { if (src & (1ULL << i)) + { + if (default_regex_flags_to_pcre[i] < 0) + { + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_UNKNOWN_ERROR, + "PCRE2 doens't support the EXTRA flag. Ignored."); + continue; + } res|= default_regex_flags_to_pcre[i]; + } } return res; } diff --git a/storage/mroonga/CMakeLists.txt b/storage/mroonga/CMakeLists.txt index 35e0783595f..6ea264ce84c 100644 --- a/storage/mroonga/CMakeLists.txt +++ b/storage/mroonga/CMakeLists.txt @@ -189,11 +189,7 @@ else() set(MYSQL_VARIANT "MySQL") endif() -if(EXISTS "${MYSQL_SOURCE_DIR}/pcre") - set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/pcre") -else() - set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/regex") -endif() +set(MYSQL_REGEX_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/regex") if(EXISTS "${MYSQL_SOURCE_DIR}/extra/rapidjson") set(MYSQL_RAPIDJSON_INCLUDE_DIR "${MYSQL_SOURCE_DIR}/extra/rapidjson/include") diff --git a/storage/mroonga/configure.ac b/storage/mroonga/configure.ac index b1e66904f75..3ef31bdc32e 100644 --- a/storage/mroonga/configure.ac +++ b/storage/mroonga/configure.ac @@ -186,11 +186,7 @@ AC_DEFUN([CONFIG_OPTION_MYSQL],[ mysql_regex_include_dir="$ac_mysql_source_dir/extra/regex" MYSQL_INCLUDES="$MYSQL_INCLUDES -I$mysql_regex_include_dir" else - if test -d "$ac_mysql_source_dir/pcre"; then - mysql_regex_include_dir="$ac_mysql_source_dir/pcre" - else - mysql_regex_include_dir="$ac_mysql_source_dir/regex" - fi + mysql_regex_include_dir="$ac_mysql_source_dir/regex" MYSQL_INCLUDES="$MYSQL_INCLUDES -I$mysql_regex_include_dir" fi if test -d "$ac_mysql_source_dir/libbinlogevents"; then diff --git a/storage/mroonga/vendor/groonga/configure.ac b/storage/mroonga/vendor/groonga/configure.ac index 414876c6a26..cab122ad3a5 100644 --- a/storage/mroonga/vendor/groonga/configure.ac +++ b/storage/mroonga/vendor/groonga/configure.ac @@ -1613,30 +1613,6 @@ AC_SUBST(ONIGMO_CFLAGS) AC_SUBST(ONIGMO_LIBS) AM_CONDITIONAL(WITH_BUNDLED_ONIGMO, test "$with_onigmo" != "no" -a "x$have_onigmo" != "xyes") -# PCRE -GRN_WITH_PCRE=no -AC_ARG_WITH(pcre, - [AS_HELP_STRING([--without-pcre], - [Don't use PCRE for groonga-httpd. [default=auto-detect]])], - [with_pcre="$withval"], - [with_pcre="auto"]) -if test "x$with_pcre" != "xno"; then - m4_ifdef([PKG_CHECK_MODULES], [ - PKG_CHECK_MODULES([PCRE], [libpcre], - [_PKG_CONFIG(PCRE_LIBS_ONLY_L, [libs-only-L], [libpcre]) - PCRE_LIBS_ONLY_L="$pkg_cv_PCRE_LIBS_ONLY_L" - GRN_WITH_PCRE=yes], - [GRN_WITH_PCRE=no]) - ], - [GRN_WITH_PCRE=no]) - if test "x$with_pcre" = "xyes" -a "$GRN_WITH_PCRE" != "yes"; then - AC_MSG_ERROR("No PCRE found") - fi -fi -AC_SUBST(GRN_WITH_PCRE) -AC_SUBST(PCRE_CFLAGS) -AC_SUBST(PCRE_LIBS_ONLY_L) - # SSL GRN_WITH_SSL=no AC_ARG_WITH(ssl, @@ -1788,11 +1764,6 @@ echo "groonga-httpd:" echo " enable: $enable_groonga_httpd" if test "$enable_groonga_httpd" = "yes"; then echo " default database path: $GROONGA_HTTPD_DEFAULT_DATABASE_PATH" - echo " PCRE: $GRN_WITH_PCRE" - if test "$GRN_WITH_PCRE" = "yes"; then - echo " CFLAGS: $PCRE_CFLAGS" - echo " LIBS only -L: $PCRE_LIBS_ONLY_L" - fi echo " SSL: $GRN_WITH_SSL" if test "$GRN_WITH_SSL" = "yes"; then echo " CFLAGS: $SSL_CFLAGS" diff --git a/storage/mroonga/vendor/groonga/tools/travis-install.sh b/storage/mroonga/vendor/groonga/tools/travis-install.sh index 72240ec1580..d7ac400c1a9 100755 --- a/storage/mroonga/vendor/groonga/tools/travis-install.sh +++ b/storage/mroonga/vendor/groonga/tools/travis-install.sh @@ -23,7 +23,6 @@ case "${TRAVIS_OS_NAME}" in brew outdated pkg-config || brew upgrade pkg-config brew reinstall libtool brew outdated libevent || brew upgrade libevent - brew outdated pcre || brew upgrade pcre brew install \ autoconf-archive \ msgpack \ |