diff options
author | Gustavo André dos Santos Lopes <cataphract@php.net> | 2010-10-06 18:53:27 +0000 |
---|---|---|
committer | Gustavo André dos Santos Lopes <cataphract@php.net> | 2010-10-06 18:53:27 +0000 |
commit | e283f7a7fed6c6b953f64371a981fe1c95dd8f6c (patch) | |
tree | 92cdbb955e31ac1441232269f458a03da2f8a613 /ext/intl/transliterator | |
parent | da6366e74a131a975ec7af1d797c025e1379cefb (diff) | |
download | php-git-e283f7a7fed6c6b953f64371a981fe1c95dd8f6c.tar.gz |
- Added support for ICU Transformations (Transliterator).
- Changes request #52986 to "to be documented".
Diffstat (limited to 'ext/intl/transliterator')
-rw-r--r-- | ext/intl/transliterator/transliterator.c | 138 | ||||
-rw-r--r-- | ext/intl/transliterator/transliterator.h | 29 | ||||
-rw-r--r-- | ext/intl/transliterator/transliterator_class.c | 434 | ||||
-rw-r--r-- | ext/intl/transliterator/transliterator_class.h | 65 | ||||
-rw-r--r-- | ext/intl/transliterator/transliterator_methods.c | 538 | ||||
-rw-r--r-- | ext/intl/transliterator/transliterator_methods.h | 38 |
6 files changed, 1242 insertions, 0 deletions
diff --git a/ext/intl/transliterator/transliterator.c b/ext/intl/transliterator/transliterator.c new file mode 100644 index 0000000000..75c9eaabda --- /dev/null +++ b/ext/intl/transliterator/transliterator.c @@ -0,0 +1,138 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "transliterator_class.h" +#include "transliterator.h" +#include "intl_convert.h" + +#include <unicode/ustring.h> + +/* {{{ transliterator_register_constants + * Register constants common for both (OO and procedural) APIs. + */ +void transliterator_register_constants( INIT_FUNC_ARGS ) +{ + if( !Transliterator_ce_ptr ) + { + zend_error( E_ERROR, "Transliterator class not defined" ); + return; + } + + #define TRANSLITERATOR_EXPOSE_CONST( x ) REGISTER_LONG_CONSTANT( #x, x, CONST_CS ) + #define TRANSLITERATOR_EXPOSE_CLASS_CONST( x ) zend_declare_class_constant_long( Transliterator_ce_ptr, ZEND_STRS( #x ) - 1, TRANSLITERATOR_##x TSRMLS_CC ); + #define TRANSLITERATOR_EXPOSE_CUSTOM_CLASS_CONST( name, value ) zend_declare_class_constant_long( Transliterator_ce_ptr, ZEND_STRS( name ) - 1, value TSRMLS_CC );*/ + + /* Normalization form constants */ + TRANSLITERATOR_EXPOSE_CLASS_CONST( FORWARD ); + TRANSLITERATOR_EXPOSE_CLASS_CONST( REVERSE ); + + #undef NORMALIZER_EXPOSE_CUSTOM_CLASS_CONST + #undef NORMALIZER_EXPOSE_CLASS_CONST + #undef NORMALIZER_EXPOSE_CONST +} +/* }}} */ + +/* {{{ transliterator_parse_error_to_string + * Transforms parse errors in strings. + */ +smart_str transliterator_parse_error_to_string( UParseError* pe ) +{ + smart_str ret = {0}; + char *buf; + int u8len; + UErrorCode status; + int any = 0; + + assert( pe != NULL ); + + smart_str_appends( &ret, "parse error " ); + if( pe->line > 0 ) + { + smart_str_appends( &ret, "on line " ); + smart_str_append_long( &ret, (long ) pe->line ); + any = 1; + } + if( pe->offset >= 0 ) { + if( any ) + smart_str_appends( &ret, ", " ); + else + smart_str_appends( &ret, "at " ); + + smart_str_appends( &ret, "offset " ); + smart_str_append_long( &ret, (long ) pe->offset ); + any = 1; + } + + if (pe->preContext[0] != 0 ) { + if( any ) + smart_str_appends( &ret, ", " ); + + smart_str_appends( &ret, "after \"" ); + intl_convert_utf16_to_utf8( &buf, &u8len, pe->preContext, -1, &status ); + if( U_FAILURE( status ) ) + { + smart_str_appends( &ret, "(could not convert parser error pre-context to UTF-8)" ); + } + else { + smart_str_appendl( &ret, buf, u8len ); + efree( buf ); + } + smart_str_appends( &ret, "\"" ); + any = 1; + } + + if( pe->postContext[0] != 0 ) + { + if( any ) + smart_str_appends( &ret, ", " ); + + smart_str_appends( &ret, "before or at \"" ); + intl_convert_utf16_to_utf8( &buf, &u8len, pe->postContext, -1, &status ); + if( U_FAILURE( status ) ) + { + smart_str_appends( &ret, "(could not convert parser error post-context to UTF-8)" ); + } + else + { + smart_str_appendl( &ret, buf, u8len ); + efree( buf ); + } + smart_str_appends( &ret, "\"" ); + any = 1; + } + + if( !any ) + { + smart_str_free( &ret ); + smart_str_appends( &ret, "no parse error" ); + } + + smart_str_0( &ret ); + return ret; +} + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/intl/transliterator/transliterator.h b/ext/intl/transliterator/transliterator.h new file mode 100644 index 0000000000..cfd5d38dbd --- /dev/null +++ b/ext/intl/transliterator/transliterator.h @@ -0,0 +1,29 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@netcabo.ot> | + +----------------------------------------------------------------------+ + */ + +#ifndef TRANSLITERATOR_TRANSLITERATOR_H +#define TRANSLITERATOR_TRANSLITERATOR_H + +#include <php.h> +#include <unicode/utypes.h> +#include <unicode/utrans.h> + +#include "ext/standard/php_smart_str.h" + +void transliterator_register_constants( INIT_FUNC_ARGS ); +smart_str transliterator_parse_error_to_string( UParseError* pe ); + +#endif /* #ifndef TRANSLITERATOR_TRANSLITERATOR_H */ diff --git a/ext/intl/transliterator/transliterator_class.c b/ext/intl/transliterator/transliterator_class.c new file mode 100644 index 0000000000..727cf1e485 --- /dev/null +++ b/ext/intl/transliterator/transliterator_class.c @@ -0,0 +1,434 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#include "transliterator_class.h" +#include "php_intl.h" +#include "transliterator_methods.h" +#include "intl_error.h" +#include "intl_convert.h" +#include "intl_data.h" + +#include <unicode/utrans.h> + +zend_class_entry *Transliterator_ce_ptr = NULL; + +zend_object_handlers Transliterator_handlers; + +/* {{{ int transliterator_object_construct( zval *object, UTransliterator *utrans, UErrorCode *status TSRMLS_DC ) + * Initialize internals of Transliterator_object. + */ +int transliterator_object_construct( zval *object, + UTransliterator *utrans, + UErrorCode *status TSRMLS_DC ) +{ + const UChar *ustr_id; + int32_t ustr_id_len; + char *str_id; + int str_id_len; + Transliterator_object *to; + + TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; + + assert( to->utrans == NULL ); + /* this assignment must happen before any return with failure because the + * caller relies on it always being made (so it can just destroy the object + * to close the transliterator) */ + to->utrans = utrans; + + ustr_id = utrans_getUnicodeID( utrans, &ustr_id_len ); + intl_convert_utf16_to_utf8( &str_id, &str_id_len, ustr_id, (int ) ustr_id_len, status ); + if( U_FAILURE( *status ) ) + { + return FAILURE; + } + + zend_update_property_stringl( Transliterator_ce_ptr, object, + "id", sizeof( "id" ) - 1, str_id, str_id_len TSRMLS_CC ); + efree( str_id ); + return SUCCESS; +} +/* }}} */ + +/* + * Auxiliary functions needed by objects of 'Transliterator' class + */ + +/* {{{ void transliterator_object_init( Transliterator_object* to ) + * Initialize internals of Transliterator_object. + */ +static void transliterator_object_init( Transliterator_object* to TSRMLS_DC ) +{ + if( !to ) + return; + + intl_error_init( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC ); +} +/* }}} */ + +/* {{{ void transliterator_object_destroy( Transliterator_object* to ) + * Clean up mem allocted by internals of Transliterator_object + */ +static void transliterator_object_destroy( Transliterator_object* to TSRMLS_DC ) +{ + if( !to ) + return; + + if( to->utrans ) + { + utrans_close( to->utrans ); + to->utrans = NULL; + } + + intl_error_reset( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC ); +} +/* }}} */ + +/* {{{ Transliterator_objects_dtor */ +static void Transliterator_objects_dtor( + void *object, + zend_object_handle handle TSRMLS_DC ) +{ + zend_objects_destroy_object( object, handle TSRMLS_CC ); +} +/* }}} */ + +/* {{{ Transliterator_objects_free */ +static void Transliterator_objects_free( zend_object *object TSRMLS_DC ) +{ + Transliterator_object* to = (Transliterator_object*) object; + + zend_object_std_dtor( &to->zo TSRMLS_CC ); + + transliterator_object_destroy( to TSRMLS_CC ); + + efree( to ); +} +/* }}} */ + +/* {{{ Transliterator_object_create */ +static zend_object_value Transliterator_object_create( + zend_class_entry *ce TSRMLS_DC ) +{ + zend_object_value retval; + Transliterator_object* intern; + + intern = ecalloc( 1, sizeof( Transliterator_object ) ); + + zend_object_std_init( &intern->zo, ce TSRMLS_CC ); +#if PHP_VERSION_ID < 50399 + zend_hash_copy( intern->zo.properties, &(ce->default_properties ), + (copy_ctor_func_t) zval_add_ref, NULL, sizeof( zval* ) ); +#else + object_properties_init( (zend_object*) intern, ce ); +#endif + transliterator_object_init( intern TSRMLS_CC ); + + retval.handle = zend_objects_store_put( + intern, + Transliterator_objects_dtor, + (zend_objects_free_object_storage_t) Transliterator_objects_free, + NULL TSRMLS_CC ); + + retval.handlers = &Transliterator_handlers; + + return retval; +} +/* }}} */ + +/* + * Object handlers for Transliterator class (and subclasses) + */ + +/* {{{ clone handler for Transliterator */ +static zend_object_value Transliterator_clone_obj( zval *object TSRMLS_DC ) +{ + Transliterator_object *to_orig, + *to_new; + zend_object_value ret_val; + intl_error_reset( NULL TSRMLS_CC ); + + to_orig = zend_object_store_get_object( object TSRMLS_CC ); + intl_error_reset( INTL_DATA_ERROR_P( to_orig ) TSRMLS_CC ); + ret_val = Transliterator_ce_ptr->create_object( Transliterator_ce_ptr TSRMLS_CC ); + to_new = zend_object_store_get_object_by_handle( ret_val.handle TSRMLS_CC ); + + zend_objects_clone_members( &to_new->zo, ret_val, + &to_orig->zo, Z_OBJ_HANDLE_P( object ) TSRMLS_CC ); + + if( to_orig->utrans != NULL ) + { + UTransliterator *utrans = NULL; + zval tempz; /* dummy zval to pass to transliterator_object_construct */ + + /* guaranteed to return NULL if it fails */ + utrans = utrans_clone( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to_orig ) ); + + if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to_orig ) ) ) + goto err; + + Z_OBJVAL( tempz ) = ret_val; + transliterator_object_construct( &tempz, utrans, + TRANSLITERATOR_ERROR_CODE_P( to_orig ) TSRMLS_CC ); + + if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to_orig ) ) ) + { + char *err_msg; +err: + + if( utrans != NULL ) + transliterator_object_destroy( to_new TSRMLS_CC ); + + /* set the error anyway, in case in the future we decide not to + * throw an error. It also helps build the error message */ + intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to_orig ) TSRMLS_CC ); + intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to_orig ), + "Could not clone transliterator", 0 TSRMLS_CC ); + + err_msg = intl_error_get_message( TRANSLITERATOR_ERROR_P( to_orig ) TSRMLS_CC ); + php_error_docref( NULL TSRMLS_CC, E_ERROR, "%s", err_msg ); + efree( err_msg ); /* if it's changed into a warning */ + /* do not destroy tempz; we need to return something */ + } + } + else + { + /* We shouldn't have unconstructed objects in the first place */ + php_error_docref( NULL TSRMLS_CC, E_WARNING, + "Cloning unconstructed transliterator." ); + } + + return ret_val; +} +/* }}} */ + +#if PHP_VERSION_ID >= 50399 +# define TRANSLITERATOR_PROPERTY_HANDLER_PROLOG \ + zval tmp_member; \ + if( Z_TYPE_P( member ) != IS_STRING ) \ + { \ + tmp_member = *member; \ + zval_copy_ctor( &tmp_member ); \ + convert_to_string( &tmp_member ); \ + member = &tmp_member; \ + key = NULL; \ + } +#else +# define TRANSLITERATOR_PROPERTY_HANDLER_PROLOG \ + zval tmp_member; \ + if( Z_TYPE_P( member ) != IS_STRING ) \ + { \ + tmp_member = *member; \ + zval_copy_ctor( &tmp_member ); \ + convert_to_string( &tmp_member ); \ + member = &tmp_member; \ + } +#endif + +#define TRANSLITERATOR_PROPERTY_HANDLER_EPILOG \ + if( member == &tmp_member ) \ + { \ + zval_dtor( &tmp_member ); \ + } + +/* {{{ get_property_ptr_ptr handler */ +#if PHP_VERSION_ID < 50399 +static zval **Transliterator_get_property_ptr_ptr( zval *object, zval *member TSRMLS_DC ) +#else +static zval **Transliterator_get_property_ptr_ptr( zval *object, zval *member, + const struct _zend_literal *key TSRMLS_DC ) +#endif +{ + zval **retval; + + TRANSLITERATOR_PROPERTY_HANDLER_PROLOG; + + if(zend_binary_strcmp( "id", sizeof( "id" ) - 1, + Z_STRVAL_P( member ), Z_STRLEN_P( member ) ) == 0 ) + { + retval = NULL; /* fallback to read_property */ + } + else + { +#if PHP_VERSION_ID < 50399 + retval = std_object_handlers.get_property_ptr_ptr( object, member TSRMLS_CC ); +#else + retval = std_object_handlers.get_property_ptr_ptr( object, member, key TSRMLS_CC ); +#endif + } + + TRANSLITERATOR_PROPERTY_HANDLER_EPILOG; + + return retval; +} +/* }}} */ + +/* {{{ read_property handler */ +#if PHP_VERSION_ID < 50399 +static zval *Transliterator_read_property( zval *object, zval *member, int type TSRMLS_DC ) /* {{{ */ +#else +static zval *Transliterator_read_property( zval *object, zval *member, int type, + const struct _zend_literal *key TSRMLS_DC ) /* {{{ */ +#endif +{ + zval *retval; + + TRANSLITERATOR_PROPERTY_HANDLER_PROLOG; + + if( ( type != BP_VAR_R && type != BP_VAR_IS ) && + ( zend_binary_strcmp( "id", sizeof( "id" ) - 1, + Z_STRVAL_P( member ), Z_STRLEN_P( member ) ) == 0 ) ) + { + php_error_docref0( NULL TSRMLS_CC, E_WARNING, "The property \"id\" is read-only" ); + retval = &EG( uninitialized_zval ); + } + else + { +#if PHP_VERSION_ID < 50399 + retval = std_object_handlers.read_property( object, member, type TSRMLS_CC ); +#else + retval = std_object_handlers.read_property( object, member, type, key TSRMLS_CC ); +#endif + } + + TRANSLITERATOR_PROPERTY_HANDLER_EPILOG; + + return retval; +} + +/* }}} */ + +/* {{{ write_property handler */ +#if PHP_VERSION_ID < 50399 +static void Transliterator_write_property( zval *object, zval *member, zval *value TSRMLS_DC ) +#else +static void Transliterator_write_property( zval *object, zval *member, zval *value, + const struct _zend_literal *key TSRMLS_DC ) +#endif +{ + TRANSLITERATOR_PROPERTY_HANDLER_PROLOG; + + if( ( EG( scope ) != Transliterator_ce_ptr ) && + ( zend_binary_strcmp( "id", sizeof( "id" ) - 1, + Z_STRVAL_P( member ), Z_STRLEN_P( member ) ) == 0 ) ) + { + php_error_docref0( NULL TSRMLS_CC, E_WARNING, "The property \"id\" is read-only" ); + } + else + { +#if PHP_VERSION_ID < 50399 + std_object_handlers.write_property( object, member, value TSRMLS_CC ); +#else + std_object_handlers.write_property( object, member, value, key TSRMLS_CC ); +#endif + } + + TRANSLITERATOR_PROPERTY_HANDLER_EPILOG; +} +/* }}} */ + +/* + * 'Transliterator' class registration structures & functions + */ + +/* {{{ Transliterator methods arguments info */ + +ZEND_BEGIN_ARG_INFO_EX( ainfo_trans_void, 0, 0, 0 ) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX( ainfo_trans_create, 0, 0, 1 ) + ZEND_ARG_INFO( 0, id ) + ZEND_ARG_INFO( 0, direction ) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX( ainfo_trans_create_from_rules, 0, 0, 1 ) + ZEND_ARG_INFO( 0, rules ) + ZEND_ARG_INFO( 0, direction ) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX( ainfo_trans_create_inverse, 0, 0, 1 ) + ZEND_ARG_OBJ_INFO( 0, orig_trans, Transliterator, 0 ) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX( ainfo_trans_me_transliterate, 0, 0, 1 ) + ZEND_ARG_INFO( 0, subject ) + ZEND_ARG_INFO( 0, start ) + ZEND_ARG_INFO( 0, end ) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX( ainfo_trans_error, 0, 0, 1 ) + ZEND_ARG_OBJ_INFO( 0, trans, Transliterator, 0 ) +ZEND_END_ARG_INFO() + +/* }}} */ + +/* {{{ Transliterator_class_functions + * Every 'Transliterator' class method has an entry in this table + */ +zend_function_entry Transliterator_class_functions[] = { + PHP_ME( Transliterator, __construct, ainfo_trans_void, ZEND_ACC_PRIVATE | ZEND_ACC_CTOR | ZEND_ACC_FINAL ) + PHP_ME_MAPPING( create, transliterator_create, ainfo_trans_create, ZEND_ACC_STATIC |ZEND_ACC_PUBLIC ) + PHP_ME_MAPPING( createFromRules,transliterator_create_from_rules, ainfo_trans_create_from_rules, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC ) + PHP_ME_MAPPING( createInverse, transliterator_create_inverse, ainfo_trans_void, ZEND_ACC_PUBLIC ) + PHP_ME_MAPPING( listIDs, transliterator_list_ids, ainfo_trans_void, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC ) + PHP_ME_MAPPING( transliterate, transliterator_transliterate, ainfo_trans_me_transliterate, ZEND_ACC_PUBLIC ) + PHP_ME_MAPPING( getErrorCode, transliterator_get_error_code, ainfo_trans_void, ZEND_ACC_PUBLIC ) + PHP_ME_MAPPING( getErrorMessage,transliterator_get_error_message, ainfo_trans_void, ZEND_ACC_PUBLIC ) + { NULL, NULL, NULL } +}; +/* }}} */ + +/* {{{ transliterator_register_Transliterator_class + * Initialize 'Transliterator' class + */ +void transliterator_register_Transliterator_class( TSRMLS_D ) +{ + zend_class_entry ce; + + /* Create and register 'Transliterator' class. */ + INIT_CLASS_ENTRY( ce, "Transliterator", Transliterator_class_functions ); + ce.create_object = Transliterator_object_create; + Transliterator_ce_ptr = zend_register_internal_class( &ce TSRMLS_CC ); + memcpy( &Transliterator_handlers, zend_get_std_object_handlers(), + sizeof Transliterator_handlers ); + Transliterator_handlers.clone_obj = Transliterator_clone_obj; + Transliterator_handlers.get_property_ptr_ptr = Transliterator_get_property_ptr_ptr; + Transliterator_handlers.read_property = Transliterator_read_property; + Transliterator_handlers.write_property = Transliterator_write_property; + + /* Declare 'Transliterator' class properties */ + if( !Transliterator_ce_ptr ) + { + zend_error( E_ERROR, + "Transliterator: attempt to create properties " + "on a non-registered class." ); + return; + } + zend_declare_property_null( Transliterator_ce_ptr, + "id", sizeof( "id" ) - 1, ZEND_ACC_PUBLIC TSRMLS_CC ); + + /* constants are declared in transliterator_register_constants, called from MINIT */ + +} +/* }}} */ + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/intl/transliterator/transliterator_class.h b/ext/intl/transliterator/transliterator_class.h new file mode 100644 index 0000000000..5ca50ed2f4 --- /dev/null +++ b/ext/intl/transliterator/transliterator_class.h @@ -0,0 +1,65 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@netcabo.pt> | + +----------------------------------------------------------------------+ + */ + +#ifndef TRANSLITERATOR_CLASS_H +#define TRANSLITERATOR_CLASS_H + +#include <php.h> + +#include "intl_common.h" +#include "intl_error.h" + +#include <unicode/utrans.h> + +typedef struct { + zend_object zo; + + // error handling + intl_error err; + + // ICU transliterator + UTransliterator* utrans; +} Transliterator_object; + +#define TRANSLITERATOR_FORWARD UTRANS_FORWARD +#define TRANSLITERATOR_REVERSE UTRANS_REVERSE + +#define TRANSLITERATOR_ERROR( co ) (co)->err +#define TRANSLITERATOR_ERROR_P( co ) &(TRANSLITERATOR_ERROR( co )) + +#define TRANSLITERATOR_ERROR_CODE( co ) INTL_ERROR_CODE(TRANSLITERATOR_ERROR( co )) +#define TRANSLITERATOR_ERROR_CODE_P( co ) &(INTL_ERROR_CODE(TRANSLITERATOR_ERROR( co ))) + +#define TRANSLITERATOR_METHOD_INIT_VARS INTL_METHOD_INIT_VARS( Transliterator, to ) +#define TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK INTL_METHOD_FETCH_OBJECT( Transliterator, to ) +#define TRANSLITERATOR_METHOD_FETCH_OBJECT\ + TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; \ + if( to->utrans == NULL ) \ + { \ + intl_errors_set( &to->err, U_ILLEGAL_ARGUMENT_ERROR, "Found unconstructed transliterator", 0 TSRMLS_CC ); \ + RETURN_FALSE; \ + } + +int transliterator_object_construct( zval *object, + UTransliterator *utrans, + UErrorCode *status TSRMLS_DC ); + +void transliterator_register_Transliterator_class( TSRMLS_D ); + +extern zend_class_entry *Transliterator_ce_ptr; +extern zend_object_handlers Transliterator_handlers; + +#endif /* #ifndef TRANSLITERATOR_CLASS_H */ diff --git a/ext/intl/transliterator/transliterator_methods.c b/ext/intl/transliterator/transliterator_methods.c new file mode 100644 index 0000000000..fbe2cf1984 --- /dev/null +++ b/ext/intl/transliterator/transliterator_methods.c @@ -0,0 +1,538 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php_intl.h" +#include "transliterator.h" +#include "transliterator_class.h" +#include "transliterator_methods.h" +#include "intl_data.h" +#include "intl_convert.h" + +#include <zend_exceptions.h> + +static int create_transliterator( char *str_id, int str_id_len, long direction, zval *object TSRMLS_DC ) +{ + Transliterator_object *to; + UChar *ustr_id = NULL; + int32_t ustr_id_len = 0; + UTransliterator *utrans; + UParseError parse_error = {0, -1}; + + intl_error_reset( NULL TSRMLS_CC ); + + if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_create: invalid direction", 0 TSRMLS_CC ); + return FAILURE; + } + + object_init_ex( object, Transliterator_ce_ptr ); + TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* fetch zend object from zval "object" into "to" */ + + /* Convert transliterator id to UTF-16 */ + intl_convert_utf8_to_utf16( &ustr_id, &ustr_id_len, str_id, str_id_len, TRANSLITERATOR_ERROR_CODE_P( to ) ); + if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) ) + { + intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC ); + intl_error_set_custom_msg( NULL, "String conversion of id to UTF-16 failed", 0 TSRMLS_CC ); + zval_dtor( object ); + return FAILURE; + } + + /* Open ICU Transliterator. */ + utrans = utrans_openU( ustr_id, ustr_id_len, (UTransDirection ) direction, + NULL, -1, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) ); + efree( ustr_id ); + + if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) ) + { + char *buf = NULL; + intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC ); + spprintf( &buf, 0, "transliterator_create: unable to open ICU transliterator" + " with id \"%s\"", str_id ); + if( buf == NULL ) { + intl_error_set_custom_msg( NULL, + "transliterator_create: unable to open ICU transliterator", 0 TSRMLS_CC ); + } + else + { + intl_error_set_custom_msg( NULL, buf, /* copy message */ 1 TSRMLS_CC ); + efree( buf ); + } + zval_dtor( object ); + return FAILURE; + } + + transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC ); + /* no need to close the transliterator manually on construction error */ + if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) ) + { + intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC ); + intl_error_set_custom_msg( NULL, + "transliterator_create: internal constructor call failed", 0 TSRMLS_CC ); + zval_dtor( object ); + return FAILURE; + } + + return SUCCESS; +} + +/* {{{ proto Transliterator transliterator_create( string id [, int direction ] ) + * proto Transliterator Transliterator::create( string id [, int direction ] ) + * Opens a transliterator by id. + */ +PHP_FUNCTION( transliterator_create ) +{ + char *str_id; + int str_id_len; + long direction = TRANSLITERATOR_FORWARD; + int res; + + TRANSLITERATOR_METHOD_INIT_VARS; + + (void) to; /* unused */ + + if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|l", + &str_id, &str_id_len, &direction ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_create: bad arguments", 0 TSRMLS_CC ); + RETURN_NULL(); + } + + object = return_value; + res = create_transliterator( str_id, str_id_len, direction, object TSRMLS_CC ); + if( res == FAILURE ) + RETURN_NULL(); + + /* success, leave return_value as it is (set by create_transliterator) */ +} +/* }}} */ + +/* {{{ proto Transliterator transliterator_create_from_rules( string rules [, int direction ] ) + * proto Transliterator Transliterator::createFromRules( string rules [, int direction ] ) + * Opens a transliterator by id. + */ +PHP_FUNCTION( transliterator_create_from_rules ) +{ + char *str_rules; + int str_rules_len; + UChar *ustr_rules = NULL; + int32_t ustr_rules_len = 0; + long direction = TRANSLITERATOR_FORWARD; + UParseError parse_error = {0, -1}; + UTransliterator *utrans; + UChar id[] = {0x52, 0x75, 0x6C, 0x65, 0x73, 0x54, 0x72, + 0x61, 0x6E, 0x73, 0x50, 0x48, 0x50, 0}; /* RulesTransPHP */ + TRANSLITERATOR_METHOD_INIT_VARS; + + if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|l", + &str_rules, &str_rules_len, &direction ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_create_from_rules: bad arguments", 0 TSRMLS_CC ); + RETURN_NULL(); + } + + if( ( direction != TRANSLITERATOR_FORWARD ) && (direction != TRANSLITERATOR_REVERSE ) ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_create_from_rules: invalid direction", 0 TSRMLS_CC ); + RETURN_NULL(); + } + + object = return_value; + object_init_ex( object, Transliterator_ce_ptr ); + TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; + + intl_convert_utf8_to_utf16( &ustr_rules, &ustr_rules_len, + str_rules, str_rules_len, TRANSLITERATOR_ERROR_CODE_P( to ) ); + /* (I'm not a big fan of non-obvious flow control macros ). + * This one checks the error value, destroys object and returns false */ + INTL_CTOR_CHECK_STATUS( to, "String conversion of rules to UTF-16 failed" ); + + /* Open ICU Transliterator. */ + utrans = utrans_openU( id, ( sizeof( id ) - 1 ) / ( sizeof( *id ) ), (UTransDirection ) direction, + ustr_rules, ustr_rules_len, &parse_error, TRANSLITERATOR_ERROR_CODE_P( to ) ); + efree( ustr_rules ); + + intl_error_set_code( NULL, INTL_DATA_ERROR_CODE( to ) TSRMLS_CC ); + if( U_FAILURE( INTL_DATA_ERROR_CODE( to ) ) ) + { + char *msg = NULL; + smart_str parse_error_str; + parse_error_str = transliterator_parse_error_to_string( &parse_error ); + spprintf( &msg, 0, "transliterator_create_from_rules: unable to " + "create ICU transliterator from rules (%s)", parse_error_str.c ); + smart_str_free( &parse_error_str ); + if( msg != NULL ) + { + intl_errors_set_custom_msg( INTL_DATA_ERROR_P( to ), msg, 1 TSRMLS_CC ); + efree( msg ); + } + zval_dtor( return_value ); + RETURN_NULL(); + } + transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC ); + /* no need to close the transliterator manually on construction error */ + INTL_CTOR_CHECK_STATUS( to, "transliterator_create_from_rules: internal constructor call failed" ); +} +/* }}} */ + +/* {{{ proto Transliterator transliterator_create_inverse( Transliterator orig_trans ) + * proto Transliterator Transliterator::createInverse() + * Opens the inverse transliterator transliterator. + */ +PHP_FUNCTION( transliterator_create_inverse ) +{ + Transliterator_object *to_orig; + UTransliterator *utrans; + TRANSLITERATOR_METHOD_INIT_VARS; + + if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O", + &object, Transliterator_ce_ptr ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_create_inverse: bad arguments", 0 TSRMLS_CC ); + RETURN_NULL(); + } + + TRANSLITERATOR_METHOD_FETCH_OBJECT; + to_orig = to; + + object = return_value; + object_init_ex( object, Transliterator_ce_ptr ); + TRANSLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; /* change "to" into new object (from "object" ) */ + + utrans = utrans_openInverse( to_orig->utrans, TRANSLITERATOR_ERROR_CODE_P( to ) ); + INTL_CTOR_CHECK_STATUS( to, "transliterator_create_inverse: could not create " + "inverse ICU transliterator" ); + transliterator_object_construct( object, utrans, TRANSLITERATOR_ERROR_CODE_P( to ) TSRMLS_CC ); + /* no need to close the transliterator manually on construction error */ + INTL_CTOR_CHECK_STATUS( to, "transliterator_create: internal constructor call failed" ); +} +/* }}} */ + +/* {{{ proto array transliterator_list_ids() + * proto array Transliterator::listIDs() + * Return an array with the registered transliterator IDs. + */ +PHP_FUNCTION( transliterator_list_ids ) +{ + UEnumeration *en; + const UChar *elem; + int32_t elem_len; + UErrorCode status = U_ZERO_ERROR; + + intl_error_reset( NULL TSRMLS_CC ); + + if( zend_parse_parameters_none() == FAILURE ) + { + /* seems to be the convention in this lib to return false instead of + * null on bad parameter types, except on constructors and factory + * methods */ + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_list_ids: bad arguments", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + en = utrans_openIDs( &status ); + INTL_CHECK_STATUS( status, + "transliterator_list_ids: Failed to obtain registered transliterators" ); + + array_init( return_value ); + while( elem = uenum_unext( en, &elem_len, &status ) ) + { + char *el_char = NULL; + int el_len = 0; + + intl_convert_utf16_to_utf8( &el_char, &el_len, elem, elem_len, &status ); + + if( U_FAILURE( status ) ) + { + efree( el_char ); + break; + } + else + { + add_next_index_stringl( return_value, el_char, el_len, 0 ); + } + } + uenum_close( en ); + + intl_error_set_code( NULL, status TSRMLS_CC ); + if( U_FAILURE( status ) ) + { + zval_dtor( return_value ); + RETVAL_FALSE; + intl_error_set_custom_msg( NULL, "transliterator_list_ids: " + "Failed to build array of registered transliterators", 0 TSRMLS_CC ); + } +} +/* }}} */ + +/* {{{ proto string transliterator_transliterate( Transliterator trans, string subject [, int start = 0 [, int end = -1 ]] ) + * proto string Transliterator::transliterate( string subject [, int start = 0 [, int end = -1 ]] ) + * Transliterate a string. */ +PHP_FUNCTION( transliterator_transliterate ) +{ + char *str; + UChar *ustr = NULL, + *uresult = NULL; + int str_len; + int32_t ustr_len = 0, + capacity, + uresult_len; + long start = 0, + limit = -1; + int success = 0, + temp_trans = 0; + TRANSLITERATOR_METHOD_INIT_VARS; + + object = getThis(); + + if( object == NULL ) + { + /* in non-OOP version, accept both a transliterator and a string */ + zval **arg1; + if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "Zs|ll", + &arg1, &str, &str_len, &start, &limit ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_transliterate: bad arguments", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + if( Z_TYPE_PP( arg1 ) == IS_OBJECT && + instanceof_function( Z_OBJCE_PP( arg1 ), Transliterator_ce_ptr TSRMLS_CC ) ) + { + object = *arg1; + } + else + { /* not a transliterator object as first argument */ + int res; + if(Z_TYPE_PP( arg1 ) != IS_STRING ) + { + SEPARATE_ZVAL( arg1 ); + convert_to_string( *arg1 ); + } + ALLOC_INIT_ZVAL( object ); + temp_trans = 1; + res = create_transliterator( Z_STRVAL_PP( arg1 ), Z_STRLEN_PP( arg1 ), + TRANSLITERATOR_FORWARD, object TSRMLS_CC ); + if( res == FAILURE ) + { + char *message = intl_error_get_message( NULL TSRMLS_CC ); + php_error_docref0( NULL TSRMLS_CC, E_WARNING, "Could not create " + "transliterator with ID \"%s\" (%s)", Z_STRVAL_PP( arg1 ), message ); + efree( message ); + /* don't set U_ILLEGAL_ARGUMENT_ERROR to allow fetching of inner error */ + goto cleanup; + } + } + } + else if( zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|ll", + &str, &str_len, &start, &limit ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_transliterate: bad arguments", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + if( limit < -1 ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_transliterate: \"end\" argument should be " + "either non-negative or -1", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + if( start < 0 || (limit != -1 ) && (start > limit ) ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_transliterate: \"start\" argument should be " + "non-negative and not bigger than \"end\" (if defined)", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + /* end argument parsing/validation */ + + TRANSLITERATOR_METHOD_FETCH_OBJECT; + + intl_convert_utf8_to_utf16( &ustr, &ustr_len, str, str_len, + TRANSLITERATOR_ERROR_CODE_P( to ) ); + INTL_METHOD_CHECK_STATUS( to, "String conversion of string to UTF-16 failed" ); + + /* we've started allocating resources, goto from now on */ + + if( ( start > ustr_len ) || (( limit != -1 ) && (limit > ustr_len ) ) ) + { + char *msg; + spprintf( &msg, 0, + "transliterator_transliterate: Neither \"start\" nor the \"end\" " + "arguments can exceed the number of UTF-16 code units " + "(in this case, %d)", (int) ustr_len ); + if(msg != NULL ) + { + intl_errors_set( TRANSLITERATOR_ERROR_P( to ), U_ILLEGAL_ARGUMENT_ERROR, + msg, 1 TSRMLS_CC ); + efree( msg ); + } + RETVAL_FALSE; + goto cleanup; + } + + uresult = safe_emalloc( ustr_len, sizeof( UChar ), 1 * sizeof( UChar ) ); + capacity = ustr_len + 1; + + while( 1 ) + { + int32_t temp_limit = ( limit == -1 ? ustr_len : (int32_t) limit ); + memcpy( uresult, ustr, ustr_len * sizeof( UChar ) ); + uresult_len = ustr_len; + + utrans_transUChars( to->utrans, uresult, &uresult_len, capacity, (int32_t) start, + &temp_limit, TRANSLITERATOR_ERROR_CODE_P( to ) ); + if( TRANSLITERATOR_ERROR_CODE( to ) == U_BUFFER_OVERFLOW_ERROR ) + { + efree( uresult ); + + uresult = safe_emalloc( uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) ); + capacity = uresult_len + 1; + + intl_error_reset( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC ); + } + else if(TRANSLITERATOR_ERROR_CODE( to ) == U_STRING_NOT_TERMINATED_WARNING ) + { + uresult = safe_erealloc( uresult, uresult_len, sizeof( UChar ), 1 * sizeof( UChar ) ); + + intl_error_reset( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC ); + break; + } + else if( U_FAILURE( TRANSLITERATOR_ERROR_CODE( to ) ) ) + { + intl_error_set_code( NULL, TRANSLITERATOR_ERROR_CODE( to ) TSRMLS_CC ); + intl_errors_set_custom_msg( TRANSLITERATOR_ERROR_P( to ), + "transliterator_transliterate: transliteration failed", 0 TSRMLS_CC ); + goto cleanup; + } + else + break; + } + + uresult[uresult_len] = (UChar) 0; + + success = 1; + +cleanup: + if( ustr ) + efree( ustr ); + + if( success ) { + /* frees uresult even on error */ + INTL_METHOD_RETVAL_UTF8( to, uresult, uresult_len, 1 ); + } + else + { + if( uresult ) + efree( uresult ); + RETVAL_FALSE; + } + + if (temp_trans ) + zval_ptr_dtor( &object ); +} +/* }}} */ + +PHP_METHOD( Transliterator, __construct ) +{ + /* this constructor shouldn't be called as it's private */ + zend_throw_exception( NULL, + "An object of this type cannot be created with the new operator.", + 0 TSRMLS_CC ); +} + +/* {{{ proto int transliterator_get_error_code( Transliterator trans ) + * proto int Transliterator::getErrorCode() + * Get the last error code for this transliterator. + */ +PHP_FUNCTION( transliterator_get_error_code ) +{ + TRANSLITERATOR_METHOD_INIT_VARS + + if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O", + &object, Transliterator_ce_ptr ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_get_error_code: unable to parse input params", 0 TSRMLS_CC ); + + RETURN_FALSE; + } + + /* Fetch the object (without resetting its last error code ). */ + to = zend_object_store_get_object( object TSRMLS_CC ); + if (to == NULL ) + RETURN_FALSE; + + RETURN_LONG( (long) TRANSLITERATOR_ERROR_CODE( to ) ); +} +/* }}} */ + + +/* {{{ proto string transliterator_get_error_message( Transliterator trans ) + * proto string Transliterator::getErrorMessage() + * Get the last error message for this transliterator. + */ +PHP_FUNCTION( transliterator_get_error_message ) +{ + const char* message = NULL; + TRANSLITERATOR_METHOD_INIT_VARS + + if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "O", + &object, Transliterator_ce_ptr ) == FAILURE ) + { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "transliterator_get_error_message: unable to parse input params", 0 TSRMLS_CC ); + + RETURN_FALSE; + } + + + /* Fetch the object (without resetting its last error code ). */ + to = zend_object_store_get_object( object TSRMLS_CC ); + if (to == NULL ) + RETURN_FALSE; + + /* Return last error message. */ + message = intl_error_get_message( TRANSLITERATOR_ERROR_P( to ) TSRMLS_CC ); + RETURN_STRING( message, 0 ); +} +/* }}} */ + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/intl/transliterator/transliterator_methods.h b/ext/intl/transliterator/transliterator_methods.h new file mode 100644 index 0000000000..b806de84fb --- /dev/null +++ b/ext/intl/transliterator/transliterator_methods.h @@ -0,0 +1,38 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#ifndef TRANSLITERATOR_METHODS_H +#define TRANSLITERATOR_METHODS_H + +#include <php.h> + +PHP_FUNCTION( transliterator_create ); + +PHP_FUNCTION( transliterator_create_from_rules ); + +PHP_FUNCTION( transliterator_list_ids ); + +PHP_FUNCTION( transliterator_create_inverse ); + +PHP_FUNCTION( transliterator_transliterate ); + +PHP_METHOD( Transliterator, __construct ); + +PHP_FUNCTION( transliterator_get_error_code ); + +PHP_FUNCTION( transliterator_get_error_message ); + +#endif /* #ifndef TRANSLITERATOR_METHODS_H */ |