diff options
author | John Coggeshall <john@php.net> | 2003-08-01 00:22:43 +0000 |
---|---|---|
committer | John Coggeshall <john@php.net> | 2003-08-01 00:22:43 +0000 |
commit | 2fb97cdf951eddd1f6376ba5a42a101e2728b298 (patch) | |
tree | cd3ba01f62f315da1aa883190be045b637d5578f /ext/tidy | |
parent | de7f7658cfabda9b8fa780fc78e3f6d3749e039e (diff) | |
download | php-git-2fb97cdf951eddd1f6376ba5a42a101e2728b298.tar.gz |
Adding the tidy extension to PECL
Diffstat (limited to 'ext/tidy')
-rw-r--r-- | ext/tidy/CREDITS | 2 | ||||
-rw-r--r-- | ext/tidy/README_TIDY | 154 | ||||
-rw-r--r-- | ext/tidy/TODO | 4 | ||||
-rw-r--r-- | ext/tidy/config.m4 | 35 | ||||
-rw-r--r-- | ext/tidy/examples/cleanhtml.php | 40 | ||||
-rw-r--r-- | ext/tidy/examples/dumpit.php | 94 | ||||
-rw-r--r-- | ext/tidy/examples/urlgrab.php | 63 | ||||
-rw-r--r-- | ext/tidy/package.xml | 55 | ||||
-rw-r--r-- | ext/tidy/php_tidy.h | 200 | ||||
-rw-r--r-- | ext/tidy/tests/001.phpt | 24 | ||||
-rw-r--r-- | ext/tidy/tests/002.phpt | 25 | ||||
-rw-r--r-- | ext/tidy/tests/003.phpt | 27 | ||||
-rw-r--r-- | ext/tidy/tests/004.phpt | 24 | ||||
-rw-r--r-- | ext/tidy/tests/005.html | 1 | ||||
-rw-r--r-- | ext/tidy/tests/005.phpt | 25 | ||||
-rw-r--r-- | ext/tidy/tests/006.phpt | 23 | ||||
-rw-r--r-- | ext/tidy/tests/007.phpt | 37 | ||||
-rw-r--r-- | ext/tidy/tidy.c | 1874 |
18 files changed, 2707 insertions, 0 deletions
diff --git a/ext/tidy/CREDITS b/ext/tidy/CREDITS new file mode 100644 index 0000000000..5d590d14f6 --- /dev/null +++ b/ext/tidy/CREDITS @@ -0,0 +1,2 @@ +Tidy +John Coggeshall
\ No newline at end of file diff --git a/ext/tidy/README_TIDY b/ext/tidy/README_TIDY new file mode 100644 index 0000000000..9b15dcd102 --- /dev/null +++ b/ext/tidy/README_TIDY @@ -0,0 +1,154 @@ + +README FOR ext/tidy by John Coggeshall <john@php.net> + +Tidy Version: 0.5b + +Tidy is an extension based on Libtidy (http://tidy.sf.net/) and allows a PHP developer +to clean, repair, and traverse HTML, XHTML, and XML documents -- including ones with +embedded scripting languages such as PHP or ASP within them using OO constructs. + +The Tidy extension has two separate APIs, one for general parsing, cleaning, and +repairing and another for document traversal. The general API is provided below: + + tidy_create() Initialize and return a tidy document resource + tidy_parse_file($tidy, $file) Parse the document stored in $file + tidy_parse_string($tidy, $str) Parse the string stored in $str + + tidy_clean_repair($tidy) Clean and repair the document + tidy_diagnose($tidy) Diagnose a parsed document + + tidy_setopt($tidy, $opt, $val) Set a configuration option $opt to $val + tidy_getopt($tidy, $opt) Retrieve a configuration option + + ** note: $opt is a string representing the option. Right now the only + source of these options is the LibTidy source.. eventually I'll document + them offically -- see the src/config.c file in the tidy source ** + + tidy_get_output($tidy) Return the cleaned tidy HTML as a string + tidy_get_error_buffer($tidy) Return a log of the errors and warnings + returned by tidy + + tidy_get_release() Return the Libtidy release date + tidy_get_status($tidy) Return the status of the document + tidy_get_html_ver($tidy) Return the major HTML version detected for + the document; + + tidy_is_xhtml($tidy) Determines if the document is XHTML + tidy_is_xml($tidy) Determines if the document is a generic XML + + tidy_error_count($tidy) Returns the number of errors in the document + tidy_warning_count($tidy) Returns the number of warnings in the document + tidy_access_count($tidy) Returns the number of accessibility-related + warnings in the document. + tidy_config_count($tidy) Returns the number of configuration errors found + + tidy_load_config($tidy, $file) Loads the specified configuration file + tidY_load_config_enc($tidy, + $file, + $enc) Loads the specified config file using the specified + character encoding + tidy_set_encoding($tidy, $enc) Sets the current character encoding for the document + tidy_save_config($tidy, $file) Saves the current config to $file + + +Beyond these general-purpose API functions, Tidy also supports the following +functions which are used to retrieve an object for document traversal: + + tidy_get_root($tidy) Returns an object starting at the root of the + document + tidy_get_head($tidy) Returns an object starting at the <HEAD> tag + tidy_get_html($tidy) Returns an object starting at the <HTML> tag + tidy_get_body($tidy) Returns an object starting at the <BODY> tag + +All Navigation of the specified document is done via the PHP5 object constructs. +There are two types of objects which Tidy can create. The first is TidyNode, which +represents HTML Tags, Text, and more (see the TidyNode_Type Constants). The second +is TidyAttr, which represents an attribute within an HTML tag (TidyNode). The +functionality of these objects is represented by the following schema: + +class TidyNode { + + public $name; // name of node (i.e. HEAD) + public $value; // value of node (everything between tags) + public $type; // type of node (text, php, asp, etc.) + public $id; // id of node (i.e. TIDY_TAG_HEAD) + + public $line; // line # of node in source + public $column; // column # of node in source + + public $html_ver; // HTML version (0,1,2,3,4) + + public $attribs; // an array of attributes (see TidyAttr) + public $children; // an array of child nodes + + function has_siblings(); // any sibling nodes? + function has_children(); // any child nodes? + function has_parent(); // have a parent? + + function is_comment(); // is node a comment? + function is_xhtml(); // is document XHTML? + function is_xml(); // is document generic XML (not HTML/XHTML) + function is_text(); // is node text? + function is_html(); // is node an HTML tag? + + function is_jste(); // is jste block? + function is_asp(); // is Microsoft ASP block? + function is_php(); // is PHP block? + + function next(); // returns next node + function prev(); // returns prev node + function parent(); // returns parent node + function child(); // returns first child node + + /* Searches for a particular attribute in the current node based + on node ID. If found returns a TidyAttr object for it */ + function get_attr_type($attr_id); + + /* + + NOT YET IMPLEMENTED + + Recursively traverses the tree from the current node and returns + an array of attributes matching the node ID/attr ID pair + + Useful for pulling out things like links: + foreach($body->fetch_attrs(TIDY_TAG_A, TIDY_ATTR_HREF) as $link) { + echo "Link : {$link->value}\n"; + } + */ + + function fetch_attrs($node_id, $attr_id); + + /* + + NOT YET IMPLEMENTED + + Recursively traverses the tree from the current node and returns + an array of nodes matching the node ID + + Useful for pulling out tables, etc (echos the HTML for every + <TABLE> block) + + foreach($body->fetch_nodes(TIDY_TAG_TABLE) as $table) { + + echo $table->value; + + } + */ + function fetch_nodes($node_id) +} + +class TidyAttr { + + public $name; // attribute name i.e. HREF + public $value; // attribute value + public $id; // attribute id i.e. TIDY_ATTR_HREF + + function next(); // returns next attribute in tag + function tag(); // returns the tag node associated with attribute +} + +Examples of using these objects to navigate the tree can be found in the examples/ +directory (I suggest looking at urlgrab.php and dumpit.php) + +E-mail thoughts, suggestions, patches, etc. to <john@php.net>
\ No newline at end of file diff --git a/ext/tidy/TODO b/ext/tidy/TODO new file mode 100644 index 0000000000..19151b14d7 --- /dev/null +++ b/ext/tidy/TODO @@ -0,0 +1,4 @@ +TODO + + - Implement fetch_attr(), fetch_node() methods + - Fix any memleaks (some may be purely ZE2 related)
\ No newline at end of file diff --git a/ext/tidy/config.m4 b/ext/tidy/config.m4 new file mode 100644 index 0000000000..069b3ee6b4 --- /dev/null +++ b/ext/tidy/config.m4 @@ -0,0 +1,35 @@ +dnl +dnl $Id$ +dnl + +PHP_ARG_WITH(tidy,for TIDY support, +[ --with-tidy[=DIR] Include TIDY support]) + +if test "$PHP_TIDY" != "no"; then + PHP_NEW_EXTENSION(tidy, tidy.c, $ext_shared) + if test "$PHP_TIDY" != "yes"; then + TIDY_SEARCH_DIRS=$PHP_TIDY + else + TIDY_SEARCH_DIRS="/usr/local /usr" + fi + for i in $TIDY_SEARCH_DIRS; do + if test -f $i/include/tidy/tidy.h; then + TIDY_DIR=$i + TIDY_INCDIR=$i/include/tidy + elif test -f $i/include/tidy.h; then + TIDY_DIR=$i + TIDY_INCDIR=$i/include + fi + done + + if test -z "$TIDY_DIR"; then + AC_MSG_ERROR(Cannot find libtidy) + fi + + TIDY_LIBDIR=$TIDY_DIR/lib + + AC_DEFINE(HAVE_TIDY,1,[ ]) + PHP_SUBST(TIDY_SHARED_LIBADD) + PHP_ADD_LIBRARY_WITH_PATH(tidy, $TIDY_LIBDIR, TIDY_SHARED_LIBADD) + PHP_ADD_INCLUDE($TIDY_INCDIR) +fi diff --git a/ext/tidy/examples/cleanhtml.php b/ext/tidy/examples/cleanhtml.php new file mode 100644 index 0000000000..c949a0cfc2 --- /dev/null +++ b/ext/tidy/examples/cleanhtml.php @@ -0,0 +1,40 @@ +<?php + + /* + * cleanhtml.php + * + * A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents + * if no file is provided, it reads from standard input. + * + * By: John Coggeshall <john@php.net> + * + * Usage: php cleanhtml.php [filename] + * + */ + + $tidy = tidy_create(); + + if(!isset($_SERVER['argv'][1])) { + $data = file_get_contents("php://stdin"); + tidy_parse_string($tidy, $data); + } else { + tidy_parse_file($tidy, $_SERVER['argv'][1]); + } + + tidy_clean_repair($tidy); + + if(tidy_warning_count($tidy) || + tidy_error_count($tidy)) { + + echo "\n\nThe following errors or warnings occured:\n"; + echo tidy_get_error_buffer($tidy); + echo "\n"; + } + + echo tidy_get_output($tidy); + +?> + + + +
\ No newline at end of file diff --git a/ext/tidy/examples/dumpit.php b/ext/tidy/examples/dumpit.php new file mode 100644 index 0000000000..46d307d704 --- /dev/null +++ b/ext/tidy/examples/dumpit.php @@ -0,0 +1,94 @@ +<?php + /* + * dumpit.php + * + * a command-line script which dumps the given HTML, PHP, ASP, XHTML, etc. + * file as it is represented in the document model. + * + * By: John Coggeshall <john@php.net> + * + * Usage; php dumpit.php <filename> + */ + + + $tidy = tidy_create(); + tidy_parse_file($tidy, $_SERVER['argv'][1]); + + /* Optionally you can do this here if you want to fix up the document */ + + /* tidy_clean_repair($tidy); */ + + $tree = tidy_get_root($tidy); + dump_tree($tree); + echo "\n"; + + function node_type($type) { + + switch($type) { + + case TIDY_NODETYPE_ROOT: return "Root Node"; + case TIDY_NODETYPE_DOCTYPE: return "DocType Node"; + case TIDY_NODETYPE_COMMENT: return "Comment Node"; + case TIDY_NODETYPE_PROCINS: return "ProcIns Node"; + case TIDY_NODETYPE_TEXT: return "Text Node"; + case TIDY_NODETYPE_START: return "Start Node"; + case TIDY_NODETYPE_END: return "End Node"; + case TIDY_NODETYPE_STARTEND: return "Start/End Node"; + case TIDY_NODETYPE_CDATA: return "CDATA Node"; + case TIDY_NODETYPE_SECTION: return "Section Node"; + case TIDY_NODETYPE_ASP: return "ASP Source Code Node"; + case TIDY_NODETYPE_PHP: return "PHP Source Code Node"; + case TIDY_NODETYPE_JSTE: return "JSTE Source Code"; + case TIDY_NODETYPE_XMLDECL: return "XML Declaration Node"; + default: return "Unknown Node"; + } + } + + function do_leaf($string, $indent) { + for($i = 0; $i < $indent; $i++) { + echo " "; + } + echo $string; + } + + function dump_tree($node, $indent = 0) { + if($node) { + /* Put something there if the node name is empty */ + $nodename = trim(strtoupper($node->name)); + $nodename = (empty($nodename)) ? "[EMPTY]" : $nodename; + + /* Generate the Node, and a pretty name for it */ + do_leaf(" + $nodename (".node_type($node->type).")\n", $indent); + + /* Check to see if this node is a text node. Text nodes are + generated by start/end tags and contain the text in between. + i.e. <B>foo</B> will create a text node with $node->value + equal to 'foo' */ + if($node->type == TIDY_NODETYPE_TEXT) { + do_leaf(" |\n", $indent); + do_leaf(" +---- Value: '{$node->value}'\n", $indent); + } + + /* Any attributes on this node? */ + if(count($node->attribs)) { + do_leaf(" |\n", $indent); + do_leaf(" +---- Attributes\n", $indent); + + /* Cycle through the attributes and display them and their values. */ + foreach($node->attribs as $attrib) { + do_leaf(" +--{$attrib->name}\n", $indent); + do_leaf(" | +-- Value: {$attrib->value}\n", $indent); + } + } + + /* Recurse along the children to generate the remaining nodes */ + if($node->has_children()) { + foreach($node->children as $child) { + dump_tree($child, $indent + 3); + } + } + } + } + + +?>
\ No newline at end of file diff --git a/ext/tidy/examples/urlgrab.php b/ext/tidy/examples/urlgrab.php new file mode 100644 index 0000000000..63a2875a79 --- /dev/null +++ b/ext/tidy/examples/urlgrab.php @@ -0,0 +1,63 @@ +<?php + + /* + * urlgrab.php + * + * A simple command-line utility to extract all of the URLS contained + * within <A HREF> tags from a document. + * + * By: John Coggeshall <john@php.net> + * + * Usage: php urlgrab.php <file> + * + */ + + /* Create a Tidy Resource */ + $tidy = tidy_create(); + + /* Parse the document */ + tidy_parse_file($tidy, $_SERVER['argv'][1]); + + /* Fix up the document */ + tidy_clean_repair($tidy); + + /* Get an object representing everything from the <HTML> tag in */ + $html = tidy_get_html($tidy); + + /* Traverse the document tree */ + print_r(get_links($html)); + + function get_links($node) { + $urls = array(); + + /* Check to see if we are on an <A> tag or not */ + if($node->id == TIDY_TAG_A) { + /* If we are, find the HREF attribute */ + $attrib = $node->get_attr_type(TIDY_ATTR_HREF); + if($attrib) { + /* Add the value of the HREF attrib to $urls */ + $urls[] = $attrib->value; + } + + } + + /* Are there any children? */ + if($node->has_children()) { + + /* Traverse down each child recursively */ + foreach($node->children as $child) { + + /* Append the results from recursion to $urls */ + foreach(get_links($child) as $url) { + + $urls[] = $url; + + } + + } + } + + return $urls; + } + +?>
\ No newline at end of file diff --git a/ext/tidy/package.xml b/ext/tidy/package.xml new file mode 100644 index 0000000000..e9458c1873 --- /dev/null +++ b/ext/tidy/package.xml @@ -0,0 +1,55 @@ +<?xml version="1.0" encoding="ISO-8859-1" ?> +<!DOCTYPE package SYSTEM "../pear/package.dtd"> +<package> + <name>Tidy</name> + <summary>Tidy HTML Repairing and Parsing</summary> + <maintainers> + <maintainer> + <user>john</user> + <name>John Coggeshall</name> + <email>john@php.net</email> + <role>lead</role> + </maintainer> + </maintainers> + <description> +Tidy is a binding for the Tidy HTML clean and repair utility which +allows you to not only clean and otherwise manipluate HTML documents, +but also traverse the document tree using the Zend Engine 2 OO semantics. + + </description> + <license>PHP</license> + <release> + <state>stable</state> + <version>0.5</version> + <date>TBA</date> + <notes> + </notes> + <filelist> + <file role="src" name="config.m4"/> + <file role="src" name="tidy.c"/> + <file role="src" name="php_tidy.h"/> + + <file role="doc" name="CREDITS"/> + <file role="doc" name="README_TIDY"/> + <file role="doc" name="TODO"/> + <file role="doc" name="examples/cleanhtml.php"/> + <file role="doc" name="examples/dumpit.php"/> + <file role="doc" name="examples/urlgrab.php"/> + + <file role="test" name="tests/001.phpt"/> + <file role="test" name="tests/002.phpt"/> + <file role="test" name="tests/003.phpt"/> + <file role="test" name="tests/004.phpt"/> + <file role="test" name="tests/005.phpt"/> + <file role="test" name="tests/005.html"/> + <file role="test" name="tests/006.phpt"/> + <file role="test" name="tests/007.phpt"/> + </filelist> + <deps> + <dep type="php" rel="ge">5.0.0</dep> + </deps> + </release> +</package> +<!-- +vim:et:ts=1:sw=1 +--> diff --git a/ext/tidy/php_tidy.h b/ext/tidy/php_tidy.h new file mode 100644 index 0000000000..67d7c57679 --- /dev/null +++ b/ext/tidy/php_tidy.h @@ -0,0 +1,200 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 4 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2003 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: John Coggeshall <john@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + +#ifndef PHP_TIDY_H +#define PHP_TIDY_H + +extern zend_module_entry tidy_module_entry; +#define phpext_tidy_ptr &tidy_module_entry + +#ifdef PHP_WIN32 +#define PHP_TIDY_API __declspec(dllexport) +#else +#define PHP_TIDY_API +#endif + +#ifdef ZTS +#include "TSRM.h" +#endif + +#include "tidyenum.h" +#include "tidy.h" +#include "buffio.h" + +#ifdef ZTS +#define TIDY_G(v) TSRMG(tidy_globals_id, zend_tidy_globals *, v) +#else +#define TIDY_G(v) (tidy_globals.v) +#endif + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +#define TIDY_RV_FALSE(__t) __t->type = IS_BOOL; __t->value.lval = FALSE +#define TIDY_RV_TRUE(__t) __t->type = IS_BOOL; __t->value.lval = TRUE + +#define TIDY_IS_TN_PROP(_p) zend_hash_exists(TIDY_G(tn_prop), #_p, strlen(#_p)) +#define TIDY_IS_TA_PROP(_p) zend_hash_exists(TIDY_G(ta_prop), #_p, strlen(#_p)) + +#define REMOVE_NEWLINE(_z) _z->value.str.val[_z->value.str.len-1] = '\0'; + +#define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT) +#define TIDY_ATTR_CONST(attr) REGISTER_LONG_CONSTANT("TIDY_ATTR_" #attr, TidyAttr_##attr, CONST_CS | CONST_PERSISTENT) +#define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT) + +#define PHP_IS_TIDYUNDEF 0 +#define PHP_IS_TIDYNODE 1 +#define PHP_IS_TIDYATTR 2 + + +struct _PHPTidyDoc { + + TidyDoc doc; + TidyBuffer *errbuf; + zend_bool parsed; +}; + +typedef struct _PHPTidyDoc PHPTidyDoc; +typedef struct _PHPTidyObj PHPTidyObj; + +struct _PHPTidyObj { + zend_object obj; + TidyNode node; + TidyAttr attr; + PHPTidyDoc *tdoc; + unsigned int type; +}; + + +PHP_MINIT_FUNCTION(tidy); +PHP_MSHUTDOWN_FUNCTION(tidy); +PHP_RINIT_FUNCTION(tidy); +PHP_RSHUTDOWN_FUNCTION(tidy); +PHP_MINFO_FUNCTION(tidy); + +PHP_FUNCTION(tidy_create); +PHP_FUNCTION(tidy_setopt); +PHP_FUNCTION(tidy_getopt); +PHP_FUNCTION(tidy_parse_string); +PHP_FUNCTION(tidy_parse_file); +PHP_FUNCTION(tidy_clean_repair); +PHP_FUNCTION(tidy_diagnose); +PHP_FUNCTION(tidy_get_output); +PHP_FUNCTION(tidy_get_error_buffer); +PHP_FUNCTION(tidy_get_release); +PHP_FUNCTION(tidy_get_status); +PHP_FUNCTION(tidy_get_html_ver); +PHP_FUNCTION(tidy_is_xhtml); +PHP_FUNCTION(tidy_is_xml); +PHP_FUNCTION(tidy_error_count); +PHP_FUNCTION(tidy_warning_count); +PHP_FUNCTION(tidy_access_count); +PHP_FUNCTION(tidy_config_count); +PHP_FUNCTION(tidy_load_config); +PHP_FUNCTION(tidy_load_config_enc); +PHP_FUNCTION(tidy_set_encoding); +PHP_FUNCTION(tidy_save_config); + +PHP_FUNCTION(tidy_get_root); +PHP_FUNCTION(tidy_get_html); +PHP_FUNCTION(tidy_get_head); +PHP_FUNCTION(tidy_get_body); + +static void php_tidy_obj_clone(void *, void ** TSRMLS_DC); +static void php_tidy_obj_dtor(void *, zend_object_handle TSRMLS_DC); + +zend_object_value php_tidy_create_obj(zend_class_entry * TSRMLS_DC); + +/* object handlers */ +zval * tidy_property_read(zval *object, zval *member, zend_bool silent TSRMLS_DC); +void tidy_property_write(zval *obj, zval *member, zval *value TSRMLS_DC); +zval ** tidy_property_get_ptr(zval *obj, zval *member TSRMLS_DC); +zval * tidy_object_get(zval *property TSRMLS_DC); +void tidy_object_set(zval **property, zval *value TSRMLS_DC); +int tidy_property_exists(zval *object, zval *member, int check_empty TSRMLS_DC); +void tidy_property_delete(zval *obj, zval *member TSRMLS_DC); +HashTable * tidy_get_properties(zval *object TSRMLS_DC); +union _zend_function * tidy_get_method(zval *obj, char *method, int method_len TSRMLS_DC); +int tidy_call_method(char *method, INTERNAL_FUNCTION_PARAMETERS); +union _zend_function * tidy_get_constructor(zval *obj TSRMLS_DC); +zend_class_entry * tidy_get_class_entry(zval *obj TSRMLS_DC); +int tidy_get_class_name(zval *obj, char **class_name, zend_uint *name_len, int parent TSRMLS_DC); +int tidy_objects_compare(zval *obj_one, zval *obj_two TSRMLS_DC); +void tidy_object_cast(zval *readobj, zval *writeobj, int type, int should_free TSRMLS_DC); + +zend_bool _php_tidy_attr_call_method(char *method, INTERNAL_FUNCTION_PARAMETERS); +zend_bool _php_tidy_node_call_method(char *method, INTERNAL_FUNCTION_PARAMETERS); +void _php_tidy_init_prop_hashtables(); + +/* resource dtor */ +void dtor_TidyDoc(zend_rsrc_list_entry * TSRMLS_DC); + +/* constant register helpers */ +void _php_tidy_register_nodetypes(INIT_FUNC_ARGS); +void _php_tidy_register_tags(INIT_FUNC_ARGS); +void _php_tidy_register_attributes(INIT_FUNC_ARGS); + +/* Callbacks for hooking Tidy Memory alloc into e*alloc */ +void * _php_tidy_mem_alloc(size_t size); +void * _php_tidy_mem_realloc(void *mem, size_t newsize); +void _php_tidy_mem_free(void *mem); +void _php_tidy_mem_panic(ctmbstr errmsg); + +ZEND_BEGIN_MODULE_GLOBALS(tidy) +ZEND_END_MODULE_GLOBALS(tidy) + +static zend_object_handlers php_tidy_object_handlers = { + ZEND_OBJECTS_STORE_HANDLERS, + tidy_property_read, + tidy_property_write, + NULL, + NULL, + tidy_property_get_ptr, + tidy_property_get_ptr, + tidy_object_get, + tidy_object_set, + tidy_property_exists, + tidy_property_delete, + tidy_get_properties, + tidy_get_method, + tidy_call_method, + tidy_get_constructor, + tidy_get_class_entry, + tidy_get_class_name, + tidy_objects_compare, + tidy_object_cast +}; + + + + +#endif + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/tidy/tests/001.phpt b/ext/tidy/tests/001.phpt new file mode 100644 index 0000000000..4be56d0403 --- /dev/null +++ b/ext/tidy/tests/001.phpt @@ -0,0 +1,24 @@ +--TEST-- +Check for tidy presence +--SKIPIF-- +<?php if (!extension_loaded("Tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php +echo "tidy extension is available"; +/* + you can add regression tests for your extension here + + the output of your test code has to be equal to the + text in the --EXPECT-- section below for the tests + to pass, differences between the output and the + expected text are interpreted as failure + + see php4/README.TESTING for further information on + writing regression tests +*/ +?> +--EXPECT-- +tidy extension is available diff --git a/ext/tidy/tests/002.phpt b/ext/tidy/tests/002.phpt new file mode 100644 index 0000000000..b3fccffb3f --- /dev/null +++ b/ext/tidy/tests/002.phpt @@ -0,0 +1,25 @@ +--TEST-- +tidy_parse_string() +--SKIPIF-- +<?php if (!extension_loaded("Tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + $tidy = tidy_create(); + + tidy_parse_string($tidy, "<HTML></HTML>"); + + echo tidy_get_output($tidy); + +?> +--EXPECT-- +<html> +<head> +<title></title> +</head> +<body> +</body> +</html>
\ No newline at end of file diff --git a/ext/tidy/tests/003.phpt b/ext/tidy/tests/003.phpt new file mode 100644 index 0000000000..fe0fefc688 --- /dev/null +++ b/ext/tidy/tests/003.phpt @@ -0,0 +1,27 @@ +--TEST-- +tidy_clean_repair() +--SKIPIF-- +<?php if (!extension_loaded("Tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + $tidy = tidy_create(); + + tidy_parse_string($tidy, "<HTML></HTML>"); + tidy_clean_repair($tidy); + + echo tidy_get_output($tidy); + +?> +--EXPECT-- +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> +<html> +<head> +<title></title> +</head> +<body> +</body> +</html> diff --git a/ext/tidy/tests/004.phpt b/ext/tidy/tests/004.phpt new file mode 100644 index 0000000000..930a2093c7 --- /dev/null +++ b/ext/tidy/tests/004.phpt @@ -0,0 +1,24 @@ +--TEST-- +tidy_diagnose() +--SKIPIF-- +<?php if (!extension_loaded("Tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + $tidy = tidy_create(); + + tidy_parse_string($tidy, "<HTML></HTML>"); + tidy_diagnose($tidy); + echo tidy_get_error_buffer($tidy); + +?> +--EXPECT-- + +line 1 column 1 - Warning: missing <!DOCTYPE> declaration +line 1 column 7 - Warning: discarding unexpected </html> +line 1 column 14 - Warning: inserting missing 'title' element +Info: Document content looks like HTML 3.2 +3 warnings, 0 errors were found!
\ No newline at end of file diff --git a/ext/tidy/tests/005.html b/ext/tidy/tests/005.html new file mode 100644 index 0000000000..8c17451f91 --- /dev/null +++ b/ext/tidy/tests/005.html @@ -0,0 +1 @@ +<HTML></HTML> diff --git a/ext/tidy/tests/005.phpt b/ext/tidy/tests/005.phpt new file mode 100644 index 0000000000..828bc148ea --- /dev/null +++ b/ext/tidy/tests/005.phpt @@ -0,0 +1,25 @@ +--TEST-- +tidy_parse_file() +--SKIPIF-- +<?php if (!extension_loaded("Tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + $tidy = tidy_create(); + + tidy_parse_file($tidy, "ext/tidy/tests/005.html"); + + echo tidy_get_output($tidy); + +?> +--EXPECT-- +<html> +<head> +<title></title> +</head> +<body> +</body> +</html>
\ No newline at end of file diff --git a/ext/tidy/tests/006.phpt b/ext/tidy/tests/006.phpt new file mode 100644 index 0000000000..2ea27d38f4 --- /dev/null +++ b/ext/tidy/tests/006.phpt @@ -0,0 +1,23 @@ +--TEST-- +Verbose tidy_get_error_buffer() +--SKIPIF-- +<?php if (!extension_loaded("Tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + $tidy = tidy_create(); + + tidy_parse_string($tidy, "<HTML><asd asdf></HTML>"); + + echo tidy_get_error_buffer($tidy, true); + +?> +--EXPECT-- +line 1 column 1 - Warning: missing <!DOCTYPE> declaration +line 1 column 7 - Error: <asd> is not recognized! +line 1 column 7 - Warning: discarding unexpected <asd> +line 1 column 17 - Warning: discarding unexpected </html> +line 1 column 7 - Warning: inserting missing 'title' element
\ No newline at end of file diff --git a/ext/tidy/tests/007.phpt b/ext/tidy/tests/007.phpt new file mode 100644 index 0000000000..7fcbb8f39c --- /dev/null +++ b/ext/tidy/tests/007.phpt @@ -0,0 +1,37 @@ +--TEST-- +Verbose tidy_setopt() / tidy_getopt() +--SKIPIF-- +<?php if (!extension_loaded("Tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + $tidy = tidy_create(); + echo "Current Value of 'tidy-mark': "; + var_dump(tidy_getopt($tidy, "tidy-mark")); + tidy_setopt($tidy, "tidy-mark", true); + echo "\nNew Value of 'tidy-mark': "; + var_dump(tidy_getopt($tidy, "tidy-mark")); + echo "Current Value of 'error-file': "; + var_dump(tidy_getopt($tidy, "error-file")); + tidy_setopt($tidy, "error-file", "foobar"); + echo "\nNew Value of 'error-file': "; + var_dump(tidy_getopt($tidy, "error-file")); + echo "Current Value of 'tab-size': "; + var_dump(tidy_getopt($tidy, "tab-size")); + tidy_setopt($tidy, "tab-size", 10); + echo "\nNew Value of 'tab-size': "; + var_dump(tidy_getopt($tidy, "tab-size")); +?> +--EXPECT-- +Current Value of 'tidy-mark': bool(false) + +New Value of 'tidy-mark': bool(true) +Current Value of 'error-file': string(0) "" + +New Value of 'error-file': string(6) "foobar" +Current Value of 'tab-size': int(8) + +New Value of 'tab-size': int(10)
\ No newline at end of file diff --git a/ext/tidy/tidy.c b/ext/tidy/tidy.c new file mode 100644 index 0000000000..21e000eba6 --- /dev/null +++ b/ext/tidy/tidy.c @@ -0,0 +1,1874 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 4 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2003 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: John Coggeshall <john@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#include "php_ini.h" +#include "ext/standard/info.h" +#include "php_tidy.h" +#include "Zend/zend_API.h" +#include "Zend/zend_hash.h" +#include "safe_mode.h" + +ZEND_DECLARE_MODULE_GLOBALS(tidy); + +static int le_tidydoc; +#define le_tidydoc_name "Tidy Document" + +zend_class_entry *php_tidy_ce; + +function_entry tidy_functions[] = { + PHP_FE(tidy_create, NULL) + PHP_FE(tidy_setopt, NULL) + PHP_FE(tidy_getopt, NULL) + PHP_FE(tidy_parse_string, NULL) + PHP_FE(tidy_parse_file, NULL) + PHP_FE(tidy_get_output, NULL) + PHP_FE(tidy_get_error_buffer, NULL) + PHP_FE(tidy_clean_repair, NULL) + PHP_FE(tidy_diagnose, NULL) + PHP_FE(tidy_get_release, NULL) + PHP_FE(tidy_get_status, NULL) + PHP_FE(tidy_get_html_ver, NULL) + PHP_FE(tidy_is_xhtml, NULL) + PHP_FE(tidy_is_xml, NULL) + PHP_FE(tidy_error_count, NULL) + PHP_FE(tidy_warning_count, NULL) + PHP_FE(tidy_access_count, NULL) + PHP_FE(tidy_config_count, NULL) + PHP_FE(tidy_load_config, NULL) + PHP_FE(tidy_load_config_enc, NULL) + PHP_FE(tidy_set_encoding, NULL) + PHP_FE(tidy_save_config, NULL) + PHP_FE(tidy_get_root, NULL) + PHP_FE(tidy_get_head, NULL) + PHP_FE(tidy_get_html, NULL) + PHP_FE(tidy_get_body, NULL) + {NULL, NULL, NULL} +}; + + +zend_module_entry tidy_module_entry = { +#if ZEND_MODULE_API_NO >= 20010901 + STANDARD_MODULE_HEADER, +#endif + "Tidy", + tidy_functions, + PHP_MINIT(tidy), + PHP_MSHUTDOWN(tidy), + NULL, + NULL, + PHP_MINFO(tidy), +#if ZEND_MODULE_API_NO >= 20010901 + "0.5b", +#endif + STANDARD_MODULE_PROPERTIES +}; + + +#ifdef COMPILE_DL_TIDY +ZEND_GET_MODULE(tidy) +#endif + +static inline PHPTidyObj *php_tidy_fetch_object(zval *object TSRMLS_DC) { + return (PHPTidyObj *) zend_object_store_get_object(object TSRMLS_CC); +} + +void * _php_tidy_mem_alloc(size_t size) { + return emalloc(size); +} + +void * _php_tidy_mem_realloc(void *mem, size_t newsize) { + return erealloc(mem, newsize); +} + +void _php_tidy_mem_free(void *mem) { + efree(mem); +} + +void _php_tidy_mem_panic(ctmbstr errmsg) { + + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not allocate memory for Tidy: %s", (char *)errmsg); +} + + +PHPTidyObj *php_tidy_new(TSRMLS_DC) { + + PHPTidyObj *intern; + + intern = emalloc(sizeof(PHPTidyObj)); + intern->obj.ce = php_tidy_ce; + intern->obj.in_get = 0; + intern->obj.in_set = 0; + intern->node = NULL; + intern->attr = NULL; + intern->type = PHP_IS_TIDYUNDEF; + + return intern; + +} + +static zend_object_value php_tidy_register_object(PHPTidyObj *intern TSRMLS_DC) { + + zend_object_value retval; + + retval.handle = zend_objects_store_put(intern, + php_tidy_obj_dtor, + php_tidy_obj_clone TSRMLS_CC); + retval.handlers = (zend_object_handlers *) &php_tidy_object_handlers; + + return retval; +} + +static void php_tidy_obj_dtor(void *object, zend_object_handle handle TSRMLS_DC) { + + PHPTidyObj *o = (PHPTidyObj *)object; + efree(o); +} + +static void php_tidy_obj_clone(void *object, void **object_clone TSRMLS_DC) { + + PHPTidyObj *intern = (PHPTidyObj *) object; + PHPTidyObj **intern_clone = (PHPTidyObj **) object_clone; + + *intern_clone = emalloc(sizeof(PHPTidyObj)); + (*intern_clone)->obj.ce = intern->obj.ce; + (*intern_clone)->obj.in_get = 0; + (*intern_clone)->obj.in_set = 0; + ALLOC_HASHTABLE((*intern_clone)->obj.properties); + + /* memcopy these.. */ + memcpy((*intern_clone)->node, intern->node, sizeof(TidyNode)); + memcpy((*intern_clone)->attr, intern->attr, sizeof(TidyAttr)); + memcpy((*intern_clone)->tdoc, intern->tdoc, sizeof(PHPTidyDoc)); + (*intern_clone)->type = intern->type; + +} + +void dtor_TidyDoc(zend_rsrc_list_entry *rsrc TSRMLS_DC) { + + PHPTidyDoc *tdoc = (PHPTidyDoc *)rsrc->ptr; + tidyRelease(tdoc->doc); + efree(tdoc); + +} + +static void php_tidy_init_globals(zend_tidy_globals *tidy_globals) { + + /* No globals for now */ +} + +PHP_MINIT_FUNCTION(tidy) { + + zend_class_entry _tidy_entry; + + INIT_CLASS_ENTRY(_tidy_entry, "TidyObject", NULL); + php_tidy_ce = zend_register_internal_class(&_tidy_entry TSRMLS_CC); + + ZEND_INIT_MODULE_GLOBALS(tidy, php_tidy_init_globals, NULL); + le_tidydoc = zend_register_list_destructors_ex(dtor_TidyDoc, NULL, le_tidydoc_name, module_number); + + _php_tidy_register_tags(INIT_FUNC_ARGS_PASSTHRU); + _php_tidy_register_attributes(INIT_FUNC_ARGS_PASSTHRU); + _php_tidy_register_nodetypes(INIT_FUNC_ARGS_PASSTHRU); + + tidySetMallocCall(_php_tidy_mem_alloc); + tidySetReallocCall(_php_tidy_mem_realloc); + tidySetFreeCall(_php_tidy_mem_free); + tidySetPanicCall(_php_tidy_mem_panic); + + return SUCCESS; + +} + + +PHP_MSHUTDOWN_FUNCTION(tidy) { + + + return SUCCESS; + +} + + + +PHP_MINFO_FUNCTION(tidy) { + + php_info_print_table_start(); + php_info_print_table_header(2, "Tidy support", "enabled"); + php_info_print_table_row(2, "Tidy Build Date", (char *)tidyReleaseDate()); + php_info_print_table_end(); + +} + +/* {{{ proto resource tidy_create() + Initialize a new tidy document */ +PHP_FUNCTION(tidy_create) { + + PHPTidyDoc *tdoc; + tdoc = emalloc(sizeof(PHPTidyDoc)); + tdoc->doc = tidyCreate(); + tdoc->parsed = 0; + + tdoc->errbuf = emalloc(sizeof(TidyBuffer)); + tidyBufInit(tdoc->errbuf); + + if(tidySetErrorBuffer(tdoc->doc, tdoc->errbuf) != 0) + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not set Tidy error buffer"); + + tidyOptSetBool(tdoc->doc, TidyForceOutput, yes); + tidyOptSetBool(tdoc->doc, TidyMark, no); + + ZEND_REGISTER_RESOURCE(return_value, tdoc, le_tidydoc); + +} +/* }}} */ + +/* {{{ proto void tidy_parse_string(resource tidy, string input) + Parse a document stored in a string */ +PHP_FUNCTION(tidy_parse_string) { + + char *input; + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 2) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "rs", &res, &input) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(tidyParseString(tdoc->doc, input) < 0) { + php_error_docref(NULL TSRMLS_CC, E_ERROR, "[Tidy error] %s", tdoc->errbuf->bp); + } + + tdoc->parsed = 1; + +} +/* }}} */ + +/* {{{ proto string tidy_get_error_buffer(resource tidy [, boolean detailed]) + Return warnings and errors which occured parsing the specified document*/ +PHP_FUNCTION(tidy_get_error_buffer) { + + zval *res; + PHPTidyDoc *tdoc; + zend_bool detailed; + + if((ZEND_NUM_ARGS() == 0) || (ZEND_NUM_ARGS() > 2)) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r|b", &res, &detailed) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + if(detailed) { + tidyErrorSummary(tdoc->doc); + } + + RETVAL_STRING(tdoc->errbuf->bp, 1); + + tidyBufClear(tdoc->errbuf); + +} +/* }}} */ + +/* {{{ proto string tidy_get_output(resource tidy) + Return a string representing the parsed tidy markup */ +PHP_FUNCTION(tidy_get_output) { + + zval *res; + PHPTidyDoc *tdoc; + TidyBuffer output = {0}; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + tidySaveBuffer( tdoc->doc, &output ); + + RETVAL_STRING(output.bp, 1); + + tidyBufFree(&output); + +} +/* }}} */ + +/* {{{ proto boolean tidy_parse_file(resource tidy, string file) + Parse markup in file or URI */ +PHP_FUNCTION(tidy_parse_file) { + + + char *inputfile; + zval *res; + PHPTidyDoc *tdoc; + php_stream *stream; + char *contents; + + if(ZEND_NUM_ARGS() != 2) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "rs", &res, &inputfile) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + stream = php_stream_open_wrapper(inputfile, "rb", + ENFORCE_SAFE_MODE | REPORT_ERRORS, + NULL); + + if(!stream) { + RETURN_FALSE; + } + + if (php_stream_copy_to_mem(stream, &contents, PHP_STREAM_COPY_ALL, 0) > 0) { + + if(tidyParseString(tdoc->doc, contents) < 0) { + php_error_docref(NULL TSRMLS_CC, E_ERROR, "[Tidy error] %s", tdoc->errbuf->bp); + } + tdoc->parsed = TRUE; + efree(contents); + } + + php_stream_close(stream); + + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto boolean tidy_clean_repair(resource tidy) + Execute configured cleanup and repair operations on parsed markup */ +PHP_FUNCTION(tidy_clean_repair) { + + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + if(tidyCleanAndRepair(tdoc->doc) >= 0) RETURN_TRUE; + + RETURN_FALSE; + +} +/* }}} */ + +/* {{{ proto boolean tidy_diagnose(resource tidy) + Run configured diagnostics on parsed and repaired markup. */ +PHP_FUNCTION(tidy_diagnose) { + + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + if(tidyRunDiagnostics(tdoc->doc) >= 0) RETURN_TRUE; + + RETURN_FALSE; +} + +/* }}} */ + +/* {{{ proto string tidy_get_release() + Get release date (version) for Tidy library */ +PHP_FUNCTION(tidy_get_release) { + + RETURN_STRING((char *)tidyReleaseDate(), 1); +} +/* }}} */ + +/* {{{ proto int tidy_get_status(resource tidy) + Get status of specfied document. */ +PHP_FUNCTION(tidy_get_status) { + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + RETURN_LONG(tidyStatus(tdoc->doc)); + +} +/* }}} */ + +/* {{{ proto int tidy_get_html_ver(resource tidy) + Get the Detected HTML version for the specified document. */ +PHP_FUNCTION(tidy_get_html_ver) { + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + + RETURN_LONG(tidyDetectedHtmlVersion(tdoc->doc)); +} +/* }}} */ + +/* {{{ proto boolean tidy_is_xhtml(resource tidy) + Indicates if the document is a XHTML document. */ +PHP_FUNCTION(tidy_is_xhtml) { + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + RETURN_BOOL(tidyDetectedXhtml(tdoc->doc)); + +} +/* }}} */ + +/* {{{ proto boolean tidy_is_xhtml(resource tidy) + Indicates if the document is a generic (non HTML/XHTML) XML document. */ +PHP_FUNCTION(tidy_is_xml) { + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + RETURN_BOOL(tidyDetectedGenericXml(tdoc->doc)); +} +/* }}} */ + +/* {{{ proto int tidy_error_count(resource tidy) + Returns the Number of Tidy errors encountered for specified document. */ +PHP_FUNCTION(tidy_error_count) { + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + RETURN_LONG(tidyErrorCount(tdoc->doc)); +} +/* }}} */ + +/* {{{ proto int tidy_warning_count(resource tidy) + Returns the Number of Tidy warnings encountered for specified document. */ +PHP_FUNCTION(tidy_warning_count) { + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + RETURN_LONG(tidyWarningCount(tdoc->doc)); +} +/* }}} */ + +/* {{{ proto int tidy_access_count(resource tidy) + Returns the Number of Tidy accessibility warnings encountered for specified document. */ +PHP_FUNCTION(tidy_access_count) { + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + RETURN_LONG(tidyAccessWarningCount(tdoc->doc)); + +} +/* }}} */ + +/* {{{ proto int tidy_config_count(resource tidy) + Returns the Number of Tidy configuration errors encountered for specified document. */ +PHP_FUNCTION(tidy_config_count) { + zval *res; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + + RETURN_LONG(tidyConfigErrorCount(tdoc->doc)); + +} +/* }}} */ + +/* {{{ proto void tidy_load_config(resource tidy, string filename) + Load an ASCII Tidy configuration file */ +PHP_FUNCTION(tidy_load_config) { + zval *res; + char *filename; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 2) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "rs", &res, &filename) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + if(tidyLoadConfig(tdoc->doc, filename) < 0) { + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Could not load configuration file '%s'", filename); + } + +} +/* }}} */ + +/* {{{ proto void tidy_load_config(resource tidy, string filename, string encoding) + Load an ASCII Tidy configuration file with the specified encoding */ +PHP_FUNCTION(tidy_load_config_enc) { + zval *res; + char *filename; + char *encoding; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 2) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "rss", &res, &filename, &encoding) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + if(tidyLoadConfigEnc(tdoc->doc, filename, encoding) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not load configuration file '%s' using encoding '%s'", filename, encoding); + RETURN_FALSE; + } + + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto boolean tidy_set_encoding(resource tidy, string encoding) + Set the input/output character encoding for parsing markup. + Values include: ascii, latin1, raw, utf8, iso2022, mac, win1252, utf16le, + utf16be, utf16, big5 and shiftjis. */ +PHP_FUNCTION(tidy_set_encoding) { + zval *res; + char *encoding; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 2) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "rs", &res, &encoding) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + if(tidySetCharEncoding(tdoc->doc, encoding) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not set encoding '%s'", encoding); + RETURN_FALSE; + } + + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto boolean tidy_save_config(resource tidy, string filename) + Save current settings to named file. Only non-default values are written. */ +PHP_FUNCTION(tidy_save_config) { + zval *res; + char *filename; + PHPTidyDoc *tdoc; + + if(ZEND_NUM_ARGS() != 2) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "rs", &res, &filename) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + if(tidyOptSaveFile(tdoc->doc, filename) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not write tidy configuration file '%s'", filename); + RETURN_FALSE; + } + + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto boolean tidy_setopt(resource tidy, string option, mixed newvalue) + Updates the configuration settings for the specified tidy document. */ +PHP_FUNCTION(tidy_setopt) { + + zval *res, *value; + char *optname; + int optname_len; + PHPTidyDoc *tdoc; + TidyOption opt; + + + if(ZEND_NUM_ARGS() != 3) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "rsz", &res, &optname, &optname_len, &value) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->doc) { + php_error_docref(NULL TSRMLS_CC,E_ERROR, "Invalid Tidy Resource Specified"); + RETURN_FALSE; + } + + opt = tidyGetOptionByName(tdoc->doc, optname); + if(!opt) { + php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown Tidy Configuration Option '%s'", optname); + RETURN_FALSE; + } + + switch(tidyOptGetType(opt)) { + + case TidyString: + if(tidyOptSetValue(tdoc->doc, tidyOptGetId(opt), Z_STRVAL_P(value))) { + RETURN_TRUE; + } + break; + + case TidyInteger: + if(tidyOptSetInt(tdoc->doc, tidyOptGetId(opt), Z_LVAL_P(value))) { + RETURN_TRUE; + } + break; + + case TidyBoolean: + if(tidyOptSetBool(tdoc->doc, tidyOptGetId(opt), Z_LVAL_P(value))) { + RETURN_TRUE; + } + break; + + default: + + php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unable to determine type of Tidy configuration constant to set"); + + } + RETURN_FALSE; +} +/* }}} */ + +/* {{{ proto mixed tidy_getopt(resource tidy, string option) + Returns the value of the specified configuration option for the tidy document. */ +PHP_FUNCTION(tidy_getopt) { + + zval *res; + char *optname, *strval; + int optname_len; + PHPTidyDoc *tdoc; + TidyOption opt; + + if(ZEND_NUM_ARGS() != 2) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "rs", &res, &optname, &optname_len) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->doc) { + php_error_docref(NULL TSRMLS_CC,E_ERROR, "Invalid Tidy Resource Specified"); + RETURN_FALSE; + } + + opt = tidyGetOptionByName(tdoc->doc, optname); + if(!opt) { + php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown Tidy Configuration Option '%s'", optname); + RETURN_FALSE; + } + + switch(tidyOptGetType(opt)) { + + case TidyString: + strval = (char *)tidyOptGetValue(tdoc->doc, tidyOptGetId(opt)); + + if(strval) { + RETURN_STRING(strval, 0); + } else { + RETURN_EMPTY_STRING(); + } + + break; + case TidyInteger: + RETURN_LONG(tidyOptGetInt(tdoc->doc, tidyOptGetId(opt))) + break; + case TidyBoolean: + + if(tidyOptGetBool(tdoc->doc, tidyOptGetId(opt))) { + RETURN_TRUE; + } + + break; + + default: + + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to determine type of Tidy configuration constant to get"); + + } + + RETURN_FALSE; + +} +/* }}} */ + +/* {{{ proto TidyNode tidy_get_root(resource tidy) + Returns a TidyNode Object representing the root of the tidy parse tree */ +PHP_FUNCTION(tidy_get_root) { + + zval *res; + PHPTidyDoc *tdoc; + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + obj = php_tidy_new(); + obj->node = tidyGetRoot(tdoc->doc); + obj->tdoc = tdoc; + obj->attr = NULL; + obj->type = PHP_IS_TIDYNODE; + + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(obj TSRMLS_CC); + +} +/* }}} */ + +/* {{{ proto TidyNode tidy_get_html(resource tidy) + Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */ +PHP_FUNCTION(tidy_get_html) { + + zval *res; + PHPTidyDoc *tdoc; + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + obj = php_tidy_new(); + obj->node = tidyGetHtml(tdoc->doc); + obj->tdoc = tdoc; + obj->attr = NULL; + obj->type = PHP_IS_TIDYNODE; + + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(obj TSRMLS_CC); + +} +/* }}} */ + +/* {{{ proto TidyNode tidy_get_head(resource tidy) + Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */ +PHP_FUNCTION(tidy_get_head) { + + zval *res; + PHPTidyDoc *tdoc; + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + obj = php_tidy_new(); + obj->node = tidyGetHead(tdoc->doc); + obj->tdoc = tdoc; + obj->attr = NULL; + obj->type = PHP_IS_TIDYNODE; + + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(obj TSRMLS_CC); + +} +/* }}} */ + +/* {{{ proto TidyNode tidy_get_body(resource tidy) + Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */ +PHP_FUNCTION(tidy_get_body) { + + zval *res; + PHPTidyDoc *tdoc; + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "r", &res) == FAILURE) { + return; + } + + ZEND_FETCH_RESOURCE(tdoc, PHPTidyDoc *, &res, -1, le_tidydoc_name, le_tidydoc); + + if(!tdoc->parsed) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); + RETURN_FALSE; + } + + obj = php_tidy_new(); + obj->node = tidyGetBody(tdoc->doc); + obj->tdoc = tdoc; + obj->attr = NULL; + obj->type = PHP_IS_TIDYNODE; + + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(obj TSRMLS_CC); + +} +/* }}} */ + +void tidy_property_delete(zval *obj, zval *member TSRMLS_DC) {} +void tidy_property_write(zval *obj, zval *member, zval *value TSRMLS_DC) {} +void tidy_object_cast(zval *readobj, zval *writeobj, int type, int should_free TSRMLS_DC) {} +void tidy_object_set(zval **property, zval *value TSRMLS_DC) {} +union _zend_function * tidy_get_constructor(zval *obj TSRMLS_DC) { + return NULL; +} + +zend_class_entry * tidy_get_class_entry(zval *obj TSRMLS_DC) { + return php_tidy_ce; +} + +zval * tidy_object_get(zval *property TSRMLS_DC) { + return NULL; +} + +zval ** tidy_property_get_ptr(zval *obj, zval *member TSRMLS_DC) { + zval **p_ptr; + zval *p; + + p_ptr = emalloc(sizeof(zval **)); + + p = tidy_property_read(obj, member, 0 TSRMLS_CC); + + *p_ptr = p; + + return p_ptr; + +} + +zval * tidy_property_read(zval *object, zval *member, zend_bool silent TSRMLS_DC) { + + PHPTidyObj *obj = php_tidy_fetch_object(object); + PHPTidyObj *newobj; + zval *return_value, *temp; + TidyBuffer buf; + TidyNode tempnode; + TidyAttr tempattr; + char *temp_str; + + char *name = Z_STRVAL_P(member); + + MAKE_STD_ZVAL(return_value); + ZVAL_NULL(return_value); + + switch(obj->type) { + + case PHP_IS_TIDYNODE: + + if(!strcmp(name, "name")) { + temp_str = (char *)tidyNodeGetName(obj->node); + if(temp_str) { + ZVAL_STRING(return_value, temp_str, 1); + } + + } else if(!strcmp(name, "value")) { + + memset(&buf, 0, sizeof(buf)); + tidyNodeGetText(obj->tdoc->doc, obj->node, &buf); + ZVAL_STRING(return_value, (char *)buf.bp, 1); + + /* The buffer adds a newline at the end of the string */ + REMOVE_NEWLINE(return_value); + + tidyBufFree(&buf); + + } else if(!strcmp(name, "type")) { + + ZVAL_LONG(return_value, tidyNodeGetType(obj->node)); + + } else if(!strcmp(name, "id")) { + + if(tidyNodeGetName(obj->node)) { + ZVAL_LONG(return_value, tidyNodeGetId(obj->node)); + } + + } else if(!strcmp(name, "attribs")) { + + array_init(return_value); + + tempattr = tidyAttrFirst(obj->node); + + if(tempattr) { + + newobj = php_tidy_new(); + newobj->node = obj->node; + newobj->tdoc = obj->tdoc; + newobj->attr = tempattr; + newobj->type = PHP_IS_TIDYATTR; + + MAKE_STD_ZVAL(temp); + + temp->type = IS_OBJECT; + temp->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + //zend_objects_store_add_ref(object); + add_next_index_zval(return_value, temp); + + while((tempattr = tidyAttrNext(tempattr))) { + + newobj = php_tidy_new(); + newobj->node = obj->node; + newobj->tdoc = obj->tdoc; + newobj->attr = tempattr; + newobj->type = PHP_IS_TIDYATTR; + + MAKE_STD_ZVAL(temp); + + temp->type = IS_OBJECT; + temp->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + //zend_objects_store_add_ref(object); + add_next_index_zval(return_value, temp); + + } + } + + } else if(!strcmp(name, "children")) { + + array_init(return_value); + tempnode = tidyGetChild(obj->node); + if(tempnode) { + + newobj = php_tidy_new(); + newobj->node = tempnode; + newobj->tdoc = obj->tdoc; + newobj->attr = NULL; + newobj->type = PHP_IS_TIDYNODE; + + MAKE_STD_ZVAL(temp); + + temp->type = IS_OBJECT; + temp->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + //zend_objects_store_add_ref(object); + add_next_index_zval(return_value, temp); + + while((tempnode = tidyGetNext(tempnode))) { + + newobj = php_tidy_new(); + newobj->node = tempnode; + newobj->tdoc = obj->tdoc; + newobj->attr = NULL; + newobj->type = PHP_IS_TIDYNODE; + + MAKE_STD_ZVAL(temp); + + temp->type = IS_OBJECT; + temp->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + //zend_objects_store_add_ref(object); + add_next_index_zval(return_value, temp); + + } + } + + } else if(!strcmp(name, "line")) { + ZVAL_LONG(return_value, tidyNodeLine(obj->node)); + } else if(!strcmp(name, "column")) { + ZVAL_LONG(return_value, tidyNodeColumn(obj->node)); + } else if(!strcmp(name, "html_ver")) { + ZVAL_LONG(return_value, tidyDetectedHtmlVersion(obj->tdoc->doc)); + } + + break; + case PHP_IS_TIDYATTR: + + if(!strcmp(name, "name")) { + temp_str = (char *)tidyAttrName(obj->attr); + if(temp_str) { + ZVAL_STRING(return_value, temp_str , 1); + } + } else if(!strcmp(name, "value")) { + temp_str = (char *)tidyAttrValue(obj->attr); + if(temp_str) { + ZVAL_STRING(return_value, temp_str , 1); + efree(temp_str); + } + } else if(!strcmp(name, "id")) { + ZVAL_LONG(return_value, tidyAttrGetId(obj->attr)); + } + + break; + default: + php_error_docref(NULL TSRMLS_CC,E_ERROR, "Something is wrong -- undefined object type."); + break; + } + + return return_value; + +} + +int tidy_property_exists(zval *object, zval *member, int check_empty TSRMLS_DC) { + + return TRUE; +} + +HashTable * tidy_get_properties(zval *object TSRMLS_DC) { + + return NULL; + +} + +union _zend_function * tidy_get_method(zval *obj, char *method, int method_len TSRMLS_DC) { + + zend_internal_function *f; + + f = emalloc(sizeof(zend_internal_function)); + f->type = ZEND_OVERLOADED_FUNCTION; + f->arg_types = NULL; + f->scope = php_tidy_ce; + f->fn_flags = 0; + f->function_name = estrndup(method, method_len); + + return (union _zend_function *) f; +} + +zend_bool _php_tidy_node_call_method(char *method, INTERNAL_FUNCTION_PARAMETERS) { + + PHPTidyObj *obj = php_tidy_fetch_object(getThis()); + PHPTidyObj *newobj; + TidyNode tempnode; + TidyAttr tempattr; + + int param; + + if(strstr(method, "has_")) { + + if(!strcmp(method, "has_siblings")) { + + if(tidyGetNext(obj->node) || tidyGetPrev(obj->node)) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } else if(!strcmp(method, "has_children")) { + + if(tidyGetChild(obj->node)) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } else if(!strcmp(method, "has_parent")) { + + if(tidyGetParent(obj->node)) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } + + } else if(strstr(method, "is_")) { + + if(!strcmp(method, "is_comment")) { + + if(tidyNodeGetType(obj->node) == TidyNode_Comment) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } else if(!strcmp(method, "is_xhtml")) { + + if(tidyDetectedXhtml(obj->tdoc->doc)) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } else if(!strcmp(method, "is_xml")) { + + if(tidyDetectedGenericXml(obj->tdoc->doc)) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } else if(!strcmp(method, "is_text")) { + + if(tidyNodeGetType(obj->node) == TidyNode_Text) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } else if(!strcmp(method, "is_jste")) { + + if(tidyNodeGetType(obj->node) == TidyNode_Jste) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } else if(!strcmp(method, "is_asp")) { + + if(tidyNodeGetType(obj->node) == TidyNode_Asp) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } else if(!strcmp(method, "is_php")) { + + if(tidyNodeGetType(obj->node) == TidyNode_Php) { + TIDY_RV_TRUE(return_value); + } else { + TIDY_RV_FALSE(return_value); + } + + } else if(!strcmp(method, "is_html")) { + + switch(tidyNodeGetType(obj->node)) { + + case TidyNode_Start: + case TidyNode_End: + case TidyNode_StartEnd: + TIDY_RV_TRUE(return_value); + break; + default: + TIDY_RV_FALSE(return_value); + break; + } + + } + + } else { + + if(!strcmp(method, "next")) { + + tempnode = tidyGetNext(obj->node); + if(tempnode) { + newobj = php_tidy_new(); + newobj->tdoc = obj->tdoc; + newobj->node = tempnode; + newobj->type = PHP_IS_TIDYNODE; + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + //zend_objects_store_add_ref(getThis()); + } + + } else if(!strcmp(method, "prev")) { + + tempnode = tidyGetPrev(obj->node); + if(tempnode) { + newobj = php_tidy_new(); + newobj->tdoc = obj->tdoc; + newobj->node = tempnode; + newobj->type = PHP_IS_TIDYNODE; + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + //zend_objects_store_add_ref(getThis()); + } + + } else if(!strcmp(method, "parent")) { + + tempnode = tidyGetParent(obj->node); + if(tempnode) { + newobj = php_tidy_new(); + newobj->tdoc = obj->tdoc; + newobj->node = tempnode; + newobj->type = PHP_IS_TIDYNODE; + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + //zend_objects_store_add_ref(getThis()); + } + + } else if(!strcmp(method, "child")) { + + tempnode = tidyGetChild(obj->node); + if(tempnode) { + newobj = php_tidy_new(); + newobj->tdoc = obj->tdoc; + newobj->node = tempnode; + newobj->type = PHP_IS_TIDYNODE; + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + //zend_objects_store_add_ref(getThis()); + } + + } else if(!strcmp(method, "get_attr_type")) { + + if(ZEND_NUM_ARGS() != 1) { + zend_wrong_param_count(TSRMLS_CC); + return TRUE; + } + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, + "l", ¶m) == FAILURE) { + return FALSE; + } + + newobj = php_tidy_new(); + newobj->tdoc = obj->tdoc; + + for(tempattr = tidyAttrFirst(obj->node); + tempattr; + tempattr = tidyAttrNext(tempattr)) { + + if(tidyAttrGetId(tempattr) == param) { + + newobj->attr = tempattr; + newobj->type = PHP_IS_TIDYATTR; + + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + //zend_objects_store_add_ref(getThis()); + break; + + } + + } + + } else { + + return FALSE; + + } + } + + return TRUE; + +} + +zend_bool _php_tidy_attr_call_method(char *method, INTERNAL_FUNCTION_PARAMETERS) { + + PHPTidyObj *obj = php_tidy_fetch_object(getThis()); + PHPTidyObj *newobj; + TidyAttr tempattr; + + if(!strcmp(method, "next")) { + + tempattr = tidyAttrNext(obj->attr); + + if(tempattr) { + + newobj = php_tidy_new(); + newobj->tdoc = obj->tdoc; + newobj->node = obj->node; + newobj->attr = tempattr; + newobj->type = PHP_IS_TIDYATTR; + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + zend_objects_store_add_ref(getThis()); + + } else { + + TIDY_RV_FALSE(return_value); + + } + + } else if(!strcmp(method, "tag")) { + + newobj = php_tidy_new(); + newobj->tdoc = obj->tdoc; + newobj->node = obj->node; + newobj->attr = NULL; + newobj->type = PHP_IS_TIDYNODE; + return_value->type = IS_OBJECT; + return_value->value.obj = php_tidy_register_object(newobj TSRMLS_CC); + zend_objects_store_add_ref(getThis()); + + } else { + + return FALSE; + + } + + return TRUE; +} + +int tidy_call_method(char *method, INTERNAL_FUNCTION_PARAMETERS) { + + PHPTidyObj *obj = php_tidy_fetch_object(getThis()); + + switch(obj->type) { + + case PHP_IS_TIDYNODE: + return _php_tidy_node_call_method(method, INTERNAL_FUNCTION_PARAM_PASSTHRU); + break; + case PHP_IS_TIDYATTR: + return _php_tidy_attr_call_method(method, INTERNAL_FUNCTION_PARAM_PASSTHRU); + break; + default: + php_error_docref(NULL TSRMLS_CC, E_ERROR, "Undefined Tidy object type."); + break; + } + + return FALSE; +} + +int tidy_get_class_name(zval *obj, char **class_name, zend_uint *name_len, int parent TSRMLS_DC) { + + PHPTidyObj *object = php_tidy_fetch_object(obj); + + switch(object->type) { + + case PHP_IS_TIDYNODE: + + *class_name = estrdup("Tidy_Node"); + *name_len = sizeof("Tidy_Node"); + + break; + case PHP_IS_TIDYATTR: + + *class_name = estrdup("Tidy_Attribute"); + *name_len = sizeof("Tidy_Attribute"); + + break; + default: + + *class_name = estrdup("Tidy_Unknown"); + *name_len = sizeof("Tidy_Unknown"); + break; + } + + return TRUE; +} + +int tidy_objects_compare(zval *obj_one, zval *obj_two TSRMLS_DC) { + PHPTidyObj *obj1, *obj2; + + obj1 = php_tidy_fetch_object(obj_one); + obj2 = php_tidy_fetch_object(obj_two); + + if( (obj1->tdoc == obj2->tdoc) && + (obj1->node == obj2->node) && + (obj1->attr == obj2->attr) && + (obj1->type == obj2->type)) { + + return TRUE; + + } + + return FALSE; + +} + + +void _php_tidy_register_nodetypes(INIT_FUNC_ARGS) { + + TIDY_NODE_CONST(ROOT, Root); + TIDY_NODE_CONST(DOCTYPE, DocType); + TIDY_NODE_CONST(COMMENT, Comment); + TIDY_NODE_CONST(PROCINS, ProcIns); + TIDY_NODE_CONST(TEXT, Text); + TIDY_NODE_CONST(START, Start); + TIDY_NODE_CONST(END, End); + TIDY_NODE_CONST(STARTEND, StartEnd); + TIDY_NODE_CONST(CDATA, CDATA); + TIDY_NODE_CONST(SECTION, Section); + TIDY_NODE_CONST(ASP, Asp); + TIDY_NODE_CONST(JSTE, Jste); + TIDY_NODE_CONST(PHP, Php); + TIDY_NODE_CONST(XMLDECL, XmlDecl); + +} + +void _php_tidy_register_tags(INIT_FUNC_ARGS) { + + TIDY_TAG_CONST(UNKNOWN); + TIDY_TAG_CONST(A); + TIDY_TAG_CONST(ABBR); + TIDY_TAG_CONST(ACRONYM); + TIDY_TAG_CONST(ADDRESS); + TIDY_TAG_CONST(ALIGN); + TIDY_TAG_CONST(APPLET); + TIDY_TAG_CONST(AREA); + TIDY_TAG_CONST(B); + TIDY_TAG_CONST(BASE); + TIDY_TAG_CONST(BASEFONT); + TIDY_TAG_CONST(BDO); + TIDY_TAG_CONST(BGSOUND); + TIDY_TAG_CONST(BIG); + TIDY_TAG_CONST(BLINK); + TIDY_TAG_CONST(BLOCKQUOTE); + TIDY_TAG_CONST(BODY); + TIDY_TAG_CONST(BR); + TIDY_TAG_CONST(BUTTON); + TIDY_TAG_CONST(CAPTION); + TIDY_TAG_CONST(CENTER); + TIDY_TAG_CONST(CITE); + TIDY_TAG_CONST(CODE); + TIDY_TAG_CONST(COL); + TIDY_TAG_CONST(COLGROUP); + TIDY_TAG_CONST(COMMENT); + TIDY_TAG_CONST(DD); + TIDY_TAG_CONST(DEL); + TIDY_TAG_CONST(DFN); + TIDY_TAG_CONST(DIR); + TIDY_TAG_CONST(DIV); + TIDY_TAG_CONST(DL); + TIDY_TAG_CONST(DT); + TIDY_TAG_CONST(EM); + TIDY_TAG_CONST(EMBED); + TIDY_TAG_CONST(FIELDSET); + TIDY_TAG_CONST(FONT); + TIDY_TAG_CONST(FORM); + TIDY_TAG_CONST(FRAME); + TIDY_TAG_CONST(FRAMESET); + TIDY_TAG_CONST(H1); + TIDY_TAG_CONST(H2); + TIDY_TAG_CONST(H3); + TIDY_TAG_CONST(H4); + TIDY_TAG_CONST(H5); + TIDY_TAG_CONST(H6); + TIDY_TAG_CONST(HEAD); + TIDY_TAG_CONST(HR); + TIDY_TAG_CONST(HTML); + TIDY_TAG_CONST(I); + TIDY_TAG_CONST(IFRAME); + TIDY_TAG_CONST(ILAYER); + TIDY_TAG_CONST(IMG); + TIDY_TAG_CONST(INPUT); + TIDY_TAG_CONST(INS); + TIDY_TAG_CONST(ISINDEX); + TIDY_TAG_CONST(KBD); + TIDY_TAG_CONST(KEYGEN); + TIDY_TAG_CONST(LABEL); + TIDY_TAG_CONST(LAYER); + TIDY_TAG_CONST(LEGEND); + TIDY_TAG_CONST(LI); + TIDY_TAG_CONST(LINK); + TIDY_TAG_CONST(LISTING); + TIDY_TAG_CONST(MAP); + TIDY_TAG_CONST(MARQUEE); + TIDY_TAG_CONST(MENU); + TIDY_TAG_CONST(META); + TIDY_TAG_CONST(MULTICOL); + TIDY_TAG_CONST(NOBR); + TIDY_TAG_CONST(NOEMBED); + TIDY_TAG_CONST(NOFRAMES); + TIDY_TAG_CONST(NOLAYER); + TIDY_TAG_CONST(NOSAVE); + TIDY_TAG_CONST(NOSCRIPT); + TIDY_TAG_CONST(OBJECT); + TIDY_TAG_CONST(OL); + TIDY_TAG_CONST(OPTGROUP); + TIDY_TAG_CONST(OPTION); + TIDY_TAG_CONST(P); + TIDY_TAG_CONST(PARAM); + TIDY_TAG_CONST(PLAINTEXT); + TIDY_TAG_CONST(PRE); + TIDY_TAG_CONST(Q); + TIDY_TAG_CONST(RB); + TIDY_TAG_CONST(RBC); + TIDY_TAG_CONST(RP); + TIDY_TAG_CONST(RT); + TIDY_TAG_CONST(RTC); + TIDY_TAG_CONST(RUBY); + TIDY_TAG_CONST(S); + TIDY_TAG_CONST(SAMP); + TIDY_TAG_CONST(SCRIPT); + TIDY_TAG_CONST(SELECT); + TIDY_TAG_CONST(SERVER); + TIDY_TAG_CONST(SERVLET); + TIDY_TAG_CONST(SMALL); + TIDY_TAG_CONST(SPACER); + TIDY_TAG_CONST(SPAN); + TIDY_TAG_CONST(STRIKE); + TIDY_TAG_CONST(STRONG); + TIDY_TAG_CONST(STYLE); + TIDY_TAG_CONST(SUB); + TIDY_TAG_CONST(SUP); + TIDY_TAG_CONST(TABLE); + TIDY_TAG_CONST(TBODY); + TIDY_TAG_CONST(TD); + TIDY_TAG_CONST(TEXTAREA); + TIDY_TAG_CONST(TFOOT); + TIDY_TAG_CONST(TH); + TIDY_TAG_CONST(THEAD); + TIDY_TAG_CONST(TITLE); + TIDY_TAG_CONST(TR); + TIDY_TAG_CONST(TT); + TIDY_TAG_CONST(U); + TIDY_TAG_CONST(UL); + TIDY_TAG_CONST(VAR); + TIDY_TAG_CONST(WBR); + TIDY_TAG_CONST(XMP); + +} + +void _php_tidy_register_attributes(INIT_FUNC_ARGS) { + + TIDY_ATTR_CONST(UNKNOWN); + TIDY_ATTR_CONST(ABBR); + TIDY_ATTR_CONST(ACCEPT); + TIDY_ATTR_CONST(ACCEPT_CHARSET); + TIDY_ATTR_CONST(ACCESSKEY); + TIDY_ATTR_CONST(ACTION); + TIDY_ATTR_CONST(ADD_DATE); + TIDY_ATTR_CONST(ALIGN); + TIDY_ATTR_CONST(ALINK); + TIDY_ATTR_CONST(ALT); + TIDY_ATTR_CONST(ARCHIVE); + TIDY_ATTR_CONST(AXIS); + TIDY_ATTR_CONST(BACKGROUND); + TIDY_ATTR_CONST(BGCOLOR); + TIDY_ATTR_CONST(BGPROPERTIES); + TIDY_ATTR_CONST(BORDER); + TIDY_ATTR_CONST(BORDERCOLOR); + TIDY_ATTR_CONST(BOTTOMMARGIN); + TIDY_ATTR_CONST(CELLPADDING); + TIDY_ATTR_CONST(CELLSPACING); + TIDY_ATTR_CONST(CHAR); + TIDY_ATTR_CONST(CHAROFF); + TIDY_ATTR_CONST(CHARSET); + TIDY_ATTR_CONST(CHECKED); + TIDY_ATTR_CONST(CITE); + TIDY_ATTR_CONST(CLASS); + TIDY_ATTR_CONST(CLASSID); + TIDY_ATTR_CONST(CLEAR); + TIDY_ATTR_CONST(CODE); + TIDY_ATTR_CONST(CODEBASE); + TIDY_ATTR_CONST(CODETYPE); + TIDY_ATTR_CONST(COLOR); + TIDY_ATTR_CONST(COLS); + TIDY_ATTR_CONST(COLSPAN); + TIDY_ATTR_CONST(COMPACT); + TIDY_ATTR_CONST(CONTENT); + TIDY_ATTR_CONST(COORDS); + TIDY_ATTR_CONST(DATA); + TIDY_ATTR_CONST(DATAFLD); + //TIDY_ATTR_CONST(DATAFORMATSAS); + TIDY_ATTR_CONST(DATAPAGESIZE); + TIDY_ATTR_CONST(DATASRC); + TIDY_ATTR_CONST(DATETIME); + TIDY_ATTR_CONST(DECLARE); + TIDY_ATTR_CONST(DEFER); + TIDY_ATTR_CONST(DIR); + TIDY_ATTR_CONST(DISABLED); + TIDY_ATTR_CONST(ENCODING); + TIDY_ATTR_CONST(ENCTYPE); + TIDY_ATTR_CONST(FACE); + TIDY_ATTR_CONST(FOR); + TIDY_ATTR_CONST(FRAME); + TIDY_ATTR_CONST(FRAMEBORDER); + TIDY_ATTR_CONST(FRAMESPACING); + TIDY_ATTR_CONST(GRIDX); + TIDY_ATTR_CONST(GRIDY); + TIDY_ATTR_CONST(HEADERS); + TIDY_ATTR_CONST(HEIGHT); + TIDY_ATTR_CONST(HREF); + TIDY_ATTR_CONST(HREFLANG); + TIDY_ATTR_CONST(HSPACE); + TIDY_ATTR_CONST(HTTP_EQUIV); + TIDY_ATTR_CONST(ID); + TIDY_ATTR_CONST(ISMAP); + TIDY_ATTR_CONST(LABEL); + TIDY_ATTR_CONST(LANG); + TIDY_ATTR_CONST(LANGUAGE); + TIDY_ATTR_CONST(LAST_MODIFIED); + TIDY_ATTR_CONST(LAST_VISIT); + TIDY_ATTR_CONST(LEFTMARGIN); + TIDY_ATTR_CONST(LINK); + TIDY_ATTR_CONST(LONGDESC); + TIDY_ATTR_CONST(LOWSRC); + TIDY_ATTR_CONST(MARGINHEIGHT); + TIDY_ATTR_CONST(MARGINWIDTH); + TIDY_ATTR_CONST(MAXLENGTH); + TIDY_ATTR_CONST(MEDIA); + TIDY_ATTR_CONST(METHOD); + TIDY_ATTR_CONST(MULTIPLE); + TIDY_ATTR_CONST(NAME); + TIDY_ATTR_CONST(NOHREF); + TIDY_ATTR_CONST(NORESIZE); + TIDY_ATTR_CONST(NOSHADE); + TIDY_ATTR_CONST(NOWRAP); + TIDY_ATTR_CONST(OBJECT); + TIDY_ATTR_CONST(OnAFTERUPDATE); + TIDY_ATTR_CONST(OnBEFOREUNLOAD); + TIDY_ATTR_CONST(OnBEFOREUPDATE); + TIDY_ATTR_CONST(OnBLUR); + TIDY_ATTR_CONST(OnCHANGE); + TIDY_ATTR_CONST(OnCLICK); + TIDY_ATTR_CONST(OnDATAAVAILABLE); + TIDY_ATTR_CONST(OnDATASETCHANGED); + TIDY_ATTR_CONST(OnDATASETCOMPLETE); + TIDY_ATTR_CONST(OnDBLCLICK); + TIDY_ATTR_CONST(OnERRORUPDATE); + TIDY_ATTR_CONST(OnFOCUS); + TIDY_ATTR_CONST(OnKEYDOWN); + TIDY_ATTR_CONST(OnKEYPRESS); + TIDY_ATTR_CONST(OnKEYUP); + TIDY_ATTR_CONST(OnLOAD); + TIDY_ATTR_CONST(OnMOUSEDOWN); + TIDY_ATTR_CONST(OnMOUSEMOVE); + TIDY_ATTR_CONST(OnMOUSEOUT); + TIDY_ATTR_CONST(OnMOUSEOVER); + TIDY_ATTR_CONST(OnMOUSEUP); + TIDY_ATTR_CONST(OnRESET); + TIDY_ATTR_CONST(OnROWENTER); + TIDY_ATTR_CONST(OnROWEXIT); + TIDY_ATTR_CONST(OnSELECT); + TIDY_ATTR_CONST(OnSUBMIT); + TIDY_ATTR_CONST(OnUNLOAD); + TIDY_ATTR_CONST(PROFILE); + TIDY_ATTR_CONST(PROMPT); + TIDY_ATTR_CONST(RBSPAN); + TIDY_ATTR_CONST(READONLY); + TIDY_ATTR_CONST(REL); + TIDY_ATTR_CONST(REV); + TIDY_ATTR_CONST(RIGHTMARGIN); + TIDY_ATTR_CONST(ROWS); + TIDY_ATTR_CONST(ROWSPAN); + TIDY_ATTR_CONST(RULES); + TIDY_ATTR_CONST(SCHEME); + TIDY_ATTR_CONST(SCOPE); + TIDY_ATTR_CONST(SCROLLING); + TIDY_ATTR_CONST(SELECTED); + TIDY_ATTR_CONST(SHAPE); + TIDY_ATTR_CONST(SHOWGRID); + TIDY_ATTR_CONST(SHOWGRIDX); + TIDY_ATTR_CONST(SHOWGRIDY); + TIDY_ATTR_CONST(SIZE); + TIDY_ATTR_CONST(SPAN); + TIDY_ATTR_CONST(SRC); + TIDY_ATTR_CONST(STANDBY); + TIDY_ATTR_CONST(START); + TIDY_ATTR_CONST(STYLE); + TIDY_ATTR_CONST(SUMMARY); + TIDY_ATTR_CONST(TABINDEX); + TIDY_ATTR_CONST(TARGET); + TIDY_ATTR_CONST(TEXT); + TIDY_ATTR_CONST(TITLE); + TIDY_ATTR_CONST(TOPMARGIN); + TIDY_ATTR_CONST(TYPE); + TIDY_ATTR_CONST(USEMAP); + TIDY_ATTR_CONST(VALIGN); + TIDY_ATTR_CONST(VALUE); + TIDY_ATTR_CONST(VALUETYPE); + TIDY_ATTR_CONST(VERSION); + TIDY_ATTR_CONST(VLINK); + TIDY_ATTR_CONST(VSPACE); + TIDY_ATTR_CONST(WIDTH); + TIDY_ATTR_CONST(WRAP); + TIDY_ATTR_CONST(XML_LANG); + TIDY_ATTR_CONST(XML_SPACE); + TIDY_ATTR_CONST(XMLNS); + +} |