diff options
Diffstat (limited to 'ext/tidy/README_TIDY')
-rw-r--r-- | ext/tidy/README_TIDY | 154 |
1 files changed, 0 insertions, 154 deletions
diff --git a/ext/tidy/README_TIDY b/ext/tidy/README_TIDY deleted file mode 100644 index 9b15dcd102..0000000000 --- a/ext/tidy/README_TIDY +++ /dev/null @@ -1,154 +0,0 @@ - -README FOR ext/tidy by John Coggeshall <john@php.net> - -Tidy Version: 0.5b - -Tidy is an extension based on Libtidy (http://tidy.sf.net/) and allows a PHP developer -to clean, repair, and traverse HTML, XHTML, and XML documents -- including ones with -embedded scripting languages such as PHP or ASP within them using OO constructs. - -The Tidy extension has two separate APIs, one for general parsing, cleaning, and -repairing and another for document traversal. The general API is provided below: - - tidy_create() Initialize and return a tidy document resource - tidy_parse_file($tidy, $file) Parse the document stored in $file - tidy_parse_string($tidy, $str) Parse the string stored in $str - - tidy_clean_repair($tidy) Clean and repair the document - tidy_diagnose($tidy) Diagnose a parsed document - - tidy_setopt($tidy, $opt, $val) Set a configuration option $opt to $val - tidy_getopt($tidy, $opt) Retrieve a configuration option - - ** note: $opt is a string representing the option. Right now the only - source of these options is the LibTidy source.. eventually I'll document - them offically -- see the src/config.c file in the tidy source ** - - tidy_get_output($tidy) Return the cleaned tidy HTML as a string - tidy_get_error_buffer($tidy) Return a log of the errors and warnings - returned by tidy - - tidy_get_release() Return the Libtidy release date - tidy_get_status($tidy) Return the status of the document - tidy_get_html_ver($tidy) Return the major HTML version detected for - the document; - - tidy_is_xhtml($tidy) Determines if the document is XHTML - tidy_is_xml($tidy) Determines if the document is a generic XML - - tidy_error_count($tidy) Returns the number of errors in the document - tidy_warning_count($tidy) Returns the number of warnings in the document - tidy_access_count($tidy) Returns the number of accessibility-related - warnings in the document. - tidy_config_count($tidy) Returns the number of configuration errors found - - tidy_load_config($tidy, $file) Loads the specified configuration file - tidY_load_config_enc($tidy, - $file, - $enc) Loads the specified config file using the specified - character encoding - tidy_set_encoding($tidy, $enc) Sets the current character encoding for the document - tidy_save_config($tidy, $file) Saves the current config to $file - - -Beyond these general-purpose API functions, Tidy also supports the following -functions which are used to retrieve an object for document traversal: - - tidy_get_root($tidy) Returns an object starting at the root of the - document - tidy_get_head($tidy) Returns an object starting at the <HEAD> tag - tidy_get_html($tidy) Returns an object starting at the <HTML> tag - tidy_get_body($tidy) Returns an object starting at the <BODY> tag - -All Navigation of the specified document is done via the PHP5 object constructs. -There are two types of objects which Tidy can create. The first is TidyNode, which -represents HTML Tags, Text, and more (see the TidyNode_Type Constants). The second -is TidyAttr, which represents an attribute within an HTML tag (TidyNode). The -functionality of these objects is represented by the following schema: - -class TidyNode { - - public $name; // name of node (i.e. HEAD) - public $value; // value of node (everything between tags) - public $type; // type of node (text, php, asp, etc.) - public $id; // id of node (i.e. TIDY_TAG_HEAD) - - public $line; // line # of node in source - public $column; // column # of node in source - - public $html_ver; // HTML version (0,1,2,3,4) - - public $attribs; // an array of attributes (see TidyAttr) - public $children; // an array of child nodes - - function has_siblings(); // any sibling nodes? - function has_children(); // any child nodes? - function has_parent(); // have a parent? - - function is_comment(); // is node a comment? - function is_xhtml(); // is document XHTML? - function is_xml(); // is document generic XML (not HTML/XHTML) - function is_text(); // is node text? - function is_html(); // is node an HTML tag? - - function is_jste(); // is jste block? - function is_asp(); // is Microsoft ASP block? - function is_php(); // is PHP block? - - function next(); // returns next node - function prev(); // returns prev node - function parent(); // returns parent node - function child(); // returns first child node - - /* Searches for a particular attribute in the current node based - on node ID. If found returns a TidyAttr object for it */ - function get_attr_type($attr_id); - - /* - - NOT YET IMPLEMENTED - - Recursively traverses the tree from the current node and returns - an array of attributes matching the node ID/attr ID pair - - Useful for pulling out things like links: - foreach($body->fetch_attrs(TIDY_TAG_A, TIDY_ATTR_HREF) as $link) { - echo "Link : {$link->value}\n"; - } - */ - - function fetch_attrs($node_id, $attr_id); - - /* - - NOT YET IMPLEMENTED - - Recursively traverses the tree from the current node and returns - an array of nodes matching the node ID - - Useful for pulling out tables, etc (echos the HTML for every - <TABLE> block) - - foreach($body->fetch_nodes(TIDY_TAG_TABLE) as $table) { - - echo $table->value; - - } - */ - function fetch_nodes($node_id) -} - -class TidyAttr { - - public $name; // attribute name i.e. HREF - public $value; // attribute value - public $id; // attribute id i.e. TIDY_ATTR_HREF - - function next(); // returns next attribute in tag - function tag(); // returns the tag node associated with attribute -} - -Examples of using these objects to navigate the tree can be found in the examples/ -directory (I suggest looking at urlgrab.php and dumpit.php) - -E-mail thoughts, suggestions, patches, etc. to <john@php.net>
\ No newline at end of file |