summaryrefslogtreecommitdiff
path: root/ext/tidy/README_TIDY
diff options
context:
space:
mode:
Diffstat (limited to 'ext/tidy/README_TIDY')
-rw-r--r--ext/tidy/README_TIDY154
1 files changed, 154 insertions, 0 deletions
diff --git a/ext/tidy/README_TIDY b/ext/tidy/README_TIDY
new file mode 100644
index 0000000000..9b15dcd102
--- /dev/null
+++ b/ext/tidy/README_TIDY
@@ -0,0 +1,154 @@
+
+README FOR ext/tidy by John Coggeshall <john@php.net>
+
+Tidy Version: 0.5b
+
+Tidy is an extension based on Libtidy (http://tidy.sf.net/) and allows a PHP developer
+to clean, repair, and traverse HTML, XHTML, and XML documents -- including ones with
+embedded scripting languages such as PHP or ASP within them using OO constructs.
+
+The Tidy extension has two separate APIs, one for general parsing, cleaning, and
+repairing and another for document traversal. The general API is provided below:
+
+ tidy_create() Initialize and return a tidy document resource
+ tidy_parse_file($tidy, $file) Parse the document stored in $file
+ tidy_parse_string($tidy, $str) Parse the string stored in $str
+
+ tidy_clean_repair($tidy) Clean and repair the document
+ tidy_diagnose($tidy) Diagnose a parsed document
+
+ tidy_setopt($tidy, $opt, $val) Set a configuration option $opt to $val
+ tidy_getopt($tidy, $opt) Retrieve a configuration option
+
+ ** note: $opt is a string representing the option. Right now the only
+ source of these options is the LibTidy source.. eventually I'll document
+ them offically -- see the src/config.c file in the tidy source **
+
+ tidy_get_output($tidy) Return the cleaned tidy HTML as a string
+ tidy_get_error_buffer($tidy) Return a log of the errors and warnings
+ returned by tidy
+
+ tidy_get_release() Return the Libtidy release date
+ tidy_get_status($tidy) Return the status of the document
+ tidy_get_html_ver($tidy) Return the major HTML version detected for
+ the document;
+
+ tidy_is_xhtml($tidy) Determines if the document is XHTML
+ tidy_is_xml($tidy) Determines if the document is a generic XML
+
+ tidy_error_count($tidy) Returns the number of errors in the document
+ tidy_warning_count($tidy) Returns the number of warnings in the document
+ tidy_access_count($tidy) Returns the number of accessibility-related
+ warnings in the document.
+ tidy_config_count($tidy) Returns the number of configuration errors found
+
+ tidy_load_config($tidy, $file) Loads the specified configuration file
+ tidY_load_config_enc($tidy,
+ $file,
+ $enc) Loads the specified config file using the specified
+ character encoding
+ tidy_set_encoding($tidy, $enc) Sets the current character encoding for the document
+ tidy_save_config($tidy, $file) Saves the current config to $file
+
+
+Beyond these general-purpose API functions, Tidy also supports the following
+functions which are used to retrieve an object for document traversal:
+
+ tidy_get_root($tidy) Returns an object starting at the root of the
+ document
+ tidy_get_head($tidy) Returns an object starting at the <HEAD> tag
+ tidy_get_html($tidy) Returns an object starting at the <HTML> tag
+ tidy_get_body($tidy) Returns an object starting at the <BODY> tag
+
+All Navigation of the specified document is done via the PHP5 object constructs.
+There are two types of objects which Tidy can create. The first is TidyNode, which
+represents HTML Tags, Text, and more (see the TidyNode_Type Constants). The second
+is TidyAttr, which represents an attribute within an HTML tag (TidyNode). The
+functionality of these objects is represented by the following schema:
+
+class TidyNode {
+
+ public $name; // name of node (i.e. HEAD)
+ public $value; // value of node (everything between tags)
+ public $type; // type of node (text, php, asp, etc.)
+ public $id; // id of node (i.e. TIDY_TAG_HEAD)
+
+ public $line; // line # of node in source
+ public $column; // column # of node in source
+
+ public $html_ver; // HTML version (0,1,2,3,4)
+
+ public $attribs; // an array of attributes (see TidyAttr)
+ public $children; // an array of child nodes
+
+ function has_siblings(); // any sibling nodes?
+ function has_children(); // any child nodes?
+ function has_parent(); // have a parent?
+
+ function is_comment(); // is node a comment?
+ function is_xhtml(); // is document XHTML?
+ function is_xml(); // is document generic XML (not HTML/XHTML)
+ function is_text(); // is node text?
+ function is_html(); // is node an HTML tag?
+
+ function is_jste(); // is jste block?
+ function is_asp(); // is Microsoft ASP block?
+ function is_php(); // is PHP block?
+
+ function next(); // returns next node
+ function prev(); // returns prev node
+ function parent(); // returns parent node
+ function child(); // returns first child node
+
+ /* Searches for a particular attribute in the current node based
+ on node ID. If found returns a TidyAttr object for it */
+ function get_attr_type($attr_id);
+
+ /*
+
+ NOT YET IMPLEMENTED
+
+ Recursively traverses the tree from the current node and returns
+ an array of attributes matching the node ID/attr ID pair
+
+ Useful for pulling out things like links:
+ foreach($body->fetch_attrs(TIDY_TAG_A, TIDY_ATTR_HREF) as $link) {
+ echo "Link : {$link->value}\n";
+ }
+ */
+
+ function fetch_attrs($node_id, $attr_id);
+
+ /*
+
+ NOT YET IMPLEMENTED
+
+ Recursively traverses the tree from the current node and returns
+ an array of nodes matching the node ID
+
+ Useful for pulling out tables, etc (echos the HTML for every
+ <TABLE> block)
+
+ foreach($body->fetch_nodes(TIDY_TAG_TABLE) as $table) {
+
+ echo $table->value;
+
+ }
+ */
+ function fetch_nodes($node_id)
+}
+
+class TidyAttr {
+
+ public $name; // attribute name i.e. HREF
+ public $value; // attribute value
+ public $id; // attribute id i.e. TIDY_ATTR_HREF
+
+ function next(); // returns next attribute in tag
+ function tag(); // returns the tag node associated with attribute
+}
+
+Examples of using these objects to navigate the tree can be found in the examples/
+directory (I suggest looking at urlgrab.php and dumpit.php)
+
+E-mail thoughts, suggestions, patches, etc. to <john@php.net> \ No newline at end of file