1 files changed, 154 insertions, 0 deletions
diff --git a/ext/tidy/README_TIDY b/ext/tidy/README_TIDY
new file mode 100644
index 0000000000..9b15dcd102
--- /dev/null
+++ b/ext/tidy/README_TIDY
@@ -0,0 +1,154 @@
+
+README FOR ext/tidy by John Coggeshall <john@php.net>
+
+Tidy Version: 0.5b
+
+Tidy is an extension based on Libtidy (http://tidy.sf.net/) and allows a PHP developer
+to clean, repair, and traverse HTML, XHTML, and XML documents -- including ones with
+embedded scripting languages such as PHP or ASP within them using OO constructs.
+
+The Tidy extension has two separate APIs, one for general parsing, cleaning, and
+repairing and another for document traversal. The general API is provided below:
+
+  tidy_create()                     Initialize and return a tidy document resource
+  tidy_parse_file($tidy, $file)     Parse the document stored in $file
+  tidy_parse_string($tidy, $str)    Parse the string stored in $str
+  
+  tidy_clean_repair($tidy)          Clean and repair the document
+  tidy_diagnose($tidy)              Diagnose a parsed document
+  
+  tidy_setopt($tidy, $opt, $val)    Set a configuration option $opt to $val
+  tidy_getopt($tidy, $opt)          Retrieve a configuration option
+  
+    ** note: $opt is a string representing the option. Right now the only
+    source of these options is the LibTidy source.. eventually I'll document
+    them offically -- see the src/config.c file in the tidy source **
+  
+  tidy_get_output($tidy)            Return the cleaned tidy HTML as a string
+  tidy_get_error_buffer($tidy)      Return a log of the errors and warnings
+                                    returned by tidy
+  
+  tidy_get_release()                Return the Libtidy release date
+  tidy_get_status($tidy)            Return the status of the document
+  tidy_get_html_ver($tidy)          Return the major HTML version detected for
+                                    the document;
+                                    
+  tidy_is_xhtml($tidy)              Determines if the document is XHTML
+  tidy_is_xml($tidy)                Determines if the document is a generic XML
+  
+  tidy_error_count($tidy)           Returns the number of errors in the document
+  tidy_warning_count($tidy)         Returns the number of warnings in the document
+  tidy_access_count($tidy)          Returns the number of accessibility-related
+                                    warnings in the document.
+  tidy_config_count($tidy)          Returns the number of configuration errors found
+  
+  tidy_load_config($tidy, $file)    Loads the specified configuration file
+  tidY_load_config_enc($tidy,
+                       $file,
+                       $enc)        Loads the specified config file using the specified
+                                    character encoding
+  tidy_set_encoding($tidy, $enc)    Sets the current character encoding for the document
+  tidy_save_config($tidy, $file)    Saves the current config to $file
+  
+  
+Beyond these general-purpose API functions, Tidy also supports the following
+functions which are used to retrieve an object for document traversal:
+  
+  tidy_get_root($tidy)              Returns an object starting at the root of the
+                                    document
+  tidy_get_head($tidy)              Returns an object starting at the <HEAD> tag
+  tidy_get_html($tidy)              Returns an object starting at the <HTML> tag
+  tidy_get_body($tidy)              Returns an object starting at the <BODY> tag
+  
+All Navigation of the specified document is done via the PHP5 object constructs.
+There are two types of objects which Tidy can create. The first is TidyNode, which
+represents HTML Tags, Text, and more (see the TidyNode_Type Constants). The second
+is TidyAttr, which represents an attribute within an HTML tag (TidyNode). The
+functionality of these objects is represented by the following schema:
+
+class TidyNode {
+
+    public $name;               // name of node (i.e. HEAD)
+    public $value;              // value of node (everything between tags)
+    public $type;               // type of node (text, php, asp, etc.)
+    public $id;                 // id of node (i.e. TIDY_TAG_HEAD)
+    
+    public $line;               // line # of node in source
+    public $column;             // column # of node in source
+    
+    public $html_ver;           // HTML version (0,1,2,3,4)
+    
+    public $attribs;            // an array of attributes (see TidyAttr)
+    public $children;           // an array of child nodes
+    
+    function has_siblings();    // any sibling nodes?
+    function has_children();    // any child nodes?
+    function has_parent();      // have a parent?
+    
+    function is_comment();      // is node a comment?
+    function is_xhtml();        // is document XHTML?
+    function is_xml();          // is document generic XML (not HTML/XHTML)
+    function is_text();         // is node text?
+    function is_html();         // is node an HTML tag?
+    
+    function is_jste();         // is jste block?
+    function is_asp();          // is Microsoft ASP block?
+    function is_php();          // is PHP block?
+    
+    function next();            // returns next node
+    function prev();            // returns prev node
+    function parent();          // returns parent node
+    function child();           // returns first child node
+    
+    /* Searches for a particular attribute in the current node based
+       on node ID. If found returns a TidyAttr object for it */
+    function get_attr_type($attr_id);
+
+    /*
+
+    NOT YET IMPLEMENTED
+
+    Recursively traverses the tree from the current node and returns
+    an array of attributes matching the node ID/attr ID pair
+
+    Useful for pulling out things like links:
+        foreach($body->fetch_attrs(TIDY_TAG_A, TIDY_ATTR_HREF) as $link) {
+            echo "Link : {$link->value}\n";
+        }
+    */
+      
+    function fetch_attrs($node_id, $attr_id);
+    
+    /*
+
+    NOT YET IMPLEMENTED
+
+    Recursively traverses the tree from the current node and returns
+    an array of nodes matching the node ID
+
+    Useful for pulling out tables, etc (echos the HTML for every
+    <TABLE> block)
+
+        foreach($body->fetch_nodes(TIDY_TAG_TABLE) as $table) {
+
+            echo $table->value;
+
+        }
+    */
+    function fetch_nodes($node_id)
+}
+
+class TidyAttr {
+
+    public $name;           // attribute name i.e. HREF
+    public $value;          // attribute value
+    public $id;             // attribute id i.e. TIDY_ATTR_HREF
+
+    function next();        // returns next attribute in tag
+    function tag();         // returns the tag node associated with attribute
+}
+
+Examples of using these objects to navigate the tree can be found in the examples/
+directory (I suggest looking at urlgrab.php and dumpit.php)
+
+E-mail thoughts, suggestions, patches, etc. to <john@php.net>
+\ No newline at end of file