3 files changed, 197 insertions, 0 deletions
diff --git a/ext/tidy/examples/cleanhtml.php b/ext/tidy/examples/cleanhtml.php
new file mode 100644
index 0000000000..c949a0cfc2
--- /dev/null
+++ b/ext/tidy/examples/cleanhtml.php
@@ -0,0 +1,40 @@
+<?php
+
+    /*
+     * cleanhtml.php
+     *
+     * A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents
+     * if no file is provided, it reads from standard input.
+     *
+     * By: John Coggeshall <john@php.net>
+     *
+     * Usage: php cleanhtml.php [filename]
+     *
+     */
+    
+    $tidy = tidy_create();
+    
+    if(!isset($_SERVER['argv'][1])) {
+      $data = file_get_contents("php://stdin");
+      tidy_parse_string($tidy, $data);
+    } else {
+       tidy_parse_file($tidy, $_SERVER['argv'][1]);
+    }
+    
+    tidy_clean_repair($tidy);
+    
+    if(tidy_warning_count($tidy) ||
+       tidy_error_count($tidy)) {
+        
+        echo "\n\nThe following errors or warnings occured:\n";
+        echo tidy_get_error_buffer($tidy);
+        echo "\n";
+    }
+    
+    echo tidy_get_output($tidy);
+    
+?>
+    
+        
+     
+     
+\ No newline at end of file
diff --git a/ext/tidy/examples/dumpit.php b/ext/tidy/examples/dumpit.php
new file mode 100644
index 0000000000..46d307d704
--- /dev/null
+++ b/ext/tidy/examples/dumpit.php
@@ -0,0 +1,94 @@
+<?php
+    /*
+     * dumpit.php
+     *
+     * a command-line script which dumps the given HTML, PHP, ASP, XHTML, etc.
+     * file as it is represented in the document model.
+     *
+     * By: John Coggeshall <john@php.net>
+     *
+     * Usage; php dumpit.php <filename>
+     */
+     
+    
+    $tidy = tidy_create();
+    tidy_parse_file($tidy, $_SERVER['argv'][1]);
+    
+    /* Optionally you can do this here if you want to fix up the document */
+    
+    /* tidy_clean_repair($tidy); */
+              
+    $tree = tidy_get_root($tidy);
+    dump_tree($tree);
+    echo "\n";
+    
+    function node_type($type) {
+        
+        switch($type) {
+            
+            case TIDY_NODETYPE_ROOT: return "Root Node";
+            case TIDY_NODETYPE_DOCTYPE: return "DocType Node";
+            case TIDY_NODETYPE_COMMENT: return "Comment Node";
+            case TIDY_NODETYPE_PROCINS: return "ProcIns Node";
+            case TIDY_NODETYPE_TEXT: return "Text Node";
+            case TIDY_NODETYPE_START: return "Start Node";
+            case TIDY_NODETYPE_END: return "End Node";
+            case TIDY_NODETYPE_STARTEND: return "Start/End Node";
+            case TIDY_NODETYPE_CDATA: return "CDATA Node";
+            case TIDY_NODETYPE_SECTION: return "Section Node";
+            case TIDY_NODETYPE_ASP: return "ASP Source Code Node";
+            case TIDY_NODETYPE_PHP: return "PHP Source Code Node";
+            case TIDY_NODETYPE_JSTE: return "JSTE Source Code";
+            case TIDY_NODETYPE_XMLDECL: return "XML Declaration Node";
+            default: return "Unknown Node";
+        }
+    }
+    
+    function do_leaf($string, $indent) {
+        for($i = 0; $i < $indent; $i++) {
+         echo " ";
+        }
+        echo $string;
+    }
+    
+    function dump_tree($node, $indent = 0) {
+        if($node) {
+            /* Put something there if the node name is empty */
+            $nodename = trim(strtoupper($node->name));
+            $nodename = (empty($nodename)) ? "[EMPTY]" : $nodename;
+            
+            /* Generate the Node, and a pretty name for it */
+            do_leaf(" + $nodename (".node_type($node->type).")\n", $indent);
+            
+            /* Check to see if this node is a text node. Text nodes are
+               generated by start/end tags and contain the text in between.
+               i.e. <B>foo</B> will create a text node with $node->value
+               equal to 'foo' */
+            if($node->type == TIDY_NODETYPE_TEXT) {
+                do_leaf("     |\n", $indent);
+                do_leaf("     +---- Value: '{$node->value}'\n", $indent);
+            }
+            
+            /* Any attributes on this node? */
+            if(count($node->attribs)) {
+                do_leaf(" |\n", $indent);
+                do_leaf(" +---- Attributes\n", $indent);
+                
+                /* Cycle through the attributes and display them and their values. */
+                foreach($node->attribs as $attrib) {
+                    do_leaf("             +--{$attrib->name}\n", $indent);
+                    do_leaf("             |    +-- Value: {$attrib->value}\n", $indent);
+                }
+            }
+            
+            /* Recurse along the children to generate the remaining nodes */
+            if($node->has_children()) {
+                foreach($node->children as $child) {
+                    dump_tree($child, $indent + 3);
+                }
+            }
+        }
+    }
+    
+
+?>  
+\ No newline at end of file
diff --git a/ext/tidy/examples/urlgrab.php b/ext/tidy/examples/urlgrab.php
new file mode 100644
index 0000000000..63a2875a79
--- /dev/null
+++ b/ext/tidy/examples/urlgrab.php
@@ -0,0 +1,63 @@
+<?php
+
+    /*
+     * urlgrab.php
+     *
+     * A simple command-line utility to extract all of the URLS contained
+     * within <A HREF> tags from a document.
+     *
+     * By: John Coggeshall <john@php.net>
+     *
+     * Usage: php urlgrab.php <file>
+     *
+     */
+     
+    /* Create a Tidy Resource */
+    $tidy = tidy_create();
+    
+    /* Parse the document */
+    tidy_parse_file($tidy, $_SERVER['argv'][1]);
+    
+    /* Fix up the document */
+    tidy_clean_repair($tidy);
+    
+    /* Get an object representing everything from the <HTML> tag in */
+    $html = tidy_get_html($tidy);
+    
+    /* Traverse the document tree */
+    print_r(get_links($html));
+    
+    function get_links($node) {
+        $urls = array();
+        
+        /* Check to see if we are on an <A> tag or not */
+        if($node->id == TIDY_TAG_A) {
+            /* If we are, find the HREF attribute */
+            $attrib = $node->get_attr_type(TIDY_ATTR_HREF);
+            if($attrib) {
+                /* Add the value of the HREF attrib to $urls */
+                $urls[] = $attrib->value;
+            }
+            
+        }
+        
+        /* Are there any children? */
+        if($node->has_children()) {
+            
+            /* Traverse down each child recursively */
+            foreach($node->children as $child) {
+                   
+                /* Append the results from recursion to $urls */
+                foreach(get_links($child) as $url) {
+                    
+                    $urls[] = $url;
+                    
+                }
+                
+            }
+        }
+        
+        return $urls;
+    }
+    
+?>  
+\ No newline at end of file