diff options
Diffstat (limited to 'ext/tidy/examples')
| -rw-r--r-- | ext/tidy/examples/cleanhtml.php | 2 | ||||
| -rw-r--r-- | ext/tidy/examples/cleanhtml5.php | 39 | ||||
| -rw-r--r-- | ext/tidy/examples/dumpit5.php | 92 | ||||
| -rw-r--r-- | ext/tidy/examples/urlgrab.php | 2 | ||||
| -rw-r--r-- | ext/tidy/examples/urlgrab5.php | 41 |
5 files changed, 176 insertions, 0 deletions
diff --git a/ext/tidy/examples/cleanhtml.php b/ext/tidy/examples/cleanhtml.php index 9d054cda4f..9a6713dc55 100644 --- a/ext/tidy/examples/cleanhtml.php +++ b/ext/tidy/examples/cleanhtml.php @@ -6,6 +6,8 @@ * A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents * if no file is provided, it reads from standard input. * + * NOTE: Works only with tidy for PHP 4.3.x, for tidy in PHP 5 see cleanhtml5.php + * * By: John Coggeshall <john@php.net> * * Usage: php cleanhtml.php [filename] diff --git a/ext/tidy/examples/cleanhtml5.php b/ext/tidy/examples/cleanhtml5.php new file mode 100644 index 0000000000..5e4095e8ac --- /dev/null +++ b/ext/tidy/examples/cleanhtml5.php @@ -0,0 +1,39 @@ +<?php + + /* + * cleanhtml5.php + * + * A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents + * if no file is provided, it reads from standard input. + * + * NOTE: Works only with tidy for PHP 5, for tidy in PHP 4.3.x see cleanhtml.php + * + * By: John Coggeshall <john@php.net> + * + * Usage: php cleanhtml5.php [filename] + * + */ + + if(!isset($_SERVER['argv'][1])) { + $data = file_get_contents("php://stdin"); + $tidy = tidy_parse_string($data); + } else { + $tidy = tidy_parse_file($_SERVER['argv'][1]); + } + + $tidy->clean_repair(); + + if(!empty($tidy->error_buf)) { + + echo "\n\nThe following errors or warnings occured:\n"; + echo "{$tidy->error_buf}\n"; + + } + + echo $tidy; + +?> + + + +
\ No newline at end of file diff --git a/ext/tidy/examples/dumpit5.php b/ext/tidy/examples/dumpit5.php new file mode 100644 index 0000000000..ad9157f6ad --- /dev/null +++ b/ext/tidy/examples/dumpit5.php @@ -0,0 +1,92 @@ +<?php + /* + * dumpit5.php + * + * a command-line script which dumps the given HTML, PHP, ASP, XHTML, etc. + * file as it is represented in the document model. + * + * NOTE: Only works with tidy for PHP 5+, for tidy in 4.3.x, see dumpit.php + * + * By: John Coggeshall <john@php.net> + * + * Usage; php dumpit5.php <filename> + */ + + $tidy = tidy_parse_file($_SERVER['argv'][1]); + + /* Optionally you can do this here if you want to fix up the document */ + + /* $tidy->clean_repair() */ + + $tree = $tidy->root(); + dump_tree($tree); + echo "\n"; + + function node_type($type) { + + switch($type) { + + case TIDY_NODETYPE_ROOT: return "Root Node"; + case TIDY_NODETYPE_DOCTYPE: return "DocType Node"; + case TIDY_NODETYPE_COMMENT: return "Comment Node"; + case TIDY_NODETYPE_PROCINS: return "ProcIns Node"; + case TIDY_NODETYPE_TEXT: return "Text Node"; + case TIDY_NODETYPE_START: return "Start Node"; + case TIDY_NODETYPE_END: return "End Node"; + case TIDY_NODETYPE_STARTEND: return "Start/End Node"; + case TIDY_NODETYPE_CDATA: return "CDATA Node"; + case TIDY_NODETYPE_SECTION: return "Section Node"; + case TIDY_NODETYPE_ASP: return "ASP Source Code Node"; + case TIDY_NODETYPE_PHP: return "PHP Source Code Node"; + case TIDY_NODETYPE_JSTE: return "JSTE Source Code"; + case TIDY_NODETYPE_XMLDECL: return "XML Declaration Node"; + default: return "Unknown Node"; + } + } + + function do_leaf($string, $indent) { + for($i = 0; $i < $indent; $i++) { + echo " "; + } + echo $string; + } + + function dump_tree(tidy_node $node, $indent = 0) { + + /* Put something there if the node name is empty */ + $nodename = trim(strtoupper($node->name)); + $nodename = (empty($nodename)) ? "[EMPTY]" : $nodename; + + /* Generate the Node, and a pretty name for it */ + do_leaf(" + $nodename (".node_type($node->type).")\n", $indent); + + /* Check to see if this node is a text node. Text nodes are + generated by start/end tags and contain the text in between. + i.e. <B>foo</B> will create a text node with $node->value + equal to 'foo' */ + if($node->type == TIDY_NODETYPE_TEXT) { + do_leaf(" |\n", $indent); + do_leaf(" +---- Value: '{$node->value}'\n", $indent); + } + + if(count($node->attribute)) { + do_leaf(" |\n", $indent); + do_leaf(" +---- Attributes\n", $indent); + + foreach($node->attribute as $name=>$value) { + @do_leaf(" +-- $name\n", $indent); + do_leaf(" | +-- Value: $value\n", $indent); + } + } + + /* Recurse along the children to generate the remaining nodes */ + if($node->has_children()) { + foreach($node->child as $child) { + dump_tree($child, $indent + 3); + } + } + + } + + +?>
\ No newline at end of file diff --git a/ext/tidy/examples/urlgrab.php b/ext/tidy/examples/urlgrab.php index 7896792ea5..9ec4c42bab 100644 --- a/ext/tidy/examples/urlgrab.php +++ b/ext/tidy/examples/urlgrab.php @@ -6,6 +6,8 @@ * A simple command-line utility to extract all of the URLS contained * within <A HREF> tags from a document. * + * NOTE: Only works with tidy for PHP 4.3.x, please see urlgrab5.php for tidy for PHP 5 + * * By: John Coggeshall <john@php.net> * * Usage: php urlgrab.php <file> diff --git a/ext/tidy/examples/urlgrab5.php b/ext/tidy/examples/urlgrab5.php new file mode 100644 index 0000000000..d9f9f7f065 --- /dev/null +++ b/ext/tidy/examples/urlgrab5.php @@ -0,0 +1,41 @@ +<?php + /* + * urlgrab5.php + * + * A simple command-line utility to extract all of the URLS contained + * within <A HREF> tags from a document. + * + * NOTE: Only works with tidy for PHP 5, please see urlgrab.php for tidy for PHP 4.3.x + * + * By: John Coggeshall <john@php.net> + * + * Usage: php urlgrab5.php <file> + * + */ + function dump_nodes(tidy_node $node, &$urls = NULL) { + + $urls = (is_array($urls)) ? $urls : array(); + + if(isset($node->id)) { + if($node->id == TIDY_TAG_A) { + $urls[] = $node->attribute['href']; + } + } + + if($node->has_children()) { + + foreach($node->child as $c) { + + dump_nodes($c, $urls); + + } + + } + + return $urls; + } + + $a = tidy_parse_file($_SERVER['argv'][1]); + $a->clean_repair(); + print_r(dump_nodes($a->html())); +?>
\ No newline at end of file |
