diff options
| author | John Coggeshall <john@php.net> | 2004-01-03 05:23:24 +0000 |
|---|---|---|
| committer | John Coggeshall <john@php.net> | 2004-01-03 05:23:24 +0000 |
| commit | 4ddb3d194d2780d7477d4bb4ce65f242cce46df4 (patch) | |
| tree | df2d80bd0a3dea3cd9b28431cf80d5bacb19c804 /ext/tidy/examples/urlgrab5.php | |
| parent | f37c9932ba822106070abd86184c2e303c06e987 (diff) | |
| download | php-git-4ddb3d194d2780d7477d4bb4ce65f242cce46df4.tar.gz | |
Added examples for tidy v2.0 (in PHP5) and a few more tests..
Diffstat (limited to 'ext/tidy/examples/urlgrab5.php')
| -rw-r--r-- | ext/tidy/examples/urlgrab5.php | 41 |
1 files changed, 41 insertions, 0 deletions
diff --git a/ext/tidy/examples/urlgrab5.php b/ext/tidy/examples/urlgrab5.php new file mode 100644 index 0000000000..d9f9f7f065 --- /dev/null +++ b/ext/tidy/examples/urlgrab5.php @@ -0,0 +1,41 @@ +<?php + /* + * urlgrab5.php + * + * A simple command-line utility to extract all of the URLS contained + * within <A HREF> tags from a document. + * + * NOTE: Only works with tidy for PHP 5, please see urlgrab.php for tidy for PHP 4.3.x + * + * By: John Coggeshall <john@php.net> + * + * Usage: php urlgrab5.php <file> + * + */ + function dump_nodes(tidy_node $node, &$urls = NULL) { + + $urls = (is_array($urls)) ? $urls : array(); + + if(isset($node->id)) { + if($node->id == TIDY_TAG_A) { + $urls[] = $node->attribute['href']; + } + } + + if($node->has_children()) { + + foreach($node->child as $c) { + + dump_nodes($c, $urls); + + } + + } + + return $urls; + } + + $a = tidy_parse_file($_SERVER['argv'][1]); + $a->clean_repair(); + print_r(dump_nodes($a->html())); +?>
\ No newline at end of file |
