summaryrefslogtreecommitdiff
path: root/ext/tidy/examples/urlgrab5.php
diff options
context:
space:
mode:
authorJohn Coggeshall <john@php.net>2004-01-03 05:23:24 +0000
committerJohn Coggeshall <john@php.net>2004-01-03 05:23:24 +0000
commit4ddb3d194d2780d7477d4bb4ce65f242cce46df4 (patch)
treedf2d80bd0a3dea3cd9b28431cf80d5bacb19c804 /ext/tidy/examples/urlgrab5.php
parentf37c9932ba822106070abd86184c2e303c06e987 (diff)
downloadphp-git-4ddb3d194d2780d7477d4bb4ce65f242cce46df4.tar.gz
Added examples for tidy v2.0 (in PHP5) and a few more tests..
Diffstat (limited to 'ext/tidy/examples/urlgrab5.php')
-rw-r--r--ext/tidy/examples/urlgrab5.php41
1 files changed, 41 insertions, 0 deletions
diff --git a/ext/tidy/examples/urlgrab5.php b/ext/tidy/examples/urlgrab5.php
new file mode 100644
index 0000000000..d9f9f7f065
--- /dev/null
+++ b/ext/tidy/examples/urlgrab5.php
@@ -0,0 +1,41 @@
+<?php
+ /*
+ * urlgrab5.php
+ *
+ * A simple command-line utility to extract all of the URLS contained
+ * within <A HREF> tags from a document.
+ *
+ * NOTE: Only works with tidy for PHP 5, please see urlgrab.php for tidy for PHP 4.3.x
+ *
+ * By: John Coggeshall <john@php.net>
+ *
+ * Usage: php urlgrab5.php <file>
+ *
+ */
+ function dump_nodes(tidy_node $node, &$urls = NULL) {
+
+ $urls = (is_array($urls)) ? $urls : array();
+
+ if(isset($node->id)) {
+ if($node->id == TIDY_TAG_A) {
+ $urls[] = $node->attribute['href'];
+ }
+ }
+
+ if($node->has_children()) {
+
+ foreach($node->child as $c) {
+
+ dump_nodes($c, $urls);
+
+ }
+
+ }
+
+ return $urls;
+ }
+
+ $a = tidy_parse_file($_SERVER['argv'][1]);
+ $a->clean_repair();
+ print_r(dump_nodes($a->html()));
+?> \ No newline at end of file