summaryrefslogtreecommitdiff
path: root/docs/raptor-tutorial-parsing.xml
diff options
context:
space:
mode:
authorDave Beckett <dave@dajobe.org>2006-08-19 22:20:55 +0000
committerDave Beckett <dave@dajobe.org>2006-08-19 22:20:55 +0000
commitc6ac0dd24ea8174873133948e0285e9f5fb8a13b (patch)
tree1b14dfff53a441db2c79f119a878d0cc62a2b695 /docs/raptor-tutorial-parsing.xml
parentc3865d501e183df5b5ff90e0c9e99df50680dade (diff)
downloadraptor-c6ac0dd24ea8174873133948e0285e9f5fb8a13b.tar.gz
Add parser URI filtering examples to tutorial
Diffstat (limited to 'docs/raptor-tutorial-parsing.xml')
-rw-r--r--docs/raptor-tutorial-parsing.xml113
1 files changed, 113 insertions, 0 deletions
diff --git a/docs/raptor-tutorial-parsing.xml b/docs/raptor-tutorial-parsing.xml
index 107d888c..1b1f94dd 100644
--- a/docs/raptor-tutorial-parsing.xml
+++ b/docs/raptor-tutorial-parsing.xml
@@ -392,6 +392,119 @@ buffer_len or NULL buffer can be used.</para>
</section>
+<section id="restrict-parser-network-access">
+<title>Restrict parser network access</title>
+
+<para>
+Parsing can cause network requests to be performed, especially
+if a URI is given as an argument such as with
+<link linkend="raptor-parse-uri"><function>raptor_parse_uri()</function></link>
+however there may also be indirect requests such as with the
+GRDDL parser that retrieves URIs depending on the results of
+initial parse requests. The URIs requested may not be wanted
+to be fetched or need to be filtered, and this can be done in
+three ways.
+</para>
+
+<section id="tutorial-filter-network-with-feature">
+<title>Filtering parser network requests with feature <link linkend="RAPTOR-FEATURE-NO-NET:CAPS"><literal>RAPTOR_FEATURE_NO_NET</literal></link></title>
+<para>
+The parser feature
+<link linkend="RAPTOR-FEATURE-NO-NET:CAPS"><literal>RAPTOR_FEATURE_NO_NET</literal></link>
+can be set with
+<link linkend="raptor-set-feature"><function>raptor_set_feature()</function></link>
+and forbids all network requests. There is no customisation of
+this approach.
+</para>
+
+<programlisting>
+ rdf_parser = raptor_new_parser("rdfxml");
+ raptor_set_feature(rdf_parser, RAPTOR_FEATURE_NO_NET);
+</programlisting>
+
+</section>
+
+
+<section id="tutorial-filter-network-www-uri-filter">
+<title>Filtering parser network requests with <link linkend="raptor-www-set-uri-filter"><function>raptor_www_set_uri_filter()</function></link></title>
+<para>
+The
+<link linkend="raptor-www-set-uri-filter"><function>raptor_www_set_uri_filter()</function></link>
+
+allows setting of a filtering function to operate on all URIs
+retrieved by a WWW connection. This connection can be used in
+parsing when operated by hand.
+</para>
+
+<programlisting>
+void write_bytes_handler(raptor_www* www, void *user_data,
+ const void *ptr, size_t size, size_t nmemb) {
+{
+ raptor_parser* rdf_parser=(raptor_parser*)user_data;
+ raptor_parse_chunk(rdf_parser, (unsigned char*)ptr, size*nmemb, 0);
+}
+
+int uri_filter(raptor_www* www, void* filter_user_data, raptor_uri* uri) {
+ /* return non-0 to forbid the request */
+}
+
+int main(int argc, char *argv[]) {
+ ...
+
+ rdf_parser = raptor_new_parser("rdfxml");
+ www = raptor_new_www();
+
+ /* filter all URI requests */
+ raptor_www_set_uri_filter(parser, uri_filter, filter_user_data);
+
+ /* make WWW write bytes to parser */
+ raptor_www_set_write_bytes_handler(www, write_bytes_handler, rdf_parser);
+
+ raptor_start_parse(rdf_parser, uri);
+ raptor_www_fetch(www, uri);
+ /* tell the parser that we are done */
+ raptor_parse_chunk(rdf_parser, NULL, 0, 1);
+
+ raptor_www_free(www);
+ raptor_free_parser(rdf_parser);
+
+ ...
+}
+
+</programlisting>
+
+</section>
+
+
+<section id="tutorial-filter-network-parser-uri-filter">
+<title>Filtering parser network requests with <link linkend="raptor-parser-set-uri-filter"><function>raptor_parser_set_uri_filter()</function></link></title>
+
+<para>
+The
+<link linkend="raptor-parser-set-uri-filter"><function>raptor_parser_set_uri_filter()</function></link>
+allows setting of a filtering function to operate on all URIs that
+the parser sees. This operates on the internal raptor_www object
+used inside parsing to retrieve URIs, similar to that described in
+the <link linkend="tutorial-filter-network-www-uri-filter">previous section</link>.
+</para>
+
+<programlisting>
+ int uri_filter(raptor_www* www, void* filter_user_data, raptor_uri* uri) {
+ /* return non-0 to forbid the request */
+ }
+
+ rdf_parser = raptor_new_parser("rdfxml");
+ raptor_parser_set_uri_filter(parser, uri_filter, filter_user_data);
+
+ /* parse content as normal */
+ raptor_parse_uri(rdf_parser, uri, base_uri);
+</programlisting>
+
+</section>
+
+</section>
+
+
<section id="tutorial-parser-static-info">
<title>Querying parser static information</title>