diff options
author | Sam Thursfield <sam@afuera.me.uk> | 2022-04-05 10:13:58 +0000 |
---|---|---|
committer | Sam Thursfield <sam@afuera.me.uk> | 2022-04-05 10:13:58 +0000 |
commit | 5ac6e360432d2786699b3c405765c3e58c93eeae (patch) | |
tree | 53ab8eba7fde5e4a2192e49b98a940ad5aa4e144 | |
parent | ea852d502f0dbd57bd54780adb811ee5c64870df (diff) | |
parent | 3566f7ca1471ae52d32d8429dc1aaedf428051b6 (diff) | |
download | tracker-5ac6e360432d2786699b3c405765c3e58c93eeae.tar.gz |
Merge branch 'wip/carlosg/doc-updates' into 'master'
Documentation improvements
Closes #349
See merge request GNOME/tracker!497
16 files changed, 493 insertions, 58 deletions
diff --git a/docs/manpages/tracker3-endpoint.1.txt b/docs/manpages/tracker3-endpoint.1.txt index 6671ca8c4..6fb89f586 100644 --- a/docs/manpages/tracker3-endpoint.1.txt +++ b/docs/manpages/tracker3-endpoint.1.txt @@ -31,6 +31,9 @@ _ontology_path_) must be provided in order to generate the database. If _ontology_name_ is used, the ontology must exist in _$datadir/tracker/ontologies_ +Users should be careful about the --http-port option, consider using +it only combined with the --loopback option. + The database itself will be stored according to _database_path_. == OPTIONS diff --git a/docs/reference/libtracker-sparql/embed-files.py b/docs/reference/libtracker-sparql/embed-files.py deleted file mode 100755 index df993b24f..000000000 --- a/docs/reference/libtracker-sparql/embed-files.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/python3 - -import os, sys, re - -f = open(sys.argv[1]) -content = f.read() -f.close() - -dirname = os.path.dirname(sys.argv[1]) - -regex = re.compile('{{(.*)}}') -matches = regex.findall(content) -replacements = {} - -for m in matches: - f = open(os.path.join(dirname, m)) - embedded = f.read() - escaped = embedded.replace('\\', '\\\\') - replace = re.compile('{{' + m + '}}') - content = replace.sub(escaped, content) - f.close() - -f = open(sys.argv[2], 'w') -f.write(content) -f.close() diff --git a/docs/reference/libtracker-sparql/examples.md.in b/docs/reference/libtracker-sparql/examples.md index 6a60320a7..474ac46d8 100644 --- a/docs/reference/libtracker-sparql/examples.md.in +++ b/docs/reference/libtracker-sparql/examples.md @@ -34,9 +34,21 @@ main loop is not blocked while these operations are executed. Once you end up with the query, remember to call [](tracker_sparql_cursor_close). The same applies to [](tracker_sparql_connection_close) when no longer needed. -```c -{{examples/connection-example.c}} -``` +<div class="gi-lang-c"> + +{{ examples/connection-example.c }} + +</div> +<div class="gi-lang-python"> + +{{ examples/connection-example.py }} + +</div> +<div class="gi-lang-javascript"> + +{{ examples/connection-example.js }} + +</div> ## Creating a private database @@ -58,9 +70,21 @@ main loop is not blocked while these operations are executed. Once you no longer need the connection, remember to call [](tracker_sparql_connection_close) on the [](TrackerSparqlConnection). -```c -{{examples/private-store-example.c}} -``` +<div class="gi-lang-c"> + +{{ examples/private-store-example.c }} + +</div> +<div class="gi-lang-python"> + +{{ examples/private-store-example.py }} + +</div> +<div class="gi-lang-javascript"> + +{{ examples/private-store-example.js }} + +</div> ## Creating a SPARQL endpoint @@ -73,9 +97,21 @@ concretely the creation of a D-Bus endpoint, that other applications may query e.g. through a connection created with [](tracker_sparql_connection_bus_new). -```c -{{examples/endpoint-example.c}} -``` +<div class="gi-lang-c"> + +{{ examples/endpoint-example.c }} + +</div> +<div class="gi-lang-python"> + +{{ examples/endpoint-example.py }} + +</div> +<div class="gi-lang-javascript"> + +{{ examples/endpoint-example.js }} + +</div> ## Receiving notification on changes @@ -88,6 +124,18 @@ on changes of certain RDF classes (Those with the This example demonstrates the use of [](TrackerNotifier) to receive notifications on database updates. -```c -{{examples/notifier-example.c}} -``` +<div class="gi-lang-c"> + +{{ examples/notifier-example.c }} + +</div> +<div class="gi-lang-python"> + +{{ examples/notifier-example.py }} + +</div> +<div class="gi-lang-javascript"> + +{{ examples/notifier-example.js }} + +</div> diff --git a/docs/reference/libtracker-sparql/examples/connection-example.js b/docs/reference/libtracker-sparql/examples/connection-example.js new file mode 100755 index 000000000..ce26026c5 --- /dev/null +++ b/docs/reference/libtracker-sparql/examples/connection-example.js @@ -0,0 +1,32 @@ +#!/usr/bin/gjs + +const { GLib, Gio, Tracker } = imports.gi + +try { + let connection = Tracker.SparqlConnection.bus_new( + 'org.freedesktop.Tracker3.Miner.Files', + null, null); + + let stmt = connection.query_statement ( + 'SELECT DISTINCT nie:url(?u) WHERE { ' + + ' ?u a nfo:FileDataObject ; ' + + ' nfo:fileName ~name ' + + '}', null); + + stmt.bind_string('name', ARGV[0]); + + let cursor = stmt.execute(null); + let i = 0; + + while (cursor.next(null)) { + i++; + print(`Result ${i}: ${cursor.get_string(0)[0]}`); + } + + print(`A total of ${i} results were found`); + + cursor.close(); + connection.close(); +} catch (e) { + printerr(`Error: ${e.message}`) +} diff --git a/docs/reference/libtracker-sparql/examples/connection-example.py b/docs/reference/libtracker-sparql/examples/connection-example.py new file mode 100755 index 000000000..d8def7f77 --- /dev/null +++ b/docs/reference/libtracker-sparql/examples/connection-example.py @@ -0,0 +1,33 @@ +#!/usr/bin/python3 + +import gi, sys +from gi.repository import GLib, Gio, Tracker + +try: + connection = Tracker.SparqlConnection.bus_new( + 'org.freedesktop.Tracker3.Miner.Files', + None, None) + + stmt = connection.query_statement ( + 'SELECT DISTINCT nie:url(?u) WHERE { ' + + ' ?u a nfo:FileDataObject ; ' + + ' nfo:fileName ~name ' + + '}', None) + + stmt.bind_string('name', sys.argv[1]) + + cursor = stmt.execute() + i = 0; + + while cursor.next(): + i += 1 + print('Result {0}: {1}'.format(i, cursor.get_string(0)[0])) + + print('A total of {0} results were found\n'.format(i)) + + cursor.close() + connection.close() + +except Exception as e: + print('Error: {0}'.format(e)) + sys.exit(-1) diff --git a/docs/reference/libtracker-sparql/examples/endpoint-example.js b/docs/reference/libtracker-sparql/examples/endpoint-example.js new file mode 100755 index 000000000..7304743f7 --- /dev/null +++ b/docs/reference/libtracker-sparql/examples/endpoint-example.js @@ -0,0 +1,23 @@ +#!/usr/bin/gjs + +const { GLib, Gio, Tracker } = imports.gi + +try { + let connection = Tracker.SparqlConnection.new( + Tracker.SparqlConnectionFlags.NONE, + null, // Database location, None creates it in-memory + Tracker.sparql_get_ontology_nepomuk(), // Ontology location + null); + + let bus = Gio.bus_get_sync(Gio.BusType.SESSION, null) + + let endpoint = Tracker.EndpointDBus.new( + connection, bus, null, null); + + let loop = GLib.MainLoop.new(null, false); + loop.run(); + + connection.close(); +} catch (e) { + printerr(`Error: ${e.message}`) +} diff --git a/docs/reference/libtracker-sparql/examples/endpoint-example.py b/docs/reference/libtracker-sparql/examples/endpoint-example.py new file mode 100755 index 000000000..654482511 --- /dev/null +++ b/docs/reference/libtracker-sparql/examples/endpoint-example.py @@ -0,0 +1,25 @@ +#!/usr/bin/python3 + +import gi, sys +from gi.repository import GLib, Gio, Tracker + +try: + connection = Tracker.SparqlConnection.new( + Tracker.SparqlConnectionFlags.NONE, + None, # Database location, None creates it in-memory + Tracker.sparql_get_ontology_nepomuk(), # Ontology location + None) + + bus = Gio.bus_get_sync(Gio.BusType.SESSION, None) + + endpoint = Tracker.EndpointDBus.new( + connection, bus, None, None) + + loop = GLib.MainLoop.new(None, False) + loop.run() + + connection.close() + +except Exception as e: + print('Error: {0}'.format(e)) + sys.exit(-1) diff --git a/docs/reference/libtracker-sparql/examples/notifier-example.js b/docs/reference/libtracker-sparql/examples/notifier-example.js new file mode 100755 index 000000000..ea34fdb3f --- /dev/null +++ b/docs/reference/libtracker-sparql/examples/notifier-example.js @@ -0,0 +1,22 @@ +#!/usr/bin/gjs + +const { GLib, Gio, Tracker } = imports.gi + +try { + let connection = Tracker.SparqlConnection.bus_new( + 'org.freedesktop.Tracker3.Miner.Files', + null, null); + + let notifier = connection.create_notifier(); + notifier.connect('events', (service, graph, events) => { + for (let event in events) + print (`Event ${event.get_event_type()} on ${event.get_urn()}`); + }); + + let loop = GLib.MainLoop.new(null, false); + loop.run(); + + connection.close(); +} catch (e) { + printerr(`Error: ${e.message}`) +} diff --git a/docs/reference/libtracker-sparql/examples/notifier-example.py b/docs/reference/libtracker-sparql/examples/notifier-example.py new file mode 100755 index 000000000..5cc9cbeea --- /dev/null +++ b/docs/reference/libtracker-sparql/examples/notifier-example.py @@ -0,0 +1,26 @@ +#!/usr/bin/python3 + +import gi, sys +from gi.repository import GLib, Gio, Tracker + +def callback(service, graph, events): + for event in events: + print('Event {0} on {1}\n'.format( + event.get_event_type(), event.get_urn())) + +try: + connection = Tracker.SparqlConnection.bus_new( + 'org.freedesktop.Tracker3.Miner.Files', + None, None) + + notifier = connection.create_notifier() + notifier.connect('events', callback) + + loop = GLib.MainLoop.new(None, False) + loop.run() + + connection.close() + +except Exception as e: + print('Error: {0}'.format(e)) + sys.exit(-1) diff --git a/docs/reference/libtracker-sparql/examples/private-store-example.js b/docs/reference/libtracker-sparql/examples/private-store-example.js new file mode 100755 index 000000000..ff950edb3 --- /dev/null +++ b/docs/reference/libtracker-sparql/examples/private-store-example.js @@ -0,0 +1,26 @@ +#!/usr/bin/gjs + +const { GLib, Gio, Tracker } = imports.gi + +try { + let connection = Tracker.SparqlConnection.new( + Tracker.SparqlConnectionFlags.NONE, + null, // Database location, None creates it in-memory + Tracker.sparql_get_ontology_nepomuk(), // Ontology location + null); + + // Create a resource containing RDF data + let resource = Tracker.Resource.new(null) + resource.set_uri('rdf:type', 'nmm:MusicPiece') + + // Create a batch, and add the resource to it + let batch = connection.create_batch() + batch.add_resource(null, resource) + + // Execute the batch to insert the data + batch.execute(null) + + connection.close(); +} catch (e) { + printerr(`Error: ${e.message}`) +} diff --git a/docs/reference/libtracker-sparql/examples/private-store-example.py b/docs/reference/libtracker-sparql/examples/private-store-example.py new file mode 100755 index 000000000..61c1adbe5 --- /dev/null +++ b/docs/reference/libtracker-sparql/examples/private-store-example.py @@ -0,0 +1,28 @@ +#!/usr/bin/python3 + +import gi, sys +from gi.repository import GLib, Gio, Tracker + +try: + connection = Tracker.SparqlConnection.new( + Tracker.SparqlConnectionFlags.NONE, + None, # Database location, None creates it in-memory + Tracker.sparql_get_ontology_nepomuk(), # Ontology location + None) + + # Create a resource containing RDF data + resource = Tracker.Resource.new(None) + resource.set_uri('rdf:type', 'nmm:MusicPiece') + + # Create a batch, and add the resource to it + batch = connection.create_batch() + batch.add_resource(None, resource) + + # Execute the batch to insert the data + batch.execute() + + connection.close() + +except Exception as e: + print('Error: {0}'.format(e)) + sys.exit(-1) diff --git a/docs/reference/libtracker-sparql/meson.build b/docs/reference/libtracker-sparql/meson.build index 64254ddf3..e52cd5235 100644 --- a/docs/reference/libtracker-sparql/meson.build +++ b/docs/reference/libtracker-sparql/meson.build @@ -9,25 +9,6 @@ endif hotdoc = import('hotdoc') -documents_with_examples = [ - [ 'examples.md.in', 'examples.md' ], -] -examples = [] - -foreach doc : documents_with_examples - examples += custom_target( - 'examples-generate', - input: doc[0], - output: doc[1], - command: [ - 'embed-files.py', - '@INPUT@', - '@OUTPUT@', - ], - depend_files: [ doc[0] ], - build_by_default: true) -endforeach - base_ontology_docs = custom_target('ontology-docgen', output: ['dc-ontology.md'], command: [tracker_docgen, @@ -53,6 +34,7 @@ nepomuk_ontology_docs = custom_target('nepomuk-docgen', content = [ 'overview.md', + 'examples.md', 'ontologies.md', 'limits.md', 'performance.md', @@ -60,6 +42,7 @@ content = [ 'sparql-functions.md', 'migrating-2to3.md', 'tutorial.md', + 'security.md', ] required_hotdoc_extensions = [ diff --git a/docs/reference/libtracker-sparql/security.md b/docs/reference/libtracker-sparql/security.md new file mode 100644 index 000000000..6ea2fa3a2 --- /dev/null +++ b/docs/reference/libtracker-sparql/security.md @@ -0,0 +1,200 @@ +--- +title: Security +short-description: Security considerations +... + +# Security considerations + +The SPARQL 1.1 specifications have a number of informative `Security +considerations` sections. This section describes how those possibly +apply to the implementation of Tracker. + +Note that most of these considerations derive from situations where +a SPARQL store is exposed through a public endpoint, while Tracker +does not do that by default. Users should be careful about creating +endpoints. For D-Bus endpoints, access through the portal is encouraged. + +## Queries + +(From [https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#security](https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#security)) +``` +SPARQL queries using FROM, FROM NAMED, or GRAPH may cause the specified URI to +be dereferenced. This may cause additional use of network, disk or CPU resources +along with associated secondary issues such as denial of service. The security +issues of Uniform Resource Identifier (URI): Generic Syntax [RFC3986] Section 7 +should be considered. In addition, the contents of file: URIs can in some cases +be accessed, processed and returned as results, providing unintended access to +local resources. + +SPARQL requests may cause additional requests to be issued from the SPARQL +endpoint, such as FROM NAMED. The endpoint is potentially within an +organisations firewall or DMZ, and so such queries may be a source of +indirection attacks. +``` + +Graph URIs are virtual in Tracker and do not cause any access outside of +database resources. The only SPARQL syntax capable of dereferencing or accessing +external resources are the `SERVICE <uri>` and `LOAD <rdf-file>` features. + + +(From [https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#security](https://www.w3.org/TR/2013/REC-sparql11-query-20130321/#security)) +``` +The SPARQL language permits extensions, which will have their own security +implications. +``` + +Tracker SPARQL extensions have no special security considerations, other than +being code that runs on silicon. + +## Federated queries + +(From [https://www.w3.org/TR/2013/REC-sparql11-federated-query-20130321/#security](https://www.w3.org/TR/2013/REC-sparql11-federated-query-20130321/#security)) +``` +SPARQL queries using SERVICE imply that a URI will be dereferenced, and that the +result will be incorporated into a working data set. +``` + +(From [https://www.w3.org/TR/sparql11-protocol/#policy-security](https://www.w3.org/TR/sparql11-protocol/#policy-security)) +``` +Since a SPARQL protocol service may make HTTP requests of other origin servers +on behalf of its clients, it may be used as a vector of attacks against other +sites or services. Thus, SPARQL protocol services may effectively act as proxies +for third-party clients. Such services may place restrictions on the resources +that they retrieve or on the rate at which external resources can be retrieved. +SPARQL protocol services may log client requests in such a way as to facilitate +tracing them with regard to third-party origin servers or services. + +SPARQL protocol services may choose to detect these and other costly, or +otherwise unsafe, queries, impose time or memory limits on queries, or impose +other restrictions to reduce the service's (and other service's) vulnerability +to denial-of-service attacks. They also may refuse to process such query +requests. +``` + +Tracker offers 2 types of endpoint that are susceptible to this vector: + +- D-Bus endpoints accessed outside a sandbox. +- HTTP endpoints + +Particularly, requests on a D-Bus endpoint happening through the portal from a +sandboxed process have all SERVICE access restricted. + +Tracker developers encourage that all access to endpoints created on D-Bus +happen through the portal, and that all HTTP endpoints validate the provenance +of the requests through the [](TrackerEndpointHttp::block-remote-address) +signal to limit access to resources. + +(From [https://www.w3.org/TR/sparql11-protocol/#policy-security](https://www.w3.org/TR/sparql11-protocol/#policy-security)) +``` +There are at least two possible sources of denial-of-service attacks against +SPARQL protocol services. First, under-constrained queries can result in very +large numbers of results, which may require large expenditures of computing +resources to process, assemble, or return. Another possible source are queries +containing very complex — either because of resource size, the number of +resources to be retrieved, or a combination of size and number — RDF Dataset +descriptions, which the service may be unable to assemble without significant +expenditure of resources, including bandwidth, CPU, or secondary storage. In +some cases such expenditures may effectively constitute a denial-of-service +attack. A SPARQL protocol service may place restrictions on the resources that +it retrieves or on the rate at which external resources are retrieved. There +may be other sources of denial-of-service attacks against SPARQL query +processing services. +``` + +Tracker does not perform any time or frequency rate limits to queries. HTTP +endpoints may perform the latter through the +[](TrackerEndpointHttp::block-remote-address) signal. + +## Updates + +(From [https://www.w3.org/TR/2013/REC-sparql11-update-20130321/#security](https://www.w3.org/TR/2013/REC-sparql11-update-20130321/#security)) +``` +Write access to data makes it inherently vulnerable to malicious access. +Standard access and authentication techniques should be used in any networked +environment. In particular, HTTPS should be used, especially when implementing +the SPARQL HTTP-based protocols. (i.e., encryption with challenge/response based +password presentation, encrypted session tokens, etc). Some of the weak points +addressed by HTTPS are: authentication, active session integrity between client +and server, preventing replays, preventing continuation of defunct sessions. +``` + +(From [https://www.w3.org/TR/sparql11-protocol/#policy-security](https://www.w3.org/TR/sparql11-protocol/#policy-security)) +``` +SPARQL protocol services may remove, insert, and change underlying data via the +update operation. To protect against malicious or destructive updates, +implementations may choose not to implement the update operation. Alternatively, +implementations may choose to use HTTP authentication mechanisms or other +implementation-defined mechanisms to prevent unauthorized invocations of the +update operation. +``` + +Tracker HTTP endpoints do not implement any update mechanisms. D-Bus endpoints +accessed through the portal from inside a sandbox are likewise read-only. + + +(From [https://www.w3.org/TR/2013/REC-sparql11-update-20130321/#security](https://www.w3.org/TR/2013/REC-sparql11-update-20130321/#security)) +``` +Systems that provide both read-only and writable interfaces can be subject to +injection attacks in the read-only interface. In particular, a SPARQL endpoint +with a Query service should be careful of injection attacks aimed at interacting +with an Update service on the same SPARQL endpoint. Like any client code, +interaction between the query service and the update service should ensure +correct escaping of strings provided by the user. + +While SPARQL Update and SPARQL Query are separate languages, some +implementations may choose to offer both at the same SPARQL endpoint. In this +case, it is important to consider that an Update operation may be obscured to +masquerade as a query. For instance, a string of unicode escapes in a PREFIX +clause could be used to hide an Update Operation. Therefore, simple syntactic +tests are inadequate to determine if a string describes a query or an update. +``` + +Following the SPARQL 1.1 spec, Tracker implements updates and queries as two +different languages with different parser entry points, this separation happens +all the way to the public API. As an additional layer of security, readonly +queries happen on readonly database connections. It is essentially not possible +to perform any data change from the query APIs. + + +# API user considerations + +Users of the Tracker API and SPARQL interface are encouraged to make some +considerations and take some precautions: + + * Do not expose any endpoints that does not need exposing. + * For local D-Bus endpoints, consider using a graph partitioning scheme that + makes it easy to policy the access to the data when accessed through the + portal. + * Avoid the possibility of injection attacks. Use [](TrackerSparqlStatement) + and avoid string-based approaches to build SPARQL queries from user input. + * Consider that IRIs are susceptible to homograph attacks. Quoting + https://www.w3.org/TR/sparql11-protocol/#policy-security: + + ``` + Different IRIs may have the same appearance. Characters in different scripts + may look similar (a Cyrillic "о" may appear similar to a Latin "o"). A + character followed by combining characters may have the same visual + representation as another character (LATIN SMALL LETTER E followed by + COMBINING ACUTE ACCENT has the same visual representation as LATIN SMALL + LETTER E WITH ACUTE). Users of SPARQL must take care to construct queries + with IRIs that match the IRIs in the data. Further information about matching + of similar characters can be found in Unicode Security Considerations + [UNISEC] and Internationalized Resource Identifiers (IRIs) [RFC3987] + Section 8. + ``` + + The situations where this might be a source of confusion or mischief, or even + be possible depends on how those IRIs are created, used, displayed or + inserted. + + +# Feature grid + +This is a quick reference of the features offered by the different types of +endpoint. + +| Endpoint | Query | Update | Graph Constraints | Service Constraints | +|----------------|-------|--------|-------------------|---------------------| +| D-Bus (portal) | ✓ | ✗ | ✓ | ✓ | +| D-Bus | ✓ | ✓ | ✗ | ✗ | +| HTTP | ✓ | ✗ | ✗ | ✗ | diff --git a/docs/reference/libtracker-sparql/sitemap.txt b/docs/reference/libtracker-sparql/sitemap.txt index 750975a03..a28c62b35 100644 --- a/docs/reference/libtracker-sparql/sitemap.txt +++ b/docs/reference/libtracker-sparql/sitemap.txt @@ -26,4 +26,5 @@ index.md limits.md performance.md sparql-and-tracker.md + security.md migrating-2to3.md diff --git a/src/libtracker-sparql/tracker-batch.c b/src/libtracker-sparql/tracker-batch.c index 883d822a7..29c236851 100644 --- a/src/libtracker-sparql/tracker-batch.c +++ b/src/libtracker-sparql/tracker-batch.c @@ -189,7 +189,7 @@ tracker_batch_add_sparql (TrackerBatch *batch, /** * tracker_batch_add_resource: * @batch: a #TrackerBatch - * @graph: RDF graph to insert the resource to + * @graph: (nullable): RDF graph to insert the resource to * @resource: a #TrackerResource * * Adds the RDF represented by @resource to @batch. diff --git a/src/libtracker-sparql/tracker-endpoint-http.c b/src/libtracker-sparql/tracker-endpoint-http.c index 03a7eb4ea..6248284e9 100644 --- a/src/libtracker-sparql/tracker-endpoint-http.c +++ b/src/libtracker-sparql/tracker-endpoint-http.c @@ -33,7 +33,17 @@ * Access to these endpoints may be managed via the * #TrackerEndpointHttp::block-remote-address signal, the boolean * return value expressing whether the connection is blocked or not. - * Inspection of the requester address is left up to the user. + * Inspection of the requester address is left up to the user. The + * default value allows all requests independently of their provenance, + * users are encouraged to add a handler. + * + * If the provided #GTlsCertificate is %NULL, the endpoint will allow + * plain HTTP connections. Users are encouraged to provide a certificate + * in order to use HTTPS. + * + * As a security measure, and in compliance with the SPARQL 1.1 specifications, + * the HTTP endpoint does not support database updates or modifications in any + * way. The database is completely owned by the host. * * A #TrackerEndpointHttp may be created on a different thread/main * context than the one creating the #TrackerSparqlConnection. |