diff options
author | Dave Beckett <dave@dajobe.org> | 2000-11-22 16:48:25 +0000 |
---|---|---|
committer | Dave Beckett <dave@dajobe.org> | 2000-11-22 16:48:25 +0000 |
commit | 80f494419565b06c3a2a0fea483c39fd6f172183 (patch) | |
tree | 7127b2f469f36afc882677182e17646baf578103 | |
parent | 5cc40fcbe46c5087b0db1210bcd1f4dfea1a8b15 (diff) | |
download | raptor-80f494419565b06c3a2a0fea483c39fd6f172183.tar.gz |
Initial revision
-rw-r--r-- | .cvsignore | 31 | ||||
-rw-r--r-- | AUTHORS | 1 | ||||
-rw-r--r-- | INSTALL.html | 22 | ||||
-rw-r--r-- | LICENSE.html | 91 | ||||
-rw-r--r-- | MPL.html | 790 | ||||
-rw-r--r-- | Makefile.am | 134 | ||||
-rw-r--r-- | NEWS.html | 22 | ||||
-rw-r--r-- | README.html | 22 | ||||
-rw-r--r-- | acconfig.h | 16 | ||||
-rwxr-xr-x | autogen.sh | 120 | ||||
-rw-r--r-- | config.h.in | 56 | ||||
-rw-r--r-- | configure.in | 287 | ||||
-rw-r--r-- | docs/Makefile.am | 134 | ||||
-rw-r--r-- | src/.cvsignore | 31 | ||||
-rw-r--r-- | src/Makefile.am | 134 | ||||
-rw-r--r-- | src/raptor.h | 134 | ||||
-rw-r--r-- | src/raptor_general.c | 2009 | ||||
-rw-r--r-- | src/raptor_parse.c | 2009 | ||||
-rw-r--r-- | src/raptor_rdfxml.c | 2009 | ||||
-rw-r--r-- | utils/.cvsignore | 31 | ||||
-rw-r--r-- | utils/Makefile.am | 134 | ||||
-rw-r--r-- | utils/rapper.c | 203 |
22 files changed, 8420 insertions, 0 deletions
diff --git a/.cvsignore b/.cvsignore new file mode 100644 index 00000000..703e31de --- /dev/null +++ b/.cvsignore @@ -0,0 +1,31 @@ +*.rdf +*.rdfs +*.log +*test +.deps* +ANNOUNCE* +ChangeLog +Makefile +Makefile.in +NEWS +README +TODO +aclocal.m4 +config.cache +config.guess +config.log +config.status +config.sub +configure +dmalloc* +example? +install-sh +missing +mkinstalldirs +not-used +old* +config.h +stamp-h* +test* +rdfdump +*.txt diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 00000000..9658bf1d --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +Dave Beckett <Dave.Beckett@bristol.ac.uk> diff --git a/INSTALL.html b/INSTALL.html new file mode 100644 index 00000000..ef04f363 --- /dev/null +++ b/INSTALL.html @@ -0,0 +1,22 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "blah"> +<html> +<head> + <title>Rapier RDF Parser Installation</title> +</head> +<body bgcolor="#ffffff" text="#000085"> + +<h1 align="center">Rapier RDF Parser Installation</h1> +<h2 align="center"><a href="http://purl.org/net/dajobe/">Dave Beckett</a><br /> +<a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a><br /><a href="http://www.bristol.ac.uk/">University of Bristol</a></h2> + + +<h2>OVERVIEW</h2> + +<hr /> + +<p>Copyright 2000 <a href="http://purl.org/net/dajobe/">Dave Beckett</a>, <a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a>, <a href="http://www.bristol.ac.uk/">University of Bristol</a></p> + +<p><a href="http://www.mirror.ac.uk/services/validator/check/referer">Validate me</a></p> + +</body> +</html> diff --git a/LICENSE.html b/LICENSE.html new file mode 100644 index 00000000..9d41fada --- /dev/null +++ b/LICENSE.html @@ -0,0 +1,91 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "blah"> +<html> +<head> + <title>Rapier - RDF Parser - License</title> + <link rel="stylesheet" title="Default" href="docs/stylesheet.css" /> +</head> +<body bgcolor="#ffffff" text="#000085"> + + <h1 align="center">Rapier - RDF Parser - License</h1> + + <p>This package is Free Software available under either of two licenses (see +<a href="FAQS.html">FAQS.html</a> to see why):</p> + + <h2>1. The GNU Lesser General Public License (LGPL)</h2> + + <p>See <a href="http://www.gnu.org/copyleft/lesser.html">http://www.gnu.org/copyleft/lesser.html</a> or <a href="COPYING.LIB">COPYING.LIB</a> for the full license text.</p> + + <hr /> + <blockquote> + <p>Copyright (C) 2000 <a href="http://purl.org/net/dajobe/">David Beckett</a>, +<a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a>, <a href="http://www.bristol.ac.uk/">University of Bristol</a>. +All Rights Reserved.</p> + + <p>This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version.</p> + + <p>This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details.</p> + + <p>You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA</p> + + </blockquote> + + <hr /> + <p><b>NOTE</b> - under Term 3 of the LGPL, you may choose to license + the entire library under the GPL. See <a href="COPYING">COPYING</a> for the full license text.</p> + + + <h2>2. The Mozilla Public License</h2> + +<p>See <a href="http://www.mozilla.org/MPL/MPL-1.1.html">http://www.mozilla.org/MPL/MPL-1.1.html</a> or <a href="MPL.html">MPL.html</a> for the full license text.</p> + +<p>Under MPL section 13. I declare that all of the Covered Code is +Multiple Licensed:</p> + + <hr /> + <blockquote> + +<p>The contents of this file are subject to the Mozilla Public +License version 1.1 (the "License"); you may not use this file +except in compliance with the License. You may obtain a copy of +the License at <a href="http://www.mozilla.org/MPL/">http://www.mozilla.org/MPL/</a></p> + +<p>Software distributed under the License is distributed on an "AS +IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or +implied. See the License for the specific language governing +rights and limitations under the License.</p> + +<p>The Initial Developer of the Original Code is <a href="http://purl.org/net/dajobe/">David Beckett</a>. +Portions created by <a href="http://purl.org/net/dajobe/">David Beckett</a> +are Copyright (C) 2000 <a href="http://purl.org/net/dajobe/">David Beckett</a>, +<a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a>, <a href="http://www.bristol.ac.uk/">University of Bristol</a>. +All Rights Reserved.</p> + +<p>Alternatively, the contents of this file may be used under the +terms of the GNU Lesser General Public License, in which case the +provisions of the LGPL License are applicable instead of those above. +If you wish to allow use of your version of this file only under the +terms of the LGPL License and not to allow others to use your version +of this file under the MPL, indicate your decision by deleting the +provisions above and replace them with the notice and other +provisions required by the LGPL License. If you do not delete the +provisions above, a recipient may use your version of this file under +either the MPL or the LGPL License.</p> + + </blockquote> + +<hr /> + +<p>Copyright 2000 <a href="http://purl.org/net/dajobe/">Dave Beckett</a>, <a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a>, <a href="http://www.bristol.ac.uk/">University of Bristol</a></p> + +<p><a href="http://www.mirror.ac.uk/services/validator/check/referer">Validate me</a></p> + +</body> +</html> diff --git a/MPL.html b/MPL.html new file mode 100644 index 00000000..8881e16a --- /dev/null +++ b/MPL.html @@ -0,0 +1,790 @@ +<!doctype html public "-//w3c//dtd html 4.0 transitional//en"> + +<html> + +<head> + <title>Mozilla Public License version 1.1</title> +</head> + +<body text="#000000" bgcolor="#FFFFFF" link="#0000EE" vlink="#551A8B" alink="#FF0000"> + + + +<center><b><font size=+2>MOZILLA PUBLIC LICENSE</font></b> + +<br><b>Version 1.1</b> + +<p> + +<hr WIDTH="20%"></center> + + + +<p><b>1. Definitions.</b> + +<ul><b>1.0.1. "Commercial Use" </b>means distribution or otherwise making + +the Covered Code available to a third party. + +<p><b>1.1. ''Contributor''</b> means each entity that creates or contributes + +to the creation of Modifications. + +<p><b>1.2. ''Contributor Version''</b> means the combination of the Original + +Code, prior Modifications used by a Contributor, and the Modifications + +made by that particular Contributor. + +<p><b>1.3. ''Covered Code''</b> means the Original Code or Modifications + +or the combination of the Original Code and Modifications, in each case + +including portions thereof<b>.</b> + +<p><b>1.4. ''Electronic Distribution Mechanism''</b> means a mechanism + +generally accepted in the software development community for the electronic + +transfer of data. + +<p><b>1.5. ''Executable''</b> means Covered Code in any form other than + +Source Code. + +<p><b>1.6. ''Initial Developer''</b> means the individual or entity identified + +as the Initial Developer in the Source Code notice required by <b>Exhibit + +A</b>. + +<p><b>1.7. ''Larger Work''</b> means a work which combines Covered Code + +or portions thereof with code not governed by the terms of this License. + +<p><b>1.8. ''License''</b> means this document. + +<p><b>1.8.1. "Licensable"</b> means having the right to grant, to the maximum + +extent possible, whether at the time of the initial grant or subsequently + +acquired, any and all of the rights conveyed herein. + +<p><b>1.9. ''Modifications''</b> means any addition to or deletion from + +the substance or structure of either the Original Code or any previous + +Modifications. When Covered Code is released as a series of files, a Modification + +is: + +<ul><b>A.</b> Any addition to or deletion from the contents of a file containing + +Original Code or previous Modifications. + +<p><b>B.</b> Any new file that contains any part of the Original Code or + +previous Modifications. + +<br> </ul> + +<b>1.10. ''Original Code''</b> means Source Code of computer software code + +which is described in the Source Code notice required by <b>Exhibit A</b> + +as Original Code, and which, at the time of its release under this License + +is not already Covered Code governed by this License. + +<p><b>1.10.1. "Patent Claims"</b> means any patent claim(s), now owned + +or hereafter acquired, including without limitation, method, process, + +and apparatus claims, in any patent Licensable by grantor. + +<p><b>1.11. ''Source Code''</b> means the preferred form of the Covered + +Code for making modifications to it, including all modules it contains, + +plus any associated interface definition files, scripts used to control + +compilation and installation of an Executable, or source code differential + +comparisons against either the Original Code or another well known, available + +Covered Code of the Contributor's choice. The Source Code can be in a compressed + +or archival form, provided the appropriate decompression or de-archiving + +software is widely available for no charge. + +<p><b>1.12. "You'' (or "Your") </b> means an individual or a legal + +entity exercising rights under, and complying with all of the terms of, + +this License or a future version of this License issued under Section 6.1. + +For legal entities, "You'' includes any entity which controls, is controlled + +by, or is under common control with You. For purposes of this definition, + +"control'' means (a) the power, direct or indirect, to cause the direction + +or management of such entity, whether by contract or otherwise, or (b) + +ownership of more than fifty percent (50%) of the outstanding shares or + +beneficial ownership of such entity.</ul> + +<b>2. Source Code License.</b> + +<ul><b>2.1. The Initial Developer Grant.</b> + +<br>The Initial Developer hereby grants You a world-wide, royalty-free, + +non-exclusive license, subject to third party intellectual property claims: + +<ul><b>(a)</b> <b> </b>under intellectual property rights (other than + +patent or trademark) Licensable by Initial Developer to use, reproduce, + +modify, display, perform, sublicense and distribute the Original Code (or + +portions thereof) with or without Modifications, and/or as part of a Larger + +Work; and + +<p><b>(b)</b> under Patents Claims infringed by the making, using or selling + +of Original Code, to make, have made, use, practice, sell, and offer for + +sale, and/or otherwise dispose of the Original Code (or portions thereof). + +<ul> + +<ul> </ul> + +</ul> + +<b>(c) </b>the licenses granted in this Section 2.1(a) and (b) are effective + +on the date Initial Developer first distributes Original Code under the + +terms of this License. + +<p><b>(d) </b>Notwithstanding Section 2.1(b) above, no patent license is + +granted: 1) for code that You delete from the Original Code; 2) separate + +from the Original Code; or 3) for infringements caused by: i) the + +modification of the Original Code or ii) the combination of the Original + +Code with other software or devices. + +<br> </ul> + +<b>2.2. Contributor Grant.</b> + +<br>Subject to third party intellectual property claims, each Contributor + +hereby grants You a world-wide, royalty-free, non-exclusive license + +<ul> + +<br><b>(a)</b> <b> </b>under intellectual property rights (other than + +patent or trademark) Licensable by Contributor, to use, reproduce, modify, + +display, perform, sublicense and distribute the Modifications created by + +such Contributor (or portions thereof) either on an unmodified basis, with + +other Modifications, as Covered Code and/or as part of a Larger Work; and + +<p><b>(b)</b> under Patent Claims infringed by the making, using, or selling + +of Modifications made by that Contributor either alone and/or in<font color="#000000"> + +combination with its Contributor Version (or portions of such combination), + +to make, use, sell, offer for sale, have made, and/or otherwise dispose + +of: 1) Modifications made by that Contributor (or portions thereof); and + +2) the combination of Modifications made by that Contributor with + +its Contributor Version (or portions of such combination).</font> + +<p><b>(c) </b>the licenses granted in Sections 2.2(a) and 2.2(b) are effective + +on the date Contributor first makes Commercial Use of the Covered Code. + +<p><b>(d) </b> Notwithstanding Section 2.2(b) above, no + +patent license is granted: 1) for any code that Contributor has deleted + +from the Contributor Version; 2) separate from the Contributor Version; + +3) for infringements caused by: i) third party modifications of Contributor + +Version or ii) the combination of Modifications made by that Contributor + +with other software (except as part of the Contributor Version) or + +other devices; or 4) under Patent Claims infringed by Covered Code in the + +absence of Modifications made by that Contributor.</ul> + +</ul> + + + +<p><br><b>3. Distribution Obligations.</b> + +<ul><b>3.1. Application of License.</b> + +<br>The Modifications which You create or to which You contribute are governed + +by the terms of this License, including without limitation Section <b>2.2</b>. + +The Source Code version of Covered Code may be distributed only under the + +terms of this License or a future version of this License released under + +Section <b>6.1</b>, and You must include a copy of this License with every + +copy of the Source Code You distribute. You may not offer or impose any + +terms on any Source Code version that alters or restricts the applicable + +version of this License or the recipients' rights hereunder. However, You + +may include an additional document offering the additional rights described + +in Section <b>3.5</b>. + +<p><b>3.2. Availability of Source Code.</b> + +<br>Any Modification which You create or to which You contribute must be + +made available in Source Code form under the terms of this License either + +on the same media as an Executable version or via an accepted Electronic + +Distribution Mechanism to anyone to whom you made an Executable version + +available; and if made available via Electronic Distribution Mechanism, + +must remain available for at least twelve (12) months after the date it + +initially became available, or at least six (6) months after a subsequent + +version of that particular Modification has been made available to such + +recipients. You are responsible for ensuring that the Source Code version + +remains available even if the Electronic Distribution Mechanism is maintained + +by a third party. + +<p><b>3.3. Description of Modifications.</b> + +<br>You must cause all Covered Code to which You contribute to contain + +a file documenting the changes You made to create that Covered Code and + +the date of any change. You must include a prominent statement that the + +Modification is derived, directly or indirectly, from Original Code provided + +by the Initial Developer and including the name of the Initial Developer + +in (a) the Source Code, and (b) in any notice in an Executable version + +or related documentation in which You describe the origin or ownership + +of the Covered Code. + +<p><b>3.4. Intellectual Property Matters</b> + +<ul><b>(a) Third Party Claims</b>. + +<br>If Contributor has knowledge that a license under a third party's intellectual + +property rights is required to exercise the rights granted by such Contributor + +under Sections 2.1 or 2.2, Contributor must include a text file with the + +Source Code distribution titled "LEGAL'' which describes the claim and + +the party making the claim in sufficient detail that a recipient will know + +whom to contact. If Contributor obtains such knowledge after the Modification + +is made available as described in Section 3.2, Contributor shall promptly + +modify the LEGAL file in all copies Contributor makes available thereafter + +and shall take other steps (such as notifying appropriate mailing lists + +or newsgroups) reasonably calculated to inform those who received the Covered + +Code that new knowledge has been obtained. + +<p><b>(b) Contributor APIs</b>. + +<br>If Contributor's Modifications include an application programming interface + +and Contributor has knowledge of patent licenses which are reasonably necessary + +to implement that API, Contributor must also include this information in + +the LEGAL file. + +<br> </ul> + + <b>(c) + +Representations.</b> + +<ul>Contributor represents that, except as disclosed pursuant to Section + +3.4(a) above, Contributor believes that Contributor's Modifications are + +Contributor's original creation(s) and/or Contributor has sufficient rights + +to grant the rights conveyed by this License.</ul> + + + +<p><br><b>3.5. Required Notices.</b> + +<br>You must duplicate the notice in <b>Exhibit A</b> in each file of the + +Source Code. If it is not possible to put such notice in a particular + +Source Code file due to its structure, then You must include such notice + +in a location (such as a relevant directory) where a user would be likely + +to look for such a notice. If You created one or more Modification(s) + +You may add your name as a Contributor to the notice described in <b>Exhibit + +A</b>. You must also duplicate this License in any documentation + +for the Source Code where You describe recipients' rights or ownership + +rights relating to Covered Code. You may choose to offer, and to + +charge a fee for, warranty, support, indemnity or liability obligations + +to one or more recipients of Covered Code. However, You may do so only + +on Your own behalf, and not on behalf of the Initial Developer or any Contributor. + +You must make it absolutely clear than any such warranty, support, indemnity + +or liability obligation is offered by You alone, and You hereby agree to + +indemnify the Initial Developer and every Contributor for any liability + +incurred by the Initial Developer or such Contributor as a result of warranty, + +support, indemnity or liability terms You offer. + +<p><b>3.6. Distribution of Executable Versions.</b> + +<br>You may distribute Covered Code in Executable form only if the requirements + +of Section <b>3.1-3.5</b> have been met for that Covered Code, and if You + +include a notice stating that the Source Code version of the Covered Code + +is available under the terms of this License, including a description of + +how and where You have fulfilled the obligations of Section <b>3.2</b>. + +The notice must be conspicuously included in any notice in an Executable + +version, related documentation or collateral in which You describe recipients' + +rights relating to the Covered Code. You may distribute the Executable + +version of Covered Code or ownership rights under a license of Your choice, + +which may contain terms different from this License, provided that You + +are in compliance with the terms of this License and that the license for + +the Executable version does not attempt to limit or alter the recipient's + +rights in the Source Code version from the rights set forth in this License. + +If You distribute the Executable version under a different license You + +must make it absolutely clear that any terms which differ from this License + +are offered by You alone, not by the Initial Developer or any Contributor. + +You hereby agree to indemnify the Initial Developer and every Contributor + +for any liability incurred by the Initial Developer or such Contributor + +as a result of any such terms You offer. + +<p><b>3.7. Larger Works.</b> + +<br>You may create a Larger Work by combining Covered Code with other code + +not governed by the terms of this License and distribute the Larger Work + +as a single product. In such a case, You must make sure the requirements + +of this License are fulfilled for the Covered Code.</ul> + +<b>4. Inability to Comply Due to Statute or Regulation.</b> + +<ul>If it is impossible for You to comply with any of the terms of this + +License with respect to some or all of the Covered Code due to statute, + +judicial order, or regulation then You must: (a) comply with the terms + +of this License to the maximum extent possible; and (b) describe the limitations + +and the code they affect. Such description must be included in the LEGAL + +file described in Section <b>3.4</b> and must be included with all distributions + +of the Source Code. Except to the extent prohibited by statute or regulation, + +such description must be sufficiently detailed for a recipient of ordinary + +skill to be able to understand it.</ul> + +<b>5. Application of this License.</b> + +<ul>This License applies to code to which the Initial Developer has attached + +the notice in <b>Exhibit A</b> and to related Covered Code.</ul> + +<b>6. Versions of the License.</b> + +<ul><b>6.1. New Versions</b>. + +<br>Netscape Communications Corporation (''Netscape'') may publish revised + +and/or new versions of the License from time to time. Each version will + +be given a distinguishing version number. + +<p><b>6.2. Effect of New Versions</b>. + +<br>Once Covered Code has been published under a particular version of + +the License, You may always continue to use it under the terms of that + +version. You may also choose to use such Covered Code under the terms of + +any subsequent version of the License published by Netscape. No one other + +than Netscape has the right to modify the terms applicable to Covered Code + +created under this License. + +<p><b>6.3. Derivative Works</b>. + +<br>If You create or use a modified version of this License (which you + +may only do in order to apply it to code which is not already Covered Code + +governed by this License), You must (a) rename Your license so that the + +phrases ''Mozilla'', ''MOZILLAPL'', ''MOZPL'', ''Netscape'', "MPL", ''NPL'' + +or any confusingly similar phrase do not appear in your license (except + +to note that your license differs from this License) and (b) otherwise + +make it clear that Your version of the license contains terms which differ + +from the Mozilla Public License and Netscape Public License. (Filling in + +the name of the Initial Developer, Original Code or Contributor in the + +notice described in <b>Exhibit A</b> shall not of themselves be deemed + +to be modifications of this License.)</ul> + +<b>7. DISCLAIMER OF WARRANTY.</b> + +<ul>COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS'' BASIS, WITHOUT + +WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, WITHOUT LIMITATION, + +WARRANTIES THAT THE COVERED CODE IS FREE OF DEFECTS, MERCHANTABLE, FIT + +FOR A PARTICULAR PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY + +AND PERFORMANCE OF THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED CODE + +PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER + +CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR CORRECTION. + +THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. + +NO USE OF ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS DISCLAIMER.</ul> + +<b>8. TERMINATION.</b> + +<ul><b>8.1. </b>This License and the rights granted hereunder will + +terminate automatically if You fail to comply with terms herein and fail + +to cure such breach within 30 days of becoming aware of the breach. All + +sublicenses to the Covered Code which are properly granted shall survive + +any termination of this License. Provisions which, by their nature, must + +remain in effect beyond the termination of this License shall survive. + +<p><b>8.2. </b>If You initiate litigation by asserting a patent infringement + +claim (excluding declatory judgment actions) against Initial Developer + +or a Contributor (the Initial Developer or Contributor against whom You + +file such action is referred to as "Participant") alleging that: + +<p><b>(a) </b>such Participant's Contributor Version directly or + +indirectly infringes any patent, then any and all rights granted by such + +Participant to You under Sections 2.1 and/or 2.2 of this License shall, + +upon 60 days notice from Participant terminate prospectively, unless if + +within 60 days after receipt of notice You either: (i) agree in writing + +to pay Participant a mutually agreeable reasonable royalty for Your past + +and future use of Modifications made by such Participant, or (ii) withdraw + +Your litigation claim with respect to the Contributor Version against such + +Participant. If within 60 days of notice, a reasonable royalty and + +payment arrangement are not mutually agreed upon in writing by the parties + +or the litigation claim is not withdrawn, the rights granted by Participant + +to You under Sections 2.1 and/or 2.2 automatically terminate at the expiration + +of the 60 day notice period specified above. + +<p><b>(b)</b> any software, hardware, or device, other than such + +Participant's Contributor Version, directly or indirectly infringes any + +patent, then any rights granted to You by such Participant under Sections + +2.1(b) and 2.2(b) are revoked effective as of the date You first made, + +used, sold, distributed, or had made, Modifications made by that Participant. + +<p><b>8.3. </b>If You assert a patent infringement claim against + +Participant alleging that such Participant's Contributor Version directly + +or indirectly infringes any patent where such claim is resolved (such as + +by license or settlement) prior to the initiation of patent infringement + +litigation, then the reasonable value of the licenses granted by such Participant + +under Sections 2.1 or 2.2 shall be taken into account in determining the + +amount or value of any payment or license. + +<p><b>8.4.</b> In the event of termination under Sections 8.1 or + +8.2 above, all end user license agreements (excluding distributors + +and resellers) which have been validly granted by You or any distributor + +hereunder prior to termination shall survive termination.</ul> + +<b>9. LIMITATION OF LIABILITY.</b> + +<ul>UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT (INCLUDING + +NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE INITIAL DEVELOPER, + +ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF COVERED CODE, OR ANY SUPPLIER + +OF ANY OF SUCH PARTIES, BE LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, + +INCIDENTAL, OR CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT + +LIMITATION, DAMAGES FOR LOSS OF GOODWILL, WORK STOPPAGE, COMPUTER FAILURE + +OR MALFUNCTION, OR ANY AND ALL OTHER COMMERCIAL DAMAGES OR LOSSES, EVEN + +IF SUCH PARTY SHALL HAVE BEEN INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. + +THIS LIMITATION OF LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR + +PERSONAL INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT APPLICABLE + +LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION + +OR LIMITATION OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION + +AND LIMITATION MAY NOT APPLY TO YOU.</ul> + +<b>10. U.S. GOVERNMENT END USERS.</b> + +<ul>The Covered Code is a ''commercial item,'' as that term is defined + +in 48 C.F.R. 2.101 (Oct. 1995), consisting of ''commercial computer software'' + +and ''commercial computer software documentation,'' as such terms are used + +in 48 C.F.R. 12.212 (Sept. 1995). Consistent with 48 C.F.R. 12.212 and + +48 C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all U.S. Government + +End Users acquire Covered Code with only those rights set forth herein.</ul> + +<b>11. MISCELLANEOUS.</b> + +<ul>This License represents the complete agreement concerning subject matter + +hereof. If any provision of this License is held to be unenforceable, such + +provision shall be reformed only to the extent necessary to make it enforceable. + +This License shall be governed by California law provisions (except to + +the extent applicable law, if any, provides otherwise), excluding its conflict-of-law + +provisions. With respect to disputes in which at least one party is a citizen + +of, or an entity chartered or registered to do business in the United States + +of America, any litigation relating to this License shall be subject to + +the jurisdiction of the Federal Courts of the Northern District of California, + +with venue lying in Santa Clara County, California, with the losing party + +responsible for costs, including without limitation, court costs and reasonable + +attorneys' fees and expenses. The application of the United Nations Convention + +on Contracts for the International Sale of Goods is expressly excluded. + +Any law or regulation which provides that the language of a contract shall + +be construed against the drafter shall not apply to this License.</ul> + +<b>12. RESPONSIBILITY FOR CLAIMS.</b> + +<ul>As between Initial Developer and the Contributors, each party is responsible + +for claims and damages arising, directly or indirectly, out of its utilization + +of rights under this License and You agree to work with Initial Developer + +and Contributors to distribute such responsibility on an equitable basis. + +Nothing herein is intended or shall be deemed to constitute any admission + +of liability.</ul> + +<b>13. MULTIPLE-LICENSED CODE.</b> + +<ul>Initial Developer may designate portions of the Covered Code as “Multiple-Licensed”. + +“Multiple-Licensed” means that the Initial Developer permits you to utilize + +portions of the Covered Code under Your choice of the NPL or the alternative + +licenses, if any, specified by the Initial Developer in the file described + +in Exhibit A.</ul> + + + +<p><br><b>EXHIBIT A -Mozilla Public License.</b> + +<ul>``The contents of this file are subject to the Mozilla Public License + +Version 1.1 (the "License"); you may not use this file except in compliance + +with the License. You may obtain a copy of the License at + +<br>http://www.mozilla.org/MPL/ + +<p>Software distributed under the License is distributed on an "AS IS" + +basis, WITHOUT WARRANTY OF + +<br>ANY KIND, either express or implied. See the License for the specific + +language governing rights and + +<br>limitations under the License. + +<p>The Original Code is ______________________________________. + +<p>The Initial Developer of the Original Code is ________________________. + +Portions created by + +<br> ______________________ are Copyright (C) ______ _______________________. + +All Rights + +<br>Reserved. + +<p>Contributor(s): ______________________________________. + +<p>Alternatively, the contents of this file may be used under the terms + +of the _____ license (the “[___] License”), in which case the provisions + +of [______] License are applicable instead of those above. + +If you wish to allow use of your version of this file only under the terms + +of the [____] License and not to allow others to use your version of this + +file under the MPL, indicate your decision by deleting the provisions + +above and replace them with the notice and other provisions required + +by the [___] License. If you do not delete the provisions above, + +a recipient may use your version of this file under either the MPL or the + +[___] License." + +<p>[NOTE: The text of this Exhibit A may differ slightly from the text + +of the notices in the Source Code files of the Original Code. You should + +use the text of this Exhibit A rather than the text found in the Original + +Code Source Code for Your Modifications.] + +<p> + +</body> + +</html> + diff --git a/Makefile.am b/Makefile.am new file mode 100644 index 00000000..cc1c6c08 --- /dev/null +++ b/Makefile.am @@ -0,0 +1,134 @@ +# -*- Mode: Makefile -*- +# +# Makefile.am - automake file for Rapier +# +# $Id$ +# +# Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ +# Institute for Learning and Research Technology, University of Bristol. +# +# This package is Free Software available under either of two licenses +# (see FAQS.html to see why): +# +# 1. The GNU Lesser General Public License (LGPL) +# +# See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the +# full license text. +# _________________________________________________________________ +# +# Copyright (C) 2000 David Beckett, Institute for Learning and +# Research Technology, University of Bristol. All Rights Reserved. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# _________________________________________________________________ +# +# NOTE - under Term 3 of the LGPL, you may choose to license the entire +# library under the GPL. See COPYING for the full license text. +# +# 2. The Mozilla Public License +# +# See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full +# license text. +# +# Under MPL section 13. I declare that all of the Covered Code is +# Multiple Licensed: +# _________________________________________________________________ +# +# The contents of this file are subject to the Mozilla Public License +# version 1.1 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License +# at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and +# limitations under the License. +# +# The Initial Developer of the Original Code is David Beckett. +# Portions created by David Beckett are Copyright (C) 2000 David +# Beckett, Institute for Learning and Research Technology, University +# of Bristol. All Rights Reserved. +# +# Alternatively, the contents of this file may be used under the +# terms of the GNU Lesser General Public License, in which case the +# provisions of the LGPL License are applicable instead of those +# above. If you wish to allow use of your version of this file only +# under the terms of the LGPL License and not to allow others to use +# your version of this file under the MPL, indicate your decision by +# deleting the provisions above and replace them with the notice and +# other provisions required by the LGPL License. If you do not delete +# the provisions above, a recipient may use your version of this file +# under either the MPL or the LGPL License. + + +bin_PROGRAMS = rdfdump +lib_LIBRARIES = librapier.a + +include_HEADERS = rapier.h + +rdfdump_LDADD = librapier.a + +librapier_a_SOURCES = rapier_parse.c + +librapier_a_LIBADD = @XML_OBJS@ +librapier_a_DEPENDENCIES = @XML_OBJS@ + +EXTRA_librapier_a_SOURCES = rdfdump.c + +EXTRA_DIST=ChangeLog \ +README NEWS LICENSE.txt \ +README.html NEWS.html LICENSE.html \ +INSTALL.html MPL.html \ +autogen.sh + +# Why is this not in the default makefile? +CC=@CC@ + +CFLAGS=@CFLAGS@ $(MEM) +STANDARD_CFLAGS=@STANDARD_CFLAGS@ $(MEM) +LIBS=@LIBS@ $(MEM_LIBS) + +# Memory debugging alternatives +MEM=@MEM@ +MEM_LIBS=@MEM_LIBS@ + +# 1) None (use standard functions directly) +#MEM= +#MEM_LIBS= + +# 2) Use dmalloc library +#MEM=-DRAPIER_MEMORY_DEBUG_DMALLOC=1 +#MEM_LIBS=-ldmalloc + +# Create some text files from HTML sources +LYNX=lynx +LYNXARGS=-dump -nolist + +SUFFIXES = .html .txt + +.html.txt: + $(LYNX) $(LYNXARGS) $< > $@ + +README: README.html + $(LYNX) $(LYNXARGS) $< > $@ + +NEWS: NEWS.html + $(LYNX) $(LYNXARGS) $< > $@ + +@SET_MAKE@ + +$(top_builddir)/expat/xmlparse/xmlparse.o $(top_builddir)/expat/xmlparse/hashtable.o $(top_builddir)/expat/xmltok/xmlrole.o $(top_builddir)/expat/xmltok/xmltok.o: + cd expat && $(MAKE) $(AM_MAKEFLAGS) all diff --git a/NEWS.html b/NEWS.html new file mode 100644 index 00000000..4477e96a --- /dev/null +++ b/NEWS.html @@ -0,0 +1,22 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "blah"> +<html> +<head> + <title>Rapier RDF Parser News</title> +</head> +<body bgcolor="#ffffff" text="#000085"> + +<h1 align="center">Rapier RDF Parser News</h1> +<h2 align="center"><a href="http://purl.org/net/dajobe/">Dave Beckett</a><br /> +<a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a><br /><a href="http://www.bristol.ac.uk/">University of Bristol</a></h2> + + +<h2>OVERVIEW</h2> + +<hr /> + +<p>Copyright 2000 <a href="http://purl.org/net/dajobe/">Dave Beckett</a>, <a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a>, <a href="http://www.bristol.ac.uk/">University of Bristol</a></p> + +<p><a href="http://www.mirror.ac.uk/services/validator/check/referer">Validate me</a></p> + +</body> +</html> diff --git a/README.html b/README.html new file mode 100644 index 00000000..24934974 --- /dev/null +++ b/README.html @@ -0,0 +1,22 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "blah"> +<html> +<head> + <title>Rapier RDF Parser</title> +</head> +<body bgcolor="#ffffff" text="#000085"> + +<h1 align="center">Rapier RDF Parser</h1> +<h2 align="center"><a href="http://purl.org/net/dajobe/">Dave Beckett</a><br /> +<a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a><br /><a href="http://www.bristol.ac.uk/">University of Bristol</a></h2> + + +<h2>OVERVIEW</h2> + +<hr /> + +<p>Copyright 2000 <a href="http://purl.org/net/dajobe/">Dave Beckett</a>, <a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a>, <a href="http://www.bristol.ac.uk/">University of Bristol</a></p> + +<p><a href="http://www.mirror.ac.uk/services/validator/check/referer">Validate me</a></p> + +</body> +</html> diff --git a/acconfig.h b/acconfig.h new file mode 100644 index 00000000..fa63088d --- /dev/null +++ b/acconfig.h @@ -0,0 +1,16 @@ +/* package name */ +#define PACKAGE + +/* package version */ +#define VERSION + +#undef RAPIER_VERSION_MAJOR +#undef RAPIER_VERSION_MINOR +#undef RAPIER_VERSION_RELEASE + +/* XML parsers */ +#undef NEED_EXPAT +#undef NEED_LIBXML + +/* expat compiled with namespaces? */ +#undef HAVE_XML_SetNamespaceDeclHandler diff --git a/autogen.sh b/autogen.sh new file mode 100755 index 00000000..7cffa39d --- /dev/null +++ b/autogen.sh @@ -0,0 +1,120 @@ +#!/bin/sh +# +# autogen.sh - Generates the initial makefiles from a pristine CVS tree +# +# $Id$ +# +# USAGE: autogen.sh [configure options] +# +# If environment variable DRYRUN is set, no configuring will be done - +# (e.g. in bash) DRYRUN=1 ./autogen.sh +# will not do any configuring but will emit the programs that would be run. +# +# This script is based on similar scripts used in various tools +# commonly made available via CVS and used with GNU automake. +# Try 'locate autogen.sh' on your system and see what you get. +# + +PACKAGE=rapier +DIE= + +if test "X$DRYRUN" != X; then + DRYRUN=echo +fi + +autoconf_vers=2.13 +automake_vers=1.4 +aclocal_vers=1.4 + +program=`basename $0` + +if (autoconf --version) < /dev/null > /dev/null 2>&1 ; then + if (autoconf --version | awk 'NR==1 { if( $3 >= '$autoconf_vers') \ + exit 1; exit 0; }'); + then + echo "$program: ERROR: \`autoconf' is too old." + echo " (version $autoconf_vers or newer is required)" + DIE="yes" + fi +else + echo + echo "$program: ERROR: You must have \`autoconf' installed to compile $PACKAGE." + echo " (version $autoconf_vers or newer is required)" + DIE="yes" +fi + + +if (automake --version) < /dev/null > /dev/null 2>&1 ; then + if (automake --version | awk 'NR==1 { if( $4 >= '$automake_vers') \ + exit 1; exit 0; }'); + then + echo "$program: ERROR: \`automake' is too old." + echo " (version $automake_vers or newer is required)" + DIE="yes" + fi + if (aclocal --version) < /dev/null > /dev/null 2>&1; then + if (aclocal --version | awk 'NR==1 { if( $4 >= '$aclocal_vers' ) \ + exit 1; exit 0; }' ); + then + echo "$program: ERROR: \`aclocal' is too old." + echo " (version $aclocal_vers or newer is required)" + DIE="yes" + fi + else + echo + echo "$program: ERROR: Missing \`aclocal'" + echo " The version of automake installed doesn't appear recent enough." + DIE="yes" + fi +else + echo + echo "$program: ERROR: You must have \`automake' installed to compile $PACKAGE." + echo " (version $automake_vers or newer is required)" + DIE="yes" +fi + + +if test "X$DIE" != X; then + exit 1 +fi + +if test -z "$*"; then + echo "$program: WARNING: Running \`configure' with no arguments." + echo "If you wish to pass any to it, please specify them on the" + echo "\`$0' command line." +fi + +am_opt= + +for coin in `find $srcdir -name configure.in -print` +do + dir=`dirname $coin` + if test -f $dir/NO-AUTO-GEN; then + echo $program: Skipping $dir -- flagged as no auto-gen + else + echo $program: Processing directory $dir + ( cd $dir + aclocalinclude="$ACLOCAL_FLAGS" + echo "$program: Running aclocal $aclocalinclude" + $DRYRUN aclocal $aclocalinclude + if grep "^AM_CONFIG_HEADER" configure.in >/dev/null; then + echo "$program: Running autoheader" + $DRYRUN autoheader + fi + echo "$program: Running automake $am_opt" + $DRYRUN automake --add-missing $am_opt + echo "$program: Running autoconf" + $DRYRUN autoconf + ) + fi +done + +conf_flags= + +echo "$program: Running ./configure $conf_flags $@" +if test "X$DRYRUN" = X; then + $DRYRUN ./configure $conf_flags "$@" \ + && echo "$program: Now type \`make' to compile $PACKAGE" || exit 1 +else + $DRYRUN ./configure $conf_flags "$@" +fi diff --git a/config.h.in b/config.h.in new file mode 100644 index 00000000..ef79ebbc --- /dev/null +++ b/config.h.in @@ -0,0 +1,56 @@ +/* config.h.in. Generated automatically from configure.in by autoheader. */ + +/* Define to empty if the keyword does not work. */ +#undef const + +/* Define if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* Define if your processor stores words with the most significant + byte first (like Motorola and SPARC, unlike Intel and VAX). */ +#undef WORDS_BIGENDIAN + +#undef RAPIER_VERSION_MAJOR +#undef RAPIER_VERSION_MINOR +#undef RAPIER_VERSION_RELEASE + +/* XML parsers */ +#undef NEED_EXPAT +#undef NEED_LIBXML + +/* expat compiled with namespaces? */ +#undef HAVE_XML_SetNamespaceDeclHandler + +/* Define if you have the getopt_long function. */ +#undef HAVE_GETOPT_LONG + +/* Define if you have the <dmalloc.h> header file. */ +#undef HAVE_DMALLOC_H + +/* Define if you have the <gnome-xml/parser.h> header file. */ +#undef HAVE_GNOME_XML_PARSER_H + +/* Define if you have the <stdarg.h> header file. */ +#undef HAVE_STDARG_H + +/* Define if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* Define if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define if you have the xml library (-lxml). */ +#undef HAVE_LIBXML + +/* Define if you have the xmlparse library (-lxmlparse). */ +#undef HAVE_LIBXMLPARSE + +/* Name of package */ +#undef PACKAGE + +/* Version number of package */ +#undef VERSION + diff --git a/configure.in b/configure.in new file mode 100644 index 00000000..94f280bf --- /dev/null +++ b/configure.in @@ -0,0 +1,287 @@ +dnl -*- Mode: sh -*- +dnl +dnl configure.in - autoconf file for Rapier +dnl (Process this file with autoconf to produce a configure script.) +dnl +dnl $Id$ +dnl +dnl Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ +dnl Institute for Learning and Research Technology, University of Bristol. +dnl +dnl This package is Free Software available under either of two licenses +dnl (see FAQS.html to see why): +dnl +dnl 1. The GNU Lesser General Public License (LGPL) +dnl +dnl See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the +dnl full license text. +dnl _________________________________________________________________ +dnl +dnl Copyright (C) 2000 David Beckett, Institute for Learning and +dnl Research Technology, University of Bristol. All Rights Reserved. +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public License +dnl as published by the Free Software Foundation; either version 2 of +dnl the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +dnl USA +dnl _________________________________________________________________ +dnl +dnl NOTE - under Term 3 of the LGPL, you may choose to license the entire +dnl library under the GPL. See COPYING for the full license text. +dnl +dnl 2. The Mozilla Public License +dnl +dnl See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full +dnl license text. +dnl +dnl Under MPL section 13. I declare that all of the Covered Code is +dnl Multiple Licensed: +dnl _________________________________________________________________ +dnl +dnl The contents of this file are subject to the Mozilla Public License +dnl version 1.1 (the "License"); you may not use this file except in +dnl compliance with the License. You may obtain a copy of the License +dnl at http://www.mozilla.org/MPL/ +dnl +dnl Software distributed under the License is distributed on an "AS IS" +dnl basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +dnl the License for the specific language governing rights and +dnl limitations under the License. +dnl +dnl The Initial Developer of the Original Code is David Beckett. +dnl Portions created by David Beckett are Copyright (C) 2000 David +dnl Beckett, Institute for Learning and Research Technology, University +dnl of Bristol. All Rights Reserved. +dnl +dnl Alternatively, the contents of this file may be used under the +dnl terms of the GNU Lesser General Public License, in which case the +dnl provisions of the LGPL License are applicable instead of those +dnl above. If you wish to allow use of your version of this file only +dnl under the terms of the LGPL License and not to allow others to use +dnl your version of this file under the MPL, indicate your decision by +dnl deleting the provisions above and replace them with the notice and +dnl other provisions required by the LGPL License. If you do not delete +dnl the provisions above, a recipient may use your version of this file +dnl under either the MPL or the LGPL License. + + +AC_REVISION($Revision$)dnl + +AC_PREREQ(2.13) +AC_INIT(rapier_parse.c) +AM_CONFIG_HEADER(config.h) + +AM_INIT_AUTOMAKE(rapier, 0.0.1) + +AM_MAINTAINER_MODE + +dnl Checks for programs. +AC_CANONICAL_HOST +AC_ARG_PROGRAM +AM_SANITY_CHECK +AM_PROG_CC_STDC +AC_PROG_INSTALL +AC_PROG_LN_S +AC_PROG_MAKE_SET +AC_PROG_RANLIB + +AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir) +AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir) +AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir) +AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir) + + + +dnl compiler checks +# if using gcc... +if test "$ac_cv_prog_gcc" = yes; then + STANDARD_CFLAGS= + MAINTAINER_CFLAGS="-Wall -Wshadow -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wredundant-decls" +else + STANDARD_CFLAGS= + MAINTAINER_CFLAGS= +fi + + + +dnl Checks for header files. +AC_HEADER_STDC +AC_CHECK_HEADERS(stdlib.h unistd.h string.h stdarg.h dmalloc.h) + + +dnl Checks for typedefs, structures, and compiler characteristics. +AC_C_CONST +AC_C_BIGENDIAN + +dnl need to change quotes to allow square brackets +changequote(<<, >>)dnl +version_major=`echo $VERSION | sed -e 's/^\([^\.]*\)\.\([^\.]*\)\.\(.*\)$/\1/'` +version_minor=`echo $VERSION | sed -e 's/^\([^\.]*\)\.\([^\.]*\)\.\(.*\)$/\2/'` +version_release=`echo $VERSION | sed -e 's/^\([^\.]*\)\.\([^\.]*\)\.\(.*\)$/\3/'` +changequote([, ])dnl + +AC_DEFINE_UNQUOTED(RAPIER_VERSION_MAJOR, $version_major) +AC_DEFINE_UNQUOTED(RAPIER_VERSION_MINOR, $version_minor) +AC_DEFINE_UNQUOTED(RAPIER_VERSION_RELEASE, $version_release) + + +dnl Checks for library functions. +AC_CHECK_FUNCS(getopt_long) + +dnl Checks for XML parsers +have_expat=0 +have_expat_lib=0 +have_expat_source=0 +need_expat=0 +need_expat_source=0 +AC_MSG_CHECKING(for expat sources) +if test -d $srcdir/expat; then + have_expat_source=1 + have_expat=1 + AC_MSG_RESULT(yes) +else + AC_MSG_RESULT(no) +fi + +oLIBS="$LIBS" +AC_CHECK_LIB(xmltok, main, xmlt=1, xmlt=0) + +AC_CHECK_LIB(xmlparse, XML_ParserCreate, xmlp=1, xmlp=0, -lxmltok) +AC_CHECK_LIB(xmlparse, XML_SetNamespaceDeclHandler) +if test "X$ac_cv_lib_xmlparse_XML_SetNamespaceDeclHandler" = "Xyes"; then + AC_DEFINE(HAVE_XML_SetNamespaceDeclHandler) +fi + +LIBS="$oLIBS" +AC_MSG_CHECKING(for system expat library) +if test $xmlp = 1 -a $xmlt = 1; then + have_expat_lib=1 + have_expat=1 + AC_MSG_RESULT(yes) +else + AC_MSG_RESULT(no) +fi + +AC_CHECK_PROG(XML_CONFIG, xml-config, xml-config) + + +have_libxml=0 +have_libxml_lib=0 +have_libxml_source=0 +need_libxml=0 +need_libxml_source=0 +AC_MSG_CHECKING(for libxml sources) +if test -d $srcdir/libxml -a -r $srcdir/libxml/libxml.spec ; then + have_libxml_source=1 + AC_MSG_RESULT(yes) +else + AC_MSG_RESULT(no) +fi + +if test "X$XML_CONFIG" != X; then + oLIBS="$LIBS" + LIBS="$LIBS `$XML_CONFIG --libs`" + AC_CHECK_LIB(xml, xmlParseFile) + LIBS="$oLIBS" + AC_MSG_CHECKING(for system (GNOME) libxml library) + if test $ac_cv_lib_xml_xmlParseFile = yes; then + have_libxml_lib=1 + have_libxml=1 + AC_MSG_RESULT(yes) + else + AC_MSG_RESULT(no) + fi +fi +AC_CHECK_HEADERS(gnome-xml/parser.h) + + +if test $have_expat_lib = 1; then + need_expat=1 + AC_DEFINE(NEED_EXPAT) +elif test $have_expat_source = 1; then + need_expat=1 + need_expat_source=1 + AC_DEFINE(NEED_EXPAT) +elif test $have_libxml_lib = 1; then + need_libxml=1 + AC_DEFINE(NEED_LIBXML) +elif test $have_libxml_source = 1; then + need_libxml=1 + need_libxml_source=1 + AC_DEFINE(NEED_LIBXML) +fi + + +AC_MSG_CHECKING(XML parsers required) +result= +if test $need_libxml = 1; then + if test $need_libxml_source = 1; then + result="$result libxml(source)" + else + result="$result libxml(system)" + fi +fi +if test $need_expat = 1; then + if test $need_expat_source = 1; then + result="$result expat(source)" + else + result="$result expat(system)" + fi +fi +AC_MSG_RESULT($result) + + +XML_OBJS= + +if test $have_libxml = 1 -a $need_libxml = 1; then + if test $need_libxml_source = 1; then + SD="$SD libxml" + (cd libxml && ./configure --cache=../config.cache --enable-shared=no) + CFLAGS="$CFLAGS -I$srcdir/libxml" + XML_OBS="libxml/libxml.a" + else + LIBS="$LIBS `$XML_CONFIG --libs`" + CFLAGS="$CFLAGS `$XML_CONFIG --cflags`" + fi +fi + +if test $need_expat = 1; then + # Only build local copy if it is needed + if test $need_expat_source = 1; then + SD="$SD expat" + CFLAGS="-I\$(top_srcdir)/expat/xmlparse $CFLAGS" + XML_OBJS="\$(top_builddir)/expat/xmlparse/xmlparse.o \$(top_builddir)/expat/xmlparse/hashtable.o \$(top_builddir)/expat/xmltok/xmlrole.o \$(top_builddir)/expat/xmltok/xmltok.o" + else + LIBS="$LIBS -lxmlparse -lxmltok" + fi +fi +AC_SUBST(XML_OBJS) + +# Make final changes to cflags +MEM= +MEM_LIBS= +if test "$USE_MAINTAINER_MODE" = yes; then + MEM=-DRAPIER_MEMORY_DEBUG_DMALLOC=1 + MEM_LIBS=-ldmalloc + CFLAGS="-DRAPIER_DEBUG=1 $CFLAGS" +fi +STANDARD_CFLAGS="$STANDARD_CFLAGS $CFLAGS" +if test "$USE_MAINTAINER_MODE" = yes; then + CFLAGS="$MAINTAINER_CFLAGS $CFLAGS" +fi +AC_SUBST(MEM) +AC_SUBST(MEM_LIBS) +AC_SUBST(STANDARD_CFLAGS) + + +AC_OUTPUT(Makefile) diff --git a/docs/Makefile.am b/docs/Makefile.am new file mode 100644 index 00000000..cc1c6c08 --- /dev/null +++ b/docs/Makefile.am @@ -0,0 +1,134 @@ +# -*- Mode: Makefile -*- +# +# Makefile.am - automake file for Rapier +# +# $Id$ +# +# Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ +# Institute for Learning and Research Technology, University of Bristol. +# +# This package is Free Software available under either of two licenses +# (see FAQS.html to see why): +# +# 1. The GNU Lesser General Public License (LGPL) +# +# See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the +# full license text. +# _________________________________________________________________ +# +# Copyright (C) 2000 David Beckett, Institute for Learning and +# Research Technology, University of Bristol. All Rights Reserved. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# _________________________________________________________________ +# +# NOTE - under Term 3 of the LGPL, you may choose to license the entire +# library under the GPL. See COPYING for the full license text. +# +# 2. The Mozilla Public License +# +# See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full +# license text. +# +# Under MPL section 13. I declare that all of the Covered Code is +# Multiple Licensed: +# _________________________________________________________________ +# +# The contents of this file are subject to the Mozilla Public License +# version 1.1 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License +# at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and +# limitations under the License. +# +# The Initial Developer of the Original Code is David Beckett. +# Portions created by David Beckett are Copyright (C) 2000 David +# Beckett, Institute for Learning and Research Technology, University +# of Bristol. All Rights Reserved. +# +# Alternatively, the contents of this file may be used under the +# terms of the GNU Lesser General Public License, in which case the +# provisions of the LGPL License are applicable instead of those +# above. If you wish to allow use of your version of this file only +# under the terms of the LGPL License and not to allow others to use +# your version of this file under the MPL, indicate your decision by +# deleting the provisions above and replace them with the notice and +# other provisions required by the LGPL License. If you do not delete +# the provisions above, a recipient may use your version of this file +# under either the MPL or the LGPL License. + + +bin_PROGRAMS = rdfdump +lib_LIBRARIES = librapier.a + +include_HEADERS = rapier.h + +rdfdump_LDADD = librapier.a + +librapier_a_SOURCES = rapier_parse.c + +librapier_a_LIBADD = @XML_OBJS@ +librapier_a_DEPENDENCIES = @XML_OBJS@ + +EXTRA_librapier_a_SOURCES = rdfdump.c + +EXTRA_DIST=ChangeLog \ +README NEWS LICENSE.txt \ +README.html NEWS.html LICENSE.html \ +INSTALL.html MPL.html \ +autogen.sh + +# Why is this not in the default makefile? +CC=@CC@ + +CFLAGS=@CFLAGS@ $(MEM) +STANDARD_CFLAGS=@STANDARD_CFLAGS@ $(MEM) +LIBS=@LIBS@ $(MEM_LIBS) + +# Memory debugging alternatives +MEM=@MEM@ +MEM_LIBS=@MEM_LIBS@ + +# 1) None (use standard functions directly) +#MEM= +#MEM_LIBS= + +# 2) Use dmalloc library +#MEM=-DRAPIER_MEMORY_DEBUG_DMALLOC=1 +#MEM_LIBS=-ldmalloc + +# Create some text files from HTML sources +LYNX=lynx +LYNXARGS=-dump -nolist + +SUFFIXES = .html .txt + +.html.txt: + $(LYNX) $(LYNXARGS) $< > $@ + +README: README.html + $(LYNX) $(LYNXARGS) $< > $@ + +NEWS: NEWS.html + $(LYNX) $(LYNXARGS) $< > $@ + +@SET_MAKE@ + +$(top_builddir)/expat/xmlparse/xmlparse.o $(top_builddir)/expat/xmlparse/hashtable.o $(top_builddir)/expat/xmltok/xmlrole.o $(top_builddir)/expat/xmltok/xmltok.o: + cd expat && $(MAKE) $(AM_MAKEFLAGS) all diff --git a/src/.cvsignore b/src/.cvsignore new file mode 100644 index 00000000..703e31de --- /dev/null +++ b/src/.cvsignore @@ -0,0 +1,31 @@ +*.rdf +*.rdfs +*.log +*test +.deps* +ANNOUNCE* +ChangeLog +Makefile +Makefile.in +NEWS +README +TODO +aclocal.m4 +config.cache +config.guess +config.log +config.status +config.sub +configure +dmalloc* +example? +install-sh +missing +mkinstalldirs +not-used +old* +config.h +stamp-h* +test* +rdfdump +*.txt diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 00000000..cc1c6c08 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,134 @@ +# -*- Mode: Makefile -*- +# +# Makefile.am - automake file for Rapier +# +# $Id$ +# +# Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ +# Institute for Learning and Research Technology, University of Bristol. +# +# This package is Free Software available under either of two licenses +# (see FAQS.html to see why): +# +# 1. The GNU Lesser General Public License (LGPL) +# +# See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the +# full license text. +# _________________________________________________________________ +# +# Copyright (C) 2000 David Beckett, Institute for Learning and +# Research Technology, University of Bristol. All Rights Reserved. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# _________________________________________________________________ +# +# NOTE - under Term 3 of the LGPL, you may choose to license the entire +# library under the GPL. See COPYING for the full license text. +# +# 2. The Mozilla Public License +# +# See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full +# license text. +# +# Under MPL section 13. I declare that all of the Covered Code is +# Multiple Licensed: +# _________________________________________________________________ +# +# The contents of this file are subject to the Mozilla Public License +# version 1.1 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License +# at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and +# limitations under the License. +# +# The Initial Developer of the Original Code is David Beckett. +# Portions created by David Beckett are Copyright (C) 2000 David +# Beckett, Institute for Learning and Research Technology, University +# of Bristol. All Rights Reserved. +# +# Alternatively, the contents of this file may be used under the +# terms of the GNU Lesser General Public License, in which case the +# provisions of the LGPL License are applicable instead of those +# above. If you wish to allow use of your version of this file only +# under the terms of the LGPL License and not to allow others to use +# your version of this file under the MPL, indicate your decision by +# deleting the provisions above and replace them with the notice and +# other provisions required by the LGPL License. If you do not delete +# the provisions above, a recipient may use your version of this file +# under either the MPL or the LGPL License. + + +bin_PROGRAMS = rdfdump +lib_LIBRARIES = librapier.a + +include_HEADERS = rapier.h + +rdfdump_LDADD = librapier.a + +librapier_a_SOURCES = rapier_parse.c + +librapier_a_LIBADD = @XML_OBJS@ +librapier_a_DEPENDENCIES = @XML_OBJS@ + +EXTRA_librapier_a_SOURCES = rdfdump.c + +EXTRA_DIST=ChangeLog \ +README NEWS LICENSE.txt \ +README.html NEWS.html LICENSE.html \ +INSTALL.html MPL.html \ +autogen.sh + +# Why is this not in the default makefile? +CC=@CC@ + +CFLAGS=@CFLAGS@ $(MEM) +STANDARD_CFLAGS=@STANDARD_CFLAGS@ $(MEM) +LIBS=@LIBS@ $(MEM_LIBS) + +# Memory debugging alternatives +MEM=@MEM@ +MEM_LIBS=@MEM_LIBS@ + +# 1) None (use standard functions directly) +#MEM= +#MEM_LIBS= + +# 2) Use dmalloc library +#MEM=-DRAPIER_MEMORY_DEBUG_DMALLOC=1 +#MEM_LIBS=-ldmalloc + +# Create some text files from HTML sources +LYNX=lynx +LYNXARGS=-dump -nolist + +SUFFIXES = .html .txt + +.html.txt: + $(LYNX) $(LYNXARGS) $< > $@ + +README: README.html + $(LYNX) $(LYNXARGS) $< > $@ + +NEWS: NEWS.html + $(LYNX) $(LYNXARGS) $< > $@ + +@SET_MAKE@ + +$(top_builddir)/expat/xmlparse/xmlparse.o $(top_builddir)/expat/xmlparse/hashtable.o $(top_builddir)/expat/xmltok/xmlrole.o $(top_builddir)/expat/xmltok/xmltok.o: + cd expat && $(MAKE) $(AM_MAKEFLAGS) all diff --git a/src/raptor.h b/src/raptor.h new file mode 100644 index 00000000..70d2e169 --- /dev/null +++ b/src/raptor.h @@ -0,0 +1,134 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * rapier.h - Redland Parser for RDF (Rapier) interfaces and definition + * + * $Id$ + * + * Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ + * Institute for Learning and Research Technology, University of Bristol. + * + * This package is Free Software available under either of two licenses + * (see FAQS.html to see why): + * + * 1. The GNU Lesser General Public License (LGPL) + * + * See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the + * full license text. + * _________________________________________________________________ + * + * Copyright (C) 2000 David Beckett, Institute for Learning and + * Research Technology, University of Bristol. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * _________________________________________________________________ + * + * NOTE - under Term 3 of the LGPL, you may choose to license the entire + * library under the GPL. See COPYING for the full license text. + * + * 2. The Mozilla Public License + * + * See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full + * license text. + * + * Under MPL section 13. I declare that all of the Covered Code is + * Multiple Licensed: + * _________________________________________________________________ + * + * The contents of this file are subject to the Mozilla Public License + * version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License + * at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and + * limitations under the License. + * + * The Initial Developer of the Original Code is David Beckett. + * Portions created by David Beckett are Copyright (C) 2000 David + * Beckett, Institute for Learning and Research Technology, University + * of Bristol. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU Lesser General Public License, in which case the + * provisions of the LGPL License are applicable instead of those + * above. If you wish to allow use of your version of this file only + * under the terms of the LGPL License and not to allow others to use + * your version of this file under the MPL, indicate your decision by + * deleting the provisions above and replace them with the notice and + * other provisions required by the LGPL License. If you do not delete + * the provisions above, a recipient may use your version of this file + * under either the MPL or the LGPL License. + */ + + + +#ifndef RAPIER_H +#define RAPIER_H + + +#ifdef __cplusplus +extern "C" { +#endif + +/* Public structure */ +typedef struct rapier_parser_s rapier_parser; + +typedef enum { RAPIER_SUBJECT_TYPE_RESOURCE } rapier_subject_type; +typedef enum { RAPIER_PREDICATE_TYPE_PREDICATE } rapier_predicate_type; +typedef enum { RAPIER_OBJECT_TYPE_RESOURCE } rapier_object_type; + + +typedef struct { + const char *uri; + const char *file; + int line; + int column; + int byte; +} rapier_locator; + + +typedef enum { + RAPIER_FEATURE_SCANNING, + RAPIER_FEATURE_RDF_NON_NS_ATTRIBUTES +} rapier_feature; + + +/* Public functions */ + +/* Create */ +rapier_parser* rapier_new(void); +/* Destroy */ +void rapier_free(rapier_parser *rdf_parser); + +void rapier_parser_set_fatal_error(rapier_parser* parser, void *user_data, void (*fatal_error_fn)(void *user_data, rapier_locator *locator, const char *msg, ...)); +void rapier_parser_set_error(rapier_parser* parser, void *user_data, void (*error_fn)(void *user_data, rapier_locator *locator, const char *msg, ...)); +void rapier_parser_set_warning(rapier_parser* parser, void *user_data, void (*warning_fn)(void *user_data, rapier_locator *locator, const char *msg, ...)); + +void rapier_set_triple_handler(rapier_parser* parser, void *userData, void (*triple_handler)(void *userData, const char *subject, rapier_subject_type subject_type, const char *predicate, rapier_predicate_type predicate_type, const char *object, rapier_object_type object_type)); +int rapier_parse_file(rapier_parser* rdf_parser, const char *uri, const char *base_uri); +void rapier_print_locator(FILE *stream, rapier_locator* locator); + +void rapier_set_feature(rapier_parser *parser, rapier_feature feature, int value); + +#define RAPIER_RDF_MS_URI "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define RAPIER_RDF_SCHEMA_URI "http://www.w3.org/2000/01/rdf-schema#" + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/raptor_general.c b/src/raptor_general.c new file mode 100644 index 00000000..9b4be7af --- /dev/null +++ b/src/raptor_general.c @@ -0,0 +1,2009 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * rapier_parse.c - Redland Parser for RDF (RAPIER) + * + * $Id$ + * + * Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ + * Institute for Learning and Research Technology, University of Bristol. + * + * This package is Free Software available under either of two licenses + * (see FAQS.html to see why): + * + * 1. The GNU Lesser General Public License (LGPL) + * + * See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the + * full license text. + * _________________________________________________________________ + * + * Copyright (C) 2000 David Beckett, Institute for Learning and + * Research Technology, University of Bristol. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * _________________________________________________________________ + * + * NOTE - under Term 3 of the LGPL, you may choose to license the entire + * library under the GPL. See COPYING for the full license text. + * + * 2. The Mozilla Public License + * + * See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full + * license text. + * + * Under MPL section 13. I declare that all of the Covered Code is + * Multiple Licensed: + * _________________________________________________________________ + * + * The contents of this file are subject to the Mozilla Public License + * version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License + * at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and + * limitations under the License. + * + * The Initial Developer of the Original Code is David Beckett. + * Portions created by David Beckett are Copyright (C) 2000 David + * Beckett, Institute for Learning and Research Technology, University + * of Bristol. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU Lesser General Public License, in which case the + * provisions of the LGPL License are applicable instead of those + * above. If you wish to allow use of your version of this file only + * under the terms of the LGPL License and not to allow others to use + * your version of this file under the MPL, indicate your decision by + * deleting the provisions above and replace them with the notice and + * other provisions required by the LGPL License. If you do not delete + * the provisions above, a recipient may use your version of this file + * under either the MPL or the LGPL License. + */ + + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#ifdef HAVE_STDARG_H +#include <stdarg.h> +#endif + +extern int errno; + +#define RAPIER_INTERNAL + +#ifdef LIBRDF_INTERNAL +/* if inside Redland */ +#include <librdf.h> + +#include <rdf_parser.h> +#include <rdf_node.h> +#include <rdf_stream.h> +#include <rdf_statement.h> + +#else +/* else standalone */ + +#define LIBRDF_MALLOC(type, size) malloc(size) +#define LIBRDF_CALLOC(type, size, count) calloc(size, count) +#define LIBRDF_FREE(type, ptr) free((void*)ptr) + +#ifdef RAPIER_DEBUG +/* Debugging messages */ +#define LIBRDF_DEBUG1(function, msg) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function); } while(0) +#define LIBRDF_DEBUG2(function, msg, arg1) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function, arg1);} while(0) +#define LIBRDF_DEBUG3(function, msg, arg1, arg2) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function, arg1, arg2);} while(0) +#define LIBRDF_DEBUG4(function, msg, arg1, arg2, arg3) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function, arg1, arg2, arg3);} while(0) + +#else +/* DEBUGGING TURNED OFF */ + +/* No debugging messages */ +#define LIBRDF_DEBUG1(function, msg) +#define LIBRDF_DEBUG2(function, msg, arg1) +#define LIBRDF_DEBUG3(function, msg, arg1, arg2) +#define LIBRDF_DEBUG4(function, msg, arg1, arg2, arg3) + +#endif + +#endif + + +/* for the memory allocation functions */ +#if defined(HAVE_DMALLOC_H) && defined(RAPIER_MEMORY_DEBUG_DMALLOC) +#include <dmalloc.h> +#undef HAVE_STDLIB_H +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#undef HAVE_STDLIB_H +#endif + + +/* XML parser includes */ +#ifdef NEED_EXPAT +#include <xmlparse.h> +#endif + +#ifdef NEED_LIBXML +#ifdef HAVE_GNOME_XML_PARSER_H +#include <gnome-xml/parser.h> +/* translate names from expat to libxml */ +#define XML_Char xmlChar +#else +#include <parser.h> +#endif +#endif + + +/* Rapier includes */ +#include <rapier.h> + +/* Rapier structures */ +/* namespace stack node */ +typedef struct rapier_ns_map_s rapier_ns_map; + +typedef enum { + /* Not in RDF grammar yet - searching for a start element. + * This can be <rdf:RDF> (goto 6.1) but since it is optional, + * the start element can also be <Description> (goto 6.3), + * <rdf:Seq> (goto 6.25) <rdf:Bag> (goto 6.26) or <rdf:Alt> (goto 6.27) + * OR from 6.3 can have ANY other element matching + * typedNode (6.13) - goto 6.3 + * CHOICE: Search for <rdf:RDF> node before starting match + * OR assume RDF content, hence go straight to production + */ + RAPIER_STATE_UNKNOWN = 0, + + /* Met production 6.1 (RDF) <rdf:RDF> element seen and can now + * expect <rdf:Description> (goto 6.3), <rdf:Seq> (goto 6.25) + * <rdf:Bag> (goto 6.26) or <rdf:Alt> (goto 6.27) OR from 6.3 can + * have ANY other element matching typedNode (6.13) - goto 6.3 + */ + RAPIER_STATE_IN_RDF = 6010, + + /* No need for 6.2 - already chose 6.3, 6.25, 6.26 or 6.27 */ + + /* Met production 6.3 (description) <rdf:Description> element + * OR 6.13 (typedNode) [pretty much anything else] + * CHOICE: Create a bag here (always? even if no bagId given) + * CHOICES: Match rdf:resource/resource, ID/rdf:ID attributes etc. + */ + RAPIER_STATE_DESCRIPTION = 6030, + + /* Matching part of 6.3 (description) inside a <Description> + * where either a list of propertyElt elements is expected or + * an empty element + */ + RAPIER_STATE_IN_DESCRIPTION = 6031, + + /* met production 6.12 (propertyElt) + */ + RAPIER_STATE_PROPERTYELT = 6120, + + /* met production 6.13 (typedNode) + */ + RAPIER_STATE_TYPED_NODE = 6130, + + /* Met production 6.25 (sequence) <rdf:Seq> element seen. Goto 6.28 */ + RAPIER_STATE_SEQ = 6250, + + /* Met production 6.26 (bag) <rdf:Bag> element seen. Goto 6.28 */ + RAPIER_STATE_BAG = 6260, + + /* Met production 6.27 (alternative) <rdf:Alt> element seen. Goto 6.28 */ + RAPIER_STATE_ALT = 6270, + + /* Met production 6.28 (member) + * Now expect <rdf:li> element and if it empty, with resource attribute + * goto 6.29 otherwise goto 6.30 + * CHOICE: Match rdf:resource/resource + */ + RAPIER_STATE_MEMBER = 6280, + + /* met production 6.29 (referencedItem) + * Found a container item with reference - <rdf:li (rdf:)resource=".."/> */ + RAPIER_STATE_REFERENCEDITEM = 6290, + + /* met production 6.30 (inlineItem) + * Found a container item with content - <rdf:li> */ + RAPIER_STATE_INLINEITEM = 6300, + +} rapier_state; + + +/* Forms: + * 1) prefix=NULL uri=<URI> - default namespace defined + * 2) prefix=NULL, uri=NULL - no default namespace + * 3) prefix=<prefix>, uri=<URI> - regular pair defined <prefix>:<URI> + */ +struct rapier_ns_map_s { + struct rapier_ns_map_s* next; /* next down the stack, NULL at bottom */ + char *prefix; /* NULL means is the default namespace */ + char *uri; + int uri_length; + int depth; /* parse depth that this was added, delete when parser leaves this */ + int is_rdf_ms; /* Non 0 if is RDF M&S Namespace */ + int is_rdf_schema; /* Non 0 if is RDF Schema Namespace */ +}; + + +/* + * Rapier XML-namespaced name, for elements or attributes + */ + +/* There are three forms + * namespace=NULL - un-namespaced name + * namespace=defined, namespace->prefix=NULL - (default ns) name + * namespace=defined, namespace->prefix=defined - ns:name + */ +typedef struct { + const rapier_ns_map *namespace; + const char *qname; + const char *uri; /* URI of namespace+qname or NULL if not defined */ + const char *value; /* optional value - used when name is an attribute */ +} rapier_ns_name; + + +typedef enum { + RDF_ATTR_about = 0, /* value of rdf:about attribute */ + RDF_ATTR_aboutEach = 1, /* " rdf:aboutEach */ + RDF_ATTR_aboutEachPrefix = 2, /* " rdf:aboutEachPrefix */ + RDF_ATTR_ID = 3, /* " rdf:ID */ + RDF_ATTR_bagID = 4, /* " rdf:bagID */ + RDF_ATTR_reference = 5, /* " rdf:reference */ + RDF_ATTR_type = 6, /* " rdf:type */ + RDF_ATTR_parseType = 7, /* " rdf:parseType */ + + RDF_ATTR_LAST = RDF_ATTR_parseType +} rdf_attr; + +static const char *rdf_attr_names[]={ + "about", + "aboutEach", + "aboutEachPrefix", + "ID", + "bagID", + "reference", + "type", + "parseType", +}; + + +/* + * Rapier Element/attributes on stack + */ +struct rapier_element_s { + struct rapier_element_s *parent; /* NULL at bottom of stack */ + rapier_ns_name *name; + rapier_ns_name **attributes; + int attribute_count; + const char * rdf_attr[8]; /* attributes declared in M&S */ + + rapier_state state; /* state that this production matches */ + + /* CDATA content of element and checks for mixed content */ + char *content_cdata; + int content_element_seen; + int content_cdata_seen; + int content_cdata_length; + +}; + +typedef struct rapier_element_s rapier_element; + + +/* + * Rapier parser object + */ +struct rapier_parser_s { + /* XML parser specific stuff */ +#ifdef NEED_EXPAT + XML_Parser xp; +#endif +#ifdef NEED_LIBXML + /* structure holding sax event handlers */ + xmlSAXHandler sax; + /* parser context */ + /* xmlParserCtxtPtr xc; */ +#endif + + /* element depth */ + int depth; + + /* stack of namespaces, most recently added at top */ + rapier_ns_map *namespaces; + + /* can be filled with error location information */ + rapier_locator locator; + + /* stack of elements - elements add after current_element */ + rapier_element *root_element; + rapier_element *current_element; + + /* non 0 if parser had fatal error and cannot continue */ + int failed; + + /* non 0 if scanning for <rdf:RDF> element, else assume doc is RDF */ + int scanning_for_rdf_RDF; + + /* non 0 to forbid non-namespaced resource, ID etc attributes + * on RDF namespaced-elements + */ + int forbid_rdf_non_ns_attributes; + + /* stuff for our user */ + void *user_data; + + void *fatal_error_user_data; + void *error_user_data; + void *warning_user_data; + void (*fatal_error_fn)(void *user_data, rapier_locator *locator, const char *msg, ...); + void (*error_fn)(void *user_data, rapier_locator *locator, const char *msg, ...); + void (*warning_fn)(void *user_data, rapier_locator *locator, const char *msg, ...); + + /* parser callbacks */ + void (*triple_handler)(void *userData, const char *subject, rapier_subject_type subject_type, const char *predicate, rapier_predicate_type predicate_type, const char *object, rapier_object_type object_type); +}; + + + + +/* static variables */ +static const char *rapier_rdf_ms_uri=RAPIER_RDF_MS_URI; +static const char *rapier_rdf_schema_uri=RAPIER_RDF_SCHEMA_URI; + + + +/* Prototypes for common expat/libxml parsing event-handling functions */ +static void rapier_xml_start_element_handler(void *userData, + const XML_Char *name, + const XML_Char **atts); + +static void rapier_xml_end_element_handler(void *userData, + const XML_Char *name); + + +/* s is not 0 terminated. */ +static void rapier_xml_cdata_handler(void *userData, + const XML_Char *s, + int len); +#ifdef HAVE_XML_SetNamespaceDeclHandler +static void rapier_start_namespace_decl_handler(void *userData, + const XML_Char *prefix, + const XML_Char *uri); + +static void rapier_end_namespace_decl_handler(void *userData, + const XML_Char *prefix); +#endif + +/* libxml-only prototypes */ +#ifdef NEED_LIBXML +static void rapier_xml_warning(void *context, rapier_locator *locator, const char *msg, ...); +static void rapier_xml_error(void *context, rapier_locator *locator, const char *msg, ...); +static void rapier_xml_fatal_error(void *context, rapier_locator *locator, const char *msg, ...); +#endif + + +/* Prototypes for local functions */ +static char * rapier_file_uri_to_filename(const char *uri); +static void rapier_parser_fatal_error(rapier_parser* parser, const char *message, ...); +static void rapier_parser_error(rapier_parser* parser, const char *message, ...); +static void rapier_parser_warning(rapier_parser* parser, const char *message, ...); + + + +/* prototypes for namespace and name/qname functions */ +static void rapier_init_namespaces(rapier_parser *rdf_parser); +static void rapier_start_namespace(rapier_parser *rdf_parser, const char *prefix, const char *namespace, int depth); +static void rapier_free_namespace(rapier_parser *rdf_parser, rapier_ns_map* namespace); +static void rapier_end_namespace(rapier_parser *rdf_parser, const char *prefix, const char *namespace); +static void rapier_end_namespaces_for_depth(rapier_parser *rdf_parser); +static rapier_ns_name* rapier_make_namespaced_name(rapier_parser *rdf_parser, const char *name, const char *value, int is_element); +static void rapier_print_ns_name(FILE *stream, rapier_ns_name* name); +static void rapier_free_ns_name(rapier_ns_name* name); +static int rapier_ns_names_equal(rapier_ns_name *name1, rapier_ns_name *name2); + + +/* prototypes for element functions */ +static rapier_element* rapier_element_pop(rapier_parser *rdf_parser); +static void rapier_element_push(rapier_parser *rdf_parser, rapier_element* element); +static void rapier_free_element(rapier_element *element); +#ifdef RAPIER_DEBUG +static void rapier_print_element(rapier_element *element, FILE* stream); +#endif + + + +/* + * Namespaces in XML + * http://www.w3.org/TR/1999/REC-xml-names-19990114/#nsc-NSDeclared + * (section 4) says: + * + * -------------------------------------------------------------------- + * The prefix xml is by definition bound to the namespace name + * http://www.w3.org/XML/1998/namespace + * -------------------------------------------------------------------- + * + * Thus should define it in the table of namespaces before we start. + * + * We *can* also define others, but let's not. + * + */ +static void +rapier_init_namespaces(rapier_parser *rdf_parser) { + rapier_start_namespace(rdf_parser, "xml", "http://www.w3.org/XML/1998/namespace", -1); +} + + +static void +rapier_start_namespace(rapier_parser *rdf_parser, + const char *prefix, const char *namespace, + int depth) +{ + int uri_length=0; + int len; + rapier_ns_map *map; + void *p; + + LIBRDF_DEBUG4(rapier_start_namespace, + "namespace prefix %s uri %s depth %d\n", + prefix ? prefix : "(default)", namespace, depth); + + /* Convert an empty namespace string "" to a NULL pointer */ + if(!*namespace) + namespace=NULL; + + len=sizeof(rapier_ns_map); + if(namespace) { + uri_length=strlen(namespace); + len+=uri_length+1; + } + if(prefix) + len+=strlen(prefix)+1; + + /* Just one malloc for map structure + namespace (maybe) + prefix (maybe)*/ + map=(rapier_ns_map*)LIBRDF_CALLOC(rapier_ns_map, len, 1); + if(!map) + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + + p=(void*)map+sizeof(rapier_ns_map); + if(namespace) { + map->uri=strcpy((char*)p, namespace); + map->uri_length=uri_length; + p+= uri_length+1; + } + if(prefix) + map->prefix=strcpy((char*)p, prefix); + map->depth=depth; + + /* set convienience flags when there is a defined namespace URI */ + if(namespace) { + if(!strncmp(namespace, rapier_rdf_ms_uri, uri_length)) + map->is_rdf_ms=1; + else if(!strncmp(namespace, rapier_rdf_schema_uri, uri_length)) + map->is_rdf_schema=1; + } + + if(rdf_parser->namespaces) + map->next=rdf_parser->namespaces; + rdf_parser->namespaces=map; +} + + +static void +rapier_free_namespace(rapier_parser *rdf_parser, rapier_ns_map* namespace) +{ + LIBRDF_FREE(rapier_ns_map, namespace); +} + + +static void +rapier_end_namespace(rapier_parser *rdf_parser, + const char *prefix, const char *namespace) +{ + LIBRDF_DEBUG3(rapier_end_namespace, "prefix %s uri \"%s\"\n", + prefix ? prefix : "(default)", namespace); +} + + +static void +rapier_end_namespaces_for_depth(rapier_parser *rdf_parser) +{ + while(rdf_parser->namespaces && + rdf_parser->namespaces->depth == rdf_parser->depth) { + rapier_ns_map* ns=rdf_parser->namespaces; + rapier_ns_map* next=ns->next; + + rapier_end_namespace(rdf_parser, ns->prefix, ns->uri); + rapier_free_namespace(rdf_parser, ns); + + rdf_parser->namespaces=next; + } + +} + + + +/* + * Namespaces in XML + * http://www.w3.org/TR/1999/REC-xml-names-19990114/#defaulting + * says: + * + * -------------------------------------------------------------------- + * 5.2 Namespace Defaulting + * + * A default namespace is considered to apply to the element where it + * is declared (if that element has no namespace prefix), and to all + * elements with no prefix within the content of that element. + * + * If the URI reference in a default namespace declaration is empty, + * then unprefixed elements in the scope of the declaration are not + * considered to be in any namespace. + * + * Note that default namespaces do not apply directly to attributes. + * + * [...] + * + * 5.3 Uniqueness of Attributes + * + * In XML documents conforming to this specification, no tag may + * contain two attributes which: + * + * 1. have identical names, or + * + * 2. have qualified names with the same local part and with + * prefixes which have been bound to namespace names that are + * identical. + * -------------------------------------------------------------------- + */ + +static rapier_ns_name* +rapier_make_namespaced_name(rapier_parser *rdf_parser, const char *name, + const char *value, int is_element) +{ + rapier_ns_name* ns_name; + const char *p; + char *new_value=NULL; + rapier_ns_map* ns; + char* new_name; + int prefix_length; + int qname_length=0; + +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG2(rapier_make_namespaced_name, + "name %s\n", name); +#endif + + ns_name=(rapier_ns_name*)LIBRDF_CALLOC(rapier_ns_name, sizeof(rapier_ns_name), 1); + if(!ns_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + return NULL; + } + + if(value) { + new_value=(char*)LIBRDF_MALLOC(cstring, strlen(value)+1); + if(!new_value) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + LIBRDF_FREE(rapier_ns_name, ns_name); + return NULL; + } + strcpy(new_value, value); + ns_name->value=new_value; + } + + /* Find : */ + for(p=name; *p && *p != ':'; p++) + ; + + /* No : - pick up default namespace, if there is one */ + if(!*p) { + new_name=(char*)LIBRDF_MALLOC(cstring, strlen(name)+1); + if(!new_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + rapier_free_ns_name(ns_name); + return NULL; + } + strcpy(new_name, name); + ns_name->qname=new_name; + + /* Find a default namespace */ + for(ns=rdf_parser->namespaces; ns && ns->prefix; ns=ns->next) + ; + + if(ns) { + ns_name->namespace=ns; +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG2(rapier_make_namespaced_name, + "Found default namespace %s\n", ns->uri); +#endif + } else { + /* failed to find namespace - now what? FIXME */ + /* rapier_parser_warning(rdf_parser, "No default namespace defined - cannot expand %s", name); */ +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG1(rapier_make_namespaced_name, + "No default namespace defined\n"); +#endif + } + + } else { + prefix_length=p-name; + p++; /* move to start of qname */ + qname_length=strlen(p); + new_name=(char*)LIBRDF_MALLOC(cstring, qname_length+1); + if(!new_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + rapier_free_ns_name(ns_name); + return NULL; + } + strcpy(new_name, p); + ns_name->qname=new_name; + + /* Find the namespace */ + for(ns=rdf_parser->namespaces; ns ; ns=ns->next) + if(ns->prefix && !strncmp(name, ns->prefix, prefix_length)) + break; + + if(!ns) { + /* failed to find namespace - now what? */ + rapier_parser_error(rdf_parser, "Failed to find namespace in %s", name); + rapier_free_ns_name(ns_name); + return NULL; + } + +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG3(rapier_make_namespaced_name, + "Found namespace prefix %s URI %s\n", ns->prefix, ns->uri); +#endif + ns_name->namespace=ns; + } + + /* If namespace has a URI and a qname is defined, create the URI + * for this element + */ + if(ns_name->namespace && ns_name->namespace->uri && qname_length) { + char *uri_string=(char*)LIBRDF_MALLOC(cstring, + ns_name->namespace->uri_length + + qname_length + 1); + if(!uri_string) { + rapier_free_ns_name(ns_name); + return NULL; + } + ns_name->uri=uri_string; + } + + + return ns_name; +} + + +static void +rapier_print_ns_name(FILE *stream, rapier_ns_name* name) +{ + if(name->namespace) { + if(name->namespace->prefix) + fprintf(stream, "%s:%s", name->namespace->prefix, name->qname); + else + fprintf(stream, "(default):%s", name->qname); + } else + fputs(name->qname, stream); +} + + +static void +rapier_free_ns_name(rapier_ns_name* name) +{ + if(name->qname) + LIBRDF_FREE(cstring, name->qname); + + if(name->uri) + LIBRDF_FREE(cstring, name->uri); + + if(name->value) + LIBRDF_FREE(cstring, name->value); + LIBRDF_FREE(rapier_ns_name, name); +} + + +static int +rapier_ns_names_equal(rapier_ns_name *name1, rapier_ns_name *name2) +{ + if(strcmp(name1->qname, name2->qname)) + return 0; + if(name1->namespace != name2->namespace) + return 0; + return 1; +} + + +static rapier_element* +rapier_element_pop(rapier_parser *rdf_parser) +{ + rapier_element *element=rdf_parser->current_element; + + if(!element) + return NULL; + + rdf_parser->current_element=element->parent; + if(rdf_parser->root_element == element) /* just deleted root */ + rdf_parser->root_element=NULL; + + return element; +} + + +static void +rapier_element_push(rapier_parser *rdf_parser, rapier_element* element) +{ + element->parent=rdf_parser->current_element; + rdf_parser->current_element=element; + if(!rdf_parser->root_element) + rdf_parser->root_element=element; +} + + +static void +rapier_free_element(rapier_element *element) +{ + int i; + + for (i=0; i < element->attribute_count; i++) + if(element->attributes[i]) + rapier_free_ns_name(element->attributes[i]); + + if(element->attributes) + LIBRDF_FREE(rapier_ns_name_array, element->attributes); + + /* Free special RDF M&S attributes */ + for(i=0; i<= RDF_ATTR_LAST; i++) + if(element->rdf_attr[i]) + LIBRDF_FREE(cstring, element->rdf_attr[i]); + + if(element->content_cdata_length) + LIBRDF_FREE(rapier_ns_name_array, element->content_cdata); + + rapier_free_ns_name(element->name); + LIBRDF_FREE(rapier_element, element); +} + + + +#ifdef RAPIER_DEBUG +static void +rapier_print_element(rapier_element *element, FILE* stream) +{ + int i; + + rapier_print_ns_name(stream, element->name); + fputc('\n', stream); + + if(element->attribute_count) { + fputs(" attributes: ", stream); + for (i = 0; i < element->attribute_count; i++) { + if(i) + fputc(' ', stream); + rapier_print_ns_name(stream, element->attributes[i]); + fprintf(stream, "='%s'", element->attributes[i]->value); + } + fputc('\n', stream); + } +} +#endif + + +static void +rapier_xml_start_element_handler(void *userData, + const XML_Char *name, const XML_Char **atts) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + int all_atts_count=0; + int ns_attributes_count=0; + rapier_ns_name** named_attrs=NULL; + int i; + rapier_ns_name* element_name; + rapier_element* element=NULL; + int finished; + rapier_state state; +#ifdef NEED_EXPAT + rapier_locator *locator=&rdf_parser->locator; /* for storing error info */ +#endif + +#ifdef NEED_EXPAT + locator->line=XML_GetCurrentLineNumber(rdf_parser->xp); + locator->column=XML_GetCurrentColumnNumber(rdf_parser->xp); + locator->byte=XML_GetCurrentByteIndex(rdf_parser->xp); +#endif + + rdf_parser->depth++; + + if (atts != NULL) { + /* Round 1 - find special attributes, at present just namespaces */ + for (i = 0;(atts[i] != NULL);i+=2) { + all_atts_count++; + + /* synthesise the XML NS events */ + if(!strncmp(atts[i], "xmlns", 5)) { + /* there is more i.e. xmlns:foo */ + const char *prefix=atts[i][5] ? &atts[i][6] : NULL; + + rapier_start_namespace(userData, prefix, atts[i+1], rdf_parser->depth); + atts[i]=NULL; /* Is it allowed to zap XML parser array things? FIXME */ + continue; + } + + ns_attributes_count++; + } + } + + + /* Now can recode element name with a namespace */ + + element_name=rapier_make_namespaced_name(rdf_parser, name, NULL, 1); + if(!element_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + return; + } + + + /* Create new element structure */ + element=(rapier_element*)LIBRDF_CALLOC(rapier_element, + sizeof(rapier_element), 1); + if(!element) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + rapier_free_ns_name(element_name); + return; + } + + + element->name=element_name; + + /* Prepare for possible element content */ + element->content_element_seen=0; + element->content_cdata_seen=0; + element->content_cdata_length=0; + + + + + if(ns_attributes_count) { + int offset = 0; + + /* Round 2 - turn attributes into namespaced-attributes */ + + /* Allocate new array to hold namespaced-attributes */ + named_attrs=(rapier_ns_name**)LIBRDF_CALLOC(rapier_ns_name-array, sizeof(rapier_ns_name*), ns_attributes_count); + if(!named_attrs) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + LIBRDF_FREE(rapier_element, element); + rapier_free_ns_name(element_name); + return; + } + + for (i = 0; i < all_atts_count; i++) { + rapier_ns_name* attribute; + + /* Skip previously processed attributes */ + if(!atts[i<<1]) + continue; + + /* namespace-name[i] stored in named_attrs[i] */ + attribute=rapier_make_namespaced_name(rdf_parser, atts[i<<1], + atts[(i<<1)+1], 0); + if(!attribute) { /* failed - tidy up and return */ + int j; + + for (j=0; j < i; j++) + LIBRDF_FREE(rapier_ns_name, named_attrs[j]); + LIBRDF_FREE(rapier_ns_name_array, named_attrs); + LIBRDF_FREE(rapier_element, element); + rapier_free_ns_name(element_name); + return; + } + + /* Save pointers to some RDF M&S attributes */ + + /* If RDF M&S namespace-prefixed attributes */ + if(attribute->namespace && attribute->namespace->is_rdf_ms) { + const char *attr_name=attribute->qname; + int j; + + for(j=0; j<= RDF_ATTR_LAST; j++) + if(!strcmp(attr_name, rdf_attr_names[j])) { + element->rdf_attr[j]=attribute->value; + /* Delete it if it was stored elsewhere */ +#if RAPIER_DEBUG + LIBRDF_DEBUG3(rapier_xml_start_element_handler, + "Found RDF M&S attribute %s URI %s\n", + attr_name, attribute->value); +#endif + /* make sure value isn't deleted from ns_name structure */ + attribute->value=NULL; + rapier_free_ns_name(attribute); + attribute=NULL; + } + } /* end if RDF M&S namespaced-prefixed attributes */ + + + /* If non namespace-prefixed RDF M&S attributes found on + * rdf namespace-prefixed element + */ + if(!rdf_parser->forbid_rdf_non_ns_attributes && + attribute && !attribute->namespace && + element_name->namespace && element_name->namespace->is_rdf_ms) { + const char *attr_name=attribute->qname; + int j; + + for(j=0; j<= RDF_ATTR_LAST; j++) + if(!strcmp(attr_name, rdf_attr_names[j])) { + element->rdf_attr[j]=attribute->value; + /* Delete it if it was stored elsewhere */ +#if RAPIER_DEBUG + LIBRDF_DEBUG3(rapier_xml_start_element_handler, + "Found non-namespaced RDF M&S attribute %s URI %s\n", + attr_name, attribute->value); +#endif + /* make sure value isn't deleted from ns_name structure */ + attribute->value=NULL; + rapier_free_ns_name(attribute); + attribute=NULL; + } + } /* end if non-namespace prefixed RDF M&S attributes */ + + + if(attribute) + named_attrs[offset++]=attribute; + } + + /* set actual count from attributes that haven't been skipped */ + ns_attributes_count=offset; + if(!offset && named_attrs) { + /* all attributes were RDF M&S or other specials and deleted + * so delete array and don't store pointer */ + LIBRDF_FREE(rapier_ns_name_array, named_attrs); + named_attrs=NULL; + } + + } /* end if ns_attributes_count */ + + element->attributes=named_attrs; + element->attribute_count=ns_attributes_count; + + + rapier_element_push(rdf_parser, element); + + + if(element->parent) { + if(++element->parent->content_element_seen == 1 && + element->parent->content_cdata_seen == 1) { + /* Uh oh - mixed content, the parent element has cdata too */ + rapier_parser_warning(rdf_parser, "element %s has mixed content.", + element->parent->name->qname); + } + } + + +#ifdef RAPIER_DEBUG + fprintf(stderr, "rapier_xml_start_element_handler: Start of namespaced-element: "); + rapier_print_element(element, stderr); +#endif + + + /* Right, now ready to enter the grammar */ + + finished= 0; + if(element->parent) + state=element->parent->state; + else + state=RAPIER_STATE_UNKNOWN; + + while(!finished) { + const char *el_name=element->name->qname; + int element_in_rdf_ns=(element->name->namespace && + element->name->namespace->is_rdf_ms); + + switch(state) { + case RAPIER_STATE_UNKNOWN: + if(element_in_rdf_ns && !strcmp(el_name, "RDF")) { + state=RAPIER_STATE_IN_RDF; + /* need more content before can continue */ + finished=1; + break; + } + /* If scanning for element, can continue */ + if(rdf_parser->scanning_for_rdf_RDF) { + finished=1; + break; + } + /* Otherwise choice of next state can be made from the current + * element by IN_RDF state */ + + state=RAPIER_STATE_IN_RDF; + break; + + case RAPIER_STATE_IN_RDF: + if(element_in_rdf_ns) { + if(!strcmp(el_name, "Description")) { + state=RAPIER_STATE_DESCRIPTION; + break; + } else if(!strcmp(el_name, "Seq")) { + state=RAPIER_STATE_SEQ; + break; + } else if(!strcmp(el_name, "Bag")) { + state=RAPIER_STATE_BAG; + break; + } else if(!strcmp(el_name, "Alt")) { + state=RAPIER_STATE_ALT; + break; + } + + /* Unexpected rdf: element at outer layer */ + rapier_parser_error(rdf_parser, "Unexpected RDF M&S element %s in <rdf:RDF> - from productions 6.2, 6.3 and 6.4 expected rdf:Description, rdf:Seq, rdf:Bag or rdf:Alt only.", el_name); + finished=1; + } + + /* Hmm, must be a typedNode, handled by the description state + * so that ID, BagID are handled in one place. + */ + state=RAPIER_STATE_DESCRIPTION; + break; + + + /* No need for 6.2 - already chose 6.3, 6.25, 6.26 or 6.27 */ + + + case RAPIER_STATE_DESCRIPTION: + /* choices here from production 6.3 (description) + * <rdf:Description idAboutAttr? bagIdAttr? propAttr* > + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? propAttr* + * <typeName idAboutAttr? bagIdAttr? propAttr*> + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? propAttr* + * (either may have no content, that is tested in the end element code) + */ + + /* lets add booleans - isn't C wonderful! */ + if((element->rdf_attr[RDF_ATTR_ID] != NULL) + + (element->rdf_attr[RDF_ATTR_about] != NULL) + + (element->rdf_attr[RDF_ATTR_aboutEach] != NULL) + + (element->rdf_attr[RDF_ATTR_aboutEachPrefix] != NULL) > 1) { + rapier_parser_warning(rdf_parser, "More than one of RDF ID, about, aboutEach or aboutEachPrefix attributes on element %s - from productions 6.5, 6.6, 6.7 and 6.8 expect at most one.", el_name); + } + + + /* has to be rdf:Description OR typedNode - checked above */ + if(element_in_rdf_ns) + state=RAPIER_STATE_IN_DESCRIPTION; + else + /* otherwise must match the typedNode production - checked below */ + state=RAPIER_STATE_TYPED_NODE; + + finished=1; + break; + + + /* Inside a <rdf:Description> so expecting a list of + * propertyElt elements + */ + case RAPIER_STATE_IN_DESCRIPTION: + state=RAPIER_STATE_PROPERTYELT; + finished=1; + break; + + + /* Expect to meet the typedNode production having + * fallen through and not met other productions - + * 6.3, 6.25, 6.26, 6.27. This is the last choice. + * + * choices here from production 6.13 (typedNode) + * <typeName idAboutAttr? bagIdAttr? propAttr* /> + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? + * <typeName idAboutAttr? bagIdAttr? propAttr* > propertyElt* </typeName> + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? propAttr* + */ + case RAPIER_STATE_TYPED_NODE: + finished=1; + break; + + case RAPIER_STATE_SEQ: + finished=1; + break; + + case RAPIER_STATE_BAG: + finished=1; + break; + + case RAPIER_STATE_ALT: + finished=1; + break; + + case RAPIER_STATE_MEMBER: + finished=1; + break; + + case RAPIER_STATE_REFERENCEDITEM: + finished=1; + break; + + case RAPIER_STATE_INLINEITEM: + finished=1; + break; + + /* choices here from production 6.12 (propertyElt) + * <propName idAttr?> value </propName> + * Attributes: ID? + * <propName idAttr? parseLiteral> literal </propName> + * Attributes: ID? parseType="literal" + * <propName idAttr? parseResource> propertyElt* </propName> + * Attributes: ID? parseType="resource" + * <propName idRefAttr? bagIdAttr? propAttr* /> + * Attributes: (ID|resource)? bagIdAttr? propAttr* + */ + case RAPIER_STATE_PROPERTYELT: + finished=1; + break; + + default: + rapier_parser_fatal_error(rdf_parser, "Unexpected parser state %d.", + state); + finished=1; + + } /* end switch */ + + if(state != element->state) { + element->state=state; + fprintf(stderr, "rapier_xml_start_element_handler: moved to state %d\n", state); + } + + } /* end while */ + + /* store final state that matched */ + element->state=state; +} + + + +static void +rapier_xml_end_element_handler(void *userData, const XML_Char *name) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + rapier_element* element; + int finished; + rapier_state state; + rapier_ns_name *element_name; +#ifdef NEED_EXPAT + rapier_locator *locator=&rdf_parser->locator; /* for storing error info */ +#endif + +#ifdef NEED_EXPAT + locator->line=XML_GetCurrentLineNumber(rdf_parser->xp); + locator->column=XML_GetCurrentColumnNumber(rdf_parser->xp); + locator->byte=XML_GetCurrentByteIndex(rdf_parser->xp); +#endif + + /* recode element name */ + + element_name=rapier_make_namespaced_name(rdf_parser, name, NULL, 1); + if(!element_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + return; + } + + +#ifdef RAPIER_DEBUG + fprintf(stderr, "rapier_xml_end_element_handler: End of namespaced-element: "); + rapier_print_ns_name(stderr, element_name); + fputc('\n', stderr); +#endif + + element=rapier_element_pop(rdf_parser); + if(!rapier_ns_names_equal(element->name, element_name)) { + /* Hmm, unexpected name - FIXME, should do something! */ + rapier_parser_warning(rdf_parser, "Element %s ended, expected end of element %s\n", name, element->name->qname); + return; + } + + + state=element->state; + finished= 0; + while(!finished) { + const char *el_name=element->name->qname; + int element_in_rdf_ns=(element->name->namespace && + element->name->namespace->is_rdf_ms); + + switch(state) { + case RAPIER_STATE_UNKNOWN: + finished=1; + break; + + case RAPIER_STATE_IN_RDF: + if(element_in_rdf_ns && !strcmp(el_name, "RDF")) { + /* end of RDF - boo hoo */ + state=RAPIER_STATE_UNKNOWN; + finished=1; + break; + } + /* When scanning, another element ending is outside the RDF + * world so this can happen without further work + */ + if(rdf_parser->scanning_for_rdf_RDF) { + state=RAPIER_STATE_UNKNOWN; + finished=1; + break; + } + /* otherwise found some junk after RDF content in an RDF-only + * document (probably never get here since this would be + * a mismatched XML tag and cause an error earlier) + */ + rapier_parser_warning(rdf_parser, "Element %s ended, expected end of RDF element\n", el_name); + state=RAPIER_STATE_UNKNOWN; + finished=1; + break; + + /* No need for 6.2 - already chose 6.3, 6.25, 6.26 or 6.27 */ + + case RAPIER_STATE_DESCRIPTION: + /* Never reached in any code outside start element + * since immediately moves on to RAPIER_STATE_IN_DESCRIPTION + * or RAPIER_STATE_TYPED_NODE + */ + abort(); + break; + + case RAPIER_STATE_IN_DESCRIPTION: + /* Must be end of description production </rdf:Description> */ + state=RAPIER_STATE_IN_RDF; + finished=1; + break; + + case RAPIER_STATE_TYPED_NODE: + /* Must be end of typedNode production element <typeName> */ + state=RAPIER_STATE_IN_RDF; + finished=1; + break; + + case RAPIER_STATE_SEQ: + finished=1; + break; + + case RAPIER_STATE_BAG: + finished=1; + break; + + case RAPIER_STATE_ALT: + finished=1; + break; + + case RAPIER_STATE_MEMBER: + finished=1; + break; + + case RAPIER_STATE_REFERENCEDITEM: + finished=1; + break; + + case RAPIER_STATE_INLINEITEM: + finished=1; + break; + + case RAPIER_STATE_PROPERTYELT: + finished=1; + break; + + default: + rapier_parser_fatal_error(rdf_parser, "Unexpected parser state %d.", + state); + finished=1; + + } /* end switch */ + + if(state != element->state) { + element->state=state; + fprintf(stderr, "rapier_xml_end_element_handler: moved to state %d\n", state); + } + + } /* end while */ + + + rapier_free_ns_name(element_name); + + rapier_end_namespaces_for_depth(rdf_parser); + rapier_free_element(element); + + rdf_parser->depth--; +} + + + +/* cdata (and ignorable whitespace for libxml). + * s is not 0 terminated for expat, is for libxml - grrrr. + */ +static void +rapier_xml_cdata_handler(void *userData, const XML_Char *s, int len) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + rapier_element* element; + rapier_state state; + char *buffer; + char *ptr; + int all_whitespace=1; + int ignore_all_whitespace=0; + int i; + + for(i=0; i<len; i++) + if(!isspace(s[i])) { + all_whitespace=0; + break; + } + + element=rdf_parser->current_element; + + state=element->state; + switch(state) { + case RAPIER_STATE_UNKNOWN: + /* Ignore all cdata if still looking for RDF */ + if(rdf_parser->scanning_for_rdf_RDF) + return; + + /* Ignore all whitespace cdata before first element */ + if(all_whitespace) + return; + + /* This probably will never happen since that would make the + * XML not be well-formed + */ + rapier_parser_warning(rdf_parser, "Found cdata before RDF element."); + break; + + case RAPIER_STATE_IN_RDF: + case RAPIER_STATE_IN_DESCRIPTION: + /* Ignore all whitespace cdata inside <RDF> or <Description> + * when it occurs although note it was seen + */ + ignore_all_whitespace=1; + break; + + case RAPIER_STATE_DESCRIPTION: + /* Never reached in any code outside start element + * since immediately moves on to RAPIER_STATE_IN_DESCRIPTION + * or RAPIER_STATE_TYPED_NODE + */ + abort(); + break; + + case RAPIER_STATE_TYPED_NODE: + case RAPIER_STATE_SEQ: + case RAPIER_STATE_BAG: + case RAPIER_STATE_ALT: + case RAPIER_STATE_MEMBER: + case RAPIER_STATE_REFERENCEDITEM: + case RAPIER_STATE_INLINEITEM: + case RAPIER_STATE_PROPERTYELT: + break; + + default: + rapier_parser_fatal_error(rdf_parser, "Unexpected parser state %d.", + state); + } /* end switch */ + + + + if(++element->content_cdata_seen == 1 && + element->content_element_seen == 1) { + /* Uh oh - mixed content, this element has elements too */ + rapier_parser_warning(rdf_parser, "element %s has mixed content.", + element->name->qname); + } + + if(all_whitespace && ignore_all_whitespace) { + LIBRDF_DEBUG2(rapier_xml_end_element_handler, "Ignoring whitespace cdata inside element %s\n", element->name->qname); + return; + } + + /* +1 here is for \0 at end */ + buffer=(char*)LIBRDF_MALLOC(cstring, element->content_cdata_length + len + 1); + /* FIXME - no error return possible */ + if(!buffer) + return; + + if(element->content_cdata_length) { + strncpy(buffer, element->content_cdata, element->content_cdata_length); + LIBRDF_FREE(cstring, element->content_cdata); + } + element->content_cdata=buffer; + + ptr=buffer+element->content_cdata_length; /* append */ + + /* adjust stored length */ + element->content_cdata_length += len; + + /* now write new stuff at end of cdata buffer */ + strncpy(ptr, s, len); + ptr += len; + *ptr = '\0'; + + LIBRDF_DEBUG3(rapier_xml_cdata_handler, + "content cdata now: '%s' (%d bytes)\n", + buffer, element->content_cdata_length); +} + + +#ifdef HAVE_XML_SetNamespaceDeclHandler +static void +rapier_start_namespace_decl_handler(void *userData, + const XML_Char *prefix, const XML_Char *uri) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + +#ifdef RAPIER_DEBUG + fprintf(stderr_parser->locator, "saw namespace %s URI %s\n", prefix, uri); +#endif +} + + +static void +rapier_end_namespace_decl_handler(void *userData, const XML_Char *prefix) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + +#ifdef RAPIER_DEBUG + fprintf(stderr_parser->locator, "saw end namespace prefix %s\n", prefix); +#endif +} +#endif + + +#ifdef NEED_LIBXML +#include <stdarg.h> + +static const char* xml_warning_prefix="XML parser warning - "; +static const char* xml_error_prefix="XML parser error - "; +static const char* xml_fatal_error_prefix="XML parser fatal error - "; + +static void +rapier_xml_warning(void *ctx, const char *msg, ...) +{ + va_list args; + int length; + char *nmsg; + + va_start(args, msg); + length=strlen(xml_warning_prefix)+strlen(msg)+1; + msg=(char*)LIBRDF_MALLOC(cstring, length); + if(!msg) { + /* just pass on, might be out of memory error */ + rapier_parser_warning(parser, msg, args); + } else { + strcpy(nmsg, xml_warning_prefix); + strcat(nmsg, msg); + rapier_parser_warning(parser, nmsg, args); + LIBRDF_FREE(cstring,nmsg); + } + va_end(args); +} + + +static void +rapier_xml_error(void *ctx, const char *msg, ...) +{ + va_list args; + int length; + char *nmsg; + + va_start(args, msg); + length=strlen(xml_error_prefix)+strlen(msg)+1; + msg=(char*)LIBRDF_MALLOC(cstring, length); + if(!msg) { + /* just pass on, might be out of memory error */ + rapier_parser_error(parser, msg, args); + } else { + strcpy(nmsg, xml_error_prefix); + strcat(nmsg, msg); + rapier_parser_error(parser, nmsg, args); + LIBRDF_FREE(cstring,nmsg); + } + va_end(args); +} + + +static void +rapier_xml_fatal_error(void *ctx, const char *msg, ...) +{ + va_list args; + int length; + char *nmsg; + + va_start(args, msg); + length=strlen(xml_fatal_error_prefix)+strlen(msg)+1; + msg=(char*)LIBRDF_MALLOC(cstring, length); + if(!msg) { + /* just pass on, might be out of memory error */ + rapier_parser_fatal_error(parser, msg, args); + } else { + strcpy(nmsg, xml_error_prefix); + strcat(nmsg, msg); + rapier_parser_fatal_error(parser, nmsg, args); + LIBRDF_FREE(cstring,nmsg); + } + va_end(args); +} + +#endif + + + +/** + * rapier_file_uri_to_filename - Convert a URI representing a file (starting file:) to a filename + * @uri: URI of string + * + * Return value: the filename or NULL on failure + **/ +static char * +rapier_file_uri_to_filename(const char *uri) +{ + int length; + char *filename; + + if (strncmp(uri, "file:", 5)) + return NULL; + + /* FIXME: unix version of URI -> filename conversion */ + length=strlen(uri) -5 +1; + filename=LIBRDF_MALLOC(cstring, length); + if(!filename) + return NULL; + + strcpy(filename, uri+5); + return filename; +} + + +/* + * rapier_parser_fatal_error - Error from a parser - Internal + **/ +static void +rapier_parser_fatal_error(rapier_parser* parser, const char *message, ...) +{ + va_list arguments; + + parser->failed=1; + + if(parser->fatal_error_fn) { + parser->fatal_error_fn(parser->fatal_error_user_data, + &parser->locator, message); + abort(); + } + + va_start(arguments, message); + + rapier_print_locator(stderr, &parser->locator); + fprintf(stderr, " rapier fatal error - "); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + + va_end(arguments); + + abort(); +} + + +/* + * rapier_parser_error - Error from a parser - Internal + **/ +static void +rapier_parser_error(rapier_parser* parser, const char *message, ...) +{ + va_list arguments; + + if(parser->error_fn) { + parser->error_fn(parser->error_user_data, &parser->locator, message); + return; + } + + va_start(arguments, message); + + rapier_print_locator(stderr, &parser->locator); + fprintf(stderr, " rapier error - "); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + + va_end(arguments); +} + + +/* + * rapier_parser_warning - Warning from a parser - Internal + **/ +static void +rapier_parser_warning(rapier_parser* parser, const char *message, ...) +{ + va_list arguments; + + if(parser->warning_fn) { + parser->warning_fn(parser->warning_user_data, &parser->locator, message); + return; + } + + va_start(arguments, message); + + rapier_print_locator(stderr, &parser->locator); + fprintf(stderr, " rapier warning - "); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + + va_end(arguments); +} + + +#ifdef NEED_LIBXML +/* from http://www.daa.com.au/~james/gnome/xml-sax/implementing.html */ +#include <parserInternals.h> + +static int myXmlSAXParseFile(xmlSAXHandlerPtr sax, void *user_data, const char *filename); + +static int +myXmlSAXParseFile(xmlSAXHandlerPtr sax, void *user_data, const char *filename) +{ + int ret = 0; + xmlParserCtxtPtr ctxt; + + ctxt = xmlCreateFileParserCtxt(filename); + if (ctxt == NULL) return -1; + ctxt->sax = sax; + ctxt->userData = user_data; + + xmlParseDocument(ctxt); + + if (ctxt->wellFormed) + ret = 0; + else + ret = -1; + if (sax != NULL) + ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return ret; +} +#endif + + + + +/* PUBLIC FUNCTIONS */ + +/** + * rapier_new - Initialise the Rapier RDF parser + * + * Return value: non 0 on failure + **/ +rapier_parser* +rapier_new(void) +{ + rapier_parser* rdf_parser; +#ifdef NEED_EXPAT + XML_Parser xp; +#endif + + rdf_parser=LIBRDF_CALLOC(rapier_parser, 1, sizeof(rapier_parser)); + + if(!rdf_parser) + return NULL; + +#ifdef NEED_EXPAT + xp=XML_ParserCreate(NULL); + + /* create a new parser in the specified encoding */ + XML_SetUserData(xp, rdf_parser); + + /* XML_SetEncoding(xp, "..."); */ + + XML_SetElementHandler(xp, rapier_xml_start_element_handler, + rapier_xml_end_element_handler); + XML_SetCharacterDataHandler(xp, rapier_xml_cdata_handler); +#ifdef HAVE_XML_SetNamespaceDeclHandler + XML_SetNamespaceDeclHandler(xp, + rapier_start_namespace_decl_handler, + rapier_end_namespace_decl_handler); +#endif + rdf_parser->xp=xp; +#endif + +#ifdef NEED_LIBXML + xmlDefaultSAXHandlerInit(); + rdf_parser->sax.startElement=rapier_xml_start_element_handler; + rdf_parser->sax.endElement=rapier_xml_end_element_handler; + + rdf_parser->sax.characters=rapier_xml_cdata_handler; + rdf_parser->sax.ignorableWhitespace=rapier_xml_cdata_handler; + + rdf_parser->sax.warning=rapier_xml_warning; + rdf_parser->sax.error=rapier_xml_error; + rdf_parser->sax.fatalError=rapier_xml_fatal_error; + + /* xmlInitParserCtxt(&rdf_parser->xc); */ +#endif + + rapier_init_namespaces(rdf_parser); + + return rdf_parser; +} + + + + +/** + * rapier_free - Free the Rapier RDF parser + * @rdf_parser: parser object + * + **/ +void +rapier_free(rapier_parser *rdf_parser) +{ + rapier_element* element; + rapier_ns_map* ns; + + ns=rdf_parser->namespaces; + while(ns) { + rapier_ns_map* next_ns=ns->next; + + rapier_free_namespace(rdf_parser, ns); + ns=next_ns; + } + + while((element=rapier_element_pop(rdf_parser))) { + rapier_free_element(element); + } + + LIBRDF_FREE(rapier_parser, rdf_parser); +} + + +/** + * rapier_parser_set_fatal_error - Set the parser error handling function + * @parser: the parser + * @user_data: user data to pass to function + * @fatal_error_fn: pointer to the function + * + * The function will receive callbacks when the parser fails. + * + **/ +void +rapier_parser_set_fatal_error(rapier_parser* parser, void *user_data, + void (*fatal_error_fn)(void *user_data, rapier_locator* locator, const char *msg, ...)) +{ + parser->fatal_error_user_data=user_data; + parser->fatal_error_fn=fatal_error_fn; +} + + +/** + * rapier_parser_set_error - Set the parser error handling function + * @parser: the parser + * @user_data: user data to pass to function + * @error_fn: pointer to the function + * + * The function will receive callbacks when the parser fails. + * + **/ +void +rapier_parser_set_error(rapier_parser* parser, void *user_data, + void (*error_fn)(void *user_data, rapier_locator* locator, const char *msg, ...)) +{ + parser->error_user_data=user_data; + parser->error_fn=error_fn; +} + + +/** + * rapier_parser_set_warning - Set the parser warning handling function + * @parser: the parser + * @user_data: user data to pass to function + * @warning_fn: pointer to the function + * + * The function will receive callbacks when the parser gives a warning. + * + **/ +void +rapier_parser_set_warning(rapier_parser* parser, void *user_data, + void (*warning_fn)(void *user_data, rapier_locator* locator, const char *msg, ...)) +{ + parser->warning_user_data=user_data; + parser->warning_fn=warning_fn; +} + + +void +rapier_set_triple_handler(rapier_parser* parser, + void *userData, + void (*triple_handler)(void *userData, + const char *subject, rapier_subject_type subject_type, + const char *predicate, rapier_predicate_type predicate_type, + const char *object, rapier_object_type object_type)) +{ + parser->triple_handler=triple_handler; +} + + + + + +/** + * rapier_parse_file - Retrieve the RDF/XML content at URI + * @rdf_parser: parser + * @uri: URI of RDF content + * @base_uri: the base URI to use (or NULL if the same) + * + * Return value: non 0 on failure + **/ +int +rapier_parse_file(rapier_parser* rdf_parser, const char *uri, + const char *base_uri) +{ +#ifdef NEED_EXPAT + XML_Parser xp; +#endif +#ifdef NEED_LIBXML + /* parser context */ + xmlParserCtxtPtr xc; +#endif +#define RBS 1024 + FILE *fh; + char buffer[RBS]; + int rc=1; + int len; + const char *filename; + rapier_locator *locator=&rdf_parser->locator; /* for storing error info */ + + /* initialise fields */ + rdf_parser->depth=0; + rdf_parser->root_element= rdf_parser->current_element=NULL; + rdf_parser->failed=0; + + + +#ifdef NEED_EXPAT + xp=rdf_parser->xp; + + XML_SetBase(xp, base_uri); +#endif + + + filename=rapier_file_uri_to_filename(uri); + if(!filename) + return 1; + + locator->file=filename; + locator->uri=base_uri; + + fh=fopen(filename, "r"); + if(!fh) { + rapier_parser_error(rdf_parser, "file open failed - %s", strerror(errno)); +#ifdef NEED_EXPAT + XML_ParserFree(xp); +#endif /* EXPAT */ + LIBRDF_FREE(cstring, filename); + return 1; + } + +#ifdef NEED_LIBXML + /* libxml needs at least 4 bytes from the XML content to allow + * content encoding detection to work */ + len=fread(buffer, 1, 4, fh); + if(len>0) { + xc = xmlCreatePushParserCtxt(&rdf_parser->sax, rdf_parser, + buffer, len, filename); + } else { + fclose(fh); + fh=NULL; + } + +#endif + + while(fh && !feof(fh)) { + len=fread(buffer, 1, RBS, fh); + if(len <= 0) { +#ifdef NEED_EXPAT + XML_Parse(xp, buffer, 0, 1); +#endif +#ifdef NEED_LIBXML + xmlParseChunk(xc, buffer, 0, 1); +#endif + break; + } +#ifdef NEED_EXPAT + rc=XML_Parse(xp, buffer, len, (len < RBS)); + if(len < RBS) + break; + if(!rc) /* expat: 0 is failure */ + break; +#endif +#ifdef NEED_LIBXML + rc=xmlParseChunk(xc, buffer, len, 0); + if(len < RBS) + break; + if(rc) /* libxml: non 0 is failure */ + break; +#endif + } + fclose(fh); + +#ifdef NEED_EXPAT + if(!rc) { + int xe=XML_GetErrorCode(xp); + + locator->line=XML_GetCurrentLineNumber(xp); + locator->column=XML_GetCurrentColumnNumber(xp); + locator->byte=XML_GetCurrentByteIndex(xp); + + rapier_parser_error(rdf_parser, "XML Parsing failed - %s", + XML_ErrorString(xe)); + rc=1; + } else + rc=0; + + XML_ParserFree(xp); +#endif /* EXPAT */ +#ifdef NEED_LIBXML + if(rc) { + rapier_parser_error(parser, "XML Parsing failed"); +#endif + + LIBRDF_FREE(cstring, filename); + + return (rc != 0); +} + + +void +rapier_print_locator(FILE *stream, rapier_locator* locator) +{ + if(!locator) + return; + + if(locator->uri) + fprintf(stream, "URI %s", locator->uri); + else if (locator->file) + fprintf(stream, "file %s", locator->file); + else + return; + if(locator->line) { + fprintf(stream, ":%d", locator->line); + if(locator->column) + fprintf(stream, " column %d", locator->column); + } +} + + + +void +rapier_set_feature(rapier_parser *parser, rapier_feature feature, int value) { + switch(feature) { + case RAPIER_FEATURE_SCANNING: + parser->scanning_for_rdf_RDF=value; + break; + + case RAPIER_FEATURE_RDF_NON_NS_ATTRIBUTES: + parser->forbid_rdf_non_ns_attributes=!value; /* negative logic - FIXME */ + break; + + default: + break; + } +} diff --git a/src/raptor_parse.c b/src/raptor_parse.c new file mode 100644 index 00000000..9b4be7af --- /dev/null +++ b/src/raptor_parse.c @@ -0,0 +1,2009 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * rapier_parse.c - Redland Parser for RDF (RAPIER) + * + * $Id$ + * + * Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ + * Institute for Learning and Research Technology, University of Bristol. + * + * This package is Free Software available under either of two licenses + * (see FAQS.html to see why): + * + * 1. The GNU Lesser General Public License (LGPL) + * + * See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the + * full license text. + * _________________________________________________________________ + * + * Copyright (C) 2000 David Beckett, Institute for Learning and + * Research Technology, University of Bristol. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * _________________________________________________________________ + * + * NOTE - under Term 3 of the LGPL, you may choose to license the entire + * library under the GPL. See COPYING for the full license text. + * + * 2. The Mozilla Public License + * + * See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full + * license text. + * + * Under MPL section 13. I declare that all of the Covered Code is + * Multiple Licensed: + * _________________________________________________________________ + * + * The contents of this file are subject to the Mozilla Public License + * version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License + * at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and + * limitations under the License. + * + * The Initial Developer of the Original Code is David Beckett. + * Portions created by David Beckett are Copyright (C) 2000 David + * Beckett, Institute for Learning and Research Technology, University + * of Bristol. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU Lesser General Public License, in which case the + * provisions of the LGPL License are applicable instead of those + * above. If you wish to allow use of your version of this file only + * under the terms of the LGPL License and not to allow others to use + * your version of this file under the MPL, indicate your decision by + * deleting the provisions above and replace them with the notice and + * other provisions required by the LGPL License. If you do not delete + * the provisions above, a recipient may use your version of this file + * under either the MPL or the LGPL License. + */ + + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#ifdef HAVE_STDARG_H +#include <stdarg.h> +#endif + +extern int errno; + +#define RAPIER_INTERNAL + +#ifdef LIBRDF_INTERNAL +/* if inside Redland */ +#include <librdf.h> + +#include <rdf_parser.h> +#include <rdf_node.h> +#include <rdf_stream.h> +#include <rdf_statement.h> + +#else +/* else standalone */ + +#define LIBRDF_MALLOC(type, size) malloc(size) +#define LIBRDF_CALLOC(type, size, count) calloc(size, count) +#define LIBRDF_FREE(type, ptr) free((void*)ptr) + +#ifdef RAPIER_DEBUG +/* Debugging messages */ +#define LIBRDF_DEBUG1(function, msg) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function); } while(0) +#define LIBRDF_DEBUG2(function, msg, arg1) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function, arg1);} while(0) +#define LIBRDF_DEBUG3(function, msg, arg1, arg2) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function, arg1, arg2);} while(0) +#define LIBRDF_DEBUG4(function, msg, arg1, arg2, arg3) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function, arg1, arg2, arg3);} while(0) + +#else +/* DEBUGGING TURNED OFF */ + +/* No debugging messages */ +#define LIBRDF_DEBUG1(function, msg) +#define LIBRDF_DEBUG2(function, msg, arg1) +#define LIBRDF_DEBUG3(function, msg, arg1, arg2) +#define LIBRDF_DEBUG4(function, msg, arg1, arg2, arg3) + +#endif + +#endif + + +/* for the memory allocation functions */ +#if defined(HAVE_DMALLOC_H) && defined(RAPIER_MEMORY_DEBUG_DMALLOC) +#include <dmalloc.h> +#undef HAVE_STDLIB_H +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#undef HAVE_STDLIB_H +#endif + + +/* XML parser includes */ +#ifdef NEED_EXPAT +#include <xmlparse.h> +#endif + +#ifdef NEED_LIBXML +#ifdef HAVE_GNOME_XML_PARSER_H +#include <gnome-xml/parser.h> +/* translate names from expat to libxml */ +#define XML_Char xmlChar +#else +#include <parser.h> +#endif +#endif + + +/* Rapier includes */ +#include <rapier.h> + +/* Rapier structures */ +/* namespace stack node */ +typedef struct rapier_ns_map_s rapier_ns_map; + +typedef enum { + /* Not in RDF grammar yet - searching for a start element. + * This can be <rdf:RDF> (goto 6.1) but since it is optional, + * the start element can also be <Description> (goto 6.3), + * <rdf:Seq> (goto 6.25) <rdf:Bag> (goto 6.26) or <rdf:Alt> (goto 6.27) + * OR from 6.3 can have ANY other element matching + * typedNode (6.13) - goto 6.3 + * CHOICE: Search for <rdf:RDF> node before starting match + * OR assume RDF content, hence go straight to production + */ + RAPIER_STATE_UNKNOWN = 0, + + /* Met production 6.1 (RDF) <rdf:RDF> element seen and can now + * expect <rdf:Description> (goto 6.3), <rdf:Seq> (goto 6.25) + * <rdf:Bag> (goto 6.26) or <rdf:Alt> (goto 6.27) OR from 6.3 can + * have ANY other element matching typedNode (6.13) - goto 6.3 + */ + RAPIER_STATE_IN_RDF = 6010, + + /* No need for 6.2 - already chose 6.3, 6.25, 6.26 or 6.27 */ + + /* Met production 6.3 (description) <rdf:Description> element + * OR 6.13 (typedNode) [pretty much anything else] + * CHOICE: Create a bag here (always? even if no bagId given) + * CHOICES: Match rdf:resource/resource, ID/rdf:ID attributes etc. + */ + RAPIER_STATE_DESCRIPTION = 6030, + + /* Matching part of 6.3 (description) inside a <Description> + * where either a list of propertyElt elements is expected or + * an empty element + */ + RAPIER_STATE_IN_DESCRIPTION = 6031, + + /* met production 6.12 (propertyElt) + */ + RAPIER_STATE_PROPERTYELT = 6120, + + /* met production 6.13 (typedNode) + */ + RAPIER_STATE_TYPED_NODE = 6130, + + /* Met production 6.25 (sequence) <rdf:Seq> element seen. Goto 6.28 */ + RAPIER_STATE_SEQ = 6250, + + /* Met production 6.26 (bag) <rdf:Bag> element seen. Goto 6.28 */ + RAPIER_STATE_BAG = 6260, + + /* Met production 6.27 (alternative) <rdf:Alt> element seen. Goto 6.28 */ + RAPIER_STATE_ALT = 6270, + + /* Met production 6.28 (member) + * Now expect <rdf:li> element and if it empty, with resource attribute + * goto 6.29 otherwise goto 6.30 + * CHOICE: Match rdf:resource/resource + */ + RAPIER_STATE_MEMBER = 6280, + + /* met production 6.29 (referencedItem) + * Found a container item with reference - <rdf:li (rdf:)resource=".."/> */ + RAPIER_STATE_REFERENCEDITEM = 6290, + + /* met production 6.30 (inlineItem) + * Found a container item with content - <rdf:li> */ + RAPIER_STATE_INLINEITEM = 6300, + +} rapier_state; + + +/* Forms: + * 1) prefix=NULL uri=<URI> - default namespace defined + * 2) prefix=NULL, uri=NULL - no default namespace + * 3) prefix=<prefix>, uri=<URI> - regular pair defined <prefix>:<URI> + */ +struct rapier_ns_map_s { + struct rapier_ns_map_s* next; /* next down the stack, NULL at bottom */ + char *prefix; /* NULL means is the default namespace */ + char *uri; + int uri_length; + int depth; /* parse depth that this was added, delete when parser leaves this */ + int is_rdf_ms; /* Non 0 if is RDF M&S Namespace */ + int is_rdf_schema; /* Non 0 if is RDF Schema Namespace */ +}; + + +/* + * Rapier XML-namespaced name, for elements or attributes + */ + +/* There are three forms + * namespace=NULL - un-namespaced name + * namespace=defined, namespace->prefix=NULL - (default ns) name + * namespace=defined, namespace->prefix=defined - ns:name + */ +typedef struct { + const rapier_ns_map *namespace; + const char *qname; + const char *uri; /* URI of namespace+qname or NULL if not defined */ + const char *value; /* optional value - used when name is an attribute */ +} rapier_ns_name; + + +typedef enum { + RDF_ATTR_about = 0, /* value of rdf:about attribute */ + RDF_ATTR_aboutEach = 1, /* " rdf:aboutEach */ + RDF_ATTR_aboutEachPrefix = 2, /* " rdf:aboutEachPrefix */ + RDF_ATTR_ID = 3, /* " rdf:ID */ + RDF_ATTR_bagID = 4, /* " rdf:bagID */ + RDF_ATTR_reference = 5, /* " rdf:reference */ + RDF_ATTR_type = 6, /* " rdf:type */ + RDF_ATTR_parseType = 7, /* " rdf:parseType */ + + RDF_ATTR_LAST = RDF_ATTR_parseType +} rdf_attr; + +static const char *rdf_attr_names[]={ + "about", + "aboutEach", + "aboutEachPrefix", + "ID", + "bagID", + "reference", + "type", + "parseType", +}; + + +/* + * Rapier Element/attributes on stack + */ +struct rapier_element_s { + struct rapier_element_s *parent; /* NULL at bottom of stack */ + rapier_ns_name *name; + rapier_ns_name **attributes; + int attribute_count; + const char * rdf_attr[8]; /* attributes declared in M&S */ + + rapier_state state; /* state that this production matches */ + + /* CDATA content of element and checks for mixed content */ + char *content_cdata; + int content_element_seen; + int content_cdata_seen; + int content_cdata_length; + +}; + +typedef struct rapier_element_s rapier_element; + + +/* + * Rapier parser object + */ +struct rapier_parser_s { + /* XML parser specific stuff */ +#ifdef NEED_EXPAT + XML_Parser xp; +#endif +#ifdef NEED_LIBXML + /* structure holding sax event handlers */ + xmlSAXHandler sax; + /* parser context */ + /* xmlParserCtxtPtr xc; */ +#endif + + /* element depth */ + int depth; + + /* stack of namespaces, most recently added at top */ + rapier_ns_map *namespaces; + + /* can be filled with error location information */ + rapier_locator locator; + + /* stack of elements - elements add after current_element */ + rapier_element *root_element; + rapier_element *current_element; + + /* non 0 if parser had fatal error and cannot continue */ + int failed; + + /* non 0 if scanning for <rdf:RDF> element, else assume doc is RDF */ + int scanning_for_rdf_RDF; + + /* non 0 to forbid non-namespaced resource, ID etc attributes + * on RDF namespaced-elements + */ + int forbid_rdf_non_ns_attributes; + + /* stuff for our user */ + void *user_data; + + void *fatal_error_user_data; + void *error_user_data; + void *warning_user_data; + void (*fatal_error_fn)(void *user_data, rapier_locator *locator, const char *msg, ...); + void (*error_fn)(void *user_data, rapier_locator *locator, const char *msg, ...); + void (*warning_fn)(void *user_data, rapier_locator *locator, const char *msg, ...); + + /* parser callbacks */ + void (*triple_handler)(void *userData, const char *subject, rapier_subject_type subject_type, const char *predicate, rapier_predicate_type predicate_type, const char *object, rapier_object_type object_type); +}; + + + + +/* static variables */ +static const char *rapier_rdf_ms_uri=RAPIER_RDF_MS_URI; +static const char *rapier_rdf_schema_uri=RAPIER_RDF_SCHEMA_URI; + + + +/* Prototypes for common expat/libxml parsing event-handling functions */ +static void rapier_xml_start_element_handler(void *userData, + const XML_Char *name, + const XML_Char **atts); + +static void rapier_xml_end_element_handler(void *userData, + const XML_Char *name); + + +/* s is not 0 terminated. */ +static void rapier_xml_cdata_handler(void *userData, + const XML_Char *s, + int len); +#ifdef HAVE_XML_SetNamespaceDeclHandler +static void rapier_start_namespace_decl_handler(void *userData, + const XML_Char *prefix, + const XML_Char *uri); + +static void rapier_end_namespace_decl_handler(void *userData, + const XML_Char *prefix); +#endif + +/* libxml-only prototypes */ +#ifdef NEED_LIBXML +static void rapier_xml_warning(void *context, rapier_locator *locator, const char *msg, ...); +static void rapier_xml_error(void *context, rapier_locator *locator, const char *msg, ...); +static void rapier_xml_fatal_error(void *context, rapier_locator *locator, const char *msg, ...); +#endif + + +/* Prototypes for local functions */ +static char * rapier_file_uri_to_filename(const char *uri); +static void rapier_parser_fatal_error(rapier_parser* parser, const char *message, ...); +static void rapier_parser_error(rapier_parser* parser, const char *message, ...); +static void rapier_parser_warning(rapier_parser* parser, const char *message, ...); + + + +/* prototypes for namespace and name/qname functions */ +static void rapier_init_namespaces(rapier_parser *rdf_parser); +static void rapier_start_namespace(rapier_parser *rdf_parser, const char *prefix, const char *namespace, int depth); +static void rapier_free_namespace(rapier_parser *rdf_parser, rapier_ns_map* namespace); +static void rapier_end_namespace(rapier_parser *rdf_parser, const char *prefix, const char *namespace); +static void rapier_end_namespaces_for_depth(rapier_parser *rdf_parser); +static rapier_ns_name* rapier_make_namespaced_name(rapier_parser *rdf_parser, const char *name, const char *value, int is_element); +static void rapier_print_ns_name(FILE *stream, rapier_ns_name* name); +static void rapier_free_ns_name(rapier_ns_name* name); +static int rapier_ns_names_equal(rapier_ns_name *name1, rapier_ns_name *name2); + + +/* prototypes for element functions */ +static rapier_element* rapier_element_pop(rapier_parser *rdf_parser); +static void rapier_element_push(rapier_parser *rdf_parser, rapier_element* element); +static void rapier_free_element(rapier_element *element); +#ifdef RAPIER_DEBUG +static void rapier_print_element(rapier_element *element, FILE* stream); +#endif + + + +/* + * Namespaces in XML + * http://www.w3.org/TR/1999/REC-xml-names-19990114/#nsc-NSDeclared + * (section 4) says: + * + * -------------------------------------------------------------------- + * The prefix xml is by definition bound to the namespace name + * http://www.w3.org/XML/1998/namespace + * -------------------------------------------------------------------- + * + * Thus should define it in the table of namespaces before we start. + * + * We *can* also define others, but let's not. + * + */ +static void +rapier_init_namespaces(rapier_parser *rdf_parser) { + rapier_start_namespace(rdf_parser, "xml", "http://www.w3.org/XML/1998/namespace", -1); +} + + +static void +rapier_start_namespace(rapier_parser *rdf_parser, + const char *prefix, const char *namespace, + int depth) +{ + int uri_length=0; + int len; + rapier_ns_map *map; + void *p; + + LIBRDF_DEBUG4(rapier_start_namespace, + "namespace prefix %s uri %s depth %d\n", + prefix ? prefix : "(default)", namespace, depth); + + /* Convert an empty namespace string "" to a NULL pointer */ + if(!*namespace) + namespace=NULL; + + len=sizeof(rapier_ns_map); + if(namespace) { + uri_length=strlen(namespace); + len+=uri_length+1; + } + if(prefix) + len+=strlen(prefix)+1; + + /* Just one malloc for map structure + namespace (maybe) + prefix (maybe)*/ + map=(rapier_ns_map*)LIBRDF_CALLOC(rapier_ns_map, len, 1); + if(!map) + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + + p=(void*)map+sizeof(rapier_ns_map); + if(namespace) { + map->uri=strcpy((char*)p, namespace); + map->uri_length=uri_length; + p+= uri_length+1; + } + if(prefix) + map->prefix=strcpy((char*)p, prefix); + map->depth=depth; + + /* set convienience flags when there is a defined namespace URI */ + if(namespace) { + if(!strncmp(namespace, rapier_rdf_ms_uri, uri_length)) + map->is_rdf_ms=1; + else if(!strncmp(namespace, rapier_rdf_schema_uri, uri_length)) + map->is_rdf_schema=1; + } + + if(rdf_parser->namespaces) + map->next=rdf_parser->namespaces; + rdf_parser->namespaces=map; +} + + +static void +rapier_free_namespace(rapier_parser *rdf_parser, rapier_ns_map* namespace) +{ + LIBRDF_FREE(rapier_ns_map, namespace); +} + + +static void +rapier_end_namespace(rapier_parser *rdf_parser, + const char *prefix, const char *namespace) +{ + LIBRDF_DEBUG3(rapier_end_namespace, "prefix %s uri \"%s\"\n", + prefix ? prefix : "(default)", namespace); +} + + +static void +rapier_end_namespaces_for_depth(rapier_parser *rdf_parser) +{ + while(rdf_parser->namespaces && + rdf_parser->namespaces->depth == rdf_parser->depth) { + rapier_ns_map* ns=rdf_parser->namespaces; + rapier_ns_map* next=ns->next; + + rapier_end_namespace(rdf_parser, ns->prefix, ns->uri); + rapier_free_namespace(rdf_parser, ns); + + rdf_parser->namespaces=next; + } + +} + + + +/* + * Namespaces in XML + * http://www.w3.org/TR/1999/REC-xml-names-19990114/#defaulting + * says: + * + * -------------------------------------------------------------------- + * 5.2 Namespace Defaulting + * + * A default namespace is considered to apply to the element where it + * is declared (if that element has no namespace prefix), and to all + * elements with no prefix within the content of that element. + * + * If the URI reference in a default namespace declaration is empty, + * then unprefixed elements in the scope of the declaration are not + * considered to be in any namespace. + * + * Note that default namespaces do not apply directly to attributes. + * + * [...] + * + * 5.3 Uniqueness of Attributes + * + * In XML documents conforming to this specification, no tag may + * contain two attributes which: + * + * 1. have identical names, or + * + * 2. have qualified names with the same local part and with + * prefixes which have been bound to namespace names that are + * identical. + * -------------------------------------------------------------------- + */ + +static rapier_ns_name* +rapier_make_namespaced_name(rapier_parser *rdf_parser, const char *name, + const char *value, int is_element) +{ + rapier_ns_name* ns_name; + const char *p; + char *new_value=NULL; + rapier_ns_map* ns; + char* new_name; + int prefix_length; + int qname_length=0; + +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG2(rapier_make_namespaced_name, + "name %s\n", name); +#endif + + ns_name=(rapier_ns_name*)LIBRDF_CALLOC(rapier_ns_name, sizeof(rapier_ns_name), 1); + if(!ns_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + return NULL; + } + + if(value) { + new_value=(char*)LIBRDF_MALLOC(cstring, strlen(value)+1); + if(!new_value) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + LIBRDF_FREE(rapier_ns_name, ns_name); + return NULL; + } + strcpy(new_value, value); + ns_name->value=new_value; + } + + /* Find : */ + for(p=name; *p && *p != ':'; p++) + ; + + /* No : - pick up default namespace, if there is one */ + if(!*p) { + new_name=(char*)LIBRDF_MALLOC(cstring, strlen(name)+1); + if(!new_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + rapier_free_ns_name(ns_name); + return NULL; + } + strcpy(new_name, name); + ns_name->qname=new_name; + + /* Find a default namespace */ + for(ns=rdf_parser->namespaces; ns && ns->prefix; ns=ns->next) + ; + + if(ns) { + ns_name->namespace=ns; +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG2(rapier_make_namespaced_name, + "Found default namespace %s\n", ns->uri); +#endif + } else { + /* failed to find namespace - now what? FIXME */ + /* rapier_parser_warning(rdf_parser, "No default namespace defined - cannot expand %s", name); */ +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG1(rapier_make_namespaced_name, + "No default namespace defined\n"); +#endif + } + + } else { + prefix_length=p-name; + p++; /* move to start of qname */ + qname_length=strlen(p); + new_name=(char*)LIBRDF_MALLOC(cstring, qname_length+1); + if(!new_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + rapier_free_ns_name(ns_name); + return NULL; + } + strcpy(new_name, p); + ns_name->qname=new_name; + + /* Find the namespace */ + for(ns=rdf_parser->namespaces; ns ; ns=ns->next) + if(ns->prefix && !strncmp(name, ns->prefix, prefix_length)) + break; + + if(!ns) { + /* failed to find namespace - now what? */ + rapier_parser_error(rdf_parser, "Failed to find namespace in %s", name); + rapier_free_ns_name(ns_name); + return NULL; + } + +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG3(rapier_make_namespaced_name, + "Found namespace prefix %s URI %s\n", ns->prefix, ns->uri); +#endif + ns_name->namespace=ns; + } + + /* If namespace has a URI and a qname is defined, create the URI + * for this element + */ + if(ns_name->namespace && ns_name->namespace->uri && qname_length) { + char *uri_string=(char*)LIBRDF_MALLOC(cstring, + ns_name->namespace->uri_length + + qname_length + 1); + if(!uri_string) { + rapier_free_ns_name(ns_name); + return NULL; + } + ns_name->uri=uri_string; + } + + + return ns_name; +} + + +static void +rapier_print_ns_name(FILE *stream, rapier_ns_name* name) +{ + if(name->namespace) { + if(name->namespace->prefix) + fprintf(stream, "%s:%s", name->namespace->prefix, name->qname); + else + fprintf(stream, "(default):%s", name->qname); + } else + fputs(name->qname, stream); +} + + +static void +rapier_free_ns_name(rapier_ns_name* name) +{ + if(name->qname) + LIBRDF_FREE(cstring, name->qname); + + if(name->uri) + LIBRDF_FREE(cstring, name->uri); + + if(name->value) + LIBRDF_FREE(cstring, name->value); + LIBRDF_FREE(rapier_ns_name, name); +} + + +static int +rapier_ns_names_equal(rapier_ns_name *name1, rapier_ns_name *name2) +{ + if(strcmp(name1->qname, name2->qname)) + return 0; + if(name1->namespace != name2->namespace) + return 0; + return 1; +} + + +static rapier_element* +rapier_element_pop(rapier_parser *rdf_parser) +{ + rapier_element *element=rdf_parser->current_element; + + if(!element) + return NULL; + + rdf_parser->current_element=element->parent; + if(rdf_parser->root_element == element) /* just deleted root */ + rdf_parser->root_element=NULL; + + return element; +} + + +static void +rapier_element_push(rapier_parser *rdf_parser, rapier_element* element) +{ + element->parent=rdf_parser->current_element; + rdf_parser->current_element=element; + if(!rdf_parser->root_element) + rdf_parser->root_element=element; +} + + +static void +rapier_free_element(rapier_element *element) +{ + int i; + + for (i=0; i < element->attribute_count; i++) + if(element->attributes[i]) + rapier_free_ns_name(element->attributes[i]); + + if(element->attributes) + LIBRDF_FREE(rapier_ns_name_array, element->attributes); + + /* Free special RDF M&S attributes */ + for(i=0; i<= RDF_ATTR_LAST; i++) + if(element->rdf_attr[i]) + LIBRDF_FREE(cstring, element->rdf_attr[i]); + + if(element->content_cdata_length) + LIBRDF_FREE(rapier_ns_name_array, element->content_cdata); + + rapier_free_ns_name(element->name); + LIBRDF_FREE(rapier_element, element); +} + + + +#ifdef RAPIER_DEBUG +static void +rapier_print_element(rapier_element *element, FILE* stream) +{ + int i; + + rapier_print_ns_name(stream, element->name); + fputc('\n', stream); + + if(element->attribute_count) { + fputs(" attributes: ", stream); + for (i = 0; i < element->attribute_count; i++) { + if(i) + fputc(' ', stream); + rapier_print_ns_name(stream, element->attributes[i]); + fprintf(stream, "='%s'", element->attributes[i]->value); + } + fputc('\n', stream); + } +} +#endif + + +static void +rapier_xml_start_element_handler(void *userData, + const XML_Char *name, const XML_Char **atts) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + int all_atts_count=0; + int ns_attributes_count=0; + rapier_ns_name** named_attrs=NULL; + int i; + rapier_ns_name* element_name; + rapier_element* element=NULL; + int finished; + rapier_state state; +#ifdef NEED_EXPAT + rapier_locator *locator=&rdf_parser->locator; /* for storing error info */ +#endif + +#ifdef NEED_EXPAT + locator->line=XML_GetCurrentLineNumber(rdf_parser->xp); + locator->column=XML_GetCurrentColumnNumber(rdf_parser->xp); + locator->byte=XML_GetCurrentByteIndex(rdf_parser->xp); +#endif + + rdf_parser->depth++; + + if (atts != NULL) { + /* Round 1 - find special attributes, at present just namespaces */ + for (i = 0;(atts[i] != NULL);i+=2) { + all_atts_count++; + + /* synthesise the XML NS events */ + if(!strncmp(atts[i], "xmlns", 5)) { + /* there is more i.e. xmlns:foo */ + const char *prefix=atts[i][5] ? &atts[i][6] : NULL; + + rapier_start_namespace(userData, prefix, atts[i+1], rdf_parser->depth); + atts[i]=NULL; /* Is it allowed to zap XML parser array things? FIXME */ + continue; + } + + ns_attributes_count++; + } + } + + + /* Now can recode element name with a namespace */ + + element_name=rapier_make_namespaced_name(rdf_parser, name, NULL, 1); + if(!element_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + return; + } + + + /* Create new element structure */ + element=(rapier_element*)LIBRDF_CALLOC(rapier_element, + sizeof(rapier_element), 1); + if(!element) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + rapier_free_ns_name(element_name); + return; + } + + + element->name=element_name; + + /* Prepare for possible element content */ + element->content_element_seen=0; + element->content_cdata_seen=0; + element->content_cdata_length=0; + + + + + if(ns_attributes_count) { + int offset = 0; + + /* Round 2 - turn attributes into namespaced-attributes */ + + /* Allocate new array to hold namespaced-attributes */ + named_attrs=(rapier_ns_name**)LIBRDF_CALLOC(rapier_ns_name-array, sizeof(rapier_ns_name*), ns_attributes_count); + if(!named_attrs) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + LIBRDF_FREE(rapier_element, element); + rapier_free_ns_name(element_name); + return; + } + + for (i = 0; i < all_atts_count; i++) { + rapier_ns_name* attribute; + + /* Skip previously processed attributes */ + if(!atts[i<<1]) + continue; + + /* namespace-name[i] stored in named_attrs[i] */ + attribute=rapier_make_namespaced_name(rdf_parser, atts[i<<1], + atts[(i<<1)+1], 0); + if(!attribute) { /* failed - tidy up and return */ + int j; + + for (j=0; j < i; j++) + LIBRDF_FREE(rapier_ns_name, named_attrs[j]); + LIBRDF_FREE(rapier_ns_name_array, named_attrs); + LIBRDF_FREE(rapier_element, element); + rapier_free_ns_name(element_name); + return; + } + + /* Save pointers to some RDF M&S attributes */ + + /* If RDF M&S namespace-prefixed attributes */ + if(attribute->namespace && attribute->namespace->is_rdf_ms) { + const char *attr_name=attribute->qname; + int j; + + for(j=0; j<= RDF_ATTR_LAST; j++) + if(!strcmp(attr_name, rdf_attr_names[j])) { + element->rdf_attr[j]=attribute->value; + /* Delete it if it was stored elsewhere */ +#if RAPIER_DEBUG + LIBRDF_DEBUG3(rapier_xml_start_element_handler, + "Found RDF M&S attribute %s URI %s\n", + attr_name, attribute->value); +#endif + /* make sure value isn't deleted from ns_name structure */ + attribute->value=NULL; + rapier_free_ns_name(attribute); + attribute=NULL; + } + } /* end if RDF M&S namespaced-prefixed attributes */ + + + /* If non namespace-prefixed RDF M&S attributes found on + * rdf namespace-prefixed element + */ + if(!rdf_parser->forbid_rdf_non_ns_attributes && + attribute && !attribute->namespace && + element_name->namespace && element_name->namespace->is_rdf_ms) { + const char *attr_name=attribute->qname; + int j; + + for(j=0; j<= RDF_ATTR_LAST; j++) + if(!strcmp(attr_name, rdf_attr_names[j])) { + element->rdf_attr[j]=attribute->value; + /* Delete it if it was stored elsewhere */ +#if RAPIER_DEBUG + LIBRDF_DEBUG3(rapier_xml_start_element_handler, + "Found non-namespaced RDF M&S attribute %s URI %s\n", + attr_name, attribute->value); +#endif + /* make sure value isn't deleted from ns_name structure */ + attribute->value=NULL; + rapier_free_ns_name(attribute); + attribute=NULL; + } + } /* end if non-namespace prefixed RDF M&S attributes */ + + + if(attribute) + named_attrs[offset++]=attribute; + } + + /* set actual count from attributes that haven't been skipped */ + ns_attributes_count=offset; + if(!offset && named_attrs) { + /* all attributes were RDF M&S or other specials and deleted + * so delete array and don't store pointer */ + LIBRDF_FREE(rapier_ns_name_array, named_attrs); + named_attrs=NULL; + } + + } /* end if ns_attributes_count */ + + element->attributes=named_attrs; + element->attribute_count=ns_attributes_count; + + + rapier_element_push(rdf_parser, element); + + + if(element->parent) { + if(++element->parent->content_element_seen == 1 && + element->parent->content_cdata_seen == 1) { + /* Uh oh - mixed content, the parent element has cdata too */ + rapier_parser_warning(rdf_parser, "element %s has mixed content.", + element->parent->name->qname); + } + } + + +#ifdef RAPIER_DEBUG + fprintf(stderr, "rapier_xml_start_element_handler: Start of namespaced-element: "); + rapier_print_element(element, stderr); +#endif + + + /* Right, now ready to enter the grammar */ + + finished= 0; + if(element->parent) + state=element->parent->state; + else + state=RAPIER_STATE_UNKNOWN; + + while(!finished) { + const char *el_name=element->name->qname; + int element_in_rdf_ns=(element->name->namespace && + element->name->namespace->is_rdf_ms); + + switch(state) { + case RAPIER_STATE_UNKNOWN: + if(element_in_rdf_ns && !strcmp(el_name, "RDF")) { + state=RAPIER_STATE_IN_RDF; + /* need more content before can continue */ + finished=1; + break; + } + /* If scanning for element, can continue */ + if(rdf_parser->scanning_for_rdf_RDF) { + finished=1; + break; + } + /* Otherwise choice of next state can be made from the current + * element by IN_RDF state */ + + state=RAPIER_STATE_IN_RDF; + break; + + case RAPIER_STATE_IN_RDF: + if(element_in_rdf_ns) { + if(!strcmp(el_name, "Description")) { + state=RAPIER_STATE_DESCRIPTION; + break; + } else if(!strcmp(el_name, "Seq")) { + state=RAPIER_STATE_SEQ; + break; + } else if(!strcmp(el_name, "Bag")) { + state=RAPIER_STATE_BAG; + break; + } else if(!strcmp(el_name, "Alt")) { + state=RAPIER_STATE_ALT; + break; + } + + /* Unexpected rdf: element at outer layer */ + rapier_parser_error(rdf_parser, "Unexpected RDF M&S element %s in <rdf:RDF> - from productions 6.2, 6.3 and 6.4 expected rdf:Description, rdf:Seq, rdf:Bag or rdf:Alt only.", el_name); + finished=1; + } + + /* Hmm, must be a typedNode, handled by the description state + * so that ID, BagID are handled in one place. + */ + state=RAPIER_STATE_DESCRIPTION; + break; + + + /* No need for 6.2 - already chose 6.3, 6.25, 6.26 or 6.27 */ + + + case RAPIER_STATE_DESCRIPTION: + /* choices here from production 6.3 (description) + * <rdf:Description idAboutAttr? bagIdAttr? propAttr* > + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? propAttr* + * <typeName idAboutAttr? bagIdAttr? propAttr*> + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? propAttr* + * (either may have no content, that is tested in the end element code) + */ + + /* lets add booleans - isn't C wonderful! */ + if((element->rdf_attr[RDF_ATTR_ID] != NULL) + + (element->rdf_attr[RDF_ATTR_about] != NULL) + + (element->rdf_attr[RDF_ATTR_aboutEach] != NULL) + + (element->rdf_attr[RDF_ATTR_aboutEachPrefix] != NULL) > 1) { + rapier_parser_warning(rdf_parser, "More than one of RDF ID, about, aboutEach or aboutEachPrefix attributes on element %s - from productions 6.5, 6.6, 6.7 and 6.8 expect at most one.", el_name); + } + + + /* has to be rdf:Description OR typedNode - checked above */ + if(element_in_rdf_ns) + state=RAPIER_STATE_IN_DESCRIPTION; + else + /* otherwise must match the typedNode production - checked below */ + state=RAPIER_STATE_TYPED_NODE; + + finished=1; + break; + + + /* Inside a <rdf:Description> so expecting a list of + * propertyElt elements + */ + case RAPIER_STATE_IN_DESCRIPTION: + state=RAPIER_STATE_PROPERTYELT; + finished=1; + break; + + + /* Expect to meet the typedNode production having + * fallen through and not met other productions - + * 6.3, 6.25, 6.26, 6.27. This is the last choice. + * + * choices here from production 6.13 (typedNode) + * <typeName idAboutAttr? bagIdAttr? propAttr* /> + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? + * <typeName idAboutAttr? bagIdAttr? propAttr* > propertyElt* </typeName> + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? propAttr* + */ + case RAPIER_STATE_TYPED_NODE: + finished=1; + break; + + case RAPIER_STATE_SEQ: + finished=1; + break; + + case RAPIER_STATE_BAG: + finished=1; + break; + + case RAPIER_STATE_ALT: + finished=1; + break; + + case RAPIER_STATE_MEMBER: + finished=1; + break; + + case RAPIER_STATE_REFERENCEDITEM: + finished=1; + break; + + case RAPIER_STATE_INLINEITEM: + finished=1; + break; + + /* choices here from production 6.12 (propertyElt) + * <propName idAttr?> value </propName> + * Attributes: ID? + * <propName idAttr? parseLiteral> literal </propName> + * Attributes: ID? parseType="literal" + * <propName idAttr? parseResource> propertyElt* </propName> + * Attributes: ID? parseType="resource" + * <propName idRefAttr? bagIdAttr? propAttr* /> + * Attributes: (ID|resource)? bagIdAttr? propAttr* + */ + case RAPIER_STATE_PROPERTYELT: + finished=1; + break; + + default: + rapier_parser_fatal_error(rdf_parser, "Unexpected parser state %d.", + state); + finished=1; + + } /* end switch */ + + if(state != element->state) { + element->state=state; + fprintf(stderr, "rapier_xml_start_element_handler: moved to state %d\n", state); + } + + } /* end while */ + + /* store final state that matched */ + element->state=state; +} + + + +static void +rapier_xml_end_element_handler(void *userData, const XML_Char *name) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + rapier_element* element; + int finished; + rapier_state state; + rapier_ns_name *element_name; +#ifdef NEED_EXPAT + rapier_locator *locator=&rdf_parser->locator; /* for storing error info */ +#endif + +#ifdef NEED_EXPAT + locator->line=XML_GetCurrentLineNumber(rdf_parser->xp); + locator->column=XML_GetCurrentColumnNumber(rdf_parser->xp); + locator->byte=XML_GetCurrentByteIndex(rdf_parser->xp); +#endif + + /* recode element name */ + + element_name=rapier_make_namespaced_name(rdf_parser, name, NULL, 1); + if(!element_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + return; + } + + +#ifdef RAPIER_DEBUG + fprintf(stderr, "rapier_xml_end_element_handler: End of namespaced-element: "); + rapier_print_ns_name(stderr, element_name); + fputc('\n', stderr); +#endif + + element=rapier_element_pop(rdf_parser); + if(!rapier_ns_names_equal(element->name, element_name)) { + /* Hmm, unexpected name - FIXME, should do something! */ + rapier_parser_warning(rdf_parser, "Element %s ended, expected end of element %s\n", name, element->name->qname); + return; + } + + + state=element->state; + finished= 0; + while(!finished) { + const char *el_name=element->name->qname; + int element_in_rdf_ns=(element->name->namespace && + element->name->namespace->is_rdf_ms); + + switch(state) { + case RAPIER_STATE_UNKNOWN: + finished=1; + break; + + case RAPIER_STATE_IN_RDF: + if(element_in_rdf_ns && !strcmp(el_name, "RDF")) { + /* end of RDF - boo hoo */ + state=RAPIER_STATE_UNKNOWN; + finished=1; + break; + } + /* When scanning, another element ending is outside the RDF + * world so this can happen without further work + */ + if(rdf_parser->scanning_for_rdf_RDF) { + state=RAPIER_STATE_UNKNOWN; + finished=1; + break; + } + /* otherwise found some junk after RDF content in an RDF-only + * document (probably never get here since this would be + * a mismatched XML tag and cause an error earlier) + */ + rapier_parser_warning(rdf_parser, "Element %s ended, expected end of RDF element\n", el_name); + state=RAPIER_STATE_UNKNOWN; + finished=1; + break; + + /* No need for 6.2 - already chose 6.3, 6.25, 6.26 or 6.27 */ + + case RAPIER_STATE_DESCRIPTION: + /* Never reached in any code outside start element + * since immediately moves on to RAPIER_STATE_IN_DESCRIPTION + * or RAPIER_STATE_TYPED_NODE + */ + abort(); + break; + + case RAPIER_STATE_IN_DESCRIPTION: + /* Must be end of description production </rdf:Description> */ + state=RAPIER_STATE_IN_RDF; + finished=1; + break; + + case RAPIER_STATE_TYPED_NODE: + /* Must be end of typedNode production element <typeName> */ + state=RAPIER_STATE_IN_RDF; + finished=1; + break; + + case RAPIER_STATE_SEQ: + finished=1; + break; + + case RAPIER_STATE_BAG: + finished=1; + break; + + case RAPIER_STATE_ALT: + finished=1; + break; + + case RAPIER_STATE_MEMBER: + finished=1; + break; + + case RAPIER_STATE_REFERENCEDITEM: + finished=1; + break; + + case RAPIER_STATE_INLINEITEM: + finished=1; + break; + + case RAPIER_STATE_PROPERTYELT: + finished=1; + break; + + default: + rapier_parser_fatal_error(rdf_parser, "Unexpected parser state %d.", + state); + finished=1; + + } /* end switch */ + + if(state != element->state) { + element->state=state; + fprintf(stderr, "rapier_xml_end_element_handler: moved to state %d\n", state); + } + + } /* end while */ + + + rapier_free_ns_name(element_name); + + rapier_end_namespaces_for_depth(rdf_parser); + rapier_free_element(element); + + rdf_parser->depth--; +} + + + +/* cdata (and ignorable whitespace for libxml). + * s is not 0 terminated for expat, is for libxml - grrrr. + */ +static void +rapier_xml_cdata_handler(void *userData, const XML_Char *s, int len) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + rapier_element* element; + rapier_state state; + char *buffer; + char *ptr; + int all_whitespace=1; + int ignore_all_whitespace=0; + int i; + + for(i=0; i<len; i++) + if(!isspace(s[i])) { + all_whitespace=0; + break; + } + + element=rdf_parser->current_element; + + state=element->state; + switch(state) { + case RAPIER_STATE_UNKNOWN: + /* Ignore all cdata if still looking for RDF */ + if(rdf_parser->scanning_for_rdf_RDF) + return; + + /* Ignore all whitespace cdata before first element */ + if(all_whitespace) + return; + + /* This probably will never happen since that would make the + * XML not be well-formed + */ + rapier_parser_warning(rdf_parser, "Found cdata before RDF element."); + break; + + case RAPIER_STATE_IN_RDF: + case RAPIER_STATE_IN_DESCRIPTION: + /* Ignore all whitespace cdata inside <RDF> or <Description> + * when it occurs although note it was seen + */ + ignore_all_whitespace=1; + break; + + case RAPIER_STATE_DESCRIPTION: + /* Never reached in any code outside start element + * since immediately moves on to RAPIER_STATE_IN_DESCRIPTION + * or RAPIER_STATE_TYPED_NODE + */ + abort(); + break; + + case RAPIER_STATE_TYPED_NODE: + case RAPIER_STATE_SEQ: + case RAPIER_STATE_BAG: + case RAPIER_STATE_ALT: + case RAPIER_STATE_MEMBER: + case RAPIER_STATE_REFERENCEDITEM: + case RAPIER_STATE_INLINEITEM: + case RAPIER_STATE_PROPERTYELT: + break; + + default: + rapier_parser_fatal_error(rdf_parser, "Unexpected parser state %d.", + state); + } /* end switch */ + + + + if(++element->content_cdata_seen == 1 && + element->content_element_seen == 1) { + /* Uh oh - mixed content, this element has elements too */ + rapier_parser_warning(rdf_parser, "element %s has mixed content.", + element->name->qname); + } + + if(all_whitespace && ignore_all_whitespace) { + LIBRDF_DEBUG2(rapier_xml_end_element_handler, "Ignoring whitespace cdata inside element %s\n", element->name->qname); + return; + } + + /* +1 here is for \0 at end */ + buffer=(char*)LIBRDF_MALLOC(cstring, element->content_cdata_length + len + 1); + /* FIXME - no error return possible */ + if(!buffer) + return; + + if(element->content_cdata_length) { + strncpy(buffer, element->content_cdata, element->content_cdata_length); + LIBRDF_FREE(cstring, element->content_cdata); + } + element->content_cdata=buffer; + + ptr=buffer+element->content_cdata_length; /* append */ + + /* adjust stored length */ + element->content_cdata_length += len; + + /* now write new stuff at end of cdata buffer */ + strncpy(ptr, s, len); + ptr += len; + *ptr = '\0'; + + LIBRDF_DEBUG3(rapier_xml_cdata_handler, + "content cdata now: '%s' (%d bytes)\n", + buffer, element->content_cdata_length); +} + + +#ifdef HAVE_XML_SetNamespaceDeclHandler +static void +rapier_start_namespace_decl_handler(void *userData, + const XML_Char *prefix, const XML_Char *uri) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + +#ifdef RAPIER_DEBUG + fprintf(stderr_parser->locator, "saw namespace %s URI %s\n", prefix, uri); +#endif +} + + +static void +rapier_end_namespace_decl_handler(void *userData, const XML_Char *prefix) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + +#ifdef RAPIER_DEBUG + fprintf(stderr_parser->locator, "saw end namespace prefix %s\n", prefix); +#endif +} +#endif + + +#ifdef NEED_LIBXML +#include <stdarg.h> + +static const char* xml_warning_prefix="XML parser warning - "; +static const char* xml_error_prefix="XML parser error - "; +static const char* xml_fatal_error_prefix="XML parser fatal error - "; + +static void +rapier_xml_warning(void *ctx, const char *msg, ...) +{ + va_list args; + int length; + char *nmsg; + + va_start(args, msg); + length=strlen(xml_warning_prefix)+strlen(msg)+1; + msg=(char*)LIBRDF_MALLOC(cstring, length); + if(!msg) { + /* just pass on, might be out of memory error */ + rapier_parser_warning(parser, msg, args); + } else { + strcpy(nmsg, xml_warning_prefix); + strcat(nmsg, msg); + rapier_parser_warning(parser, nmsg, args); + LIBRDF_FREE(cstring,nmsg); + } + va_end(args); +} + + +static void +rapier_xml_error(void *ctx, const char *msg, ...) +{ + va_list args; + int length; + char *nmsg; + + va_start(args, msg); + length=strlen(xml_error_prefix)+strlen(msg)+1; + msg=(char*)LIBRDF_MALLOC(cstring, length); + if(!msg) { + /* just pass on, might be out of memory error */ + rapier_parser_error(parser, msg, args); + } else { + strcpy(nmsg, xml_error_prefix); + strcat(nmsg, msg); + rapier_parser_error(parser, nmsg, args); + LIBRDF_FREE(cstring,nmsg); + } + va_end(args); +} + + +static void +rapier_xml_fatal_error(void *ctx, const char *msg, ...) +{ + va_list args; + int length; + char *nmsg; + + va_start(args, msg); + length=strlen(xml_fatal_error_prefix)+strlen(msg)+1; + msg=(char*)LIBRDF_MALLOC(cstring, length); + if(!msg) { + /* just pass on, might be out of memory error */ + rapier_parser_fatal_error(parser, msg, args); + } else { + strcpy(nmsg, xml_error_prefix); + strcat(nmsg, msg); + rapier_parser_fatal_error(parser, nmsg, args); + LIBRDF_FREE(cstring,nmsg); + } + va_end(args); +} + +#endif + + + +/** + * rapier_file_uri_to_filename - Convert a URI representing a file (starting file:) to a filename + * @uri: URI of string + * + * Return value: the filename or NULL on failure + **/ +static char * +rapier_file_uri_to_filename(const char *uri) +{ + int length; + char *filename; + + if (strncmp(uri, "file:", 5)) + return NULL; + + /* FIXME: unix version of URI -> filename conversion */ + length=strlen(uri) -5 +1; + filename=LIBRDF_MALLOC(cstring, length); + if(!filename) + return NULL; + + strcpy(filename, uri+5); + return filename; +} + + +/* + * rapier_parser_fatal_error - Error from a parser - Internal + **/ +static void +rapier_parser_fatal_error(rapier_parser* parser, const char *message, ...) +{ + va_list arguments; + + parser->failed=1; + + if(parser->fatal_error_fn) { + parser->fatal_error_fn(parser->fatal_error_user_data, + &parser->locator, message); + abort(); + } + + va_start(arguments, message); + + rapier_print_locator(stderr, &parser->locator); + fprintf(stderr, " rapier fatal error - "); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + + va_end(arguments); + + abort(); +} + + +/* + * rapier_parser_error - Error from a parser - Internal + **/ +static void +rapier_parser_error(rapier_parser* parser, const char *message, ...) +{ + va_list arguments; + + if(parser->error_fn) { + parser->error_fn(parser->error_user_data, &parser->locator, message); + return; + } + + va_start(arguments, message); + + rapier_print_locator(stderr, &parser->locator); + fprintf(stderr, " rapier error - "); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + + va_end(arguments); +} + + +/* + * rapier_parser_warning - Warning from a parser - Internal + **/ +static void +rapier_parser_warning(rapier_parser* parser, const char *message, ...) +{ + va_list arguments; + + if(parser->warning_fn) { + parser->warning_fn(parser->warning_user_data, &parser->locator, message); + return; + } + + va_start(arguments, message); + + rapier_print_locator(stderr, &parser->locator); + fprintf(stderr, " rapier warning - "); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + + va_end(arguments); +} + + +#ifdef NEED_LIBXML +/* from http://www.daa.com.au/~james/gnome/xml-sax/implementing.html */ +#include <parserInternals.h> + +static int myXmlSAXParseFile(xmlSAXHandlerPtr sax, void *user_data, const char *filename); + +static int +myXmlSAXParseFile(xmlSAXHandlerPtr sax, void *user_data, const char *filename) +{ + int ret = 0; + xmlParserCtxtPtr ctxt; + + ctxt = xmlCreateFileParserCtxt(filename); + if (ctxt == NULL) return -1; + ctxt->sax = sax; + ctxt->userData = user_data; + + xmlParseDocument(ctxt); + + if (ctxt->wellFormed) + ret = 0; + else + ret = -1; + if (sax != NULL) + ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return ret; +} +#endif + + + + +/* PUBLIC FUNCTIONS */ + +/** + * rapier_new - Initialise the Rapier RDF parser + * + * Return value: non 0 on failure + **/ +rapier_parser* +rapier_new(void) +{ + rapier_parser* rdf_parser; +#ifdef NEED_EXPAT + XML_Parser xp; +#endif + + rdf_parser=LIBRDF_CALLOC(rapier_parser, 1, sizeof(rapier_parser)); + + if(!rdf_parser) + return NULL; + +#ifdef NEED_EXPAT + xp=XML_ParserCreate(NULL); + + /* create a new parser in the specified encoding */ + XML_SetUserData(xp, rdf_parser); + + /* XML_SetEncoding(xp, "..."); */ + + XML_SetElementHandler(xp, rapier_xml_start_element_handler, + rapier_xml_end_element_handler); + XML_SetCharacterDataHandler(xp, rapier_xml_cdata_handler); +#ifdef HAVE_XML_SetNamespaceDeclHandler + XML_SetNamespaceDeclHandler(xp, + rapier_start_namespace_decl_handler, + rapier_end_namespace_decl_handler); +#endif + rdf_parser->xp=xp; +#endif + +#ifdef NEED_LIBXML + xmlDefaultSAXHandlerInit(); + rdf_parser->sax.startElement=rapier_xml_start_element_handler; + rdf_parser->sax.endElement=rapier_xml_end_element_handler; + + rdf_parser->sax.characters=rapier_xml_cdata_handler; + rdf_parser->sax.ignorableWhitespace=rapier_xml_cdata_handler; + + rdf_parser->sax.warning=rapier_xml_warning; + rdf_parser->sax.error=rapier_xml_error; + rdf_parser->sax.fatalError=rapier_xml_fatal_error; + + /* xmlInitParserCtxt(&rdf_parser->xc); */ +#endif + + rapier_init_namespaces(rdf_parser); + + return rdf_parser; +} + + + + +/** + * rapier_free - Free the Rapier RDF parser + * @rdf_parser: parser object + * + **/ +void +rapier_free(rapier_parser *rdf_parser) +{ + rapier_element* element; + rapier_ns_map* ns; + + ns=rdf_parser->namespaces; + while(ns) { + rapier_ns_map* next_ns=ns->next; + + rapier_free_namespace(rdf_parser, ns); + ns=next_ns; + } + + while((element=rapier_element_pop(rdf_parser))) { + rapier_free_element(element); + } + + LIBRDF_FREE(rapier_parser, rdf_parser); +} + + +/** + * rapier_parser_set_fatal_error - Set the parser error handling function + * @parser: the parser + * @user_data: user data to pass to function + * @fatal_error_fn: pointer to the function + * + * The function will receive callbacks when the parser fails. + * + **/ +void +rapier_parser_set_fatal_error(rapier_parser* parser, void *user_data, + void (*fatal_error_fn)(void *user_data, rapier_locator* locator, const char *msg, ...)) +{ + parser->fatal_error_user_data=user_data; + parser->fatal_error_fn=fatal_error_fn; +} + + +/** + * rapier_parser_set_error - Set the parser error handling function + * @parser: the parser + * @user_data: user data to pass to function + * @error_fn: pointer to the function + * + * The function will receive callbacks when the parser fails. + * + **/ +void +rapier_parser_set_error(rapier_parser* parser, void *user_data, + void (*error_fn)(void *user_data, rapier_locator* locator, const char *msg, ...)) +{ + parser->error_user_data=user_data; + parser->error_fn=error_fn; +} + + +/** + * rapier_parser_set_warning - Set the parser warning handling function + * @parser: the parser + * @user_data: user data to pass to function + * @warning_fn: pointer to the function + * + * The function will receive callbacks when the parser gives a warning. + * + **/ +void +rapier_parser_set_warning(rapier_parser* parser, void *user_data, + void (*warning_fn)(void *user_data, rapier_locator* locator, const char *msg, ...)) +{ + parser->warning_user_data=user_data; + parser->warning_fn=warning_fn; +} + + +void +rapier_set_triple_handler(rapier_parser* parser, + void *userData, + void (*triple_handler)(void *userData, + const char *subject, rapier_subject_type subject_type, + const char *predicate, rapier_predicate_type predicate_type, + const char *object, rapier_object_type object_type)) +{ + parser->triple_handler=triple_handler; +} + + + + + +/** + * rapier_parse_file - Retrieve the RDF/XML content at URI + * @rdf_parser: parser + * @uri: URI of RDF content + * @base_uri: the base URI to use (or NULL if the same) + * + * Return value: non 0 on failure + **/ +int +rapier_parse_file(rapier_parser* rdf_parser, const char *uri, + const char *base_uri) +{ +#ifdef NEED_EXPAT + XML_Parser xp; +#endif +#ifdef NEED_LIBXML + /* parser context */ + xmlParserCtxtPtr xc; +#endif +#define RBS 1024 + FILE *fh; + char buffer[RBS]; + int rc=1; + int len; + const char *filename; + rapier_locator *locator=&rdf_parser->locator; /* for storing error info */ + + /* initialise fields */ + rdf_parser->depth=0; + rdf_parser->root_element= rdf_parser->current_element=NULL; + rdf_parser->failed=0; + + + +#ifdef NEED_EXPAT + xp=rdf_parser->xp; + + XML_SetBase(xp, base_uri); +#endif + + + filename=rapier_file_uri_to_filename(uri); + if(!filename) + return 1; + + locator->file=filename; + locator->uri=base_uri; + + fh=fopen(filename, "r"); + if(!fh) { + rapier_parser_error(rdf_parser, "file open failed - %s", strerror(errno)); +#ifdef NEED_EXPAT + XML_ParserFree(xp); +#endif /* EXPAT */ + LIBRDF_FREE(cstring, filename); + return 1; + } + +#ifdef NEED_LIBXML + /* libxml needs at least 4 bytes from the XML content to allow + * content encoding detection to work */ + len=fread(buffer, 1, 4, fh); + if(len>0) { + xc = xmlCreatePushParserCtxt(&rdf_parser->sax, rdf_parser, + buffer, len, filename); + } else { + fclose(fh); + fh=NULL; + } + +#endif + + while(fh && !feof(fh)) { + len=fread(buffer, 1, RBS, fh); + if(len <= 0) { +#ifdef NEED_EXPAT + XML_Parse(xp, buffer, 0, 1); +#endif +#ifdef NEED_LIBXML + xmlParseChunk(xc, buffer, 0, 1); +#endif + break; + } +#ifdef NEED_EXPAT + rc=XML_Parse(xp, buffer, len, (len < RBS)); + if(len < RBS) + break; + if(!rc) /* expat: 0 is failure */ + break; +#endif +#ifdef NEED_LIBXML + rc=xmlParseChunk(xc, buffer, len, 0); + if(len < RBS) + break; + if(rc) /* libxml: non 0 is failure */ + break; +#endif + } + fclose(fh); + +#ifdef NEED_EXPAT + if(!rc) { + int xe=XML_GetErrorCode(xp); + + locator->line=XML_GetCurrentLineNumber(xp); + locator->column=XML_GetCurrentColumnNumber(xp); + locator->byte=XML_GetCurrentByteIndex(xp); + + rapier_parser_error(rdf_parser, "XML Parsing failed - %s", + XML_ErrorString(xe)); + rc=1; + } else + rc=0; + + XML_ParserFree(xp); +#endif /* EXPAT */ +#ifdef NEED_LIBXML + if(rc) { + rapier_parser_error(parser, "XML Parsing failed"); +#endif + + LIBRDF_FREE(cstring, filename); + + return (rc != 0); +} + + +void +rapier_print_locator(FILE *stream, rapier_locator* locator) +{ + if(!locator) + return; + + if(locator->uri) + fprintf(stream, "URI %s", locator->uri); + else if (locator->file) + fprintf(stream, "file %s", locator->file); + else + return; + if(locator->line) { + fprintf(stream, ":%d", locator->line); + if(locator->column) + fprintf(stream, " column %d", locator->column); + } +} + + + +void +rapier_set_feature(rapier_parser *parser, rapier_feature feature, int value) { + switch(feature) { + case RAPIER_FEATURE_SCANNING: + parser->scanning_for_rdf_RDF=value; + break; + + case RAPIER_FEATURE_RDF_NON_NS_ATTRIBUTES: + parser->forbid_rdf_non_ns_attributes=!value; /* negative logic - FIXME */ + break; + + default: + break; + } +} diff --git a/src/raptor_rdfxml.c b/src/raptor_rdfxml.c new file mode 100644 index 00000000..9b4be7af --- /dev/null +++ b/src/raptor_rdfxml.c @@ -0,0 +1,2009 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * rapier_parse.c - Redland Parser for RDF (RAPIER) + * + * $Id$ + * + * Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ + * Institute for Learning and Research Technology, University of Bristol. + * + * This package is Free Software available under either of two licenses + * (see FAQS.html to see why): + * + * 1. The GNU Lesser General Public License (LGPL) + * + * See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the + * full license text. + * _________________________________________________________________ + * + * Copyright (C) 2000 David Beckett, Institute for Learning and + * Research Technology, University of Bristol. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * _________________________________________________________________ + * + * NOTE - under Term 3 of the LGPL, you may choose to license the entire + * library under the GPL. See COPYING for the full license text. + * + * 2. The Mozilla Public License + * + * See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full + * license text. + * + * Under MPL section 13. I declare that all of the Covered Code is + * Multiple Licensed: + * _________________________________________________________________ + * + * The contents of this file are subject to the Mozilla Public License + * version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License + * at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and + * limitations under the License. + * + * The Initial Developer of the Original Code is David Beckett. + * Portions created by David Beckett are Copyright (C) 2000 David + * Beckett, Institute for Learning and Research Technology, University + * of Bristol. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU Lesser General Public License, in which case the + * provisions of the LGPL License are applicable instead of those + * above. If you wish to allow use of your version of this file only + * under the terms of the LGPL License and not to allow others to use + * your version of this file under the MPL, indicate your decision by + * deleting the provisions above and replace them with the notice and + * other provisions required by the LGPL License. If you do not delete + * the provisions above, a recipient may use your version of this file + * under either the MPL or the LGPL License. + */ + + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#ifdef HAVE_STDARG_H +#include <stdarg.h> +#endif + +extern int errno; + +#define RAPIER_INTERNAL + +#ifdef LIBRDF_INTERNAL +/* if inside Redland */ +#include <librdf.h> + +#include <rdf_parser.h> +#include <rdf_node.h> +#include <rdf_stream.h> +#include <rdf_statement.h> + +#else +/* else standalone */ + +#define LIBRDF_MALLOC(type, size) malloc(size) +#define LIBRDF_CALLOC(type, size, count) calloc(size, count) +#define LIBRDF_FREE(type, ptr) free((void*)ptr) + +#ifdef RAPIER_DEBUG +/* Debugging messages */ +#define LIBRDF_DEBUG1(function, msg) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function); } while(0) +#define LIBRDF_DEBUG2(function, msg, arg1) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function, arg1);} while(0) +#define LIBRDF_DEBUG3(function, msg, arg1, arg2) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function, arg1, arg2);} while(0) +#define LIBRDF_DEBUG4(function, msg, arg1, arg2, arg3) do {fprintf(stderr, "%s:%d:%s: " msg, __FILE__, __LINE__, #function, arg1, arg2, arg3);} while(0) + +#else +/* DEBUGGING TURNED OFF */ + +/* No debugging messages */ +#define LIBRDF_DEBUG1(function, msg) +#define LIBRDF_DEBUG2(function, msg, arg1) +#define LIBRDF_DEBUG3(function, msg, arg1, arg2) +#define LIBRDF_DEBUG4(function, msg, arg1, arg2, arg3) + +#endif + +#endif + + +/* for the memory allocation functions */ +#if defined(HAVE_DMALLOC_H) && defined(RAPIER_MEMORY_DEBUG_DMALLOC) +#include <dmalloc.h> +#undef HAVE_STDLIB_H +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#undef HAVE_STDLIB_H +#endif + + +/* XML parser includes */ +#ifdef NEED_EXPAT +#include <xmlparse.h> +#endif + +#ifdef NEED_LIBXML +#ifdef HAVE_GNOME_XML_PARSER_H +#include <gnome-xml/parser.h> +/* translate names from expat to libxml */ +#define XML_Char xmlChar +#else +#include <parser.h> +#endif +#endif + + +/* Rapier includes */ +#include <rapier.h> + +/* Rapier structures */ +/* namespace stack node */ +typedef struct rapier_ns_map_s rapier_ns_map; + +typedef enum { + /* Not in RDF grammar yet - searching for a start element. + * This can be <rdf:RDF> (goto 6.1) but since it is optional, + * the start element can also be <Description> (goto 6.3), + * <rdf:Seq> (goto 6.25) <rdf:Bag> (goto 6.26) or <rdf:Alt> (goto 6.27) + * OR from 6.3 can have ANY other element matching + * typedNode (6.13) - goto 6.3 + * CHOICE: Search for <rdf:RDF> node before starting match + * OR assume RDF content, hence go straight to production + */ + RAPIER_STATE_UNKNOWN = 0, + + /* Met production 6.1 (RDF) <rdf:RDF> element seen and can now + * expect <rdf:Description> (goto 6.3), <rdf:Seq> (goto 6.25) + * <rdf:Bag> (goto 6.26) or <rdf:Alt> (goto 6.27) OR from 6.3 can + * have ANY other element matching typedNode (6.13) - goto 6.3 + */ + RAPIER_STATE_IN_RDF = 6010, + + /* No need for 6.2 - already chose 6.3, 6.25, 6.26 or 6.27 */ + + /* Met production 6.3 (description) <rdf:Description> element + * OR 6.13 (typedNode) [pretty much anything else] + * CHOICE: Create a bag here (always? even if no bagId given) + * CHOICES: Match rdf:resource/resource, ID/rdf:ID attributes etc. + */ + RAPIER_STATE_DESCRIPTION = 6030, + + /* Matching part of 6.3 (description) inside a <Description> + * where either a list of propertyElt elements is expected or + * an empty element + */ + RAPIER_STATE_IN_DESCRIPTION = 6031, + + /* met production 6.12 (propertyElt) + */ + RAPIER_STATE_PROPERTYELT = 6120, + + /* met production 6.13 (typedNode) + */ + RAPIER_STATE_TYPED_NODE = 6130, + + /* Met production 6.25 (sequence) <rdf:Seq> element seen. Goto 6.28 */ + RAPIER_STATE_SEQ = 6250, + + /* Met production 6.26 (bag) <rdf:Bag> element seen. Goto 6.28 */ + RAPIER_STATE_BAG = 6260, + + /* Met production 6.27 (alternative) <rdf:Alt> element seen. Goto 6.28 */ + RAPIER_STATE_ALT = 6270, + + /* Met production 6.28 (member) + * Now expect <rdf:li> element and if it empty, with resource attribute + * goto 6.29 otherwise goto 6.30 + * CHOICE: Match rdf:resource/resource + */ + RAPIER_STATE_MEMBER = 6280, + + /* met production 6.29 (referencedItem) + * Found a container item with reference - <rdf:li (rdf:)resource=".."/> */ + RAPIER_STATE_REFERENCEDITEM = 6290, + + /* met production 6.30 (inlineItem) + * Found a container item with content - <rdf:li> */ + RAPIER_STATE_INLINEITEM = 6300, + +} rapier_state; + + +/* Forms: + * 1) prefix=NULL uri=<URI> - default namespace defined + * 2) prefix=NULL, uri=NULL - no default namespace + * 3) prefix=<prefix>, uri=<URI> - regular pair defined <prefix>:<URI> + */ +struct rapier_ns_map_s { + struct rapier_ns_map_s* next; /* next down the stack, NULL at bottom */ + char *prefix; /* NULL means is the default namespace */ + char *uri; + int uri_length; + int depth; /* parse depth that this was added, delete when parser leaves this */ + int is_rdf_ms; /* Non 0 if is RDF M&S Namespace */ + int is_rdf_schema; /* Non 0 if is RDF Schema Namespace */ +}; + + +/* + * Rapier XML-namespaced name, for elements or attributes + */ + +/* There are three forms + * namespace=NULL - un-namespaced name + * namespace=defined, namespace->prefix=NULL - (default ns) name + * namespace=defined, namespace->prefix=defined - ns:name + */ +typedef struct { + const rapier_ns_map *namespace; + const char *qname; + const char *uri; /* URI of namespace+qname or NULL if not defined */ + const char *value; /* optional value - used when name is an attribute */ +} rapier_ns_name; + + +typedef enum { + RDF_ATTR_about = 0, /* value of rdf:about attribute */ + RDF_ATTR_aboutEach = 1, /* " rdf:aboutEach */ + RDF_ATTR_aboutEachPrefix = 2, /* " rdf:aboutEachPrefix */ + RDF_ATTR_ID = 3, /* " rdf:ID */ + RDF_ATTR_bagID = 4, /* " rdf:bagID */ + RDF_ATTR_reference = 5, /* " rdf:reference */ + RDF_ATTR_type = 6, /* " rdf:type */ + RDF_ATTR_parseType = 7, /* " rdf:parseType */ + + RDF_ATTR_LAST = RDF_ATTR_parseType +} rdf_attr; + +static const char *rdf_attr_names[]={ + "about", + "aboutEach", + "aboutEachPrefix", + "ID", + "bagID", + "reference", + "type", + "parseType", +}; + + +/* + * Rapier Element/attributes on stack + */ +struct rapier_element_s { + struct rapier_element_s *parent; /* NULL at bottom of stack */ + rapier_ns_name *name; + rapier_ns_name **attributes; + int attribute_count; + const char * rdf_attr[8]; /* attributes declared in M&S */ + + rapier_state state; /* state that this production matches */ + + /* CDATA content of element and checks for mixed content */ + char *content_cdata; + int content_element_seen; + int content_cdata_seen; + int content_cdata_length; + +}; + +typedef struct rapier_element_s rapier_element; + + +/* + * Rapier parser object + */ +struct rapier_parser_s { + /* XML parser specific stuff */ +#ifdef NEED_EXPAT + XML_Parser xp; +#endif +#ifdef NEED_LIBXML + /* structure holding sax event handlers */ + xmlSAXHandler sax; + /* parser context */ + /* xmlParserCtxtPtr xc; */ +#endif + + /* element depth */ + int depth; + + /* stack of namespaces, most recently added at top */ + rapier_ns_map *namespaces; + + /* can be filled with error location information */ + rapier_locator locator; + + /* stack of elements - elements add after current_element */ + rapier_element *root_element; + rapier_element *current_element; + + /* non 0 if parser had fatal error and cannot continue */ + int failed; + + /* non 0 if scanning for <rdf:RDF> element, else assume doc is RDF */ + int scanning_for_rdf_RDF; + + /* non 0 to forbid non-namespaced resource, ID etc attributes + * on RDF namespaced-elements + */ + int forbid_rdf_non_ns_attributes; + + /* stuff for our user */ + void *user_data; + + void *fatal_error_user_data; + void *error_user_data; + void *warning_user_data; + void (*fatal_error_fn)(void *user_data, rapier_locator *locator, const char *msg, ...); + void (*error_fn)(void *user_data, rapier_locator *locator, const char *msg, ...); + void (*warning_fn)(void *user_data, rapier_locator *locator, const char *msg, ...); + + /* parser callbacks */ + void (*triple_handler)(void *userData, const char *subject, rapier_subject_type subject_type, const char *predicate, rapier_predicate_type predicate_type, const char *object, rapier_object_type object_type); +}; + + + + +/* static variables */ +static const char *rapier_rdf_ms_uri=RAPIER_RDF_MS_URI; +static const char *rapier_rdf_schema_uri=RAPIER_RDF_SCHEMA_URI; + + + +/* Prototypes for common expat/libxml parsing event-handling functions */ +static void rapier_xml_start_element_handler(void *userData, + const XML_Char *name, + const XML_Char **atts); + +static void rapier_xml_end_element_handler(void *userData, + const XML_Char *name); + + +/* s is not 0 terminated. */ +static void rapier_xml_cdata_handler(void *userData, + const XML_Char *s, + int len); +#ifdef HAVE_XML_SetNamespaceDeclHandler +static void rapier_start_namespace_decl_handler(void *userData, + const XML_Char *prefix, + const XML_Char *uri); + +static void rapier_end_namespace_decl_handler(void *userData, + const XML_Char *prefix); +#endif + +/* libxml-only prototypes */ +#ifdef NEED_LIBXML +static void rapier_xml_warning(void *context, rapier_locator *locator, const char *msg, ...); +static void rapier_xml_error(void *context, rapier_locator *locator, const char *msg, ...); +static void rapier_xml_fatal_error(void *context, rapier_locator *locator, const char *msg, ...); +#endif + + +/* Prototypes for local functions */ +static char * rapier_file_uri_to_filename(const char *uri); +static void rapier_parser_fatal_error(rapier_parser* parser, const char *message, ...); +static void rapier_parser_error(rapier_parser* parser, const char *message, ...); +static void rapier_parser_warning(rapier_parser* parser, const char *message, ...); + + + +/* prototypes for namespace and name/qname functions */ +static void rapier_init_namespaces(rapier_parser *rdf_parser); +static void rapier_start_namespace(rapier_parser *rdf_parser, const char *prefix, const char *namespace, int depth); +static void rapier_free_namespace(rapier_parser *rdf_parser, rapier_ns_map* namespace); +static void rapier_end_namespace(rapier_parser *rdf_parser, const char *prefix, const char *namespace); +static void rapier_end_namespaces_for_depth(rapier_parser *rdf_parser); +static rapier_ns_name* rapier_make_namespaced_name(rapier_parser *rdf_parser, const char *name, const char *value, int is_element); +static void rapier_print_ns_name(FILE *stream, rapier_ns_name* name); +static void rapier_free_ns_name(rapier_ns_name* name); +static int rapier_ns_names_equal(rapier_ns_name *name1, rapier_ns_name *name2); + + +/* prototypes for element functions */ +static rapier_element* rapier_element_pop(rapier_parser *rdf_parser); +static void rapier_element_push(rapier_parser *rdf_parser, rapier_element* element); +static void rapier_free_element(rapier_element *element); +#ifdef RAPIER_DEBUG +static void rapier_print_element(rapier_element *element, FILE* stream); +#endif + + + +/* + * Namespaces in XML + * http://www.w3.org/TR/1999/REC-xml-names-19990114/#nsc-NSDeclared + * (section 4) says: + * + * -------------------------------------------------------------------- + * The prefix xml is by definition bound to the namespace name + * http://www.w3.org/XML/1998/namespace + * -------------------------------------------------------------------- + * + * Thus should define it in the table of namespaces before we start. + * + * We *can* also define others, but let's not. + * + */ +static void +rapier_init_namespaces(rapier_parser *rdf_parser) { + rapier_start_namespace(rdf_parser, "xml", "http://www.w3.org/XML/1998/namespace", -1); +} + + +static void +rapier_start_namespace(rapier_parser *rdf_parser, + const char *prefix, const char *namespace, + int depth) +{ + int uri_length=0; + int len; + rapier_ns_map *map; + void *p; + + LIBRDF_DEBUG4(rapier_start_namespace, + "namespace prefix %s uri %s depth %d\n", + prefix ? prefix : "(default)", namespace, depth); + + /* Convert an empty namespace string "" to a NULL pointer */ + if(!*namespace) + namespace=NULL; + + len=sizeof(rapier_ns_map); + if(namespace) { + uri_length=strlen(namespace); + len+=uri_length+1; + } + if(prefix) + len+=strlen(prefix)+1; + + /* Just one malloc for map structure + namespace (maybe) + prefix (maybe)*/ + map=(rapier_ns_map*)LIBRDF_CALLOC(rapier_ns_map, len, 1); + if(!map) + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + + p=(void*)map+sizeof(rapier_ns_map); + if(namespace) { + map->uri=strcpy((char*)p, namespace); + map->uri_length=uri_length; + p+= uri_length+1; + } + if(prefix) + map->prefix=strcpy((char*)p, prefix); + map->depth=depth; + + /* set convienience flags when there is a defined namespace URI */ + if(namespace) { + if(!strncmp(namespace, rapier_rdf_ms_uri, uri_length)) + map->is_rdf_ms=1; + else if(!strncmp(namespace, rapier_rdf_schema_uri, uri_length)) + map->is_rdf_schema=1; + } + + if(rdf_parser->namespaces) + map->next=rdf_parser->namespaces; + rdf_parser->namespaces=map; +} + + +static void +rapier_free_namespace(rapier_parser *rdf_parser, rapier_ns_map* namespace) +{ + LIBRDF_FREE(rapier_ns_map, namespace); +} + + +static void +rapier_end_namespace(rapier_parser *rdf_parser, + const char *prefix, const char *namespace) +{ + LIBRDF_DEBUG3(rapier_end_namespace, "prefix %s uri \"%s\"\n", + prefix ? prefix : "(default)", namespace); +} + + +static void +rapier_end_namespaces_for_depth(rapier_parser *rdf_parser) +{ + while(rdf_parser->namespaces && + rdf_parser->namespaces->depth == rdf_parser->depth) { + rapier_ns_map* ns=rdf_parser->namespaces; + rapier_ns_map* next=ns->next; + + rapier_end_namespace(rdf_parser, ns->prefix, ns->uri); + rapier_free_namespace(rdf_parser, ns); + + rdf_parser->namespaces=next; + } + +} + + + +/* + * Namespaces in XML + * http://www.w3.org/TR/1999/REC-xml-names-19990114/#defaulting + * says: + * + * -------------------------------------------------------------------- + * 5.2 Namespace Defaulting + * + * A default namespace is considered to apply to the element where it + * is declared (if that element has no namespace prefix), and to all + * elements with no prefix within the content of that element. + * + * If the URI reference in a default namespace declaration is empty, + * then unprefixed elements in the scope of the declaration are not + * considered to be in any namespace. + * + * Note that default namespaces do not apply directly to attributes. + * + * [...] + * + * 5.3 Uniqueness of Attributes + * + * In XML documents conforming to this specification, no tag may + * contain two attributes which: + * + * 1. have identical names, or + * + * 2. have qualified names with the same local part and with + * prefixes which have been bound to namespace names that are + * identical. + * -------------------------------------------------------------------- + */ + +static rapier_ns_name* +rapier_make_namespaced_name(rapier_parser *rdf_parser, const char *name, + const char *value, int is_element) +{ + rapier_ns_name* ns_name; + const char *p; + char *new_value=NULL; + rapier_ns_map* ns; + char* new_name; + int prefix_length; + int qname_length=0; + +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG2(rapier_make_namespaced_name, + "name %s\n", name); +#endif + + ns_name=(rapier_ns_name*)LIBRDF_CALLOC(rapier_ns_name, sizeof(rapier_ns_name), 1); + if(!ns_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + return NULL; + } + + if(value) { + new_value=(char*)LIBRDF_MALLOC(cstring, strlen(value)+1); + if(!new_value) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + LIBRDF_FREE(rapier_ns_name, ns_name); + return NULL; + } + strcpy(new_value, value); + ns_name->value=new_value; + } + + /* Find : */ + for(p=name; *p && *p != ':'; p++) + ; + + /* No : - pick up default namespace, if there is one */ + if(!*p) { + new_name=(char*)LIBRDF_MALLOC(cstring, strlen(name)+1); + if(!new_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + rapier_free_ns_name(ns_name); + return NULL; + } + strcpy(new_name, name); + ns_name->qname=new_name; + + /* Find a default namespace */ + for(ns=rdf_parser->namespaces; ns && ns->prefix; ns=ns->next) + ; + + if(ns) { + ns_name->namespace=ns; +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG2(rapier_make_namespaced_name, + "Found default namespace %s\n", ns->uri); +#endif + } else { + /* failed to find namespace - now what? FIXME */ + /* rapier_parser_warning(rdf_parser, "No default namespace defined - cannot expand %s", name); */ +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG1(rapier_make_namespaced_name, + "No default namespace defined\n"); +#endif + } + + } else { + prefix_length=p-name; + p++; /* move to start of qname */ + qname_length=strlen(p); + new_name=(char*)LIBRDF_MALLOC(cstring, qname_length+1); + if(!new_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + rapier_free_ns_name(ns_name); + return NULL; + } + strcpy(new_name, p); + ns_name->qname=new_name; + + /* Find the namespace */ + for(ns=rdf_parser->namespaces; ns ; ns=ns->next) + if(ns->prefix && !strncmp(name, ns->prefix, prefix_length)) + break; + + if(!ns) { + /* failed to find namespace - now what? */ + rapier_parser_error(rdf_parser, "Failed to find namespace in %s", name); + rapier_free_ns_name(ns_name); + return NULL; + } + +#if RAPIER_DEBUG > 1 + LIBRDF_DEBUG3(rapier_make_namespaced_name, + "Found namespace prefix %s URI %s\n", ns->prefix, ns->uri); +#endif + ns_name->namespace=ns; + } + + /* If namespace has a URI and a qname is defined, create the URI + * for this element + */ + if(ns_name->namespace && ns_name->namespace->uri && qname_length) { + char *uri_string=(char*)LIBRDF_MALLOC(cstring, + ns_name->namespace->uri_length + + qname_length + 1); + if(!uri_string) { + rapier_free_ns_name(ns_name); + return NULL; + } + ns_name->uri=uri_string; + } + + + return ns_name; +} + + +static void +rapier_print_ns_name(FILE *stream, rapier_ns_name* name) +{ + if(name->namespace) { + if(name->namespace->prefix) + fprintf(stream, "%s:%s", name->namespace->prefix, name->qname); + else + fprintf(stream, "(default):%s", name->qname); + } else + fputs(name->qname, stream); +} + + +static void +rapier_free_ns_name(rapier_ns_name* name) +{ + if(name->qname) + LIBRDF_FREE(cstring, name->qname); + + if(name->uri) + LIBRDF_FREE(cstring, name->uri); + + if(name->value) + LIBRDF_FREE(cstring, name->value); + LIBRDF_FREE(rapier_ns_name, name); +} + + +static int +rapier_ns_names_equal(rapier_ns_name *name1, rapier_ns_name *name2) +{ + if(strcmp(name1->qname, name2->qname)) + return 0; + if(name1->namespace != name2->namespace) + return 0; + return 1; +} + + +static rapier_element* +rapier_element_pop(rapier_parser *rdf_parser) +{ + rapier_element *element=rdf_parser->current_element; + + if(!element) + return NULL; + + rdf_parser->current_element=element->parent; + if(rdf_parser->root_element == element) /* just deleted root */ + rdf_parser->root_element=NULL; + + return element; +} + + +static void +rapier_element_push(rapier_parser *rdf_parser, rapier_element* element) +{ + element->parent=rdf_parser->current_element; + rdf_parser->current_element=element; + if(!rdf_parser->root_element) + rdf_parser->root_element=element; +} + + +static void +rapier_free_element(rapier_element *element) +{ + int i; + + for (i=0; i < element->attribute_count; i++) + if(element->attributes[i]) + rapier_free_ns_name(element->attributes[i]); + + if(element->attributes) + LIBRDF_FREE(rapier_ns_name_array, element->attributes); + + /* Free special RDF M&S attributes */ + for(i=0; i<= RDF_ATTR_LAST; i++) + if(element->rdf_attr[i]) + LIBRDF_FREE(cstring, element->rdf_attr[i]); + + if(element->content_cdata_length) + LIBRDF_FREE(rapier_ns_name_array, element->content_cdata); + + rapier_free_ns_name(element->name); + LIBRDF_FREE(rapier_element, element); +} + + + +#ifdef RAPIER_DEBUG +static void +rapier_print_element(rapier_element *element, FILE* stream) +{ + int i; + + rapier_print_ns_name(stream, element->name); + fputc('\n', stream); + + if(element->attribute_count) { + fputs(" attributes: ", stream); + for (i = 0; i < element->attribute_count; i++) { + if(i) + fputc(' ', stream); + rapier_print_ns_name(stream, element->attributes[i]); + fprintf(stream, "='%s'", element->attributes[i]->value); + } + fputc('\n', stream); + } +} +#endif + + +static void +rapier_xml_start_element_handler(void *userData, + const XML_Char *name, const XML_Char **atts) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + int all_atts_count=0; + int ns_attributes_count=0; + rapier_ns_name** named_attrs=NULL; + int i; + rapier_ns_name* element_name; + rapier_element* element=NULL; + int finished; + rapier_state state; +#ifdef NEED_EXPAT + rapier_locator *locator=&rdf_parser->locator; /* for storing error info */ +#endif + +#ifdef NEED_EXPAT + locator->line=XML_GetCurrentLineNumber(rdf_parser->xp); + locator->column=XML_GetCurrentColumnNumber(rdf_parser->xp); + locator->byte=XML_GetCurrentByteIndex(rdf_parser->xp); +#endif + + rdf_parser->depth++; + + if (atts != NULL) { + /* Round 1 - find special attributes, at present just namespaces */ + for (i = 0;(atts[i] != NULL);i+=2) { + all_atts_count++; + + /* synthesise the XML NS events */ + if(!strncmp(atts[i], "xmlns", 5)) { + /* there is more i.e. xmlns:foo */ + const char *prefix=atts[i][5] ? &atts[i][6] : NULL; + + rapier_start_namespace(userData, prefix, atts[i+1], rdf_parser->depth); + atts[i]=NULL; /* Is it allowed to zap XML parser array things? FIXME */ + continue; + } + + ns_attributes_count++; + } + } + + + /* Now can recode element name with a namespace */ + + element_name=rapier_make_namespaced_name(rdf_parser, name, NULL, 1); + if(!element_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + return; + } + + + /* Create new element structure */ + element=(rapier_element*)LIBRDF_CALLOC(rapier_element, + sizeof(rapier_element), 1); + if(!element) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + rapier_free_ns_name(element_name); + return; + } + + + element->name=element_name; + + /* Prepare for possible element content */ + element->content_element_seen=0; + element->content_cdata_seen=0; + element->content_cdata_length=0; + + + + + if(ns_attributes_count) { + int offset = 0; + + /* Round 2 - turn attributes into namespaced-attributes */ + + /* Allocate new array to hold namespaced-attributes */ + named_attrs=(rapier_ns_name**)LIBRDF_CALLOC(rapier_ns_name-array, sizeof(rapier_ns_name*), ns_attributes_count); + if(!named_attrs) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + LIBRDF_FREE(rapier_element, element); + rapier_free_ns_name(element_name); + return; + } + + for (i = 0; i < all_atts_count; i++) { + rapier_ns_name* attribute; + + /* Skip previously processed attributes */ + if(!atts[i<<1]) + continue; + + /* namespace-name[i] stored in named_attrs[i] */ + attribute=rapier_make_namespaced_name(rdf_parser, atts[i<<1], + atts[(i<<1)+1], 0); + if(!attribute) { /* failed - tidy up and return */ + int j; + + for (j=0; j < i; j++) + LIBRDF_FREE(rapier_ns_name, named_attrs[j]); + LIBRDF_FREE(rapier_ns_name_array, named_attrs); + LIBRDF_FREE(rapier_element, element); + rapier_free_ns_name(element_name); + return; + } + + /* Save pointers to some RDF M&S attributes */ + + /* If RDF M&S namespace-prefixed attributes */ + if(attribute->namespace && attribute->namespace->is_rdf_ms) { + const char *attr_name=attribute->qname; + int j; + + for(j=0; j<= RDF_ATTR_LAST; j++) + if(!strcmp(attr_name, rdf_attr_names[j])) { + element->rdf_attr[j]=attribute->value; + /* Delete it if it was stored elsewhere */ +#if RAPIER_DEBUG + LIBRDF_DEBUG3(rapier_xml_start_element_handler, + "Found RDF M&S attribute %s URI %s\n", + attr_name, attribute->value); +#endif + /* make sure value isn't deleted from ns_name structure */ + attribute->value=NULL; + rapier_free_ns_name(attribute); + attribute=NULL; + } + } /* end if RDF M&S namespaced-prefixed attributes */ + + + /* If non namespace-prefixed RDF M&S attributes found on + * rdf namespace-prefixed element + */ + if(!rdf_parser->forbid_rdf_non_ns_attributes && + attribute && !attribute->namespace && + element_name->namespace && element_name->namespace->is_rdf_ms) { + const char *attr_name=attribute->qname; + int j; + + for(j=0; j<= RDF_ATTR_LAST; j++) + if(!strcmp(attr_name, rdf_attr_names[j])) { + element->rdf_attr[j]=attribute->value; + /* Delete it if it was stored elsewhere */ +#if RAPIER_DEBUG + LIBRDF_DEBUG3(rapier_xml_start_element_handler, + "Found non-namespaced RDF M&S attribute %s URI %s\n", + attr_name, attribute->value); +#endif + /* make sure value isn't deleted from ns_name structure */ + attribute->value=NULL; + rapier_free_ns_name(attribute); + attribute=NULL; + } + } /* end if non-namespace prefixed RDF M&S attributes */ + + + if(attribute) + named_attrs[offset++]=attribute; + } + + /* set actual count from attributes that haven't been skipped */ + ns_attributes_count=offset; + if(!offset && named_attrs) { + /* all attributes were RDF M&S or other specials and deleted + * so delete array and don't store pointer */ + LIBRDF_FREE(rapier_ns_name_array, named_attrs); + named_attrs=NULL; + } + + } /* end if ns_attributes_count */ + + element->attributes=named_attrs; + element->attribute_count=ns_attributes_count; + + + rapier_element_push(rdf_parser, element); + + + if(element->parent) { + if(++element->parent->content_element_seen == 1 && + element->parent->content_cdata_seen == 1) { + /* Uh oh - mixed content, the parent element has cdata too */ + rapier_parser_warning(rdf_parser, "element %s has mixed content.", + element->parent->name->qname); + } + } + + +#ifdef RAPIER_DEBUG + fprintf(stderr, "rapier_xml_start_element_handler: Start of namespaced-element: "); + rapier_print_element(element, stderr); +#endif + + + /* Right, now ready to enter the grammar */ + + finished= 0; + if(element->parent) + state=element->parent->state; + else + state=RAPIER_STATE_UNKNOWN; + + while(!finished) { + const char *el_name=element->name->qname; + int element_in_rdf_ns=(element->name->namespace && + element->name->namespace->is_rdf_ms); + + switch(state) { + case RAPIER_STATE_UNKNOWN: + if(element_in_rdf_ns && !strcmp(el_name, "RDF")) { + state=RAPIER_STATE_IN_RDF; + /* need more content before can continue */ + finished=1; + break; + } + /* If scanning for element, can continue */ + if(rdf_parser->scanning_for_rdf_RDF) { + finished=1; + break; + } + /* Otherwise choice of next state can be made from the current + * element by IN_RDF state */ + + state=RAPIER_STATE_IN_RDF; + break; + + case RAPIER_STATE_IN_RDF: + if(element_in_rdf_ns) { + if(!strcmp(el_name, "Description")) { + state=RAPIER_STATE_DESCRIPTION; + break; + } else if(!strcmp(el_name, "Seq")) { + state=RAPIER_STATE_SEQ; + break; + } else if(!strcmp(el_name, "Bag")) { + state=RAPIER_STATE_BAG; + break; + } else if(!strcmp(el_name, "Alt")) { + state=RAPIER_STATE_ALT; + break; + } + + /* Unexpected rdf: element at outer layer */ + rapier_parser_error(rdf_parser, "Unexpected RDF M&S element %s in <rdf:RDF> - from productions 6.2, 6.3 and 6.4 expected rdf:Description, rdf:Seq, rdf:Bag or rdf:Alt only.", el_name); + finished=1; + } + + /* Hmm, must be a typedNode, handled by the description state + * so that ID, BagID are handled in one place. + */ + state=RAPIER_STATE_DESCRIPTION; + break; + + + /* No need for 6.2 - already chose 6.3, 6.25, 6.26 or 6.27 */ + + + case RAPIER_STATE_DESCRIPTION: + /* choices here from production 6.3 (description) + * <rdf:Description idAboutAttr? bagIdAttr? propAttr* > + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? propAttr* + * <typeName idAboutAttr? bagIdAttr? propAttr*> + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? propAttr* + * (either may have no content, that is tested in the end element code) + */ + + /* lets add booleans - isn't C wonderful! */ + if((element->rdf_attr[RDF_ATTR_ID] != NULL) + + (element->rdf_attr[RDF_ATTR_about] != NULL) + + (element->rdf_attr[RDF_ATTR_aboutEach] != NULL) + + (element->rdf_attr[RDF_ATTR_aboutEachPrefix] != NULL) > 1) { + rapier_parser_warning(rdf_parser, "More than one of RDF ID, about, aboutEach or aboutEachPrefix attributes on element %s - from productions 6.5, 6.6, 6.7 and 6.8 expect at most one.", el_name); + } + + + /* has to be rdf:Description OR typedNode - checked above */ + if(element_in_rdf_ns) + state=RAPIER_STATE_IN_DESCRIPTION; + else + /* otherwise must match the typedNode production - checked below */ + state=RAPIER_STATE_TYPED_NODE; + + finished=1; + break; + + + /* Inside a <rdf:Description> so expecting a list of + * propertyElt elements + */ + case RAPIER_STATE_IN_DESCRIPTION: + state=RAPIER_STATE_PROPERTYELT; + finished=1; + break; + + + /* Expect to meet the typedNode production having + * fallen through and not met other productions - + * 6.3, 6.25, 6.26, 6.27. This is the last choice. + * + * choices here from production 6.13 (typedNode) + * <typeName idAboutAttr? bagIdAttr? propAttr* /> + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? + * <typeName idAboutAttr? bagIdAttr? propAttr* > propertyElt* </typeName> + * Attributes: (ID|about|aboutEach|aboutEachPrefix)? bagID? propAttr* + */ + case RAPIER_STATE_TYPED_NODE: + finished=1; + break; + + case RAPIER_STATE_SEQ: + finished=1; + break; + + case RAPIER_STATE_BAG: + finished=1; + break; + + case RAPIER_STATE_ALT: + finished=1; + break; + + case RAPIER_STATE_MEMBER: + finished=1; + break; + + case RAPIER_STATE_REFERENCEDITEM: + finished=1; + break; + + case RAPIER_STATE_INLINEITEM: + finished=1; + break; + + /* choices here from production 6.12 (propertyElt) + * <propName idAttr?> value </propName> + * Attributes: ID? + * <propName idAttr? parseLiteral> literal </propName> + * Attributes: ID? parseType="literal" + * <propName idAttr? parseResource> propertyElt* </propName> + * Attributes: ID? parseType="resource" + * <propName idRefAttr? bagIdAttr? propAttr* /> + * Attributes: (ID|resource)? bagIdAttr? propAttr* + */ + case RAPIER_STATE_PROPERTYELT: + finished=1; + break; + + default: + rapier_parser_fatal_error(rdf_parser, "Unexpected parser state %d.", + state); + finished=1; + + } /* end switch */ + + if(state != element->state) { + element->state=state; + fprintf(stderr, "rapier_xml_start_element_handler: moved to state %d\n", state); + } + + } /* end while */ + + /* store final state that matched */ + element->state=state; +} + + + +static void +rapier_xml_end_element_handler(void *userData, const XML_Char *name) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + rapier_element* element; + int finished; + rapier_state state; + rapier_ns_name *element_name; +#ifdef NEED_EXPAT + rapier_locator *locator=&rdf_parser->locator; /* for storing error info */ +#endif + +#ifdef NEED_EXPAT + locator->line=XML_GetCurrentLineNumber(rdf_parser->xp); + locator->column=XML_GetCurrentColumnNumber(rdf_parser->xp); + locator->byte=XML_GetCurrentByteIndex(rdf_parser->xp); +#endif + + /* recode element name */ + + element_name=rapier_make_namespaced_name(rdf_parser, name, NULL, 1); + if(!element_name) { + rapier_parser_fatal_error(rdf_parser, "Out of memory"); + return; + } + + +#ifdef RAPIER_DEBUG + fprintf(stderr, "rapier_xml_end_element_handler: End of namespaced-element: "); + rapier_print_ns_name(stderr, element_name); + fputc('\n', stderr); +#endif + + element=rapier_element_pop(rdf_parser); + if(!rapier_ns_names_equal(element->name, element_name)) { + /* Hmm, unexpected name - FIXME, should do something! */ + rapier_parser_warning(rdf_parser, "Element %s ended, expected end of element %s\n", name, element->name->qname); + return; + } + + + state=element->state; + finished= 0; + while(!finished) { + const char *el_name=element->name->qname; + int element_in_rdf_ns=(element->name->namespace && + element->name->namespace->is_rdf_ms); + + switch(state) { + case RAPIER_STATE_UNKNOWN: + finished=1; + break; + + case RAPIER_STATE_IN_RDF: + if(element_in_rdf_ns && !strcmp(el_name, "RDF")) { + /* end of RDF - boo hoo */ + state=RAPIER_STATE_UNKNOWN; + finished=1; + break; + } + /* When scanning, another element ending is outside the RDF + * world so this can happen without further work + */ + if(rdf_parser->scanning_for_rdf_RDF) { + state=RAPIER_STATE_UNKNOWN; + finished=1; + break; + } + /* otherwise found some junk after RDF content in an RDF-only + * document (probably never get here since this would be + * a mismatched XML tag and cause an error earlier) + */ + rapier_parser_warning(rdf_parser, "Element %s ended, expected end of RDF element\n", el_name); + state=RAPIER_STATE_UNKNOWN; + finished=1; + break; + + /* No need for 6.2 - already chose 6.3, 6.25, 6.26 or 6.27 */ + + case RAPIER_STATE_DESCRIPTION: + /* Never reached in any code outside start element + * since immediately moves on to RAPIER_STATE_IN_DESCRIPTION + * or RAPIER_STATE_TYPED_NODE + */ + abort(); + break; + + case RAPIER_STATE_IN_DESCRIPTION: + /* Must be end of description production </rdf:Description> */ + state=RAPIER_STATE_IN_RDF; + finished=1; + break; + + case RAPIER_STATE_TYPED_NODE: + /* Must be end of typedNode production element <typeName> */ + state=RAPIER_STATE_IN_RDF; + finished=1; + break; + + case RAPIER_STATE_SEQ: + finished=1; + break; + + case RAPIER_STATE_BAG: + finished=1; + break; + + case RAPIER_STATE_ALT: + finished=1; + break; + + case RAPIER_STATE_MEMBER: + finished=1; + break; + + case RAPIER_STATE_REFERENCEDITEM: + finished=1; + break; + + case RAPIER_STATE_INLINEITEM: + finished=1; + break; + + case RAPIER_STATE_PROPERTYELT: + finished=1; + break; + + default: + rapier_parser_fatal_error(rdf_parser, "Unexpected parser state %d.", + state); + finished=1; + + } /* end switch */ + + if(state != element->state) { + element->state=state; + fprintf(stderr, "rapier_xml_end_element_handler: moved to state %d\n", state); + } + + } /* end while */ + + + rapier_free_ns_name(element_name); + + rapier_end_namespaces_for_depth(rdf_parser); + rapier_free_element(element); + + rdf_parser->depth--; +} + + + +/* cdata (and ignorable whitespace for libxml). + * s is not 0 terminated for expat, is for libxml - grrrr. + */ +static void +rapier_xml_cdata_handler(void *userData, const XML_Char *s, int len) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + rapier_element* element; + rapier_state state; + char *buffer; + char *ptr; + int all_whitespace=1; + int ignore_all_whitespace=0; + int i; + + for(i=0; i<len; i++) + if(!isspace(s[i])) { + all_whitespace=0; + break; + } + + element=rdf_parser->current_element; + + state=element->state; + switch(state) { + case RAPIER_STATE_UNKNOWN: + /* Ignore all cdata if still looking for RDF */ + if(rdf_parser->scanning_for_rdf_RDF) + return; + + /* Ignore all whitespace cdata before first element */ + if(all_whitespace) + return; + + /* This probably will never happen since that would make the + * XML not be well-formed + */ + rapier_parser_warning(rdf_parser, "Found cdata before RDF element."); + break; + + case RAPIER_STATE_IN_RDF: + case RAPIER_STATE_IN_DESCRIPTION: + /* Ignore all whitespace cdata inside <RDF> or <Description> + * when it occurs although note it was seen + */ + ignore_all_whitespace=1; + break; + + case RAPIER_STATE_DESCRIPTION: + /* Never reached in any code outside start element + * since immediately moves on to RAPIER_STATE_IN_DESCRIPTION + * or RAPIER_STATE_TYPED_NODE + */ + abort(); + break; + + case RAPIER_STATE_TYPED_NODE: + case RAPIER_STATE_SEQ: + case RAPIER_STATE_BAG: + case RAPIER_STATE_ALT: + case RAPIER_STATE_MEMBER: + case RAPIER_STATE_REFERENCEDITEM: + case RAPIER_STATE_INLINEITEM: + case RAPIER_STATE_PROPERTYELT: + break; + + default: + rapier_parser_fatal_error(rdf_parser, "Unexpected parser state %d.", + state); + } /* end switch */ + + + + if(++element->content_cdata_seen == 1 && + element->content_element_seen == 1) { + /* Uh oh - mixed content, this element has elements too */ + rapier_parser_warning(rdf_parser, "element %s has mixed content.", + element->name->qname); + } + + if(all_whitespace && ignore_all_whitespace) { + LIBRDF_DEBUG2(rapier_xml_end_element_handler, "Ignoring whitespace cdata inside element %s\n", element->name->qname); + return; + } + + /* +1 here is for \0 at end */ + buffer=(char*)LIBRDF_MALLOC(cstring, element->content_cdata_length + len + 1); + /* FIXME - no error return possible */ + if(!buffer) + return; + + if(element->content_cdata_length) { + strncpy(buffer, element->content_cdata, element->content_cdata_length); + LIBRDF_FREE(cstring, element->content_cdata); + } + element->content_cdata=buffer; + + ptr=buffer+element->content_cdata_length; /* append */ + + /* adjust stored length */ + element->content_cdata_length += len; + + /* now write new stuff at end of cdata buffer */ + strncpy(ptr, s, len); + ptr += len; + *ptr = '\0'; + + LIBRDF_DEBUG3(rapier_xml_cdata_handler, + "content cdata now: '%s' (%d bytes)\n", + buffer, element->content_cdata_length); +} + + +#ifdef HAVE_XML_SetNamespaceDeclHandler +static void +rapier_start_namespace_decl_handler(void *userData, + const XML_Char *prefix, const XML_Char *uri) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + +#ifdef RAPIER_DEBUG + fprintf(stderr_parser->locator, "saw namespace %s URI %s\n", prefix, uri); +#endif +} + + +static void +rapier_end_namespace_decl_handler(void *userData, const XML_Char *prefix) +{ + rapier_parser* rdf_parser=(rapier_parser*)userData; + +#ifdef RAPIER_DEBUG + fprintf(stderr_parser->locator, "saw end namespace prefix %s\n", prefix); +#endif +} +#endif + + +#ifdef NEED_LIBXML +#include <stdarg.h> + +static const char* xml_warning_prefix="XML parser warning - "; +static const char* xml_error_prefix="XML parser error - "; +static const char* xml_fatal_error_prefix="XML parser fatal error - "; + +static void +rapier_xml_warning(void *ctx, const char *msg, ...) +{ + va_list args; + int length; + char *nmsg; + + va_start(args, msg); + length=strlen(xml_warning_prefix)+strlen(msg)+1; + msg=(char*)LIBRDF_MALLOC(cstring, length); + if(!msg) { + /* just pass on, might be out of memory error */ + rapier_parser_warning(parser, msg, args); + } else { + strcpy(nmsg, xml_warning_prefix); + strcat(nmsg, msg); + rapier_parser_warning(parser, nmsg, args); + LIBRDF_FREE(cstring,nmsg); + } + va_end(args); +} + + +static void +rapier_xml_error(void *ctx, const char *msg, ...) +{ + va_list args; + int length; + char *nmsg; + + va_start(args, msg); + length=strlen(xml_error_prefix)+strlen(msg)+1; + msg=(char*)LIBRDF_MALLOC(cstring, length); + if(!msg) { + /* just pass on, might be out of memory error */ + rapier_parser_error(parser, msg, args); + } else { + strcpy(nmsg, xml_error_prefix); + strcat(nmsg, msg); + rapier_parser_error(parser, nmsg, args); + LIBRDF_FREE(cstring,nmsg); + } + va_end(args); +} + + +static void +rapier_xml_fatal_error(void *ctx, const char *msg, ...) +{ + va_list args; + int length; + char *nmsg; + + va_start(args, msg); + length=strlen(xml_fatal_error_prefix)+strlen(msg)+1; + msg=(char*)LIBRDF_MALLOC(cstring, length); + if(!msg) { + /* just pass on, might be out of memory error */ + rapier_parser_fatal_error(parser, msg, args); + } else { + strcpy(nmsg, xml_error_prefix); + strcat(nmsg, msg); + rapier_parser_fatal_error(parser, nmsg, args); + LIBRDF_FREE(cstring,nmsg); + } + va_end(args); +} + +#endif + + + +/** + * rapier_file_uri_to_filename - Convert a URI representing a file (starting file:) to a filename + * @uri: URI of string + * + * Return value: the filename or NULL on failure + **/ +static char * +rapier_file_uri_to_filename(const char *uri) +{ + int length; + char *filename; + + if (strncmp(uri, "file:", 5)) + return NULL; + + /* FIXME: unix version of URI -> filename conversion */ + length=strlen(uri) -5 +1; + filename=LIBRDF_MALLOC(cstring, length); + if(!filename) + return NULL; + + strcpy(filename, uri+5); + return filename; +} + + +/* + * rapier_parser_fatal_error - Error from a parser - Internal + **/ +static void +rapier_parser_fatal_error(rapier_parser* parser, const char *message, ...) +{ + va_list arguments; + + parser->failed=1; + + if(parser->fatal_error_fn) { + parser->fatal_error_fn(parser->fatal_error_user_data, + &parser->locator, message); + abort(); + } + + va_start(arguments, message); + + rapier_print_locator(stderr, &parser->locator); + fprintf(stderr, " rapier fatal error - "); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + + va_end(arguments); + + abort(); +} + + +/* + * rapier_parser_error - Error from a parser - Internal + **/ +static void +rapier_parser_error(rapier_parser* parser, const char *message, ...) +{ + va_list arguments; + + if(parser->error_fn) { + parser->error_fn(parser->error_user_data, &parser->locator, message); + return; + } + + va_start(arguments, message); + + rapier_print_locator(stderr, &parser->locator); + fprintf(stderr, " rapier error - "); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + + va_end(arguments); +} + + +/* + * rapier_parser_warning - Warning from a parser - Internal + **/ +static void +rapier_parser_warning(rapier_parser* parser, const char *message, ...) +{ + va_list arguments; + + if(parser->warning_fn) { + parser->warning_fn(parser->warning_user_data, &parser->locator, message); + return; + } + + va_start(arguments, message); + + rapier_print_locator(stderr, &parser->locator); + fprintf(stderr, " rapier warning - "); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + + va_end(arguments); +} + + +#ifdef NEED_LIBXML +/* from http://www.daa.com.au/~james/gnome/xml-sax/implementing.html */ +#include <parserInternals.h> + +static int myXmlSAXParseFile(xmlSAXHandlerPtr sax, void *user_data, const char *filename); + +static int +myXmlSAXParseFile(xmlSAXHandlerPtr sax, void *user_data, const char *filename) +{ + int ret = 0; + xmlParserCtxtPtr ctxt; + + ctxt = xmlCreateFileParserCtxt(filename); + if (ctxt == NULL) return -1; + ctxt->sax = sax; + ctxt->userData = user_data; + + xmlParseDocument(ctxt); + + if (ctxt->wellFormed) + ret = 0; + else + ret = -1; + if (sax != NULL) + ctxt->sax = NULL; + xmlFreeParserCtxt(ctxt); + + return ret; +} +#endif + + + + +/* PUBLIC FUNCTIONS */ + +/** + * rapier_new - Initialise the Rapier RDF parser + * + * Return value: non 0 on failure + **/ +rapier_parser* +rapier_new(void) +{ + rapier_parser* rdf_parser; +#ifdef NEED_EXPAT + XML_Parser xp; +#endif + + rdf_parser=LIBRDF_CALLOC(rapier_parser, 1, sizeof(rapier_parser)); + + if(!rdf_parser) + return NULL; + +#ifdef NEED_EXPAT + xp=XML_ParserCreate(NULL); + + /* create a new parser in the specified encoding */ + XML_SetUserData(xp, rdf_parser); + + /* XML_SetEncoding(xp, "..."); */ + + XML_SetElementHandler(xp, rapier_xml_start_element_handler, + rapier_xml_end_element_handler); + XML_SetCharacterDataHandler(xp, rapier_xml_cdata_handler); +#ifdef HAVE_XML_SetNamespaceDeclHandler + XML_SetNamespaceDeclHandler(xp, + rapier_start_namespace_decl_handler, + rapier_end_namespace_decl_handler); +#endif + rdf_parser->xp=xp; +#endif + +#ifdef NEED_LIBXML + xmlDefaultSAXHandlerInit(); + rdf_parser->sax.startElement=rapier_xml_start_element_handler; + rdf_parser->sax.endElement=rapier_xml_end_element_handler; + + rdf_parser->sax.characters=rapier_xml_cdata_handler; + rdf_parser->sax.ignorableWhitespace=rapier_xml_cdata_handler; + + rdf_parser->sax.warning=rapier_xml_warning; + rdf_parser->sax.error=rapier_xml_error; + rdf_parser->sax.fatalError=rapier_xml_fatal_error; + + /* xmlInitParserCtxt(&rdf_parser->xc); */ +#endif + + rapier_init_namespaces(rdf_parser); + + return rdf_parser; +} + + + + +/** + * rapier_free - Free the Rapier RDF parser + * @rdf_parser: parser object + * + **/ +void +rapier_free(rapier_parser *rdf_parser) +{ + rapier_element* element; + rapier_ns_map* ns; + + ns=rdf_parser->namespaces; + while(ns) { + rapier_ns_map* next_ns=ns->next; + + rapier_free_namespace(rdf_parser, ns); + ns=next_ns; + } + + while((element=rapier_element_pop(rdf_parser))) { + rapier_free_element(element); + } + + LIBRDF_FREE(rapier_parser, rdf_parser); +} + + +/** + * rapier_parser_set_fatal_error - Set the parser error handling function + * @parser: the parser + * @user_data: user data to pass to function + * @fatal_error_fn: pointer to the function + * + * The function will receive callbacks when the parser fails. + * + **/ +void +rapier_parser_set_fatal_error(rapier_parser* parser, void *user_data, + void (*fatal_error_fn)(void *user_data, rapier_locator* locator, const char *msg, ...)) +{ + parser->fatal_error_user_data=user_data; + parser->fatal_error_fn=fatal_error_fn; +} + + +/** + * rapier_parser_set_error - Set the parser error handling function + * @parser: the parser + * @user_data: user data to pass to function + * @error_fn: pointer to the function + * + * The function will receive callbacks when the parser fails. + * + **/ +void +rapier_parser_set_error(rapier_parser* parser, void *user_data, + void (*error_fn)(void *user_data, rapier_locator* locator, const char *msg, ...)) +{ + parser->error_user_data=user_data; + parser->error_fn=error_fn; +} + + +/** + * rapier_parser_set_warning - Set the parser warning handling function + * @parser: the parser + * @user_data: user data to pass to function + * @warning_fn: pointer to the function + * + * The function will receive callbacks when the parser gives a warning. + * + **/ +void +rapier_parser_set_warning(rapier_parser* parser, void *user_data, + void (*warning_fn)(void *user_data, rapier_locator* locator, const char *msg, ...)) +{ + parser->warning_user_data=user_data; + parser->warning_fn=warning_fn; +} + + +void +rapier_set_triple_handler(rapier_parser* parser, + void *userData, + void (*triple_handler)(void *userData, + const char *subject, rapier_subject_type subject_type, + const char *predicate, rapier_predicate_type predicate_type, + const char *object, rapier_object_type object_type)) +{ + parser->triple_handler=triple_handler; +} + + + + + +/** + * rapier_parse_file - Retrieve the RDF/XML content at URI + * @rdf_parser: parser + * @uri: URI of RDF content + * @base_uri: the base URI to use (or NULL if the same) + * + * Return value: non 0 on failure + **/ +int +rapier_parse_file(rapier_parser* rdf_parser, const char *uri, + const char *base_uri) +{ +#ifdef NEED_EXPAT + XML_Parser xp; +#endif +#ifdef NEED_LIBXML + /* parser context */ + xmlParserCtxtPtr xc; +#endif +#define RBS 1024 + FILE *fh; + char buffer[RBS]; + int rc=1; + int len; + const char *filename; + rapier_locator *locator=&rdf_parser->locator; /* for storing error info */ + + /* initialise fields */ + rdf_parser->depth=0; + rdf_parser->root_element= rdf_parser->current_element=NULL; + rdf_parser->failed=0; + + + +#ifdef NEED_EXPAT + xp=rdf_parser->xp; + + XML_SetBase(xp, base_uri); +#endif + + + filename=rapier_file_uri_to_filename(uri); + if(!filename) + return 1; + + locator->file=filename; + locator->uri=base_uri; + + fh=fopen(filename, "r"); + if(!fh) { + rapier_parser_error(rdf_parser, "file open failed - %s", strerror(errno)); +#ifdef NEED_EXPAT + XML_ParserFree(xp); +#endif /* EXPAT */ + LIBRDF_FREE(cstring, filename); + return 1; + } + +#ifdef NEED_LIBXML + /* libxml needs at least 4 bytes from the XML content to allow + * content encoding detection to work */ + len=fread(buffer, 1, 4, fh); + if(len>0) { + xc = xmlCreatePushParserCtxt(&rdf_parser->sax, rdf_parser, + buffer, len, filename); + } else { + fclose(fh); + fh=NULL; + } + +#endif + + while(fh && !feof(fh)) { + len=fread(buffer, 1, RBS, fh); + if(len <= 0) { +#ifdef NEED_EXPAT + XML_Parse(xp, buffer, 0, 1); +#endif +#ifdef NEED_LIBXML + xmlParseChunk(xc, buffer, 0, 1); +#endif + break; + } +#ifdef NEED_EXPAT + rc=XML_Parse(xp, buffer, len, (len < RBS)); + if(len < RBS) + break; + if(!rc) /* expat: 0 is failure */ + break; +#endif +#ifdef NEED_LIBXML + rc=xmlParseChunk(xc, buffer, len, 0); + if(len < RBS) + break; + if(rc) /* libxml: non 0 is failure */ + break; +#endif + } + fclose(fh); + +#ifdef NEED_EXPAT + if(!rc) { + int xe=XML_GetErrorCode(xp); + + locator->line=XML_GetCurrentLineNumber(xp); + locator->column=XML_GetCurrentColumnNumber(xp); + locator->byte=XML_GetCurrentByteIndex(xp); + + rapier_parser_error(rdf_parser, "XML Parsing failed - %s", + XML_ErrorString(xe)); + rc=1; + } else + rc=0; + + XML_ParserFree(xp); +#endif /* EXPAT */ +#ifdef NEED_LIBXML + if(rc) { + rapier_parser_error(parser, "XML Parsing failed"); +#endif + + LIBRDF_FREE(cstring, filename); + + return (rc != 0); +} + + +void +rapier_print_locator(FILE *stream, rapier_locator* locator) +{ + if(!locator) + return; + + if(locator->uri) + fprintf(stream, "URI %s", locator->uri); + else if (locator->file) + fprintf(stream, "file %s", locator->file); + else + return; + if(locator->line) { + fprintf(stream, ":%d", locator->line); + if(locator->column) + fprintf(stream, " column %d", locator->column); + } +} + + + +void +rapier_set_feature(rapier_parser *parser, rapier_feature feature, int value) { + switch(feature) { + case RAPIER_FEATURE_SCANNING: + parser->scanning_for_rdf_RDF=value; + break; + + case RAPIER_FEATURE_RDF_NON_NS_ATTRIBUTES: + parser->forbid_rdf_non_ns_attributes=!value; /* negative logic - FIXME */ + break; + + default: + break; + } +} diff --git a/utils/.cvsignore b/utils/.cvsignore new file mode 100644 index 00000000..703e31de --- /dev/null +++ b/utils/.cvsignore @@ -0,0 +1,31 @@ +*.rdf +*.rdfs +*.log +*test +.deps* +ANNOUNCE* +ChangeLog +Makefile +Makefile.in +NEWS +README +TODO +aclocal.m4 +config.cache +config.guess +config.log +config.status +config.sub +configure +dmalloc* +example? +install-sh +missing +mkinstalldirs +not-used +old* +config.h +stamp-h* +test* +rdfdump +*.txt diff --git a/utils/Makefile.am b/utils/Makefile.am new file mode 100644 index 00000000..cc1c6c08 --- /dev/null +++ b/utils/Makefile.am @@ -0,0 +1,134 @@ +# -*- Mode: Makefile -*- +# +# Makefile.am - automake file for Rapier +# +# $Id$ +# +# Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ +# Institute for Learning and Research Technology, University of Bristol. +# +# This package is Free Software available under either of two licenses +# (see FAQS.html to see why): +# +# 1. The GNU Lesser General Public License (LGPL) +# +# See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the +# full license text. +# _________________________________________________________________ +# +# Copyright (C) 2000 David Beckett, Institute for Learning and +# Research Technology, University of Bristol. All Rights Reserved. +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public License +# as published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA +# _________________________________________________________________ +# +# NOTE - under Term 3 of the LGPL, you may choose to license the entire +# library under the GPL. See COPYING for the full license text. +# +# 2. The Mozilla Public License +# +# See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full +# license text. +# +# Under MPL section 13. I declare that all of the Covered Code is +# Multiple Licensed: +# _________________________________________________________________ +# +# The contents of this file are subject to the Mozilla Public License +# version 1.1 (the "License"); you may not use this file except in +# compliance with the License. You may obtain a copy of the License +# at http://www.mozilla.org/MPL/ +# +# Software distributed under the License is distributed on an "AS IS" +# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +# the License for the specific language governing rights and +# limitations under the License. +# +# The Initial Developer of the Original Code is David Beckett. +# Portions created by David Beckett are Copyright (C) 2000 David +# Beckett, Institute for Learning and Research Technology, University +# of Bristol. All Rights Reserved. +# +# Alternatively, the contents of this file may be used under the +# terms of the GNU Lesser General Public License, in which case the +# provisions of the LGPL License are applicable instead of those +# above. If you wish to allow use of your version of this file only +# under the terms of the LGPL License and not to allow others to use +# your version of this file under the MPL, indicate your decision by +# deleting the provisions above and replace them with the notice and +# other provisions required by the LGPL License. If you do not delete +# the provisions above, a recipient may use your version of this file +# under either the MPL or the LGPL License. + + +bin_PROGRAMS = rdfdump +lib_LIBRARIES = librapier.a + +include_HEADERS = rapier.h + +rdfdump_LDADD = librapier.a + +librapier_a_SOURCES = rapier_parse.c + +librapier_a_LIBADD = @XML_OBJS@ +librapier_a_DEPENDENCIES = @XML_OBJS@ + +EXTRA_librapier_a_SOURCES = rdfdump.c + +EXTRA_DIST=ChangeLog \ +README NEWS LICENSE.txt \ +README.html NEWS.html LICENSE.html \ +INSTALL.html MPL.html \ +autogen.sh + +# Why is this not in the default makefile? +CC=@CC@ + +CFLAGS=@CFLAGS@ $(MEM) +STANDARD_CFLAGS=@STANDARD_CFLAGS@ $(MEM) +LIBS=@LIBS@ $(MEM_LIBS) + +# Memory debugging alternatives +MEM=@MEM@ +MEM_LIBS=@MEM_LIBS@ + +# 1) None (use standard functions directly) +#MEM= +#MEM_LIBS= + +# 2) Use dmalloc library +#MEM=-DRAPIER_MEMORY_DEBUG_DMALLOC=1 +#MEM_LIBS=-ldmalloc + +# Create some text files from HTML sources +LYNX=lynx +LYNXARGS=-dump -nolist + +SUFFIXES = .html .txt + +.html.txt: + $(LYNX) $(LYNXARGS) $< > $@ + +README: README.html + $(LYNX) $(LYNXARGS) $< > $@ + +NEWS: NEWS.html + $(LYNX) $(LYNXARGS) $< > $@ + +@SET_MAKE@ + +$(top_builddir)/expat/xmlparse/xmlparse.o $(top_builddir)/expat/xmlparse/hashtable.o $(top_builddir)/expat/xmltok/xmlrole.o $(top_builddir)/expat/xmltok/xmltok.o: + cd expat && $(MAKE) $(AM_MAKEFLAGS) all diff --git a/utils/rapper.c b/utils/rapper.c new file mode 100644 index 00000000..a2a78734 --- /dev/null +++ b/utils/rapper.c @@ -0,0 +1,203 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * rdfdump.c - Rapier example code using parse to print all RDF callbacks + * + * $Id$ + * + * Copyright (C) 2000 David Beckett - http://purl.org/net/dajobe/ + * Institute for Learning and Research Technology, University of Bristol. + * + * This package is Free Software available under either of two licenses + * (see FAQS.html to see why): + * + * 1. The GNU Lesser General Public License (LGPL) + * + * See http://www.gnu.org/copyleft/lesser.html or COPYING.LIB for the + * full license text. + * _________________________________________________________________ + * + * Copyright (C) 2000 David Beckett, Institute for Learning and + * Research Technology, University of Bristol. All Rights Reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * _________________________________________________________________ + * + * NOTE - under Term 3 of the LGPL, you may choose to license the entire + * library under the GPL. See COPYING for the full license text. + * + * 2. The Mozilla Public License + * + * See http://www.mozilla.org/MPL/MPL-1.1.html or MPL.html for the full + * license text. + * + * Under MPL section 13. I declare that all of the Covered Code is + * Multiple Licensed: + * _________________________________________________________________ + * + * The contents of this file are subject to the Mozilla Public License + * version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License + * at http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and + * limitations under the License. + * + * The Initial Developer of the Original Code is David Beckett. + * Portions created by David Beckett are Copyright (C) 2000 David + * Beckett, Institute for Learning and Research Technology, University + * of Bristol. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the + * terms of the GNU Lesser General Public License, in which case the + * provisions of the LGPL License are applicable instead of those + * above. If you wish to allow use of your version of this file only + * under the terms of the LGPL License and not to allow others to use + * your version of this file under the MPL, indicate your decision by + * deleting the provisions above and replace them with the notice and + * other provisions required by the LGPL License. If you do not delete + * the provisions above, a recipient may use your version of this file + * under either the MPL or the LGPL License. + */ + + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <string.h> + +#include <rapier.h> + + +/* one prototype needed */ +int main(int argc, char *argv[]); + + +static +void print_triples(void *userData, + const char *subject, rapier_subject_type subject_type, + const char *predicate, rapier_predicate_type predicate_type, + const char *object, rapier_object_type object_type) +{ + fprintf(stderr, "subject: %s predicate: %s object: %s\n", + subject, predicate, object); +} + + +#include <getopt.h> + + +int +main(int argc, char *argv[]) +{ + rapier_parser* parser; + char *uri; + char *program=argv[0]; + int rc; + int scanning=0; + int usage=0; + +#define GETOPT_STRING "sh" + +#ifdef HAVE_GETOPT_LONG + static struct option long_options[] = + { + /* name, has_arg, flag, val */ + {"scan", 0, 0, 's'}, + {"help", 0, 0, 'h'}, + {NULL, 0, 0, 0} + }; +#endif + + + while (!usage) + { + int c; +#ifdef HAVE_GETOPT_LONG + int option_index = 0; + + c = getopt_long (argc, argv, GETOPT_STRING, long_options, &option_index); +#else + c = getopt (argc, argv, GETOPT_STRING); +#endif + if (c == -1) + break; + + switch (c) + { + case 0: + case '?': /* getopt() - unknown option */ +#ifdef HAVE_GETOPT_LONG + fprintf(stderr, "Unknown option %s\n", long_options[option_index].name); +#else + fprintf(stderr, "Unknown option %s\n", argv[optind]); +#endif + usage=1; + break; + + case 'h': + usage=1; + break; + + case 's': + scanning=1; + break; + } + + } + + if(optind != argc-1) + usage=2; /* usage and error */ + + + if(usage) { + fprintf(stderr, "Usage: %s [OPTIONS] <RDF source file: URI>\n", program); + fprintf(stderr, "Parse the given file as RDF using Rapier\n"); + fprintf(stderr, " -h, --help : This message\n"); + fprintf(stderr, " -s, --scan : Scan for <rdf:RDF> element in source\n"); + return(usage>1); + } + + uri=argv[optind]; + + parser=rapier_new(); + if(!parser) { + fprintf(stderr, "%s: Failed to create rapier parser\n", program); + return(1); + } + + + if(scanning) + rapier_set_feature(parser, RAPIER_FEATURE_SCANNING, 1); + + + /* PARSE the URI as RDF/XML*/ + fprintf(stdout, "%s: Parsing URI %s\n", program, uri); + + rapier_set_triple_handler(parser, NULL, print_triples); + + if(rapier_parse_file(parser, uri, uri)) { + fprintf(stderr, "%s: Failed to parse RDF into model\n", program); + rc=1; + } else + rc=0; + rapier_free(parser); + + return(rc); +} |