diff options
author | Dave Beckett <dave@dajobe.org> | 2007-08-15 00:26:28 +0000 |
---|---|---|
committer | Dave Beckett <dave@dajobe.org> | 2007-08-15 00:26:28 +0000 |
commit | e3a7375fa980393042662bf736fcedbeef35c0ef (patch) | |
tree | b5163b6d01041132221a9ef48915843f64d17f00 /src/raptor_rdfxml.c | |
parent | 51a3e22301148af58a81eda8e2d58a63a37f848e (diff) | |
download | raptor-e3a7375fa980393042662bf736fcedbeef35c0ef.tar.gz |
(raptor_rdfxml_parse_recognise_syntax): Apply a negative score if html is in the mime type. Do not recognize as rdf if <html is present or the html namespace is declared.
Diffstat (limited to 'src/raptor_rdfxml.c')
-rw-r--r-- | src/raptor_rdfxml.c | 25 |
1 files changed, 15 insertions, 10 deletions
diff --git a/src/raptor_rdfxml.c b/src/raptor_rdfxml.c index 16025b4f..ca712f87 100644 --- a/src/raptor_rdfxml.c +++ b/src/raptor_rdfxml.c @@ -1193,13 +1193,14 @@ raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory, score+=2; } - if(mime_type && - (!strcmp((const char*)mime_type, "text/rdf"))) - score+=7; - - if(mime_type && - (!strcmp((const char*)mime_type, "application/xml"))) - score+=5; + if(mime_type) { + if(!strstr((const char*)mime_type, "html")) + score-= 4; + else if(!strcmp((const char*)mime_type, "text/rdf")) + score+= 7; + else if(!strcmp((const char*)mime_type, "application/xml")) + score+= 5; + } if(buffer && len) { /* Check it's an XML namespace declared and not N3 or Turtle which @@ -1213,14 +1214,18 @@ raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory, #define HAS_RDF_ENTITY2 (strstr((const char*)buffer, "<!ENTITY rdf \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">") != NULL) #define HAS_RDF_ENTITY3 (strstr((const char*)buffer, "xmlns:rdf=\"&rdf;\"") != NULL) #define HAS_RDF_ENTITY4 (strstr((const char*)buffer, "xmlns:rdf='&rdf;'") != NULL) +#define HAS_HTML_NS (strstr((const char*)buffer, "http://www.w3.org/1999/xhtml") != NULL) +#define HAS_HTML_ROOT (strstr((const char*)buffer, "<html") != NULL) - if(HAS_RDF_XMLNS1 || HAS_RDF_XMLNS2 || HAS_RDF_XMLNS3 || HAS_RDF_XMLNS4 || - HAS_RDF_ENTITY1 || HAS_RDF_ENTITY2 || HAS_RDF_ENTITY3 || HAS_RDF_ENTITY4) { + if(!HAS_HTML_NS && !HAS_HTML_ROOT && + (HAS_RDF_XMLNS1 || HAS_RDF_XMLNS2 || HAS_RDF_XMLNS3 || HAS_RDF_XMLNS4 || + HAS_RDF_ENTITY1 || HAS_RDF_ENTITY2 || HAS_RDF_ENTITY3 || HAS_RDF_ENTITY4) + ) { int has_rdf_RDF=(strstr((const char*)buffer, "<rdf:RDF") != NULL); int has_rdf_Description=(strstr((const char*)buffer, "rdf:Description") != NULL); int has_rdf_about=(strstr((const char*)buffer, "rdf:about") != NULL); - score=7; + score+= 7; if(has_rdf_RDF) score++; if(has_rdf_Description) |