summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristos Zoulas <christos@zoulas.com>2022-04-18 21:38:10 +0000
committerChristos Zoulas <christos@zoulas.com>2022-04-18 21:38:10 +0000
commit14b5d7aa0b55275969809fdf84e8a8caee857c0f (patch)
treeb69b672bd4eac86887bcaf2a89391434225c0a15
parent961e193e4519d40983322ed853cea6511d4b6494 (diff)
downloadfile-git-14b5d7aa0b55275969809fdf84e8a8caee857c0f.tar.gz
From Dirk Mueller:
* regex rules need literal dots escaped, otherwise they are considered any character * literal search strings can be searched using search rather than the much more expensive regex * use standard xml declaration search as used in other format matchers * only match the first 1024 bytes, the information we look for should be in the very first tag * remove unnecessary parentheses
-rw-r--r--magic/Magdir/dataone28
1 files changed, 14 insertions, 14 deletions
diff --git a/magic/Magdir/dataone b/magic/Magdir/dataone
index 8ef3f798..566633ef 100644
--- a/magic/Magdir/dataone
+++ b/magic/Magdir/dataone
@@ -1,6 +1,6 @@
#------------------------------------------------------------------------------
-# $File: dataone,v 1.2 2019/04/19 00:42:27 christos Exp $
+# $File: dataone,v 1.3 2022/04/18 21:38:10 christos Exp $
#
# DataONE- files from Dave Vieglais <dave.vieglais@gmail.com> &
# Pratik Shrivastava <pratikshrivastava23@gmail.com>
@@ -9,39 +9,39 @@
#------------------------------------------------------------------------------
# EML (Ecological Metadata Language Format)
-0 string <?xml
->&0 regex (eml)-[0-9].[0-9].[0-9]+ eml://ecoinformatics.org/%s
+0 string \<?xml\ version=
+>&0 regex/1024 eml-[0-9]\\.[0-9]\\.[0-9]+ eml://ecoinformatics.org/%s
# onedcx (DataONE Dublin Core Extended v1.0)
->&0 regex (onedcx/v)[0-9].[0-9]+ https://ns.dataone.org/metadata/schema/onedcx/v1.0
+>&0 regex/1024 onedcx/v[0-9]\\.[0-9]+ https://ns.dataone.org/metadata/schema/onedcx/v1.0
# FGDC-STD-001-1998 (Content Standard for Digital Geospatial Metadata,
# version 001-1998)
->&0 regex fgdc FGDC-STD-001-1998
+>&0 search/1024 fgdc FGDC-STD-001-1998
# Mercury (Oak Ridge National Lab Mercury Metadata version 1.0)
->&0 regex (mercury/terms/v)[0-9].[0-9] https://purl.org/ornl/schema/mercury/terms/v1.0
+>&0 regex/1024 mercury/terms/v[0-9]\\.[0-9] https://purl.org/ornl/schema/mercury/terms/v1.0
# ISOTC211 (Geographic MetaData (GMD) Extensible Markup Language)
->&0 regex isotc211
->>&0 regex eng;USA https://www.isotc211.org/2005/gmd
+>&0 search/1024 isotc211
+>>&0 search/1024 eng;USA https://www.isotc211.org/2005/gmd
# ISOTC211 (NOAA Variant Geographic MetaData (GMD) Extensible Markup Language)
->>&0 regex gov.noaa.nodc:[0-9]+ https://www.isotc211.org/2005/gmd-noaa
+>>&0 regex/1024 gov\\.noaa\\.nodc:[0-9]+ https://www.isotc211.org/2005/gmd-noaa
# ISOTC211 PANGAEA Variant Geographic MetaData (GMD) Extensible Markup Language
->>&0 regex pangaea.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+ https://www.isotc211.org/2005/gmd-pangaea
+>>&0 regex/1024 pangaea\\.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+ https://www.isotc211.org/2005/gmd-pangaea
!:mime text/xml
# Object Reuse and Exchange Vocabulary
-0 string <?xml
->&0 regex rdf
->>&0 regex openarchives https://www.openarchives.org/ore/terms
+0 string \<?xml\ version=
+>&0 search/1024 rdf
+>>&0 search/1024 openarchives https://www.openarchives.org/ore/terms
!:mime application/rdf+xml
# Dryad Metadata Application Profile Version 3.1
0 string <DryadData
->&0 regex (dryad-bibo/v)[0-9].[0-9] https://datadryad.org/profile/v3.1
+>&0 regex/1024 dryad-bibo/v[0-9]\\.[0-9] https://datadryad.org/profile/v3.1
!:mime text/xml