summaryrefslogtreecommitdiff
path: root/support/iana_registry.rb
blob: 25c353e72777dad814326a79130d62c8e56977b2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# -*- ruby encoding: utf-8 -*-

$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)

require 'open-uri'
require 'nokogiri'
require 'cgi'
require 'pathname'
require 'yaml'

ENV['RUBY_MIME_TYPES_LAZY_LOAD'] = 'yes'
require 'mime/types'

class MIME::Types
  def self.deprecated(*_args, &_block)
    # We are an internal tool. Silence deprecation warnings.
  end
end

class IANARegistry
  DEFAULTS = {
    url: %q(https://www.iana.org/assignments/media-types/media-types.xml),
    to: Pathname(__FILE__).join('../../type-lists')
  }.freeze.each_value(&:freeze)

  def self.download(options = {})
    dest = Pathname(options[:to] || DEFAULTS[:to]).expand_path
    url  = options.fetch(:url, DEFAULTS[:url])

    puts 'Downloading IANA MIME type assignments.'
    puts "\t#{url}"
    xml  = Nokogiri::XML(open(url) { |f| f.read })

    xml.css('registry registry').each do |registry|
      next if registry.at_css('title').text == 'example'
      new(registry: registry, to: dest) do |parser|
        puts "Extracting #{parser.type}/*."
        parser.parse
        parser.save
      end
    end
  end

  attr_reader :type

  def initialize(options = {})
    @registry = options.fetch(:registry)
    @to       = Pathname(options.fetch(:to)).expand_path
    @type     = @registry.at_css('title').text
    @name     = "#{@type}.yaml"
    @file     = @to.join(@name)
    @types    = mime_types_for(@file)

    yield self if block_given?
  end

  ASSIGNMENT_FILE_REF = '{%s=http://www.iana.org/assignments/media-types/%s}'

  def parse
    @registry.css('record').each do |record|
      subtype       = record.at_css('name').text
      obsolete      = record.at_css('obsolete').text rescue nil
      use_instead   = record.at_css('deprecated').text rescue nil

      if subtype =~ /OBSOLETE|DEPRECATE/i
        use_instead ||= $1 if subtype =~ /in favou?r of (.*)/
        obsolete = true
      end

      subtype, notes = subtype.split(/ /, 2)

      refs, xrefs = parse_refs_and_files(
        record.css('xref'),
        record.css('file'),
        subtype
      )

      xrefs['notes'] << notes if notes

      content_type  = [ @type, subtype ].join('/')

      types         = @types.select { |t|
        (t.content_type.downcase == content_type.downcase)
      }

      if types.empty?
        MIME::Type.new(content_type) do |mt|
          mt.references  = %w(IANA) + refs
          mt.xrefs       = xrefs
          mt.registered  = true
          mt.obsolete    = obsolete if obsolete
          mt.use_instead = use_instead if use_instead
          @types << mt
        end
      else
        types.each { |mt|
          mt.references  = %w(IANA) + refs
          mt.registered  = true
          mt.xrefs       = xrefs
          mt.obsolete    = obsolete if obsolete
          mt.use_instead = use_instead if use_instead
        }
      end
    end
  end

  def save
    @to.mkpath
    File.open(@file, 'wb') { |f| f.puts @types.map.to_a.sort.uniq.to_yaml }
  end

  private

  def mime_types_for(file)
    if file.exist?
      MIME::Types::Loader.load_from_yaml(file)
    else
      MIME::Types.new
    end
  end

  def parse_refs_and_files(refs, files, subtype)
    xr = MIME::Types::Container.new
    r  = []

    refs.each do |xref|
      type = xref['type']
      data = xref['data']

      r << ref_from_type(type, data)

      xr[type] << data
    end

    files.each do |file|
      file_name = if file.text == subtype
                    [ @type, subtype ].join('/')
                  else
                    file.text
                  end

      if file['type'] == 'template'
        r << (ASSIGNMENT_FILE_REF % [ file_name, file_name ])
      end

      xr[file['type']] << file_name
    end

    [ r, xr ]
  end

  def ref_from_type(type, data)
    case type
    when 'person'
      "[#{data}]"
    when 'rfc'
      data.upcase
    when 'draft'
      "DRAFT:#{data.sub(/^RFC-/, 'draft-')}"
    when 'rfc-errata'
      "{RFC Errata #{data}=http://www.rfc-editor.org/errata_search.php?eid=#{data}}"
    when 'uri'
      "{#{data}}"
    else # 'text' or something else
      data
    end
  end
end