summaryrefslogtreecommitdiff
path: root/support/iana_registry.rb
blob: 8f02176f846d9970044ad316248aca4170c84981 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# -*- ruby encoding: utf-8 -*-

$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)

require 'open-uri'
require 'nokogiri'
require 'cgi'
require 'pathname'
require 'yaml'

ENV['RUBY_MIME_TYPES_LAZY_LOAD'] = 'yes'
require 'mime/types'

class IANARegistry
  DEFAULTS = {
    url: %q(https://www.iana.org/assignments/media-types/media-types.xml),
    to: Pathname(__FILE__).join('../../type-lists')
  }.freeze.each_value(&:freeze)

  def self.download(options = {})
    dest = Pathname(options[:to] || DEFAULTS[:to]).expand_path
    url  = options.fetch(:url, DEFAULTS[:url])

    puts "Downloading IANA MIME type assignments."
    puts "\t#{url}"
    xml  = Nokogiri::XML(open(url) { |f| f.read })

    xml.css('registry registry').each do |registry|
      next if registry.at_css('title').text == 'example'
      new(registry: registry, to: dest) do |parser|
        puts "Extracting #{parser.type}/*."
        parser.parse
        parser.save
      end
    end
  end

  attr_reader :type

  def initialize(options = {})
    @registry = options.fetch(:registry)
    @to       = Pathname(options.fetch(:to)).expand_path
    @type     = @registry.at_css('title').text
    @name     = "#{@type}.yaml"
    @file     = @to.join(@name)
    @types    = mime_types_for(@file)

    yield self if block_given?
  end

  ASSIGNMENT_FILE_REF = "{%s=http://www.iana.org/assignments/media-types/%s}"

  def parse
    @registry.css('record').each do |record|
      subtype       = record.at_css('name').text
      refs, xrefs   = parse_refs_and_files(record.css('xref'), record.css('file'))

      content_type  = [ @type, subtype ].join('/')
      obsolete      = record.at_css('obsolete')
      use_instead   = record.at_css('deprecated').text rescue nil

      types         = @types.select { |t|
        (t.content_type.downcase == content_type.downcase)
      }

      if types.empty?
        MIME::Type.new(content_type) do |mt|
          mt.references  = %w(IANA) + refs
          mt.xrefs       = xrefs
          mt.registered  = true
          mt.obsolete    = obsolete if obsolete
          mt.use_instead = use_instead if use_instead
          @types << mt
        end
      else
        types.each { |mt|
          mt.references  = %w(IANA) + refs
          mt.registered  = true
          mt.xrefs       = xrefs
          mt.obsolete    = obsolete if obsolete
          mt.use_instead = use_instead if use_instead
        }
      end
    end
  end

  def save
    @to.mkpath
    File.open(@file, 'wb') { |f| f.puts @types.map.to_a.sort.to_yaml }
  end

  private
  def mime_types_for(file)
    if file.exist?
      MIME::Types::Loader.load_from_yaml(file)
    else
      MIME::Types.new
    end
  end

  def parse_refs_and_files(refs, files)
    xr = MIME::Types::Container.new
    r  = []

    refs.each do |xref|
      type, data = xref["type"], xref["data"]

      # Fix some known-broken links that are actually people.
      if type == 'uri'
        case data
        when /contact-people.htmll#Dolan\z/
          type, data = "person", "Dolan"
        when /contact-people.htmll#Rottmann?\z/
          type, data = "person", "Frank_Rottman"
        else
          nil # There’s no error with this URI.
        end
      end

      r << xref_to_ref(type, data)

      xrefs[type] << data
    end

    files.each do |file|
      file_name = if file.text == subtype
                    [ @type, subtype ].join('/')
                  else
                    file.text
                  end

      if file["type"] == "template"
        refs << (ASSIGNMENT_FILE_REF % [ file_name, file_name ])
      end

      xrefs[file["type"]] << file_name
    end

    r, xr
  end

  def ref_from_type(type, data)
    case type
    when 'person'
      "[#{data}]"
    when 'rfc'
      data.upcase
    when 'draft'
      "DRAFT:#{data.sub(/^RFC-/, 'draft-')}"
    when 'rfc-errata'
      "{RFC Errata #{data}=http://www.rfc-editor.org/errata_search.php?eid=#{data}}"
    when 'uri'
      "{#{data}}"
    else # 'text' or something else
      data
    end
  end
end