summaryrefslogtreecommitdiff
path: root/support/iana_registry.rb
blob: 12b918a097554915b3dea4c239e455c0629b477b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# -*- ruby encoding: utf-8 -*-

$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)

require 'open-uri'
require 'nokogiri'
require 'cgi'
require 'pathname'
require 'yaml'

ENV['RUBY_MIME_TYPES_LAZY_LOAD'] = 'yes'
require 'mime/types'

class IANARegistry
  DEFAULTS = {
    url: %q(https://www.iana.org/assignments/media-types/media-types.xml),
    to: Pathname(__FILE__).join('../../type-lists')
  }.freeze.each_value(&:freeze)

  def self.download(options = {})
    dest = Pathname(options[:to] || DEFAULTS[:to]).expand_path
    url  = options.fetch(:url, DEFAULTS[:url])

    puts "Downloading IANA MIME type assignments."
    puts "\t#{url}"
    xml  = Nokogiri::XML(open(url) { |f| f.read })

    xml.css('registry registry').each do |registry|
      next if registry.at_css('title').text == 'example'
      new(registry: registry, to: dest) do |parser|
        puts "Extracting #{parser.type}/*."
        parser.parse
        parser.save
      end
    end
  end

  attr_reader :type

  def initialize(options = {})
    @registry = options.fetch(:registry)
    @to       = Pathname(options.fetch(:to)).expand_path
    @type     = @registry.at_css('title').text
    @name     = "#{@type}.yaml"
    @file     = @to.join(@name)
    @types    = mime_types_for(@file)

    yield self if block_given?
  end

  ASSIGNMENT_FILE_REF = "{%s=http://www.iana.org/assignments/media-types/%s}"

  def parse
    @registry.css('record').each do |record|
      subtype = record.at_css('name').text
      refs    = record.css('xref').map do |xref|
        case xref["type"]
        when 'person'
          "[#{xref["data"]}]"
        when 'rfc'
          xref["data"].upcase
        when 'draft'
          "DRAFT:#{xref["data"].sub(/^RFC-/, 'draft-')}"
        when 'rfc-errata'
          "{RFC Errata #{xref["data"]}=http://www.rfc-editor.org/errata_search.php?eid=#{xref["data"]}}"
        when 'uri'
          # Fix a couple of known-broken links:
          case xref["data"]
          when /contact-people.htmll#Dolan\z/
            "[Dolan]"
          when /contact-people.htmll#Rottmann?\z/
            "[Frank_Rottman]"
          else
            "{#{xref["data"]}}"
          end
        when 'text'
          xref["data"]
        end
      end

      xrefs   = MIME::Types::Container.new
      record.css('xref').map do |xref|
        type, data = xref["type"], xref["data"]

        case type
        when 'uri'
          case data
          when /contact-people.htmll#Dolan\z/
            type, data = "person", "Dolan"
          when /contact-people.htmll#Rottmann?\z/
            type, data = "person", "Frank_Rottman"
          end
        end

        xrefs[type] << data
      end

      record.css('file').each do |file|
        file_name = if file.text == subtype
                      [ @type, subtype ].join('/')
                    else
                      file.text
                    end

        if file["type"] == "template"
          refs << (ASSIGNMENT_FILE_REF % [ file_name, file_name ])
        end

        xrefs[file["type"]] << file_name
      end

      content_type  = [ @type, subtype ].join('/')
      obsolete      = record.at_css('obsolete')
      use_instead   = record.at_css('deprecated').text rescue nil

      types         = @types.select { |t|
        (t.content_type.downcase == content_type.downcase)
      }

      if types.empty?
        MIME::Type.new(content_type) do |mt|
          mt.references  = %w(IANA) + refs
          mt.xrefs       = xrefs
          mt.registered  = true
          mt.obsolete    = obsolete if obsolete
          mt.use_instead = use_instead if use_instead
          @types << mt
        end
      else
        types.each { |mt|
          mt.references  = %w(IANA) + refs
          mt.registered  = true
          mt.xrefs       = xrefs
          mt.obsolete    = obsolete if obsolete
          mt.use_instead = use_instead if use_instead
        }
      end
    end
  end

  def save
    @to.mkpath
    File.open(@file, 'wb') { |f| f.puts @types.map.to_a.sort.to_yaml }
  end

  private
  def mime_types_for(file)
    if file.exist?
      MIME::Types::Loader.load_from_yaml(file)
    else
      MIME::Types.new
    end
  end
end