summaryrefslogtreecommitdiff
path: root/chromium/net/dns/dns_hosts.cc
blob: 852d35c8bb4e1b99155ba3f946b805005258305b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/dns/dns_hosts.h"

#include "base/file_util.h"
#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_util.h"
#include "base/strings/string_tokenizer.h"

using base::StringPiece;

namespace net {

// Parses the contents of a hosts file.  Returns one token (IP or hostname) at
// a time.  Doesn't copy anything; accepts the file as a StringPiece and
// returns tokens as StringPieces.
class HostsParser {
 public:
  explicit HostsParser(const StringPiece& text)
      : text_(text),
        data_(text.data()),
        end_(text.size()),
        pos_(0),
        token_(),
        token_is_ip_(false) {}

  // Advances to the next token (IP or hostname).  Returns whether another
  // token was available.  |token_is_ip| and |token| can be used to find out
  // the type and text of the token.
  bool Advance() {
    bool next_is_ip = (pos_ == 0);
    while (pos_ < end_ && pos_ != std::string::npos) {
      switch (text_[pos_]) {
        case ' ':
        case '\t':
          SkipWhitespace();
          break;

        case '\r':
        case '\n':
          next_is_ip = true;
          pos_++;
          break;

        case '#':
          SkipRestOfLine();
          break;

        default: {
          size_t token_start = pos_;
          SkipToken();
          size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;

          token_ = StringPiece(data_ + token_start, token_end - token_start);
          token_is_ip_ = next_is_ip;

          return true;
        }
      }
    }

    text_ = StringPiece();
    return false;
  }

  // Fast-forwards the parser to the next line.  Should be called if an IP
  // address doesn't parse, to avoid wasting time tokenizing hostnames that
  // will be ignored.
  void SkipRestOfLine() {
    pos_ = text_.find("\n", pos_);
  }

  // Returns whether the last-parsed token is an IP address (true) or a
  // hostname (false).
  bool token_is_ip() { return token_is_ip_; }

  // Returns the text of the last-parsed token as a StringPiece referencing
  // the same underlying memory as the StringPiece passed to the constructor.
  // Returns an empty StringPiece if no token has been parsed or the end of
  // the input string has been reached.
  const StringPiece& token() { return token_; }

 private:
  void SkipToken() {
    pos_ = text_.find_first_of(" \t\n\r#", pos_);
  }

  void SkipWhitespace() {
    pos_ = text_.find_first_not_of(" \t", pos_);
  }

  StringPiece text_;
  const char* data_;
  const size_t end_;

  size_t pos_;
  StringPiece token_;
  bool token_is_ip_;

  DISALLOW_COPY_AND_ASSIGN(HostsParser);
};



void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
  CHECK(dns_hosts);
  DnsHosts& hosts = *dns_hosts;

  StringPiece ip_text;
  IPAddressNumber ip;
  AddressFamily family = ADDRESS_FAMILY_IPV4;
  HostsParser parser(contents);
  while (parser.Advance()) {
    if (parser.token_is_ip()) {
      StringPiece new_ip_text = parser.token();
      // Some ad-blocking hosts files contain thousands of entries pointing to
      // the same IP address (usually 127.0.0.1).  Don't bother parsing the IP
      // again if it's the same as the one above it.
      if (new_ip_text != ip_text) {
        IPAddressNumber new_ip;
        if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
          ip_text = new_ip_text;
          ip.swap(new_ip);
          family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
        } else {
          parser.SkipRestOfLine();
        }
      }
    } else {
      DnsHostsKey key(parser.token().as_string(), family);
      StringToLowerASCII(&key.first);
      IPAddressNumber& mapped_ip = hosts[key];
      if (mapped_ip.empty())
        mapped_ip = ip;
      // else ignore this entry (first hit counts)
    }
  }
}

bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
  dns_hosts->clear();
  // Missing file indicates empty HOSTS.
  if (!base::PathExists(path))
    return true;

  int64 size;
  if (!file_util::GetFileSize(path, &size))
    return false;

  UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);

  // Reject HOSTS files larger than |kMaxHostsSize| bytes.
  const int64 kMaxHostsSize = 1 << 25;  // 32MB
  if (size > kMaxHostsSize)
    return false;

  std::string contents;
  if (!file_util::ReadFileToString(path, &contents))
    return false;

  ParseHosts(contents, dns_hosts);
  return true;
}

}  // namespace net