From af145601df4329a4dc55ef0ce9ce5f8645f09d4f Mon Sep 17 00:00:00 2001 From: David Moss Date: Fri, 13 Jan 2017 00:20:49 +0000 Subject: - fixed a unicode vs bytes issue between Python 2.x and 3.x when reading and writing IEEE data files. --- CHANGELOG | 3 ++ netaddr/compat.py | 4 ++ netaddr/core.py | 19 ---------- netaddr/eui/__init__.py | 8 ++-- netaddr/eui/ieee.py | 84 +++++++++++++++++++++++------------------- tutorials/2.x/eui/tutorial.txt | 26 ++++++------- 6 files changed, 71 insertions(+), 73 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3bc18ef..d89c6e6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -16,6 +16,9 @@ Changes since 0.7.18 * cleaned up INSTALL docs so they accurately reflect current Python packaging. +* fixed broken parsing, generating and reading of IEEE index files when switching + between Python 2.x and 3.x. + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Specific bug fixes addressed in this release ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/netaddr/compat.py b/netaddr/compat.py index ff54803..0fe69ee 100644 --- a/netaddr/compat.py +++ b/netaddr/compat.py @@ -19,6 +19,8 @@ if _sys.version_info[0] == 3: _str_type = str + _bytes_type = lambda x: bytes(x, 'UTF-8') + _is_str = lambda x: isinstance(x, (str, type(''.encode()))) _is_int = lambda x: isinstance(x, int) @@ -67,6 +69,8 @@ elif _sys.version_info[0:2] > [2, 3]: _str_type = basestring + _bytes_type = str + _is_str = lambda x: isinstance(x, basestring) _is_int = lambda x: isinstance(x, (int, long)) diff --git a/netaddr/core.py b/netaddr/core.py index 52f8930..f17eaba 100644 --- a/netaddr/core.py +++ b/netaddr/core.py @@ -204,22 +204,3 @@ class DictDotLookup(object): def __repr__(self): return _pprint.pformat(self.__dict__) - - -def dos2unix(filename): - """ - Replace DOS line endings (CRLF) with UNIX line endings (LF) in file. - - """ - fh = open(filename, "rb") - data = fh.read() - fh.close() - - if '\0' in data: - raise ValueError('file contains binary data: %s!' % filename) - - newdata = data.replace("\r\n".encode(), "\n".encode()) - if newdata != data: - f = open(filename, "wb") - f.write(newdata) - f.close() diff --git a/netaddr/eui/__init__.py b/netaddr/eui/__init__.py index d8f2456..aa79014 100644 --- a/netaddr/eui/__init__.py +++ b/netaddr/eui/__init__.py @@ -91,10 +91,10 @@ class OUI(BaseIdentifier): # Discover offsets. if self._value in ieee.OUI_INDEX: - fh = open(ieee.OUI_REGISTRY) + fh = open(ieee.OUI_REGISTRY_PATH, 'rb') for (offset, size) in ieee.OUI_INDEX[self._value]: fh.seek(offset) - data = fh.read(size) + data = fh.read(size).decode('UTF-8') self._parse_data(data, offset, size) fh.close() else: @@ -256,12 +256,12 @@ class IAB(BaseIdentifier): # Discover offsets. if self._value in ieee.IAB_INDEX: - fh = open(ieee.IAB_REGISTRY) + fh = open(ieee.IAB_REGISTRY_PATH, 'rb') (offset, size) = ieee.IAB_INDEX[self._value][0] self.record['offset'] = offset self.record['size'] = size fh.seek(offset) - data = fh.read(size) + data = fh.read(size).decode('UTF-8') self._parse_data(data, offset, size) fh.close() else: diff --git a/netaddr/eui/ieee.py b/netaddr/eui/ieee.py index 11ccd9f..36380f6 100755 --- a/netaddr/eui/ieee.py +++ b/netaddr/eui/ieee.py @@ -35,22 +35,23 @@ More details can be found at the following URLs :- import os.path as _path import csv as _csv +from netaddr.compat import _bytes_type from netaddr.core import Subscriber, Publisher #: Path to local copy of IEEE OUI Registry data file. -OUI_REGISTRY = _path.join(_path.dirname(__file__), 'oui.txt') +OUI_REGISTRY_PATH = _path.join(_path.dirname(__file__), 'oui.txt') #: Path to netaddr OUI index file. -OUI_METADATA = _path.join(_path.dirname(__file__), 'oui.idx') +OUI_INDEX_PATH = _path.join(_path.dirname(__file__), 'oui.idx') #: OUI index lookup dictionary. OUI_INDEX = {} #: Path to local copy of IEEE IAB Registry data file. -IAB_REGISTRY = _path.join(_path.dirname(__file__), 'iab.txt') +IAB_REGISTRY_PATH = _path.join(_path.dirname(__file__), 'iab.txt') #: Path to netaddr IAB index file. -IAB_METADATA = _path.join(_path.dirname(__file__), 'iab.idx') +IAB_INDEX_PATH = _path.join(_path.dirname(__file__), 'iab.idx') #: IAB index lookup dictionary. IAB_INDEX = {} @@ -118,7 +119,7 @@ class OUIIndexParser(Publisher): if hasattr(ieee_file, 'readline') and hasattr(ieee_file, 'tell'): self.fh = ieee_file else: - self.fh = open(ieee_file) + self.fh = open(ieee_file, 'rb') def parse(self): """ @@ -129,20 +130,24 @@ class OUIIndexParser(Publisher): record = None size = 0 + marker = _bytes_type('(hex)') + hyphen = _bytes_type('-') + empty_string = _bytes_type('') + while True: - line = self.fh.readline() # unbuffered to obtain correct offsets + line = self.fh.readline() if not line: break # EOF, we're done - if skip_header and '(hex)' in line: + if skip_header and marker in line: skip_header = False if skip_header: # ignoring header section continue - if '(hex)' in line: + if marker in line: # record start if record is not None: # a complete record. @@ -152,7 +157,7 @@ class OUIIndexParser(Publisher): size = len(line) offset = (self.fh.tell() - len(line)) oui = line.split()[0] - index = int(oui.replace('-', ''), 16) + index = int(oui.replace(hyphen, empty_string), 16) record = [index, offset] else: # within record @@ -197,7 +202,7 @@ class IABIndexParser(Publisher): if hasattr(ieee_file, 'readline') and hasattr(ieee_file, 'tell'): self.fh = ieee_file else: - self.fh = open(ieee_file) + self.fh = open(ieee_file, 'rb') def parse(self): """ @@ -207,20 +212,26 @@ class IABIndexParser(Publisher): skip_header = True record = None size = 0 + + hex_marker = _bytes_type('(hex)') + base16_marker = _bytes_type('(base 16)') + hyphen = _bytes_type('-') + empty_string = _bytes_type('') + while True: - line = self.fh.readline() # unbuffered + line = self.fh.readline() if not line: break # EOF, we're done - if skip_header and '(hex)' in line: + if skip_header and hex_marker in line: skip_header = False if skip_header: # ignoring header section continue - if '(hex)' in line: + if hex_marker in line: # record start if record is not None: record.append(size) @@ -231,12 +242,12 @@ class IABIndexParser(Publisher): index = iab_prefix record = [index, offset] size = len(line) - elif '(base 16)' in line: + elif base16_marker in line: # within record size += len(line) - prefix = record[0].replace('-', '') + prefix = record[0].replace(hyphen, empty_string) suffix = line.split()[0] - suffix = suffix.split('-')[0] + suffix = suffix.split(hyphen)[0] record[0] = (int(prefix + suffix, 16)) >> 12 else: # within record @@ -247,38 +258,37 @@ class IABIndexParser(Publisher): self.notify(record) -def create_indices(): - """Create indices for OUI and IAB file based lookups""" - oui_parser = OUIIndexParser(OUI_REGISTRY) - oui_parser.attach(FileIndexer(OUI_METADATA)) +def create_index_from_registry(registry_path, index_path, parser): + """Generate an index files from the IEEE registry file.""" + oui_parser = parser(registry_path) + oui_parser.attach(FileIndexer(index_path)) oui_parser.parse() - iab_parser = IABIndexParser(IAB_REGISTRY) - iab_parser.attach(FileIndexer(IAB_METADATA)) - iab_parser.parse() +def create_indices(): + """Create indices for OUI and IAB file based lookups""" + create_index_from_registry(OUI_REGISTRY_PATH, OUI_INDEX_PATH, OUIIndexParser) + create_index_from_registry(IAB_REGISTRY_PATH, IAB_INDEX_PATH, IABIndexParser) -def load_indices(): - """Load OUI and IAB lookup indices into memory""" - fp = open(OUI_METADATA) - try: - for row in _csv.reader(fp): - (key, offset, size) = [int(_) for _ in row] - OUI_INDEX.setdefault(key, []) - OUI_INDEX[key].append((offset, size)) - finally: - fp.close() - fp = open(IAB_METADATA) +def load_index(index, index_path): + """Load index from file into index data structure.""" + fp = open(index_path, 'rb') try: - for row in _csv.reader(fp): + for row in _csv.reader([x.decode('UTF-8') for x in fp]): (key, offset, size) = [int(_) for _ in row] - IAB_INDEX.setdefault(key, []) - IAB_INDEX[key].append((offset, size)) + index.setdefault(key, []) + index[key].append((offset, size)) finally: fp.close() +def load_indices(): + """Load OUI and IAB lookup indices into memory""" + load_index(OUI_INDEX, OUI_INDEX_PATH) + load_index(IAB_INDEX, IAB_INDEX_PATH) + + if __name__ == '__main__': # Generate indices when module is executed as a script. create_indices() diff --git a/tutorials/2.x/eui/tutorial.txt b/tutorials/2.x/eui/tutorial.txt index c6f4775..a8217c6 100644 --- a/tutorials/2.x/eui/tutorial.txt +++ b/tutorials/2.x/eui/tutorial.txt @@ -136,9 +136,9 @@ Here is how you query an OUI with the EUI interface. >>> oui OUI('00-1B-77') >>> oui.registration().address -['Lot 8, Jalan Hi-Tech 2/3', 'Kulim Kedah 09000', 'MY'] +[u'Lot 8, Jalan Hi-Tech 2/3', u'Kulim Kedah 09000', u'MY'] >>> oui.registration().org -'Intel Corporate' +u'Intel Corporate' You can also use OUI objects directly without going through the EUI interface. @@ -150,29 +150,29 @@ This example shows you how you access them individually by specifying an index n >>> oui OUI('08-00-30') >>> oui.registration(0).address -['2380 N. ROSE AVENUE', 'OXNARD CA 93010', 'US'] +[u'2380 N. ROSE AVENUE', u'OXNARD CA 93010', u'US'] >>> oui.registration(0).org -'NETWORK RESEARCH CORPORATION' +u'NETWORK RESEARCH CORPORATION' >>> oui.registration(0).oui '08-00-30' >>> oui.registration(1).address -['GPO BOX 2476V', 'MELBOURNE VIC 3001', 'AU'] +[u'GPO BOX 2476V', u'MELBOURNE VIC 3001', u'AU'] >>> oui.registration(1).org -'ROYAL MELBOURNE INST OF TECH' +u'ROYAL MELBOURNE INST OF TECH' >>> oui.registration(1).oui '08-00-30' >>> oui.registration(2).address -['CH-1211 GENEVE 23', 'SUISSE/SWITZ', 'CH'] +[u'CH-1211 GENEVE 23', u'SUISSE/SWITZ', u'CH'] >>> oui.registration(2).org -'CERN' +u'CERN' >>> oui.registration(2).oui '08-00-30' >>> for i in range(oui.reg_count): ... str(oui), oui.registration(i).org ... -('08-00-30', 'NETWORK RESEARCH CORPORATION') -('08-00-30', 'ROYAL MELBOURNE INST OF TECH') -('08-00-30', 'CERN') +('08-00-30', u'NETWORK RESEARCH CORPORATION') +('08-00-30', u'ROYAL MELBOURNE INST OF TECH') +('08-00-30', u'CERN') Here is how you query an IAB with the EUI interface. @@ -183,9 +183,9 @@ True >>> iab IAB('00-50-C2-00-00-00') >>> iab.registration() -{'address': ['1241 Superieor Ave E', 'Cleveland OH 44114', 'US'], +{'address': [u'1241 Superieor Ave E', u'Cleveland OH 44114', u'US'], 'iab': '00-50-C2-00-00-00', 'idx': 84680704, ... - 'org': 'T.L.S. Corp.', + 'org': u'T.L.S. Corp.', 'size': 537} -- cgit v1.2.1