summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Moss <drkjam@gmail.com>2017-01-13 00:20:49 +0000
committerDavid Moss <drkjam@gmail.com>2017-01-13 00:20:49 +0000
commitaf145601df4329a4dc55ef0ce9ce5f8645f09d4f (patch)
tree8bf9412b264fd6c972b57cc9a07311c9a3654bf3
parent0be135d644a4cc07d61fc3497c0acf74e662a34b (diff)
downloadnetaddr-0.7.19.tar.gz
- fixed a unicode vs bytes issue between Python 2.x and 3.x when reading and writing IEEE data files.netaddr-0.7.19
-rw-r--r--CHANGELOG3
-rw-r--r--netaddr/compat.py4
-rw-r--r--netaddr/core.py19
-rw-r--r--netaddr/eui/__init__.py8
-rwxr-xr-xnetaddr/eui/ieee.py84
-rw-r--r--tutorials/2.x/eui/tutorial.txt26
6 files changed, 71 insertions, 73 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 3bc18ef..d89c6e6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -16,6 +16,9 @@ Changes since 0.7.18
* cleaned up INSTALL docs so they accurately reflect current Python packaging.
+* fixed broken parsing, generating and reading of IEEE index files when switching
+ between Python 2.x and 3.x.
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Specific bug fixes addressed in this release
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/netaddr/compat.py b/netaddr/compat.py
index ff54803..0fe69ee 100644
--- a/netaddr/compat.py
+++ b/netaddr/compat.py
@@ -19,6 +19,8 @@ if _sys.version_info[0] == 3:
_str_type = str
+ _bytes_type = lambda x: bytes(x, 'UTF-8')
+
_is_str = lambda x: isinstance(x, (str, type(''.encode())))
_is_int = lambda x: isinstance(x, int)
@@ -67,6 +69,8 @@ elif _sys.version_info[0:2] > [2, 3]:
_str_type = basestring
+ _bytes_type = str
+
_is_str = lambda x: isinstance(x, basestring)
_is_int = lambda x: isinstance(x, (int, long))
diff --git a/netaddr/core.py b/netaddr/core.py
index 52f8930..f17eaba 100644
--- a/netaddr/core.py
+++ b/netaddr/core.py
@@ -204,22 +204,3 @@ class DictDotLookup(object):
def __repr__(self):
return _pprint.pformat(self.__dict__)
-
-
-def dos2unix(filename):
- """
- Replace DOS line endings (CRLF) with UNIX line endings (LF) in file.
-
- """
- fh = open(filename, "rb")
- data = fh.read()
- fh.close()
-
- if '\0' in data:
- raise ValueError('file contains binary data: %s!' % filename)
-
- newdata = data.replace("\r\n".encode(), "\n".encode())
- if newdata != data:
- f = open(filename, "wb")
- f.write(newdata)
- f.close()
diff --git a/netaddr/eui/__init__.py b/netaddr/eui/__init__.py
index d8f2456..aa79014 100644
--- a/netaddr/eui/__init__.py
+++ b/netaddr/eui/__init__.py
@@ -91,10 +91,10 @@ class OUI(BaseIdentifier):
# Discover offsets.
if self._value in ieee.OUI_INDEX:
- fh = open(ieee.OUI_REGISTRY)
+ fh = open(ieee.OUI_REGISTRY_PATH, 'rb')
for (offset, size) in ieee.OUI_INDEX[self._value]:
fh.seek(offset)
- data = fh.read(size)
+ data = fh.read(size).decode('UTF-8')
self._parse_data(data, offset, size)
fh.close()
else:
@@ -256,12 +256,12 @@ class IAB(BaseIdentifier):
# Discover offsets.
if self._value in ieee.IAB_INDEX:
- fh = open(ieee.IAB_REGISTRY)
+ fh = open(ieee.IAB_REGISTRY_PATH, 'rb')
(offset, size) = ieee.IAB_INDEX[self._value][0]
self.record['offset'] = offset
self.record['size'] = size
fh.seek(offset)
- data = fh.read(size)
+ data = fh.read(size).decode('UTF-8')
self._parse_data(data, offset, size)
fh.close()
else:
diff --git a/netaddr/eui/ieee.py b/netaddr/eui/ieee.py
index 11ccd9f..36380f6 100755
--- a/netaddr/eui/ieee.py
+++ b/netaddr/eui/ieee.py
@@ -35,22 +35,23 @@ More details can be found at the following URLs :-
import os.path as _path
import csv as _csv
+from netaddr.compat import _bytes_type
from netaddr.core import Subscriber, Publisher
#: Path to local copy of IEEE OUI Registry data file.
-OUI_REGISTRY = _path.join(_path.dirname(__file__), 'oui.txt')
+OUI_REGISTRY_PATH = _path.join(_path.dirname(__file__), 'oui.txt')
#: Path to netaddr OUI index file.
-OUI_METADATA = _path.join(_path.dirname(__file__), 'oui.idx')
+OUI_INDEX_PATH = _path.join(_path.dirname(__file__), 'oui.idx')
#: OUI index lookup dictionary.
OUI_INDEX = {}
#: Path to local copy of IEEE IAB Registry data file.
-IAB_REGISTRY = _path.join(_path.dirname(__file__), 'iab.txt')
+IAB_REGISTRY_PATH = _path.join(_path.dirname(__file__), 'iab.txt')
#: Path to netaddr IAB index file.
-IAB_METADATA = _path.join(_path.dirname(__file__), 'iab.idx')
+IAB_INDEX_PATH = _path.join(_path.dirname(__file__), 'iab.idx')
#: IAB index lookup dictionary.
IAB_INDEX = {}
@@ -118,7 +119,7 @@ class OUIIndexParser(Publisher):
if hasattr(ieee_file, 'readline') and hasattr(ieee_file, 'tell'):
self.fh = ieee_file
else:
- self.fh = open(ieee_file)
+ self.fh = open(ieee_file, 'rb')
def parse(self):
"""
@@ -129,20 +130,24 @@ class OUIIndexParser(Publisher):
record = None
size = 0
+ marker = _bytes_type('(hex)')
+ hyphen = _bytes_type('-')
+ empty_string = _bytes_type('')
+
while True:
- line = self.fh.readline() # unbuffered to obtain correct offsets
+ line = self.fh.readline()
if not line:
break # EOF, we're done
- if skip_header and '(hex)' in line:
+ if skip_header and marker in line:
skip_header = False
if skip_header:
# ignoring header section
continue
- if '(hex)' in line:
+ if marker in line:
# record start
if record is not None:
# a complete record.
@@ -152,7 +157,7 @@ class OUIIndexParser(Publisher):
size = len(line)
offset = (self.fh.tell() - len(line))
oui = line.split()[0]
- index = int(oui.replace('-', ''), 16)
+ index = int(oui.replace(hyphen, empty_string), 16)
record = [index, offset]
else:
# within record
@@ -197,7 +202,7 @@ class IABIndexParser(Publisher):
if hasattr(ieee_file, 'readline') and hasattr(ieee_file, 'tell'):
self.fh = ieee_file
else:
- self.fh = open(ieee_file)
+ self.fh = open(ieee_file, 'rb')
def parse(self):
"""
@@ -207,20 +212,26 @@ class IABIndexParser(Publisher):
skip_header = True
record = None
size = 0
+
+ hex_marker = _bytes_type('(hex)')
+ base16_marker = _bytes_type('(base 16)')
+ hyphen = _bytes_type('-')
+ empty_string = _bytes_type('')
+
while True:
- line = self.fh.readline() # unbuffered
+ line = self.fh.readline()
if not line:
break # EOF, we're done
- if skip_header and '(hex)' in line:
+ if skip_header and hex_marker in line:
skip_header = False
if skip_header:
# ignoring header section
continue
- if '(hex)' in line:
+ if hex_marker in line:
# record start
if record is not None:
record.append(size)
@@ -231,12 +242,12 @@ class IABIndexParser(Publisher):
index = iab_prefix
record = [index, offset]
size = len(line)
- elif '(base 16)' in line:
+ elif base16_marker in line:
# within record
size += len(line)
- prefix = record[0].replace('-', '')
+ prefix = record[0].replace(hyphen, empty_string)
suffix = line.split()[0]
- suffix = suffix.split('-')[0]
+ suffix = suffix.split(hyphen)[0]
record[0] = (int(prefix + suffix, 16)) >> 12
else:
# within record
@@ -247,38 +258,37 @@ class IABIndexParser(Publisher):
self.notify(record)
-def create_indices():
- """Create indices for OUI and IAB file based lookups"""
- oui_parser = OUIIndexParser(OUI_REGISTRY)
- oui_parser.attach(FileIndexer(OUI_METADATA))
+def create_index_from_registry(registry_path, index_path, parser):
+ """Generate an index files from the IEEE registry file."""
+ oui_parser = parser(registry_path)
+ oui_parser.attach(FileIndexer(index_path))
oui_parser.parse()
- iab_parser = IABIndexParser(IAB_REGISTRY)
- iab_parser.attach(FileIndexer(IAB_METADATA))
- iab_parser.parse()
+def create_indices():
+ """Create indices for OUI and IAB file based lookups"""
+ create_index_from_registry(OUI_REGISTRY_PATH, OUI_INDEX_PATH, OUIIndexParser)
+ create_index_from_registry(IAB_REGISTRY_PATH, IAB_INDEX_PATH, IABIndexParser)
-def load_indices():
- """Load OUI and IAB lookup indices into memory"""
- fp = open(OUI_METADATA)
- try:
- for row in _csv.reader(fp):
- (key, offset, size) = [int(_) for _ in row]
- OUI_INDEX.setdefault(key, [])
- OUI_INDEX[key].append((offset, size))
- finally:
- fp.close()
- fp = open(IAB_METADATA)
+def load_index(index, index_path):
+ """Load index from file into index data structure."""
+ fp = open(index_path, 'rb')
try:
- for row in _csv.reader(fp):
+ for row in _csv.reader([x.decode('UTF-8') for x in fp]):
(key, offset, size) = [int(_) for _ in row]
- IAB_INDEX.setdefault(key, [])
- IAB_INDEX[key].append((offset, size))
+ index.setdefault(key, [])
+ index[key].append((offset, size))
finally:
fp.close()
+def load_indices():
+ """Load OUI and IAB lookup indices into memory"""
+ load_index(OUI_INDEX, OUI_INDEX_PATH)
+ load_index(IAB_INDEX, IAB_INDEX_PATH)
+
+
if __name__ == '__main__':
# Generate indices when module is executed as a script.
create_indices()
diff --git a/tutorials/2.x/eui/tutorial.txt b/tutorials/2.x/eui/tutorial.txt
index c6f4775..a8217c6 100644
--- a/tutorials/2.x/eui/tutorial.txt
+++ b/tutorials/2.x/eui/tutorial.txt
@@ -136,9 +136,9 @@ Here is how you query an OUI with the EUI interface.
>>> oui
OUI('00-1B-77')
>>> oui.registration().address
-['Lot 8, Jalan Hi-Tech 2/3', 'Kulim Kedah 09000', 'MY']
+[u'Lot 8, Jalan Hi-Tech 2/3', u'Kulim Kedah 09000', u'MY']
>>> oui.registration().org
-'Intel Corporate'
+u'Intel Corporate'
You can also use OUI objects directly without going through the EUI interface.
@@ -150,29 +150,29 @@ This example shows you how you access them individually by specifying an index n
>>> oui
OUI('08-00-30')
>>> oui.registration(0).address
-['2380 N. ROSE AVENUE', 'OXNARD CA 93010', 'US']
+[u'2380 N. ROSE AVENUE', u'OXNARD CA 93010', u'US']
>>> oui.registration(0).org
-'NETWORK RESEARCH CORPORATION'
+u'NETWORK RESEARCH CORPORATION'
>>> oui.registration(0).oui
'08-00-30'
>>> oui.registration(1).address
-['GPO BOX 2476V', 'MELBOURNE VIC 3001', 'AU']
+[u'GPO BOX 2476V', u'MELBOURNE VIC 3001', u'AU']
>>> oui.registration(1).org
-'ROYAL MELBOURNE INST OF TECH'
+u'ROYAL MELBOURNE INST OF TECH'
>>> oui.registration(1).oui
'08-00-30'
>>> oui.registration(2).address
-['CH-1211 GENEVE 23', 'SUISSE/SWITZ', 'CH']
+[u'CH-1211 GENEVE 23', u'SUISSE/SWITZ', u'CH']
>>> oui.registration(2).org
-'CERN'
+u'CERN'
>>> oui.registration(2).oui
'08-00-30'
>>> for i in range(oui.reg_count):
... str(oui), oui.registration(i).org
...
-('08-00-30', 'NETWORK RESEARCH CORPORATION')
-('08-00-30', 'ROYAL MELBOURNE INST OF TECH')
-('08-00-30', 'CERN')
+('08-00-30', u'NETWORK RESEARCH CORPORATION')
+('08-00-30', u'ROYAL MELBOURNE INST OF TECH')
+('08-00-30', u'CERN')
Here is how you query an IAB with the EUI interface.
@@ -183,9 +183,9 @@ True
>>> iab
IAB('00-50-C2-00-00-00')
>>> iab.registration()
-{'address': ['1241 Superieor Ave E', 'Cleveland OH 44114', 'US'],
+{'address': [u'1241 Superieor Ave E', u'Cleveland OH 44114', u'US'],
'iab': '00-50-C2-00-00-00',
'idx': 84680704,
...
- 'org': 'T.L.S. Corp.',
+ 'org': u'T.L.S. Corp.',
'size': 537}