From af145601df4329a4dc55ef0ce9ce5f8645f09d4f Mon Sep 17 00:00:00 2001
From: David Moss <drkjam@gmail.com>
Date: Fri, 13 Jan 2017 00:20:49 +0000
Subject: - fixed a unicode vs bytes issue between Python 2.x and 3.x when
 reading and writing IEEE data files.

---
 CHANGELOG                      |  3 ++
 netaddr/compat.py              |  4 ++
 netaddr/core.py                | 19 ----------
 netaddr/eui/__init__.py        |  8 ++--
 netaddr/eui/ieee.py            | 84 +++++++++++++++++++++++-------------------
 tutorials/2.x/eui/tutorial.txt | 26 ++++++-------
 6 files changed, 71 insertions(+), 73 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 3bc18ef..d89c6e6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -16,6 +16,9 @@ Changes since 0.7.18
 
 * cleaned up INSTALL docs so they accurately reflect current Python packaging.
 
+* fixed broken parsing, generating and reading of IEEE index files when switching
+  between Python 2.x and 3.x.
+
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 Specific bug fixes addressed in this release
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/netaddr/compat.py b/netaddr/compat.py
index ff54803..0fe69ee 100644
--- a/netaddr/compat.py
+++ b/netaddr/compat.py
@@ -19,6 +19,8 @@ if _sys.version_info[0] == 3:
 
     _str_type = str
 
+    _bytes_type = lambda x: bytes(x, 'UTF-8')
+
     _is_str = lambda x: isinstance(x, (str, type(''.encode())))
 
     _is_int = lambda x: isinstance(x, int)
@@ -67,6 +69,8 @@ elif _sys.version_info[0:2] > [2, 3]:
 
     _str_type = basestring
 
+    _bytes_type = str
+
     _is_str = lambda x: isinstance(x, basestring)
 
     _is_int = lambda x: isinstance(x, (int, long))
diff --git a/netaddr/core.py b/netaddr/core.py
index 52f8930..f17eaba 100644
--- a/netaddr/core.py
+++ b/netaddr/core.py
@@ -204,22 +204,3 @@ class DictDotLookup(object):
 
     def __repr__(self):
         return _pprint.pformat(self.__dict__)
-
-
-def dos2unix(filename):
-    """
-    Replace DOS line endings (CRLF) with UNIX line endings (LF) in file.
-
-    """
-    fh = open(filename, "rb")
-    data = fh.read()
-    fh.close()
-
-    if '\0' in data:
-        raise ValueError('file contains binary data: %s!' % filename)
-
-    newdata = data.replace("\r\n".encode(), "\n".encode())
-    if newdata != data:
-        f = open(filename, "wb")
-        f.write(newdata)
-        f.close()
diff --git a/netaddr/eui/__init__.py b/netaddr/eui/__init__.py
index d8f2456..aa79014 100644
--- a/netaddr/eui/__init__.py
+++ b/netaddr/eui/__init__.py
@@ -91,10 +91,10 @@ class OUI(BaseIdentifier):
 
         #   Discover offsets.
         if self._value in ieee.OUI_INDEX:
-            fh = open(ieee.OUI_REGISTRY)
+            fh = open(ieee.OUI_REGISTRY_PATH, 'rb')
             for (offset, size) in ieee.OUI_INDEX[self._value]:
                 fh.seek(offset)
-                data = fh.read(size)
+                data = fh.read(size).decode('UTF-8')
                 self._parse_data(data, offset, size)
             fh.close()
         else:
@@ -256,12 +256,12 @@ class IAB(BaseIdentifier):
 
         #   Discover offsets.
         if self._value in ieee.IAB_INDEX:
-            fh = open(ieee.IAB_REGISTRY)
+            fh = open(ieee.IAB_REGISTRY_PATH, 'rb')
             (offset, size) = ieee.IAB_INDEX[self._value][0]
             self.record['offset'] = offset
             self.record['size'] = size
             fh.seek(offset)
-            data = fh.read(size)
+            data = fh.read(size).decode('UTF-8')
             self._parse_data(data, offset, size)
             fh.close()
         else:
diff --git a/netaddr/eui/ieee.py b/netaddr/eui/ieee.py
index 11ccd9f..36380f6 100755
--- a/netaddr/eui/ieee.py
+++ b/netaddr/eui/ieee.py
@@ -35,22 +35,23 @@ More details can be found at the following URLs :-
 import os.path as _path
 import csv as _csv
 
+from netaddr.compat import _bytes_type
 from netaddr.core import Subscriber, Publisher
 
 
 #: Path to local copy of IEEE OUI Registry data file.
-OUI_REGISTRY = _path.join(_path.dirname(__file__), 'oui.txt')
+OUI_REGISTRY_PATH = _path.join(_path.dirname(__file__), 'oui.txt')
 #: Path to netaddr OUI index file.
-OUI_METADATA = _path.join(_path.dirname(__file__), 'oui.idx')
+OUI_INDEX_PATH = _path.join(_path.dirname(__file__), 'oui.idx')
 
 #: OUI index lookup dictionary.
 OUI_INDEX = {}
 
 #: Path to local copy of IEEE IAB Registry data file.
-IAB_REGISTRY = _path.join(_path.dirname(__file__), 'iab.txt')
+IAB_REGISTRY_PATH = _path.join(_path.dirname(__file__), 'iab.txt')
 
 #: Path to netaddr IAB index file.
-IAB_METADATA = _path.join(_path.dirname(__file__), 'iab.idx')
+IAB_INDEX_PATH = _path.join(_path.dirname(__file__), 'iab.idx')
 
 #: IAB index lookup dictionary.
 IAB_INDEX = {}
@@ -118,7 +119,7 @@ class OUIIndexParser(Publisher):
         if hasattr(ieee_file, 'readline') and hasattr(ieee_file, 'tell'):
             self.fh = ieee_file
         else:
-            self.fh = open(ieee_file)
+            self.fh = open(ieee_file, 'rb')
 
     def parse(self):
         """
@@ -129,20 +130,24 @@ class OUIIndexParser(Publisher):
         record = None
         size = 0
 
+        marker = _bytes_type('(hex)')
+        hyphen = _bytes_type('-')
+        empty_string = _bytes_type('')
+
         while True:
-            line = self.fh.readline() # unbuffered to obtain correct offsets
+            line = self.fh.readline()
 
             if not line:
                 break   # EOF, we're done
 
-            if skip_header and '(hex)' in line:
+            if skip_header and marker in line:
                 skip_header = False
 
             if skip_header:
                 #   ignoring header section
                 continue
 
-            if '(hex)' in line:
+            if marker in line:
                 #   record start
                 if record is not None:
                     #   a complete record.
@@ -152,7 +157,7 @@ class OUIIndexParser(Publisher):
                 size = len(line)
                 offset = (self.fh.tell() - len(line))
                 oui = line.split()[0]
-                index = int(oui.replace('-', ''), 16)
+                index = int(oui.replace(hyphen, empty_string), 16)
                 record = [index, offset]
             else:
                 #   within record
@@ -197,7 +202,7 @@ class IABIndexParser(Publisher):
         if hasattr(ieee_file, 'readline') and hasattr(ieee_file, 'tell'):
             self.fh = ieee_file
         else:
-            self.fh = open(ieee_file)
+            self.fh = open(ieee_file, 'rb')
 
     def parse(self):
         """
@@ -207,20 +212,26 @@ class IABIndexParser(Publisher):
         skip_header = True
         record = None
         size = 0
+
+        hex_marker = _bytes_type('(hex)')
+        base16_marker = _bytes_type('(base 16)')
+        hyphen = _bytes_type('-')
+        empty_string = _bytes_type('')
+
         while True:
-            line = self.fh.readline()   # unbuffered
+            line = self.fh.readline()
 
             if not line:
                 break   # EOF, we're done
 
-            if skip_header and '(hex)' in line:
+            if skip_header and hex_marker in line:
                 skip_header = False
 
             if skip_header:
                 #   ignoring header section
                 continue
 
-            if '(hex)' in line:
+            if hex_marker in line:
                 #   record start
                 if record is not None:
                     record.append(size)
@@ -231,12 +242,12 @@ class IABIndexParser(Publisher):
                 index = iab_prefix
                 record = [index, offset]
                 size = len(line)
-            elif '(base 16)' in line:
+            elif base16_marker in line:
                 #   within record
                 size += len(line)
-                prefix = record[0].replace('-', '')
+                prefix = record[0].replace(hyphen, empty_string)
                 suffix = line.split()[0]
-                suffix = suffix.split('-')[0]
+                suffix = suffix.split(hyphen)[0]
                 record[0] = (int(prefix + suffix, 16)) >> 12
             else:
                 #   within record
@@ -247,38 +258,37 @@ class IABIndexParser(Publisher):
         self.notify(record)
 
 
-def create_indices():
-    """Create indices for OUI and IAB file based lookups"""
-    oui_parser = OUIIndexParser(OUI_REGISTRY)
-    oui_parser.attach(FileIndexer(OUI_METADATA))
+def create_index_from_registry(registry_path, index_path, parser):
+    """Generate an index files from the IEEE registry file."""
+    oui_parser = parser(registry_path)
+    oui_parser.attach(FileIndexer(index_path))
     oui_parser.parse()
 
-    iab_parser = IABIndexParser(IAB_REGISTRY)
-    iab_parser.attach(FileIndexer(IAB_METADATA))
-    iab_parser.parse()
 
+def create_indices():
+    """Create indices for OUI and IAB file based lookups"""
+    create_index_from_registry(OUI_REGISTRY_PATH, OUI_INDEX_PATH, OUIIndexParser)
+    create_index_from_registry(IAB_REGISTRY_PATH, IAB_INDEX_PATH, IABIndexParser)
 
-def load_indices():
-    """Load OUI and IAB lookup indices into memory"""
-    fp = open(OUI_METADATA)
-    try:
-        for row in _csv.reader(fp):
-            (key, offset, size) = [int(_) for _ in row]
-            OUI_INDEX.setdefault(key, [])
-            OUI_INDEX[key].append((offset, size))
-    finally:
-        fp.close()
 
-    fp = open(IAB_METADATA)
+def load_index(index, index_path):
+    """Load index from file into index data structure."""
+    fp = open(index_path, 'rb')
     try:
-        for row in _csv.reader(fp):
+        for row in _csv.reader([x.decode('UTF-8') for x in fp]):
             (key, offset, size) = [int(_) for _ in row]
-            IAB_INDEX.setdefault(key, [])
-            IAB_INDEX[key].append((offset, size))
+            index.setdefault(key, [])
+            index[key].append((offset, size))
     finally:
         fp.close()
 
 
+def load_indices():
+    """Load OUI and IAB lookup indices into memory"""
+    load_index(OUI_INDEX, OUI_INDEX_PATH)
+    load_index(IAB_INDEX, IAB_INDEX_PATH)
+
+
 if __name__ == '__main__':
     #   Generate indices when module is executed as a script.
     create_indices()
diff --git a/tutorials/2.x/eui/tutorial.txt b/tutorials/2.x/eui/tutorial.txt
index c6f4775..a8217c6 100644
--- a/tutorials/2.x/eui/tutorial.txt
+++ b/tutorials/2.x/eui/tutorial.txt
@@ -136,9 +136,9 @@ Here is how you query an OUI with the EUI interface.
 >>> oui
 OUI('00-1B-77')
 >>> oui.registration().address
-['Lot 8, Jalan Hi-Tech 2/3', 'Kulim  Kedah  09000', 'MY']
+[u'Lot 8, Jalan Hi-Tech 2/3', u'Kulim  Kedah  09000', u'MY']
 >>> oui.registration().org
-'Intel Corporate'
+u'Intel Corporate'
 
 You can also use OUI objects directly without going through the EUI interface.
 
@@ -150,29 +150,29 @@ This example shows you how you access them individually by specifying an index n
 >>> oui
 OUI('08-00-30')
 >>> oui.registration(0).address
-['2380 N. ROSE AVENUE', 'OXNARD  CA  93010', 'US']
+[u'2380 N. ROSE AVENUE', u'OXNARD  CA  93010', u'US']
 >>> oui.registration(0).org
-'NETWORK RESEARCH CORPORATION'
+u'NETWORK RESEARCH CORPORATION'
 >>> oui.registration(0).oui
 '08-00-30'
 >>> oui.registration(1).address
-['GPO BOX 2476V', 'MELBOURNE  VIC  3001', 'AU']
+[u'GPO BOX 2476V', u'MELBOURNE  VIC  3001', u'AU']
 >>> oui.registration(1).org
-'ROYAL MELBOURNE INST OF TECH'
+u'ROYAL MELBOURNE INST OF TECH'
 >>> oui.registration(1).oui
 '08-00-30'
 >>> oui.registration(2).address
-['CH-1211 GENEVE 23', 'SUISSE/SWITZ', 'CH']
+[u'CH-1211 GENEVE 23', u'SUISSE/SWITZ', u'CH']
 >>> oui.registration(2).org
-'CERN'
+u'CERN'
 >>> oui.registration(2).oui
 '08-00-30'
 >>> for i in range(oui.reg_count):
 ...     str(oui), oui.registration(i).org
 ...
-('08-00-30', 'NETWORK RESEARCH CORPORATION')
-('08-00-30', 'ROYAL MELBOURNE INST OF TECH')
-('08-00-30', 'CERN')
+('08-00-30', u'NETWORK RESEARCH CORPORATION')
+('08-00-30', u'ROYAL MELBOURNE INST OF TECH')
+('08-00-30', u'CERN')
 
 Here is how you query an IAB with the EUI interface.
 
@@ -183,9 +183,9 @@ True
 >>> iab
 IAB('00-50-C2-00-00-00')
 >>> iab.registration()
-{'address': ['1241 Superieor Ave E', 'Cleveland  OH  44114', 'US'],
+{'address': [u'1241 Superieor Ave E', u'Cleveland  OH  44114', u'US'],
  'iab': '00-50-C2-00-00-00',
  'idx': 84680704,
  ...
- 'org': 'T.L.S. Corp.',
+ 'org': u'T.L.S. Corp.',
  'size': 537}
-- 
cgit v1.2.1