import os
import csv 
import bz2
import gzip
import re
import urllib2
import socket

from apache_reader import ApacheLogReader

# dictionary key structure: filename, user_agent, package_name

class LocalStats(object):
    """Base class that writes the log file
    """
    def _get_logs(self, logfile, file_urls):
        """Needs to return an iterator. Each entry
        should be a dictionary"""
        if callable(logfile):
            return logfile(file_urls)
        raise NotImplementedError

    def _get_file_obj(self, path, mode='r', compression=None):
        """returns a file object"""
        if compression == 'bz2':
            return bz2.BZ2File(path, mode)
        elif compression == 'gz':
            return gzip.open(path, mode)
        return open(path, mode)
    
    def _build_stats(self, logfile, fileobj, files_url='/packages', 
                     filter=None, compression=None):
        """Builds a stats file
        
        - logfile: path to the original log file, or callable
        - fileobj : a file object or a path to create a file
        - files_url : a filter that define the beginnin of package urls 
        - filter: if given, a callable that receives the 
        current line. if the callable returns True, 
        the line is not included
        """
        downloads = {}
        for log in self._get_logs(logfile, files_url):
            if filter is not None:
                if filter(log):
                    continue
            filename = log['filename']
            user_agent = log['useragent'] 
            package_name = log['packagename']
            key = (filename, user_agent, package_name)
            count = log.get('count', 1)
            if key in downloads:
                downloads[key] += count
            else:
                downloads[key] = count
        self._write_stats(fileobj, downloads, compression=compression)

    def _write_stats(self, fileobj, downloads, compression=None):
        if isinstance(fileobj, str):
            fileobj = self._get_file_obj(fileobj, 'w', compression)
            file_created = True
        else:
            file_created = False

        writer = csv.writer(fileobj)
        filenames = downloads.keys()
        filenames.sort()
        for key in filenames:
            filename, user_agent, package_name = key
            count = downloads[key]
            writer.writerow((package_name, filename, user_agent, count))
        if file_created:
            fileobj.close()

    def build_daily_stats(self, year, month, day, logfile, fileobj,
                          files_url='/packages', compression=None):
        """creates a daily stats file using an apache log file.
        
        - year, month, day: values for the day 
        - logfile : path to the log file, or callable
        - fileobj : a file object or a path to create a file
        - files_url : a filter that define the beginning of package urls
        """
        def _filter(log):
            return (day != log['day'] or month != log['month'] or 
                    year != log['year'])

        self._build_stats(logfile, fileobj, files_url, _filter, compression)


    def build_monthly_stats(self, year, month, logfile, fileobj,
                            files_url='/packages', compression=None):
        """creates a monthly stats file using an apache log file.
        
        - year, month: values for the month
        - logfile : path to the log file
        - fileobj : a file object or a path to create a file
        - files_url : a filter that define the beginnin of package urls
        """
        def _filter(log):
            return (month != log['month'] or year != log['year'])

        self._build_stats(logfile, fileobj, files_url, _filter, compression)

    def read_stats(self, stats_file):
        """Returns an iterator over a stats file"""
        if isinstance(stats_file, str):
            ext = os.path.splitext(stats_file)[-1][1:]
            stats_file = self._get_file_obj(stats_file, 'r', ext)
        reader = csv.reader(stats_file)
        for line in reader:
            # work around user agents with commas
            while len(line) > 4:
                line[2] += ',' + line[3]
                del line[3]
            yield {'packagename': line[0],
                   'filename': line[1],
                   'useragent': line[2],
                   'count': int(line[3])}
        #reader.close()

    def read_stats_dict(self, stats_file):
        res = {}
        for r in self.read_stats(stats_file):
            key = (r['filename'], r['useragent'], r['packagename'])
            value = r['count']
            res[key] = value
        return res

    def build_local_stats(self, year, month, day, logfile, directory=None):
        """builds local stats with default values"""
        filename = '%d-%.2d-%.2d.bz2' % (year, month, day)
        if directory is not None:
            filename = os.path.join(directory, filename)

        self.build_daily_stats(year, month, day, logfile, filename, 
                               compression='bz2')

    def integrate_stats(self, targetdir, year, month, day, fd):
        new = self.read_stats_dict(fd)
        oldpath = "%s/days/%s-%.2s-%.2s.bz2" % (targetdir, year, month, day)
        if os.path.exists(oldpath):
            old = self.read_stats_dict(oldpath)
            for k, v in new.items():
                old[k] = old.get(k, 0) + v
        else:
            old = new
        self._write_stats(oldpath, old, 'bz2')
        monthpath = "%s/months/%s-%.2s.bz2" % (targetdir, year, month)
        if os.path.exists(monthpath):
            old = self.read_stats_dict(monthpath)
            for k, v in new.items():
                old[k] = old.get(k, 0) + v
        else:
            old = new
        self._write_stats(monthpath, old, 'bz2')
        return new
        

class ApacheLocalStats(LocalStats):
    """concrete class that uses the ApacheLogReader"""
    def _get_logs(self, logfile, files_url):
        return ApacheLogReader(logfile, files_url)

class ApacheDistantLocalStats(ApacheLocalStats):
    """Concrete class that gets the data from a distant file"""
    is_url = re.compile(r'^http://')

    def __init__(self, cache_folder='', timeout=5):
        self.cache_folder = cache_folder
        if not os.path.exists(cache_folder):
            os.makedirs(cache_folder)
        self.timeout = timeout

    def get_and_cache(self, url):
        """retrieve the distant file and add it in the local 
        cache"""
        basename = url.split('/')[-1]
        filename = os.path.join(self.cache_folder, basename)
        if os.path.exists(filename):
            # in cache, let's return it
            return filename, open(filename)
        
        # not in cache, we need to retrieve it
        # and store it
        oldtimeout = socket.getdefaulttimeout()
        socket.setdefaulttimeout(self.timeout)
        try:
            try:
                content = urllib2.urlopen(url).read()
            except (urllib2.URLError, socket.timeout):
                return '', None
        finally:
            socket.setdefaulttimeout(oldtimeout)

        f = open(filename, 'w')
        try:
            f.write(content)
        finally:
            f.close()
        
        return filename, open(filename)

    def read_stats(self, stats_file):  
        """retrieve a distant file and works with it"""
        if self.is_url.search(stats_file) is not None: 
            path, fileobj = self.get_and_cache(stats_file)
            if path == '':
                return iter([])
        return ApacheLocalStats.read_stats(self, path)