##################################################################
package Geo::OSM::Planet;
##################################################################

use Exporter;
@ISA = qw( Exporter );
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
@EXPORT = qw( mirror_planet
	      osm_dir
	      planet_dir
	      UTF8sanitize
	      estimated_max_id
	      estimated_max_count
	      );

use strict;
use warnings;

use HTTP::Request;
use File::Basename;
use File::Copy;
use File::Path;
use File::Slurp;
use Getopt::Long;
use HTTP::Request;
use Storable ();
use Data::Dumper;

use Utils::File;
use Utils::Debug;
use Utils::LWP::Utils;


# As of planet-061220
my $estimations = {
            'way' => {
                       'count' => 3918624,
                       'max_id' => 8013668
                     },
            'elem' => {
                        'count' => 312752630,
                        'max_id' => 312752630
                      },
            'seg' => {
                       'count' => 40992844,
                       'max_id' => 57261050
                     },
            'segment' => {
                           'count' => 41325764,
                           'max_id' => 57259818
                         },
            'tag' => {
                       'count' => 186445779,
                       'max_id' => 1
                     },
            'node' => {
                        'count' => 40069619,
                        'max_id' => 59943310
                      },
            'line' => {
                        'count' => 388379350,
                        'max_id' => 312752630
                      }
};

# ------------------------------------------------------------------
# This routine estimates the maximum id for way,elem,seg,... 
# The values are taken from older planet.osm Files
# So they mostly are a little bit to low
# ARGS: 
#   $type: line|way|tag|...
# RETURNS:
#   $result: number of estimated max_id
sub estimated_max_id($){
    my $type= shift;
    unless ( defined ($estimations->{$type}->{max_id})) {
	warn("\n estimated_max_id($type): unknown Tag-Type\n");
	return 0;
    };
    return $estimations->{$type}->{max_id};
}

# ------------------------------------------------------------------
# This routine estimates the maximim number of elements for way,elem,seg,... 
# The values are taken from older planet.osm Files
# So they mostly are a little bit to low
# ARGS: 
#   $type: line|way|tag|...
# RETURNS:
#   $result: number of estimated elements
sub estimated_max_count($){
    my $type= shift;
    unless ( defined ($estimations->{$type}->{count})) {
	warn("\n estimated_max_id($type): unknown Tag-Type\n");
	return 0;
    };
    return $estimations->{$type}->{count};
}

# ------------------------------------------------------------------
# returns the osm main directory for holding data
sub osm_dir() {
    # For later these are the defaults
    # on where we can read/write
    #  ~/osm
    # /var/data/osm
    my $dir;

    my $home = $ENV{HOME};
    unless ( $home ) {
	$home = `whoami`;
	chomp $home;
	$home = "/home/$home";
    }
    
    $dir = "$home/osm";
    return $dir;
}

# ------------------------------------------------------------------
# Returns (or sets) the directory where the planet.osm files will be found
my $PLANET_DIR='';
sub planet_dir(;$) {
    my $new_dir=shift;

    if ( $new_dir ) {
	$PLANET_DIR  = $new_dir;
    } elsif( ! $PLANET_DIR) {
	my $dir = osm_dir();
	$PLANET_DIR  = "$dir/planet";
    }
    return $PLANET_DIR;
}


sub sort_unique(@){
    my @srt = sort @_;
    my @erg;
    my $last_val=undef;
    for my $val ( @srt ){
	next if $last_val &&  $val eq $last_val;
	$last_val=$val;
	push (@erg,$val);
    }
    return @erg
}
    

# ------------------------------------------------------------------
# mirror the newest planet.osm File to
#  ~/osm/planet/planet.osm.bz2
# and the resulting 
# Filename is returned
#
# the file is -----NO LONGER--- Sanitized afterwards  
sub mirror_planet(){
    my $planet_server="http://planet.openstreetmap.org";
    my $url = "$planet_server";

    my $mirror_dir=planet_dir();
    mkdir_if_needed( $mirror_dir );
    
    my $current_file;
    if ( !$Utils::LWP::Utils::NO_MIRROR ) {
	# Get Index.html of Planet.osm.org
	my $apache_sort_hy_date="?C=M;O=D";
	my $index_file="$mirror_dir/planet_index.html";
	my $result = mirror_file("$url/$apache_sort_hy_date",$index_file);
	if ( $result ) {
	    my $index_content = read_file( $index_file ) ;

	    # Get the current planet.osm File
	    my @all_files = ($index_content =~ m/(planet-\d\d\d\d\d\d.osm.bz2|planet-\d\d\d\d\d\d.osm.gz)/g);
	    my ( $current_file1,$current_file2 )
		= grep { $_ !~ m/planet-061008/ } reverse sort_unique(@all_files);
	    print STDERR " TOP Files:   ( $current_file1,$current_file2 ) \n" if $DEBUG;
	    $current_file = $current_file1;
	    $current_file1 =~ s/\.bz2$/\.gz/;
	    if (  $current_file1 eq $current_file2 ) {
		$current_file =  $current_file1
	    };
	    if ( $current_file ) {
		$url .= "/$current_file";
		$current_file = "$mirror_dir/$current_file";
		print STDERR "Mirror OSM Data from $url\n" if $VERBOSE || $DEBUG;
		$result = mirror_file($url,$current_file);
		#return undef unless $result;
	    }
	}
    }

    my @files= reverse sort_unique( grep { $_ !~ m/planet-061008/ } glob("$mirror_dir/planet-*.osm.{bz2,gz}"));
    if ( $DEBUG) {
	print STDERR "Existing Files: \n\t".join("\n\t",@files)."\n";
    }
    $current_file = $files[0];
    
    if ( $DEBUG) {
	print STDERR "Choosen File: $current_file\n";
    }
    
    return undef unless $current_file;

#    $current_file = UTF8sanitize($current_file);
#    if ( $DEBUG >2 || $VERBOSE>3) {
#	print STDERR "Sanitized File: $current_file\n";
#    }

    my ($unpacked_file) = ($current_file=~ m/(.*\.osm)/);
    $current_file = $unpacked_file
	unless file_needs_re_generation($current_file,$unpacked_file);

    print STDERR "Mirror done, using '$current_file'\n" if $VERBOSE>1 || $DEBUG>1;
    return $current_file;
}

# ------------------------------------------------------------------
# creates a second file with a sanitized Version of planet.osm
# the resulting file can be found at
#    ~/osm/planet/planet-07XXXX-a.osm.bz2
# If a recent enought Version is found in ~/osm/planet/
# nothing is done, but the filename of the file is returned
# if the routine finds an uncompressed up to date Version 
#   ~/osm/planet/planet-07XXXX-a.osm
# this Filename is returned.
sub UTF8sanitize($){
    my $filename = shift;
    if ( $DEBUG) {
	print STDERR "UTF8sanitize($filename)\n";
    }
    my $start_time=time();

    # the newer Files do not need to be sanitized
    my ($file_date) = ($filename =~ m/planet-(\d+)/ );
    return $filename
	if ($file_date >= 061205) && ( $file_date < 061213);

    my $filename_new= $filename;
    $filename_new =~ s/\.osm/-a.osm/;
    my $filename_new_check=newest_unpacked_filename($filename_new);

    # check if planet-070101-a.osm[.bz2] is newer than  planet-070101.osm.bz2
    return $filename_new_check
	unless file_needs_re_generation($filename,$filename_new_check);

    # We have to create a new one
    print STDERR "UTF8 Sanitize $filename ... \n";
    # Uggly Hack, but for now it works
    my $UTF8sanitizer=`which UTF8sanitizer`;
    chomp $UTF8sanitizer;
    unless ( -x $UTF8sanitizer ) {
	$UTF8sanitizer=find_file_in_perl_path('../planet.osm/C/UTF8sanitizer');
    }
    die "Sanitizer not found\n" unless -x $UTF8sanitizer;
    print STDERR "Sanitizer found at '$UTF8sanitizer'\n" if $DEBUG;

    print STDERR "     this may take some time ... \n";
    my $cmd = "gzip -dc $filename | $UTF8sanitizer  | bzip2 >$filename_new.part";
    print "Command: $cmd" if $DEBUG || $VERBOSE;
    my $result = `$cmd`;
    print $result if $DEBUG || $VERBOSE;
  
    print "Sanitized $filename " if $DEBUG || $VERBOSE;
	print_time($start_time);

    my $file_size     = -s "$filename";
    my $file_size_new = -s "$filename_new.part";
    if ( $file_size_new < ($file_size*0.9) ) {
	die "File Sanitize seems not successfull.\n".
	    "Original Size $file_size\n".
	    "Sanitized Size $file_size_new\n";
    }
    rename "$filename_new.part","$filename_new";
    if ( ! -s $filename_new ) {
	die "Cannot sanitize $filename\n";
    }
    print "now we have a sanitized $filename_new\n" if $DEBUG || $VERBOSE;
    return $filename_new;
}

# ------------------------------------------------------------------
# find a file in the current Perl Search path. For now this was the 
# easiest solution to find programms like UTF8Sanitize
# ARGS: relative filename (relative to @INC-path
# RETURNS: Absolute path to file
sub find_file_in_perl_path($){
    my $file = shift;

    my $found_file = '';
    for my $path ( @INC ) {
	my $filename = "$path/$file";
	print "find_file_in_perl_path: looking in '$filename'\n" if $DEBUG>2;
	if ( -s $filename){
	    $found_file = $filename;
	    last;
	};
    }
    
    print "find_file_in_perl_path($file): --> $found_file\n" if $DEBUG;
    return $found_file;
}

# ------------------------------------------------------------------
1;

=head1 NAME

Geo::OSM::Planet

=head1 COPYRIGHT

Copyright 2006, Jörg Ostertag

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

=head1 AUTHOR

Jörg Ostertag (planet-count-for-openstreetmap@ostertag.name)

=head1 SEE ALSO

http://www.openstreetmap.org/

=cut