summaryrefslogtreecommitdiff
path: root/apps/JAWS/clients/WebSTONE/bin/mine-logs.pl
blob: 1b721aa46aee2efc6024399dca4e5413c4d0fdb7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/pkg/gnu/bin//perl
#
#  mine-logs.pl:
#       script to transform access logs into WebStone workload
#
#       created 18 December 1995 mblakele@engr.sgi.com
#
# functional map:
#       usage: mine-logs.pl access.log
#
#       1. For each line in the input
#             a. parse the URL and the time, the outcome code, and the size
#             b. if the code is 200, and it's a GET, 
#               do we already know about this URL?
#                 i. yes - increment its counter
#                ii. no - create a slot for it, record size, 
#                    and set counter=1
#

$debug = 0;
$line_number = 0;

while (<>) {
    chomp;

    $line_number++;
    ($line_number % 1000) || printf STDERR ".";
    # parse line
    ( $client, $junk1, $junk2, $date, $timezone, 
      $command, $url, $version, $result_code, $size ) =
	split;
    # strip some junk
    $command =~ s/\"//;
    $date =~ s/\[//;

    ($debug) && printf STDERR "$client, $date, $command, $url, $result_code, $size\n";

    # is it a GET? Did it succeed? (i.e., is the result code 200?)
    if (($command eq 'GET') && ($result_code == 200)) {
	# is this URL already in the key set?
	if (exists $counter{$url}) {
	    # URL is in key set
	    ($debug) && printf STDERR "URL $url already in key set: incrementing\n";
	    $counter{$url}++;
	    if ($size == $size{$url}) {
		($debug) && printf STDERR "size mismatch on $url: $size != $size{$url}\n";
		if ($size <=> $size{$url}) { $size{$url} = $size; }
	    }
	}
	else {
	    # URL isn't in key set
	    ($debug) && printf STDERR "URL $url isn't in key set: adding size $size\n";
	    $counter{$url} = 1;
	    $size{$url} = $size;
	}
	# end if key set
    } # end if GET
}
# end of input file
printf STDERR "\n";

# now we print out a workload file

# first, the headline
$date = `date`;
chomp($date);
printf "# WebStone workload file\n# \tgenerated by $0 $date\n#\n";

# next, sort the keys
@sorted_keys = sort by_counter keys(%counter);

# iterate through sorted keys
foreach $key (@sorted_keys) {
    # print url, weighting, and (commented) the size in bytes
    ($debug) && printf STDERR "printing data for $key\n";
    printf "$key\t$counter{$key}\t#$size{$key}\n";
}
# end foreach

# end main

sub 
by_counter {
    $counter{$b} <=> $counter{$a};
}
# end by_counter

# end mine-logs.pl