#!/usr/bin/perl use strict; use Parse::AccessLogEntry; use Data::Dumper; my $P=Parse::AccessLogEntry::new(); my $devel; $devel=1 if($ARGV[0] eq 'devel'); sub url_decode { chomp $_[0]; my %hash; my @parts = split(/&/, $_[0]); for (@parts) { my($a, $b) = split(/=/, $_); $a =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge; $b =~ s/%([a-fA-F0-9]{2})/chr(hex($1))/ge; $b=~s/\+/ /g; $hash{$a} = $b; } return \%hash; } my %Fail; my %Hits; my %AgentHash; while() { my $H=$P->parse($_); next unless($H->{code} eq '200'); my $Agent=$H->{agent}; my $IP=$H->{host}; #I don't count! Ignoring hits from work and home. next if($IP eq '160.33.43.75'); next if($IP eq '69.227.0.244'); my $File=$H->{file}; # Some things really mean other things. $File=~s/\/WebHome$/\//; $File=~s/\?.*//g; $File=~s/^\/bin\/view\//\//; $File=~s/^\/bin\/viewfile\//\//; $File=~s/^\/pub\//\//; $File=~s/^\/bin\/search\//\//; $File=~s/^\/bin\/natsearch\//\//; $File=~s/^\/bin\/attach\//\//; $File='/' if($File eq '/Gunwiki/'); #A lot of other things don't count either. next if($File eq '/viewauth/Sandbox/Comments'); next if($File eq '/favicon.ico'); next if($File eq '/robots.txt'); next if($File=~m/\.css$/i); next if($File=~m/\.jpg$/i); next if($File=~m/\/WebRss/); next if($File=~m/\/pub\/TWiki\//); next if($File=~m/^\/TWiki\//); next if($File=~m/^\/Sandbox\//); next if($File=~m/^\/Main\//); next if($File=~m/^\/Personal\//); next if($File=~m/^\/bin\/login\//); #We ignore administrative actions too. next if($File=~m/\/bin\/configure/); next if($File=~m/\/bin\/changes/); next if($File=~m/\/bin\/save/); next if($File=~m/\/bin\/edit\//); next if($File=~m/\/bin\/attach\//); next if($File=~m/\/bin\/rdiff\//); next if($File=~m/\/bin\/rename\//); next if($File=~m/\/admin\//); #Disregarding 'oops', although perhaps this should be reported on next if($File=~m/\/bin\/oops\//); #Bots shouldn't count! Ignoring various user agents. next if($Agent=~m/^FAST MetaWeb Crawler/); next if($Agent=~m/^ichiro/); next if($Agent=~m/^psbot/); next if($Agent=~m/^SBIder/); next if($Agent=~m/^ilial\//); next if($Agent=~m/^NutchCVS/); next if($Agent=~m/^sproose/); next if($Agent=~m/^online link validator/); next if($Agent=~m/^Gigabot/); next if($Agent=~m/^TurnitinBot/); next if($Agent=~m/^Accoona-AI-Agent/); next if($Agent=~m/^Teemer/); next if($Agent=~m/^Speedy Spider/); next if($Agent=~m/^Yeti/); next if($Agent=~m/^ia_archiver-web.archive.org/); next if($Agent=~m/^SMBot/); next if($Agent=~m/\(Twiceler/); next if($Agent=~m/^SurveyBot/); next if($Agent=~m/^msnbot/); next if($Agent=~m/^MJ12bot/); next if($Agent=~m/^VadixBot/); next if($Agent=~m/Zyborg/); next if($Agent=~m/NextGenSearchBot/); next if($Agent=~m/Yahoo\! Slurp/); next if($Agent=~m/Googlebot/); next if($Agent=~m/SnapPreviewBot/); next if($Agent=~m/NimbleCrawler/); next if($Agent=~m/Ask Jeeves\/Teoma/); next if($Agent=~m/BecomeBot/); $AgentHash{$Agent}++; $Hits{$File}++; #print "Valid: $_" if($devel); } print "\nHits:\n"; foreach(sort { $Hits{$b}<=>$Hits{$a} } keys %Hits) { printf " %-s: %s\n", $_, $Hits{$_}; } exit unless($devel); print "\nAgents:\n"; foreach(sort { $AgentHash{$b}<=>$AgentHash{$a} } keys %AgentHash) { printf " %-s: %s\n", $_, $AgentHash{$_}; }