Wikipedia:Statistik/totalViews.pl
Us der alemannische Wikipedia, der freie Dialäkt-Enzyklopedy
- !/usr/bin/perl
- This script was released by w:als:User:Melancholie under the GNU General Public License and Creative Commons by-sa (attribution + share alike); if you should improve this script, please tell me!
- Usage: See bottom (or just try ;-)
use LWP::UserAgent; $project = $ARGV[0]; $date = $ARGV[1]; $hour = $ARGV[2]; if ($hour eq "") {
$hour = 0;
} else {
$hour =~ s/^\+(.+)$/$1/; $hour =~ s/^(-?)0([0-9])$/$1$2/; $hour =~ s/^(-?)0?([0-9]):?00$/$1$2/;
} $hour = $hour-1; if ($hour > 0) {$hour = 24 - $hour;} else {$hour = $hour * -1;} if ($hour < 10) {$hour = "0$hour";}
mkdir "projectcounts" unless -d "projectcounts"; mkdir "pagecounts" unless -d "pagecounts"; mkdir "$date" unless -d "$date";
for (my $i = 1; $i <= 2; $i++) {
if ($ARGV[0] && $ARGV[1] && $hour =~ /^[0-9]{2,2}$/) { if ($i eq 1) { $url = "http://dammit.lt/wikistats/"; $file = "wikiStats.htm"; print "Downloading $file\n"; $userAgent = LWP::UserAgent->new(); $httpRequest = HTTP::Request->new("GET", $url); $serverResponse = $userAgent->request($httpRequest, $file); if ($serverResponse->is_error()) { print "Error code: ", $serverResponse->code(), "\n"; print "Error message: ", $serverResponse->message(), "\n"; } }
open HTM, "wikiStats.htm"; if ($i eq 1) { open LIST, ">projectcounts/list.txt"; } else { print "Preparing to download pagecounts files (+/- 600 MB; press Ctrl+C to abort)\n"; open LIST, ">pagecounts/list.txt"; open EXE, ">pagecounts/extract.sh"; print EXE "#!/bin/bash\n\n"; } $j = 0; while(<HTM>) { if ($i eq 1 && $_ =~ /href="projectcounts-([0-9]+)-([0-9]{2,2})([0-9]+)"/ || $i eq 2 && $_ =~ /href="pagecounts-([0-9]+)-([0-9]{2,2})([0-9]+)\.gz"/) { if ($1 eq $date && $2 eq $hour || $j > 0 && $j < 24) { $j++; if ($i eq 1) { $file = "projectcounts-$1-$2$3"; } else { $file = "pagecounts-$1-$2$3.gz"; } print LIST "$file\n"; if ($i eq 2) { print EXE "echo \" Extracting $file\" && gunzip -f $file && "; } $url = "http://dammit.lt/wikistats/$file"; print " Downloading $file\n"; $userAgent = LWP::UserAgent->new(); $httpRequest = HTTP::Request->new("GET", $url); if ($i eq 1) { $serverResponse = $userAgent->request($httpRequest, "projectcounts/$file"); } else { $serverResponse = $userAgent->request($httpRequest, "pagecounts/$file"); } if ($serverResponse->is_error()) { print "Error code: ", $serverResponse->code(), "\n"; print "Error message: ", $serverResponse->message(), "\n"; } } } } print EXE "sleep 1"; close EXE; close LIST; close HTM; if ($i eq 1) { open LIST, "projectcounts/list.txt"; $totalViews = 0; my $globalViews; while(<LIST>) { if ($_ =~ /^(projectcounts-[0-9]+-[0-9]+)$/) { open IN, "projectcounts/$1"; while(<IN>) { if ($_ =~ /^([a-z-]+) - ([0-9]+) ([0-9]+)$/) { if ($project eq "Wikimedia") { $globalViews{$1} = $globalViews{$1}+$2; } elsif ($1 eq $project) { $totalViews = $totalViews+$2; } } } close IN; } } close LIST; unlink ("projectcounts/list.txt"); # unlink ("projectcounts-files")? # rmdir "projectcounts-folder"? open ALS, "totalHits-$date.txt"; while(<ALS>) { if ($_ =~ /^([0-9]+) /) {$alsHits = $1;} } close ALS; if ($alsHits) {$globalViews{"als"} = $alsHits;} if ($project eq "Wikimedia") { open OUT, ">$date/index.html"; print OUT "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"; print OUT "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\n"; print OUT "<head>\n"; print OUT "<meta name=\"robots\" content=\"noindex, noarchive, nosnippet\" />\n"; print OUT "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n"; print OUT "<title>Wikimedia page Hits</title>\n"; print OUT "</head>\n\n"; print OUT "<body style=\"background-color: #FFFFF0; margin: 10px;\">\n";
print OUT "
< <a href=\"../\">Home</a>
\n"; print OUT "
Total page hits for Wikimedia wikis on $date (UTC); counted by the <a href=\"http://lists.wikimedia.org/pipermail/wikitech-l/2007-December/035435.html\">squid servers</a> (also bots, crawlers, reloads etc. have been counted)
Wiktionary & Co. are not yet analysed in <a href=\"http://lists.wikimedia.org/pipermail/wikitech-l/2007-December/035435.html\">midom's statistics</a>, unfortunately!
Overall page impressions: Not until Wiktionary & Co. get analysed!
Wiktionary & Co. are not yet analysed in <a href=\"http://lists.wikimedia.org/pipermail/wikitech-l/2007-December/035435.html\">midom's statistics</a>, unfortunately!
Overall page impressions: Not until Wiktionary & Co. get analysed!
\n";
print OUT "
\n";
print OUT "
\n";
print OUT "Sorted by language code:\n";print OUT "
- \n"; my @abc = sort keys %globalViews; my @hitList; foreach (@abc) { $key = $_; $wikiHits = $globalViews{$key}; $wikiHits =~ s/([0-9])([0-9]{3,3})$/$1,$2/; $wikiHits =~ s/([0-9])([0-9]{3,3}),/$1,$2,/; if ($globalViews{$key} > 99999) { print OUT "
- <a href=\"../do-it-yourself.htm\">$key</a> - $wikiHits page hits
- \n";
} elsif ($globalViews{$key} > 1) {
print OUT " - <a href=\"$key/\">$key</a> - $wikiHits page hits
- \n";
} push(@hitList, $globalViews{$key}); }
print OUT "
\n"; print OUT "
\n";
print OUT "Sorted by total page hits:\n";print OUT "
- \n"; my @hit = sort(numSort @hitList); my $dbl = 0; foreach (@hit) { if ($_ ne $dbl) { $wikiHits = $_; $dbl = $_; foreach (keys %globalViews) { $key = $_; if ($globalViews{$_} eq $dbl) { $wikiHits =~ s/([0-9])([0-9]{3,3})$/$1,$2/; $wikiHits =~ s/([0-9])([0-9]{3,3}),/$1,$2,/; if ($globalViews{$key} > 99999) { print OUT "
- <a href=\"../do-it-yourself.htm\">$key</a> - $wikiHits page hits\n";
} elsif ($globalViews{$key} > 1) {
print OUT " - <a href=\"$key/\">$key</a> - $wikiHits page hits\n";
} } } } }
print OUT "
\n"; print OUT "
\n
\n"; print OUT "
Get the free-and-open-source <a href=\"../wikiHits.pl\">Perl script</a>, used for creating this overview!
\n";
print OUT "</body>\n</html>"; } else { open OUT, ">totalHits-$date.txt"; print OUT "$totalViews page hits/views including those of bots, reloads etc. ($date, UTC$ARGV[2])"; print "$totalViews page hits/views including those of bots, reloads etc. ($date, UTC$ARGV[2])\n"; } close OUT; } else { print "You have to extract the downloaded *.gz files (> 2.5 GB; use 7-Zip)!\n If you are running Linux you can execute the file extract.sh!\n You then may run pageHits.pl, too!\n\n"; } } else { if ($i eq 1) { print "\nYou have to specify your project code and the exact day and hour (UTC) to begin with!\n"; print " If you want to evaluate the day 2008-02-17, for example:\n"; print " If your wiki's timezone is UTC+2 or more, you have to type \"perl wikiHits.pl de 20080216 +2\"\n"; print " If your wiki's timezone is UTC-5 [< +2], you have to type \"perl wikiHits.pl en 20080217 -5\"\n"; print " If your project is Wikimedia Commons you may just type \"perl wikiHits.pl commons 20080217\"\n\n"; } }
} unlink ("wikiStats.htm");
sub numSort {
if ($a > $b) {return -1;} elsif ($a == $b) {return 0;} else {return 1;}
}