Wikipedia:Statistik/totalViews.pl

Us der alemannische Wikipedia, der freie Dialäkt-Enzyklopedy

  1. !/usr/bin/perl
  2. This script was released by w:als:User:Melancholie under the GNU General Public License and Creative Commons by-sa (attribution + share alike); if you should improve this script, please tell me!
  3. Usage: See bottom (or just try ;-)

use LWP::UserAgent; $project = $ARGV[0]; $date = $ARGV[1]; $hour = $ARGV[2]; if ($hour eq "") {

$hour = 0;

} else {

$hour =~ s/^\+(.+)$/$1/;
$hour =~ s/^(-?)0([0-9])$/$1$2/;
$hour =~ s/^(-?)0?([0-9]):?00$/$1$2/;

} $hour = $hour-1; if ($hour > 0) {$hour = 24 - $hour;} else {$hour = $hour * -1;} if ($hour < 10) {$hour = "0$hour";}

mkdir "projectcounts" unless -d "projectcounts"; mkdir "pagecounts" unless -d "pagecounts"; mkdir "$date" unless -d "$date";

for (my $i = 1; $i <= 2; $i++) {

if ($ARGV[0] && $ARGV[1] && $hour =~ /^[0-9]{2,2}$/) {
 if ($i eq 1) {
  $url = "http://dammit.lt/wikistats/";
  $file = "wikiStats.htm";
  print "Downloading $file\n";
  $userAgent = LWP::UserAgent->new();
  $httpRequest = HTTP::Request->new("GET", $url);
  $serverResponse = $userAgent->request($httpRequest, $file);
  if ($serverResponse->is_error()) {
   print "Error code: ", $serverResponse->code(), "\n";
   print "Error message: ", $serverResponse->message(), "\n";
  }
 }
 open HTM, "wikiStats.htm";
 if ($i eq 1) {
  open LIST, ">projectcounts/list.txt";
 } else {
  print "Preparing to download pagecounts files (+/- 600 MB; press Ctrl+C to abort)\n";
  open LIST, ">pagecounts/list.txt";
  open EXE, ">pagecounts/extract.sh";
  print EXE "#!/bin/bash\n\n";
 }
 $j = 0;
 while(<HTM>) {
  if ($i eq 1 && $_ =~ /href="projectcounts-([0-9]+)-([0-9]{2,2})([0-9]+)"/ || $i eq 2 && $_ =~ /href="pagecounts-([0-9]+)-([0-9]{2,2})([0-9]+)\.gz"/) {
   if ($1 eq $date && $2 eq $hour || $j > 0 && $j < 24) {
    $j++;
    if ($i eq 1) {
     $file = "projectcounts-$1-$2$3";
    } else {
     $file = "pagecounts-$1-$2$3.gz";
    }
    print LIST "$file\n";
    if ($i eq 2) {
     print EXE "echo \" Extracting $file\" && gunzip -f $file && ";
    }
    $url = "http://dammit.lt/wikistats/$file";
    print " Downloading $file\n";
    $userAgent = LWP::UserAgent->new();
    $httpRequest = HTTP::Request->new("GET", $url);
    if ($i eq 1) {
     $serverResponse = $userAgent->request($httpRequest, "projectcounts/$file");
    } else {
     $serverResponse = $userAgent->request($httpRequest, "pagecounts/$file");
    }
    if ($serverResponse->is_error()) {
     print "Error code: ", $serverResponse->code(), "\n";
     print "Error message: ", $serverResponse->message(), "\n";
    }
   }
  }
 }
 print EXE "sleep 1";
 close EXE;
 close LIST;
 close HTM;
 if ($i eq 1) {
  open LIST, "projectcounts/list.txt";
  $totalViews = 0;
  my $globalViews;
  while(<LIST>) {
   if ($_ =~ /^(projectcounts-[0-9]+-[0-9]+)$/) {
    open IN, "projectcounts/$1";
    while(<IN>) {
     if ($_ =~ /^([a-z-]+) - ([0-9]+) ([0-9]+)$/) {
      if ($project eq "Wikimedia") {
       $globalViews{$1} = $globalViews{$1}+$2;
      } elsif ($1 eq $project) {
       $totalViews = $totalViews+$2;
      }
     }
    }
    close IN;
   }
  }
  close LIST;
  unlink ("projectcounts/list.txt");
  # unlink ("projectcounts-files")?
  # rmdir "projectcounts-folder"?
  open ALS, "totalHits-$date.txt";
  while(<ALS>) {
   if ($_ =~ /^([0-9]+) /) {$alsHits = $1;}
  }
  close ALS;
  if ($alsHits) {$globalViews{"als"} = $alsHits;}
  if ($project eq "Wikimedia") {
   open OUT, ">$date/index.html";
   print OUT "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
   print OUT "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\n";
   print OUT "<head>\n";
   print OUT "<meta name=\"robots\" content=\"noindex, noarchive, nosnippet\" />\n";
   print OUT "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n";
   print OUT "<title>Wikimedia page Hits</title>\n";
   print OUT "</head>\n\n";
   print OUT "<body style=\"background-color: #FFFFF0; margin: 10px;\">\n";

print OUT "

< <a href=\"../\">Home</a>

\n"; print OUT "

Total page hits for Wikimedia wikis on $date (UTC); counted by the <a href=\"http://lists.wikimedia.org/pipermail/wikitech-l/2007-December/035435.html\">squid servers</a> (also bots, crawlers, reloads etc. have been counted)
Wiktionary & Co. are not yet analysed in <a href=\"http://lists.wikimedia.org/pipermail/wikitech-l/2007-December/035435.html\">midom's statistics</a>, unfortunately!

Overall page impressions: Not until Wiktionary & Co. get analysed!

\n";

   print OUT "
\n";

print OUT "

\n";
   print OUT "Sorted by language code:\n";
print OUT "
  • \n"; my @abc = sort keys %globalViews; my @hitList; foreach (@abc) { $key = $_; $wikiHits = $globalViews{$key}; $wikiHits =~ s/([0-9])([0-9]{3,3})$/$1,$2/; $wikiHits =~ s/([0-9])([0-9]{3,3}),/$1,$2,/; if ($globalViews{$key} > 99999) { print OUT "
  • <a href=\"../do-it-yourself.htm\">$key</a> - $wikiHits page hits
  • \n";
        } elsif ($globalViews{$key} > 1) {
    
    print OUT "
  • <a href=\"$key/\">$key</a> - $wikiHits page hits
  • \n";
        }
        push(@hitList, $globalViews{$key});
       }
    
    print OUT "
\n\n"; print OUT "

\n"; print OUT "

\n";
   print OUT "Sorted by total page hits:\n";
print OUT "
  1. \n"; my @hit = sort(numSort @hitList); my $dbl = 0; foreach (@hit) { if ($_ ne $dbl) { $wikiHits = $_; $dbl = $_; foreach (keys %globalViews) { $key = $_; if ($globalViews{$_} eq $dbl) { $wikiHits =~ s/([0-9])([0-9]{3,3})$/$1,$2/; $wikiHits =~ s/([0-9])([0-9]{3,3}),/$1,$2,/; if ($globalViews{$key} > 99999) { print OUT "
  2. <a href=\"../do-it-yourself.htm\">$key</a> - $wikiHits page hits\n";
           } elsif ($globalViews{$key} > 1) {
    
    print OUT "
  3. <a href=\"$key/\">$key</a> - $wikiHits page hits\n";
           }
          }
         }
        }
       }
    
    print OUT "
\n"; print OUT "

\n"; print OUT "
\n


\n"; print OUT "

Get the free-and-open-source <a href=\"../wikiHits.pl\">Perl script</a>, used for creating this overview!

\n";

   print OUT "</body>\n</html>";
  } else {
   open OUT, ">totalHits-$date.txt";
   print OUT "$totalViews page hits/views including those of bots, reloads etc. ($date, UTC$ARGV[2])";
   print "$totalViews page hits/views including those of bots, reloads etc. ($date, UTC$ARGV[2])\n";
  }
  close OUT;
 } else {
  print "You have to extract the downloaded *.gz files (> 2.5 GB; use 7-Zip)!\n If you are running Linux you can execute the file extract.sh!\n You then may run pageHits.pl, too!\n\n";
 }
} else {
 if ($i eq 1) {
  print "\nYou have to specify your project code and the exact day and hour (UTC) to begin with!\n";
  print " If you want to evaluate the day 2008-02-17, for example:\n";
  print "  If your wiki's timezone is UTC+2 or more, you have to type \"perl wikiHits.pl de 20080216 +2\"\n";
  print "  If your wiki's timezone is UTC-5 [< +2], you have to type \"perl wikiHits.pl en 20080217 -5\"\n";
  print "  If your project is Wikimedia Commons you may just type \"perl wikiHits.pl commons 20080217\"\n\n";
 }
}

} unlink ("wikiStats.htm");

sub numSort {

if ($a > $b) {return -1;}
elsif ($a == $b) {return 0;}
else {return 1;}

}