Wikipedia:Statistik/pageViews.pl

Us der alemannische Wikipedia, der freie Dialäkt-Enzyklopedy

  1. !/usr/bin/perl
  2. This script was released by w:als:User:Melancholie under the GNU General Public License and Creative Commons by-sa (attribution + share alike); if you should improve this script, please tell me!
  3. Usage: See bottom (or just try ;-)

$project = $ARGV[0]; $date = $ARGV[1]; $hour = $ARGV[2]; if ($hour eq "") {

$hour = 0;

} else {

$hour =~ s/^\+(.+)$/$1/;
$hour =~ s/^(-?)0([0-9])$/$1$2/;
$hour =~ s/^(-?)0?([0-9]):?00$/$1$2/;

} $hour = $hour-1; if ($hour > 0) {$hour = 24 - $hour;} else {$hour = $hour * -1;} if ($hour < 10) {$hour = "0$hour";}

print "Preparing to analyse logs...\n";

if ($project eq "Wikimedia") {

open SUM, "$date/index.html";
while(<SUM>) {
 #todo: Reduce for one column
 if ($_ =~ />([a-z-]+)<\/a> - ([0-9,]+) /) {
  $wikiCode = $1;
  $wikiHits = $2;
  $wikiHits =~ s/,//g;
  # Before if(), or it would be -1 ;-)
  $globalHits{$wikiCode} = $wikiHits;
  if ($wikiHits < 100000 || $wikiCode =~ /(eo|simple|vo)/) {
   $onePercent{$wikiCode} = $wikiHits/100;
   $topm = 10000; if ($wikiCode =~ /[ev]o/) {$topm = 100000;}
   $tenthOfPermille{$wikiCode} = $wikiHits/$topm;
   # Only count the >[1/million] pages; should not affect anything!?
   $onePpm{$wikiCode} = $wikiHits/1000000;
  }
 }
}

} else {

open SUM, "totalHits-$date.txt";
while(<SUM>) {
 if ($_ =~ /^([0-9]+) /) {
  $totalHits = $1;
  $onePercent = $1/100;
  $topm = 10000; if ($project =~ /[ev]o/) {$topm = 100000;}
  $tenthOfPermille = $totalHits/$topm;
  $tenthOfPermille =~ s/\.[0-9]+$//;
  if ($tenthOfPermille < 1) {
   $tenthOfPermille = 1;
  }
  # Only count the >[1/million] pages; should not affect anything!?
  $onePpm = $totalHits/1000000;
 }
}

} close SUM;

$max = 0; my %pageViews; my @pageViewList; my @searchTermList; my @wiktSearchTermList; if ($ARGV[0] && $ARGV[1] && $hour =~ /^[0-9]{2,2}$/) {

open LIST, "pagecounts/list.txt";
if ($project eq "Wikimedia") {
 print " IMPORTANT: You need much more than 200 MB of free memory (RAM + swap)!\n";
 print "          | Otherwise press Ctrl+C to abort!\n";
}
while(<LIST>) {
 if ($_ =~ /^(pagecounts-[0-9]+-[0-9]+)\.gz$/) {
  open IN, "pagecounts/$1";
  print " Analysing $1\n";
  while(<IN>) {
   if ($_ =~ /^([a-z-]+) ([^ ]+) ([0-9]+) ([0-9]+)$/) {
    $wiki = $1;
    $page = $2;
    $views = $3;
    if (($project eq "Wikimedia" && ($globalHits{$wiki} < 100000 || $wiki =~ /(eo|simple|vo)/) && $views > $onePpm{$wiki} && $globalHits{$wiki} > 0) || ($wiki eq $project && $views > $onePpm)) {
     $page =~ s/%([A-Fa-f0-9][A-Fa-f0-9])/pack("C", hex($1))/eg;
     # or:
     # use URI::Escape;
     # $page = uri_escape($page);
     # or:
     # $page =~s/([^a-zA-Z0-9_\.\-])/sprintf("%%%02lx", ord($1))/esg;
     $page =~ s/\/[wW]iki\/(.+)$/$1/;
     if ($page !~ /(index2?\.php|skins-1\.5\/.+|images\/.+?\.png|_fichiers|protection\.php|sort_none\.gif|sort_up\.gif|sort_down\.gif|result\.php|\/statistik\/.+?\.pl|\/sf-forum|prev\=\/search\?q|\*.php|http:\/\/|\/irc:\/\/chat|^\||\$1|wiki\/|xml:lang=|\/favicon\.ico|^\/?headbg\.jpg|^\/?bullet\.gif|^\/?user\.gif|poweredby_mediawiki_88x31|wikimedia-button|^\\|wikipedia\.org|opensearch_desc\.php|errors\.php|<\/?a>|Text\/Text|\/[0-9\.]+alpha|head_auth\.php|class\.php|mailto:|\@[A-Za-z]|\.php|\%|=|�)/i && !($page !~ /(Bild|Image):/i && $page =~ /\.(gif|jpg|jpeg|png|svg)$/i)) {
      if (($page =~ /:Statistik\/Suchen\/(.+)$/i || $page =~ /:Statistics\/search_?(e|term)s?\/(.+)$/i) && $page !~ /[\\\[\{\}\]]/ && $project ne "Wikimedia") {
       # Use the following only if wiki page titles are forced to uppercase!
       $searchTerm = $1;
       $searchTerm =~ s/^\++//;
       $searchTerm =~ s/\++$//;
       $searchTerm =~ s/[\+_]+/ /g;
       if ($searchTerm =~ /(^|\/| )\^?[#\*<>'"] ?[A-ZÄÖÜa-zäèéöüß]+/) {
        $searchTerm =~ s/(^|\/| )\^?[#\*<>'"]( )?([A-ZÄÖÜa-zäèéöüß]+)/$1$3/;
       }
       if ($searchTerm =~ /[A-ZÄÖÜa-zäèéöüß]+ ?[#\*'"]( |$)/) {
        $searchTerm =~ s/([A-ZÄÖÜa-zäèéöüß]+)( )?[#\*'"]( |$)/$1$3/;
       }
       # Only works for ASCII:
       if ($searchTerm =~ /[A-Z]{5,9}/) {$searchTerm = lc($searchTerm);}
       # or:
       # =~ tr/A-Z/a-z/;
       if ($searchTerm !~ /iktionary\//) {
        $searchTerm = ucfirst($searchTerm);
        # The following somehow has to be done in a different manner, of course!
        $searchTerm =~ s/^ä/Ä/;
        $searchTerm =~ s/^ö/Ö/;
        $searchTerm =~ s/^ü/Ü/;
        $searchTerm =~ s/ a([a-zäèéöü]{3,3})/ A$1/g;
        $searchTerm =~ s/ b([a-zäèéöü]{3,3})/ B$1/g;
        $searchTerm =~ s/ c([a-zäèéöü]{3,3})/ C$1/g;
        $searchTerm =~ s/ d([a-zäèéöü]{3,3})/ D$1/g;
        $searchTerm =~ s/ e([a-zäèéöü]{3,3})/ E$1/g;
        $searchTerm =~ s/ f([a-zäèéöü]{3,3})/ F$1/g;
        $searchTerm =~ s/ g([a-zäèéöü]{3,3})/ G$1/g;
        $searchTerm =~ s/ h([a-zäèéöü]{3,3})/ H$1/g;
        $searchTerm =~ s/ i([a-zäèéöü]{3,3})/ I$1/g;
        $searchTerm =~ s/ j([a-zäèéöü]{3,3})/ J$1/g;
        $searchTerm =~ s/ k([a-zäèéöü]{3,3})/ K$1/g;
        $searchTerm =~ s/ l([a-zäèéöü]{3,3})/ L$1/g;
        $searchTerm =~ s/ m([a-zäèéöü]{3,3})/ M$1/g;
        $searchTerm =~ s/ n([a-zäèéöü]{3,3})/ N$1/g;
        $searchTerm =~ s/ o([a-zäèéöü]{3,3})/ O$1/g;
        $searchTerm =~ s/ p([a-zäèéöü]{3,3})/ P$1/g;
        $searchTerm =~ s/ q([a-zäèéöü]{3,3})/ Q$1/g;
        $searchTerm =~ s/ r([a-zäèéöü]{3,3})/ R$1/g;
        $searchTerm =~ s/ s([a-zäèéöü]{3,3})/ S$1/g;
        $searchTerm =~ s/ t([a-zäèéöü]{3,3})/ T$1/g;
        $searchTerm =~ s/ u([a-zäèéöü]{3,3})/ U$1/g;
        $searchTerm =~ s/ v([a-zäèéöü]{3,3})/ V$1/g;
        $searchTerm =~ s/ w([a-zäèéöü]{3,3})/ W$1/g;
        $searchTerm =~ s/ x([a-zäèéöü]{3,3})/ X$1/g;
        $searchTerm =~ s/ y([a-zäèéöü]{3,3})/ Y$1/g;
        $searchTerm =~ s/ z([a-zäèéöü]{3,3})/ Z$1/g;
        $searchTerm =~ s/ ä([a-zäèéöü]{3,3})/ Ä$1/g;
        $searchTerm =~ s/ ö([a-zäèéöü]{3,3})/ Ö$1/g;
        $searchTerm =~ s/ ü([a-zäèéöü]{3,3})/ Ü$1/g;
        $searchTerm =~ s/ ?- ?a/-A/g;
        $searchTerm =~ s/ ?- ?b/-B/g;
        $searchTerm =~ s/ ?- ?c/-C/g;
        $searchTerm =~ s/ ?- ?d/-D/g;
        $searchTerm =~ s/ ?- ?e/-E/g;
        $searchTerm =~ s/ ?- ?f/-F/g;
        $searchTerm =~ s/ ?- ?g/-G/g;
        $searchTerm =~ s/ ?- ?h/-H/g;
        $searchTerm =~ s/ ?- ?i/-I/g;
        $searchTerm =~ s/ ?- ?j/-J/g;
        $searchTerm =~ s/ ?- ?k/-K/g;
        $searchTerm =~ s/ ?- ?l/-L/g;
        $searchTerm =~ s/ ?- ?m/-M/g;
        $searchTerm =~ s/ ?- ?n/-N/g;
        $searchTerm =~ s/ ?- ?o/-O/g;
        $searchTerm =~ s/ ?- ?p/-P/g;
        $searchTerm =~ s/ ?- ?q/-Q/g;
        $searchTerm =~ s/ ?- ?r/-R/g;
        $searchTerm =~ s/ ?- ?s/-S/g;
        $searchTerm =~ s/ ?- ?t/-T/g;
        $searchTerm =~ s/ ?- ?u/-U/g;
        $searchTerm =~ s/ ?- ?v/-V/g;
        $searchTerm =~ s/ ?- ?w/-W/g;
        $searchTerm =~ s/ ?- ?x/-X/g;
        $searchTerm =~ s/ ?- ?y/-Y/g;
        $searchTerm =~ s/ ?- ?z/-Z/g;
        $searchTerm =~ s/ ?- ?ä/-Ä/g;
        $searchTerm =~ s/ ?- ?ö/-Ö/g;
        $searchTerm =~ s/ ?- ?ü/-Ü/g;
        # or subst in output below:
        # $letter/term
       }
       if ($searchTerm =~ /(aü|äü|üä)/i) {
        $searchTerm =~ s/(Aü|Äü|Üä|Uä)/Äu/g;
        $searchTerm =~ s/(aü|äü|üä)/äu/g;
       } elsif ($searchTerm =~ /aer$/) {
        $searchTerm =~ s/aer$/är/;
       }
       $page = "Statistics/loggedSearches/$searchTerm";
      }
      if ($project eq "Wikimedia") {
       # And now comes a potential RAM blaster (you better should use Linux, to "nice" this ;-)
       $wikiPage = "$wiki:-:$page";
       if (!$pageViews{$wikiPage}) {$pageViews{$wikiPage} = 0;}
       $pageViews{$wikiPage} = $pageViews{$wikiPage} + $views;
       if (!$max{$wiki}) {$max{$wiki} = 0;}
       if ($pageViews{$wikiPage} > $max{$wiki}) {$max{$wiki} = $pageViews{$wikiPage};}
      } else {
       if (!$pageViews{$page}) {$pageViews{$page} = 0;}
       $pageViews{$page} = $pageViews{$page} + $views;
       if ($pageViews{$page} > $max) {$max = $pageViews{$page};}
      }
     }
    }
   }
  }
  close IN;
 }
}
close LIST;
$uniqueVisitors = 0;
$loggedSearches = 0;
if ($project ne "Wikimedia") {open VIS, ">uniqueVisitors-$date.txt";}
if ($project eq "Wikimedia") {
 my $langCode = "aa";
 # And now comes a potential swap space blaster (you really should use Linux, to "nice" that one ;-)
 print "Sorting data...\n";
 foreach (keys %pageViews) {
  if ($pageViews{$_} < 3) {
   delete $pageViews{$_};
  } else {
   $curKey = $_;
   $curWiki = $curKey;
   $curWiki =~ s/^([a-z-]+):-:([^ ]+)$/$1/;
   if ($pageViews{$curKey} < $tenthOfPermille{$curWiki}) {
    delete $pageViews{$curKey};
   }
  }
 }
 my @abc = sort keys %pageViews;
 print " Processing data from $langCode wiki ...";
 $arrayPos = -1;
 foreach (@abc) {
  $key = $_;
  $arrayPos++;
  if ($key =~ /^([a-z-]+):-:([^ ]+)$/) {
   $code = $1;
   $langPage = $2;
   #if ($langCode =~ /(commons|meta|species)/) {
   # $subDom = "$langCode.wikimedia";
   #} else {
    $subDom = "$langCode.wikipedia";
   #}
   if ($code ne $langCode) {
    print " creating chart ...";
    mkdir "$date/$langCode" unless -d "$date/$langCode";
    open OUT, ">$date/$langCode/index.html";
    print OUT "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
    print OUT "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\n";
    print OUT "<head>\n";
    print OUT "<meta name=\"robots\" content=\"noindex, noarchive, nosnippet\" />\n";
    print OUT "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n";
    print OUT "<title>Wikimedia wiki page Hits for $langCode</title>\n";
    print OUT "</head>\n\n";
    print OUT "<body style=\"background-color: #FFFFF0; margin: 10px;\">\n";

print OUT "

< <a href=\"../../\">Home</a> | <a href=\"../\">comparison</a>

\n";

    $zero = ""; if ($langCode =~ /[ev]o/) {$zero = "0";}

print OUT "

Page views (only if >0.$zero$\1‰ or >3) for <a href=\"http://$subDom.org/\">$langCode</a>-wiki on $date (UTC); counted by the <a href=\"http://lists.wikimedia.org/pipermail/wikitech-l/2007-December/035435.html\">squid servers</a> (also bots, crawlers, reloads etc. have been counted)

\n"; print OUT "

  1. \n"; my $i = $max{$langCode}+1; my $j = 0; # Or with values? for ($i > 0; $i = $i-1;) { foreach (@pageViewList) { $item = $_; if ($item =~ /^
  2. ([0-9]+)<\/b>/ && $1 eq $i) { if ($j < 1000 && $item !~ /(\/A_simple_and_harmless_Visitor_Counter|:Statistik\/Suchen\/)/i) { $item =~ s/([0-9])([0-9]{3,3})<\/b>/$1,$2<\/b>/; $item =~ s/([0-9])([0-9]{3,3}),/$1,$2,/; print OUT $item; $j++; } } } } print OUT "

\n\n";

    $yearMonth = $date;
    $yearMonth =~ s/[0-9]{2,2}$//;
    print OUT "Missing your article in top 1000? Just spam it upwards ;-)
No, seriously: Just have a look on the <a href=\"http://stats.grok.se/$langCode/$yearMonth/\">Wikipedia article traffic statistics</a>!\n";

print OUT "
\n
\n


\n"; print OUT "

Get the free-and-open-source Perl scripts <a href=\"../../wikiHits.pl\">wikiHits</a> and <a href=\"../../pageHits.pl\">pageHits</a>, used for creating this analysis!

\n";

    print OUT "</body>\n</html>";
    close OUT;
    @pageViewList = ();
    $langCode = $code;
    print " done\n Processing data from $code wiki ...";
   }
   $percent = $pageViews{$key}/$onePercent{$langCode};
   $percent =~ s/\.([0-9]{2,2}).+$/.$1/;

push(@pageViewList, "

   delete $pageViews{$key};
  }
  $abc[$arrayPos] = "";
 }
} else {
 for (my $i = $tenthOfPermille; $i <= $max; $i++) {
  foreach (keys %pageViews) {
   $key = $_;
   if ($pageViews{$_} < $tenthOfPermille) {delete $pageViews{$_};}
   if ($pageViews{$_} eq $i) {
    if ($key =~ /\/A_simple_and_harmless_Visitor_Counter/i) {
     $uniqueVisitors = $pageViews{$_};
     # It's a JavaScript-based counter (needing cookies being enabled)! [JS: ~95% / C: ~99%]
     # ~95%: TheCounter.com/stats
     # ~99%: WebHits.de/deutsch/webstats.html
     $allUniqueVisitors = ($uniqueVisitors/94)*100;
     $allUniqueVisitors =~ s/^([0-9]+)\..+$/$1/;
     print VIS "$allUniqueVisitors unique visitors (based on JavaScript and cookies, not IPs)";
    } elsif ($key =~ /^Statistics\/loggedSearches\/(.+)$/i) {
     if ($key =~ /\/deWiktionary\/(.+)$/i) {
      $searchTerm = $1;
      #ToDo: for or while
      if ($searchTerm =~ /(be|[^is]cht?|ck|[^i]de|[^w]eit|[^b]el|fer?|ft|ge|ien?|[^e]in|ke|le|me|mus|ne|nis|pe|re|rn|se|ße|st|tat|tät|ter?|tik|ion|tor?|tum|tz|inge?|ug|ung|uss?|ut|wang|ze|z|ache|burg|hafen|hause?n?|heim|hof|stadt|weiler|wort|wert|off|ende)( |$)/) {
       $suffix = $1;
       $searchTerm =~ s/(^| |-)([a-zäèéöüß]+)$suffix( |$)/$1\{\{subst:UCFIRST:$2$suffix\}\}$3/g;
      }
      if ($searchTerm =~ /(eit|schaft|ion|ung|ach|burg)en( |$)/) {
       $suffix = $1;
       $searchTerm =~ s/(^| |-)([a-zäèéöüß]+)$suffix\en( |$)/$1\{\{subst:UCFIRST:$2$suffix\}\}$3/g;
      }
      if ($searchTerm =~ /[äöü]/ && $searchTerm =~ /(chen|lein)( |$)/) {
       $suffix = $1;
       $searchTerm =~ s/(^| |-)([a-zäèéöüß]+)$suffix( |$)/$1\{\{subst:UCFIRST:$2$suffix\}\}$3/g;
      }
      if ($searchTerm =~ /inn?en(-| |$)/) {
       $searchTerm =~ s/(^| |-)([a-zäèéöüß]+)inn?en(-| |$)/$1\{\{subst:UCFIRST:$2$\in\}\}$3/g;
      }
      if ($searchTerm =~ /niss?e( |$)/) {
       $searchTerm =~ s/(^| |-)([a-zäèéöüß]+)niss?e( |$)/$1\{\{subst:UCFIRST:$2$\nis\}\}$3/g;
      }
      if ($searchTerm =~ /(ats|fts|eits|ngs)/) {
       $suffix = $1;
       $searchTerm =~ s/(^| |-)([A-ZÄÖÜa-zäèéöüß]+)$suffix([A-ZÄÖÜa-zäèéöüß-]+)( |$)/$1\{\{subst:UCFIRST:$2$suffix$3\}\}$4/g;
      }
      if ($searchTerm =~ /(m|n)f( |$)/) {
       $suffix = "$1$\f";
       $searchTerm =~ s/(^| |-)([a-zäèéöüß]+)$suffix( |$)/$1\{\{subst:UCFIRST:$2$suffix\}\}$3/g;
       # For the following stuff most searches have already been done in lowercase ;-)
      }
      if ($searchTerm =~ /(bar|den|los|ieren|gt|ig|isch|lich|end|ten|rt|sh)( |$)/) {
       $suffix = $1;
       $searchTerm =~ s/(^| |-)([A-ZÄÖÜ])([a-zäèéöüß]+)$suffix( |$)/$1\{\{subst:LCFIRST:$2$3$suffix\}\}$4/g;
      }
      if ($searchTerm =~ /[dpkt]le?n?( |$)/) {
       $searchTerm =~ s/([dpkt])l(e?)(n?)( |$)/$1$\l$2$3$4/g;
      }
      if ($searchTerm =~ /\{\{subst/) {
       while ($searchTerm =~ /^(.*?)\{\{subst:UCFIRST:([^\}]+)\}\}(.*?)$/g) {
        $sTs = "";
        $revised = ucfirst($2);
        $revised =~ s/^ä/Ä/;
        $revised =~ s/^ö/Ö/;
        $revised =~ s/^ü/Ü/;
        $searchTerm = "$1$revised$3";
        if ($3 =~ /^en( |$)/) {$sTs = "en$1";}
       }
       while ($searchTerm =~ /^(.*?)\{\{subst:LCFIRST:([^\}]+)\}\}(.*?)$/g) {
        $sTs = "";
        $revised = lcfirst($2);
        $revised =~ s/^Ä/ä/;
        $revised =~ s/^Ö/ö/;
        $revised =~ s/^Ü/ü/;
        $searchTerm = "$1$revised$3";
       }

unshift(@wiktSearchTermList, "

      } else {

unshift(@wiktSearchTermList, "

      }
     } elsif ($key !~ /iktionary\//) {

unshift(@searchTermList, "

     }
     $loggedSearches = $loggedSearches+$pageViews{$_};
    } else {
     # The JavaScript counter influences the overall sum!
     # [With higher $i, $loggedSearches gets more exact]!
     $realTotalHits = $totalHits-$pageViews{"Wikipedia:Statistik/Besucher/A_simple_and_harmless_Visitor_Counter"}-$loggedSearches;
     $onePercent = $realTotalHits/100;
     $percent = $pageViews{$_}/$onePercent;
     $percent =~ s/\.([0-9]{2,2}).+$/.$1/;
     unshift(@pageViewList, "#$pageViews{$_} [$percent %]: $key\n");
     delete $pageViews{$_};
    }
   }
  }
 }
}
if ($project ne "Wikimedia") {
 close VIS;
 open OUT, ">pageHits-$date.txt";
 print OUT "Hits/views per page; including those of bots, reloads etc. ($date, UTC$ARGV[2])\n\n";
 for (my $p = 0; $p < 5000; $p++) {
  print OUT $pageViewList[$p];
 }
 close OUT;
 open ST, ">$date/searchTerms.htm";
 print ST "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
 print ST "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\n";
 print ST "<head>\n";
 print ST "<meta name=\"robots\" content=\"noindex, noarchive, nosnippet\" />\n";
 print ST "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n";
 print ST "<title>Protokolyrte Suechaafróga</title>\n";
 print ST "</head>\n\n";
 print ST "<body style=\"background-color: #FFFFF0; margin: 10px;\">\n";

print ST "

< <a href=\"../searchLog.htm\">Home</a>

\n";

 #print ST "Search terms for not found articles ($date, UTC$ARGV[2]); possible inexactness of around +5 (Cookies, Squids)\n\n";

print ST "

Suechaafróga nò it uffindbare Artikl ($date)
Èrschtbuechschtabe vo alle Werter mit meh as 3 Buechschtabe sind mit ucfirst(albert einstein) outomatisch umgwandlet worre!

\n"; print ST "

\n"; print ST @searchTermList; print ST "

\n\n";

 print ST "</body>\n</html>";
 close ST;
 open WST, ">$date/wiktSearchTerms.htm";
 print WST "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
 print WST "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\n";
 print WST "<head>\n";
 print WST "<meta name=\"robots\" content=\"noindex, noarchive, nosnippet\" />\n";
 print WST "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n";
 print WST "<title>Protokollierte Suchanfragen</title>\n";
 print WST "</head>\n\n";
 print WST "<body style=\"background-color: #FFFFF0; margin: 10px;\">\n";

print WST "

< <a href=\"../wiktSearchLog.htm\">Home</a>

\n";

 #print WST "Search terms for not found articles ($date, UTC$ARGV[2]); possible inexactness of around +5 (Cookies, Squids)\n\n";

print WST "

Suchanfragen nach nicht auffindbaren Einträgen ($date)
Achtung: Ungenauigkeiten von etwa +5 mgl. (Cookies, Squids); enthält trotz autom. Groß-/Kleinschreibkorrektur noch massenhaft Rechtschreibfehler!

\n"; print WST "

\n"; print WST @wiktSearchTermList; print WST "

\n\n";

 print WST "</body>\n</html>";
 close WST;
 open OLD, "totalHits-$date.txt";
 while(<OLD>) {
  if ($_ =~ /^([0-9]+) /) {
   $totalLine = $_;
   $fixedTotal = $1-$uniqueVisitors-$loggedSearches;
   $totalLine =~ s/^[0-9]+ (.+)$/$fixedTotal\ $1/;
  }
 }
 close OLD;
 open FIX, ">totalHits-$date.txt";
 print FIX $totalLine;
 close FIX;
}

} else {

print "\nYou have to specify your project code and the exact day and hour (UTC) to begin with!\n";
print " If you want to evaluate the day 2008-02-17, for example:\n";
print "  If your wiki's timezone is UTC+2 or more, you have to type \"perl pageHits.pl de 20080216 +2\"\n";
print "  If your wiki's timezone is UTC-5 [< +2], you have to type \"perl pageHits.pl en 20080217 -5\"\n";
print "  If your project is Wikimedia Commons you may just type \"perl pageHits.pl commons 20080217\"\n\n";

} unlink ("pagecounts/extract.sh");

  1. unlink ("pagecounts/list.txt");
  2. unlink ("pagecounts-files")?
  3. rmdir "pagecounts-folder"?

print "\nPhew...\nFinished!\n";