Wikipedia:Fehlende Artikel/Dezirataj-paĝoj.pl
Us der alemannische Wikipedia, der freie Dialäkt-Enzyklopedy
- !/usr/bin/perl
open VARIN, "lokalizo.txt"; while (<VARIN>) {
if ($_ =~ /\$input-historio \= \"([^\"]+?)-history\.xml\";/) {$InputFile = "$1-current.xml";} if ($_ =~ /\$input-artikoloj \= \"([^\"]+?)\";/) {$InputList = $1;} if ($_ =~ /\$majuskligo \= \"([^\"]+?)\";/) {$UpperCase = $1;}
} close VARIN;
open LIST, "$InputList"; while (<LIST>) {
if ($_ =~ /^(.+)$/) {$article{$1} = 1;}
} close LIST;
for (my $i = 1; $i <= 2; $i++) { open IN, "$InputFile";
sub xmlunesc { my $text = shift; $text =~ s/</</sg; $text =~ s/>/>/sg; $text =~ s/'/'/sg; $text =~ s/"/"/sg; $text =~ s/&/&/sg; $text; }
sub xmlrevision { $xml =~ s/<revision>(.*?)<\/revision>//s; my $revcontent = $1; $rev = {}; xmlpage() unless defined $page; $xml = ""; while($revcontent =~ /<text xml:space="preserve">(.*?)<\/text>/sg) { my $content = $1; $content = "" unless defined $content; $rev->{"text"} = xmlunesc($content); } revision(); }
sub xmlpage { while($xml =~ s/<title>(.*?)<\/title>//s) { my $content = $1; $content = xmlunesc($content); $page->{"title"} = defined $content ? $content : ""; } }
$max = 0; my $lnkPage; sub revision { return unless defined $page->{"title"}; $page->{"title"} = "_" unless defined $page->{"title"}; $page->{"title"} =~ s/\s/_/sg; if ($i < 2) { while ($rev->{"text"} =~ /[^\{]\{\{ ?:? *([^\[\{\|\}\]]+?) *[\|\}]/g) { $wikiTemplate = $1; $wikiTemplate =~ s/ +/_/g; if ($UpperCase eq 1 && $wikiTemplate =~ /^[a-z]/) { $wikiTemplate = ucfirst($wikiTemplate); } elsif ($UpperCase eq 1 && $wikiTemplate =~ /^[äèéöü]/) { $wikiTemplate =~ s/^ä/Ä/; $wikiTemplate =~ s/^è/È/; $wikiTemplate =~ s/^é/É/; $wikiTemplate =~ s/^ö/Ö/; $wikiTemplate =~ s/^ü/Ü/; } if ($lnkTemplate{$wikiTemplate} ne $page->{"title"}) { if (!$inclusionPage{$wikiTemplate}) {$inclusionPage{$wikiTemplate} = 0;} $inclusionPage{$wikiTemplate}++; $lnkTemplate{$wikiTemplate} = $page->{"title"}; } if (!$templateInclusion{$wikiTemplate}) {$templateInclusion{$wikiTemplate} = 0;} $templateInclusion{$wikiTemplate}++; } } else { while ($rev->{"text"} =~ /\[\[ ?:? *([^\[\{:#\|\}\]]+?) *[#\|\]]/g) { $wikiLnk = $1; $wikiLnk =~ s/ +/_/g; if ($UpperCase eq 1 && $wikiLnk =~ /^[a-z]/) { $wikiLnk = ucfirst($wikiLnk); } elsif ($UpperCase eq 1 && $wikiLnk =~ /^[äèéöü]/) { $wikiLnk =~ s/^ä/Ä/; $wikiLnk =~ s/^è/È/; $wikiLnk =~ s/^é/É/; $wikiLnk =~ s/^ö/Ö/; $wikiLnk =~ s/^ü/Ü/; } if (!$article{$wikiLnk} && $lnkPage{$wikiLnk} ne $page->{"title"}) { if (!$linkPage{$wikiLnk}) {$linkPage{$wikiLnk} = 0;} $linkPage{$wikiLnk}++; if ($linkPage{$wikiLnk} > $max) {$max = $linkPage{$wikiLnk};} $lnkPage{$wikiLnk} = $page->{"title"}; if ($page->{"title"} =~ /^(Template|Vorlage):(.+)$/ && $inclusionPage{$2}) { if (!$hrefPage{$wikiLnk}) {$hrefPage{$wikiLnk} = 0;} $hrefPage{$wikiLnk} = $hrefPage{$wikiLnk}+$inclusionPage{$2}; } } if (!$article{$wikiLnk}) { if (!$wikiLink{$wikiLnk}) {$wikiLink{$wikiLnk} = 0; $htmlLink{$wikiLnk} = 0;} $wikiLink{$wikiLnk}++; $htmlLink{$wikiLnk}++; if ($page->{"title"} =~ /^(Template|Vorlage):(.+)$/ && $templateInclusion{$2}) { $htmlLink{$wikiLnk} = $htmlLink{$wikiLnk}+$templateInclusion{$2}; } } } } }
my @missingArticles; open OUT, ">Dezirataj-paĝoj.txt"; while(<IN>) { $xml .= $_; if(/^\s*<\/revision>/) { xmlrevision(); } elsif(/^\s*<\/page>/) { $xml = ""; $page = undef; } } $plural = ""; for (my $j = 1; $j <= $max; $j++) {
foreach (keys %linkPage) { if ($linkPage{$_} eq $j) { if ($j eq 2) {$plural = "s";} if (!$hrefPage{$_}) {$hrefPage{$_} = 0;} #unshift(@missingArticles, "#$_ (in $linkPage{$_} source$plural; $wikiLink{$_} wiki link$plural || over $hrefPage{$_} template inclusion$plural, thus over $htmlLink{$_} HTML link$plural)\n"); unshift(@missingArticles, "#$_ Wikipedia:Fehlende Artikel/Dezirataj-paĝoj.pl/ref}\n"); } }
} print OUT "Wanted pages [by internal links]:\n:# Wanted_article (linked from x1 source(s) with y1 wiki link(s); there are at least x2 pages having included template(s) [only non-redirected ones, yet] with another link(s) [intersecting]; there is a total of at least y2 visible links)\n\n"; print OUT @missingArticles; close OUT;
close IN; }