Benutzer:MelancholieBot/VandalismCheckBot.pl
- !/usr/bin/perl
use URI::Escape; use LWP::UserAgent;
$user = $ARGV[0]; $project = $ARGV[1];
- for-redo
- +500?
- fe del
- noprint
- Potential vandalism
if ($ARGV[0]) {
print "Getting SiteMatrix ...\n";
if ($project eq "wikipedia") {
$url = "http://meta.wikimedia.org/w/api.php?action=query&format=xml&meta=siteinfo&siprop=interwikimap&sifilteriw=local";
} else {
$url = "http://meta.wikimedia.org/w/api.php?action=sitematrix&format=xml";
}
$file = "siteMatrix.xml";
print " Downloading $file\n";
$userAgent = LWP::UserAgent->new();
$userAgent->agent("VandalismCheckBot (operated by $user)");
$httpRequest = HTTP::Request->new("GET", $url);
$serverResponse = $userAgent->request($httpRequest, $file);
if ($serverResponse->is_error()) {
print "Error code: ", $serverResponse->code(), "\n";
print "Error message: ", $serverResponse->message(), "\n";
}
open XML, "siteMatrix.xml";
open TXT, ">siteMatrix.txt";
while (defined($in = <XML>)) {
$out = $in;
if ($project eq "wikipedia") {
$out =~ s/<iw/\n<iw/g;
} else {
$out =~ s/<site/\n<site/g;
}
print TXT $out;
}
close TXT;
close XML;
open SM, "siteMatrix.txt";
$properList = 0;
$wikiNum = 0;
while(<SM>) {
if ($_ =~ /<iw prefix="(closed-zh-tw|nan|sep11|test)".+url="http:\/\/\1\.wikipedia\.org\/.+" \/>/) {next;}
if ($_ =~ /<iw prefix="([^"]+)".+url="http:\/\/\1\.wikipedia\.org\/.+" \/>/ || $_ =~ /<site url="http:\/\/([^\.]+)\.$project\.org\/?"/) {
if ($properList eq 0) {$properList = 1;}
$tmpWiki = $1;
#self.obsolete: http://svn.wikimedia.org/svnroot/pywikipedia/trunk/pywikipedia/families/wikipedia_family.py
if ($tmpWiki !~ /^(aa|cho|dk|ho|hz|ii|kj|kr|mh|minnan|mo|mus|nb|jp|ru-sib|tlh|tokipona|zh-tw|zh-cn)$/) {
$wikiNum++;
$wiki{$wikiNum} = $tmpWiki;
}
}
}
if ($properList eq 0) {print "ERROR: Empty list!\n";}
close SM;
- if <query-continue> <allusers aufrom="bot" />
print "Getting bot list ...\n";
for (my $j = 1; $j <= 1; $j++) {
$url = "http://en.wikipedia.org/w/api.php?action=query&format=xml&list=allusers&augroup=bot&aulimit=500";
$file = "botList.xml";
print " Downloading $file\n";
$userAgent = LWP::UserAgent->new();
$userAgent->agent("VandalismCheckBot (operated by $user)");
$httpRequest = HTTP::Request->new("GET", $url);
$serverResponse = $userAgent->request($httpRequest, $file);
if ($serverResponse->is_error()) {
print "Error code: ", $serverResponse->code(), "\n";
print "Error message: ", $serverResponse->message(), "\n";
print "Retrying to download ...\n"; redo;
}
}
open XML, "botList.xml";
open TXT, ">botList.txt";
while (defined($in = <XML>)) {
$out = $in;
$out =~ s/<u/\n<u/g;
print TXT $out;
}
close TXT;
close XML;
- </allusers>?
open TXT, "botList.txt";
$properList = 0;
$botNum = 0;
while(<TXT>) {
if ($_ =~ //) {#\"? --CommonsDelinker etc.
if ($properList eq 0) {$properList = 1;}
$botNum++;
$bot{$botNum} = $1; #my
}
}
if ($properList eq 0) {print "ERROR: Empty list!\n";}
close TXT;
$bot{1} = "Numbo3-bot";
$ucStart = "20080525000000";
print "Getting bot contributions ...\n"; $todo = uri_escape($todo); $url = "http://en.wikipedia.org/w/api.php?action=query&format=xml&list=usercontribs&ucuser=$bot{1}&ucnamespace=0&ucprop=ids|comment&ucdir=newer&ucstart=$ucStart&uclimit=500"; $file = "botContrib.xml"; print " Downloading $file\n"; $userAgent = LWP::UserAgent->new(); $userAgent->agent("VandalismCheckBot (operated by $user)"); $httpRequest = HTTP::Request->new("GET", $url); $serverResponse = $userAgent->request($httpRequest, $file); if ($serverResponse->is_error()) { print "Error code: ", $serverResponse->code(), "\n"; print "Error message: ", $serverResponse->message(), "\n"; }
open XML, "botContrib.xml";
open TXT, ">botContrib.txt";
while (defined($in = <XML>)) {
$out = $in;
$out =~ s/<item/\n<item/g;
print TXT $out;
}
close TXT;
close XML;
- </allusers>?
open TXT, "botContrib.txt";
$properList = 0;
while(<TXT>) {
if ($_ =~ /<item user="([^"]+)" pageid="([^"]+)" revid="([^"]+)" comment="([^"]+)" \/>/) {#\"? (quote)
if ($properList eq 0) {$properList = 1;}
$tmpBot = $1;
$pageID = $2;
$revID = $3;
$comment = $4;
if ($comment =~ /[a-z-]+, [a-z-]+, [a-z-]+, [a-z-]+/) {
- 50?
$todo = uri_escape($todo); http://en.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&pageids=$pageID&rvprop=ids|user&rvlimit=25&rvstartid=$revID&rvexcludeuser=$tmpBot <page pageid="477792" ns="0" title="(Hamada)"> $title = $1; <rev revid="(214540355)" user="(152.78.120.254)" anon="" /> <rev revid="(214540355)" user="152.78.120.254" anon="" /> <rev revid="(214540355)" user="152.78.120.254" anon="" /> if (!$user{$2}) {
$oldID{$i} = $1;
$user{$2} = 1;
}
fe () {del} http://en.wikipedia.org/w/api.php?action=parse&format=xml&oldid=$oldID&prop=externallinks|categories|langlinks if <langlinks /> $noLanglinks = 1; if <categories /> $noCategories = 1; if <externallinks /> $noExternallinks = 1;
if ($noLanglinks || $noCategories) {
my @unsorted; foreach (keys %oldID) {
push(@unsorted, $oldID{$_});
@sorted = sort(numSort @unsorted);
foreach (@sorted) {
$tmpID = $_;
if ($noLanglinks && $noCategories) {
http://en.wikipedia.org/w/api.php?action=parse&format=xml&oldid=$tmpID&prop=categories|langlinks
} elsif ($noCategories) {
http://en.wikipedia.org/w/api.php?action=parse&format=xml&oldid=$tmpID&prop=categories
} else {
http://en.wikipedia.org/w/api.php?action=parse&format=xml&oldid=$tmpID&prop=langlinks
} if !<langlinks /> $langlinks = 1; if !<categories /> $categories = 1;
if ($noExternallinks) {
if !<externallinks /> $externallinks = 1;
}
if () {
print OUT "*$title (external links, categories, interlang)"; print OUT "*$title (external links, categories)"; print OUT "*$title (external links, interlang)"; print OUT "*$title (categories, interlang)"; print OUT "*$title (categories)"; print OUT "*$title (interlang)";
} }
}
}
}
if ($properList eq 0) {print "ERROR: Empty list!\n";}
close TXT;
unlink ("siteMatrix.xml"); unlink ("siteMatrix.txt"); unlink ("botList.html"); unlink ("botBlocks.html");
sub numSort {
if ($a > $b) {return -1;}
elsif ($a == $b) {return 0;}
else {return 1;}
}