Zum Inhalt springen

Benutzer:MelancholieBot/VandalismCheckBot.pl

Us der alemannische Wikipedia, der freie Dialäkt-Enzyklopedy
  1. !/usr/bin/perl

use URI::Escape; use LWP::UserAgent;

$user = $ARGV[0]; $project = $ARGV[1];

  1. for-redo
  2. +500?
  3. fe del
  4. noprint
  1. Potential vandalism

if ($ARGV[0]) {

print "Getting SiteMatrix ...\n";
if ($project eq "wikipedia") {
 $url = "http://meta.wikimedia.org/w/api.php?action=query&format=xml&meta=siteinfo&siprop=interwikimap&sifilteriw=local";
} else {
 $url = "http://meta.wikimedia.org/w/api.php?action=sitematrix&format=xml";
}
$file = "siteMatrix.xml";
print " Downloading $file\n";
$userAgent = LWP::UserAgent->new();
$userAgent->agent("VandalismCheckBot (operated by $user)");
$httpRequest = HTTP::Request->new("GET", $url);
$serverResponse = $userAgent->request($httpRequest, $file);
if ($serverResponse->is_error()) {
 print "Error code: ", $serverResponse->code(), "\n";
 print "Error message: ", $serverResponse->message(), "\n";
}
open XML, "siteMatrix.xml";
open TXT, ">siteMatrix.txt";
while (defined($in = <XML>)) {
 $out = $in;
 if ($project eq "wikipedia") {
  $out =~ s/<iw/\n<iw/g;
 } else {
  $out =~ s/<site/\n<site/g;
 }
 print TXT $out;
}
close TXT;
close XML;
open SM, "siteMatrix.txt";
$properList = 0;
$wikiNum = 0;
while(<SM>) {
 if ($_ =~ /<iw prefix="(closed-zh-tw|nan|sep11|test)".+url="http:\/\/\1\.wikipedia\.org\/.+" \/>/) {next;}
 if ($_ =~ /<iw prefix="([^"]+)".+url="http:\/\/\1\.wikipedia\.org\/.+" \/>/ || $_ =~ /<site url="http:\/\/([^\.]+)\.$project\.org\/?"/) {
  if ($properList eq 0) {$properList = 1;}
  $tmpWiki = $1;
  #self.obsolete: http://svn.wikimedia.org/svnroot/pywikipedia/trunk/pywikipedia/families/wikipedia_family.py
  if ($tmpWiki !~ /^(aa|cho|dk|ho|hz|ii|kj|kr|mh|minnan|mo|mus|nb|jp|ru-sib|tlh|tokipona|zh-tw|zh-cn)$/) {
   $wikiNum++;
   $wiki{$wikiNum} = $tmpWiki;
  }
 }
}
if ($properList eq 0) {print "ERROR: Empty list!\n";}
close SM;


  1. if <query-continue> <allusers aufrom="bot" />
print "Getting bot list ...\n";
  for (my $j = 1; $j <= 1; $j++) {
   $url = "http://en.wikipedia.org/w/api.php?action=query&format=xml&list=allusers&augroup=bot&aulimit=500";
   $file = "botList.xml";
   print " Downloading $file\n";
   $userAgent = LWP::UserAgent->new();
   $userAgent->agent("VandalismCheckBot (operated by $user)");
   $httpRequest = HTTP::Request->new("GET", $url);
   $serverResponse = $userAgent->request($httpRequest, $file);
   if ($serverResponse->is_error()) {
    print "Error code: ", $serverResponse->code(), "\n";
    print "Error message: ", $serverResponse->message(), "\n";
    print "Retrying to download ...\n"; redo;
   }
  }
   open XML, "botList.xml";
   open TXT, ">botList.txt";
   while (defined($in = <XML>)) {
    $out = $in;
    $out =~ s/<u/\n<u/g;
    print TXT $out;
   }
   close TXT;
   close XML;
  1. </allusers>?
open TXT, "botList.txt";
$properList = 0;
$botNum = 0;
while(<TXT>) {
 if ($_ =~ //) {#\"? --CommonsDelinker etc.
  if ($properList eq 0) {$properList = 1;}
  $botNum++;
  $bot{$botNum} = $1; #my
 }
}
if ($properList eq 0) {print "ERROR: Empty list!\n";}
close TXT;


$bot{1} = "Numbo3-bot"; $ucStart = "20080525000000";

print "Getting bot contributions ...\n";
$todo = uri_escape($todo);
$url = "http://en.wikipedia.org/w/api.php?action=query&format=xml&list=usercontribs&ucuser=$bot{1}&ucnamespace=0&ucprop=ids|comment&ucdir=newer&ucstart=$ucStart&uclimit=500";
$file = "botContrib.xml";
print " Downloading $file\n";
$userAgent = LWP::UserAgent->new();
$userAgent->agent("VandalismCheckBot (operated by $user)");
$httpRequest = HTTP::Request->new("GET", $url);
$serverResponse = $userAgent->request($httpRequest, $file);
if ($serverResponse->is_error()) {
 print "Error code: ", $serverResponse->code(), "\n";
 print "Error message: ", $serverResponse->message(), "\n";
}
   open XML, "botContrib.xml";
   open TXT, ">botContrib.txt";
   while (defined($in = <XML>)) {
    $out = $in;
    $out =~ s/<item/\n<item/g;
    print TXT $out;
   }
   close TXT;
   close XML;
  1. </allusers>?
open TXT, "botContrib.txt";
$properList = 0;
while(<TXT>) {
 if ($_ =~ /<item user="([^"]+)" pageid="([^"]+)" revid="([^"]+)" comment="([^"]+)" \/>/) {#\"? (quote)
  if ($properList eq 0) {$properList = 1;}
  $tmpBot = $1;
  $pageID = $2;
  $revID = $3;
  $comment = $4;
  if ($comment =~ /[a-z-]+, [a-z-]+, [a-z-]+, [a-z-]+/) {


  1. 50?

$todo = uri_escape($todo); http://en.wikipedia.org/w/api.php?action=query&format=xml&prop=revisions&pageids=$pageID&rvprop=ids|user&rvlimit=25&rvstartid=$revID&rvexcludeuser=$tmpBot <page pageid="477792" ns="0" title="(Hamada)"> $title = $1; <rev revid="(214540355)" user="(152.78.120.254)" anon="" /> <rev revid="(214540355)" user="152.78.120.254" anon="" /> <rev revid="(214540355)" user="152.78.120.254" anon="" /> if (!$user{$2}) {

$oldID{$i} = $1;
$user{$2} = 1;

}

fe () {del} http://en.wikipedia.org/w/api.php?action=parse&format=xml&oldid=$oldID&prop=externallinks|categories|langlinks if <langlinks /> $noLanglinks = 1; if <categories /> $noCategories = 1; if <externallinks /> $noExternallinks = 1;

if ($noLanglinks || $noCategories) {

my @unsorted; foreach (keys %oldID) {

push(@unsorted, $oldID{$_});
@sorted = sort(numSort @unsorted);
 foreach (@sorted) {
  $tmpID = $_;

if ($noLanglinks && $noCategories) {

http://en.wikipedia.org/w/api.php?action=parse&format=xml&oldid=$tmpID&prop=categories|langlinks

} elsif ($noCategories) {

http://en.wikipedia.org/w/api.php?action=parse&format=xml&oldid=$tmpID&prop=categories

} else {

http://en.wikipedia.org/w/api.php?action=parse&format=xml&oldid=$tmpID&prop=langlinks

} if !<langlinks /> $langlinks = 1; if !<categories /> $categories = 1;

 if ($noExternallinks) {

if !<externallinks /> $externallinks = 1;

 }

if () {

print OUT "*$title (external links, categories, interlang)";
print OUT "*$title (external links, categories)";
print OUT "*$title (external links, interlang)";
print OUT "*$title (categories, interlang)";
print OUT "*$title (categories)";
print OUT "*$title (interlang)";

} }


  }
 }
}
if ($properList eq 0) {print "ERROR: Empty list!\n";}
close TXT;


unlink ("siteMatrix.xml"); unlink ("siteMatrix.txt"); unlink ("botList.html"); unlink ("botBlocks.html");

sub numSort {

if ($a > $b) {return -1;}
elsif ($a == $b) {return 0;}
else {return 1;}

}

  1. als:User:MelancholieBot/VandalismCheckBot.pl !END