Участник:Edwardspec TalkBot/interwiki.pl
Материал из Википедии — свободной энциклопедии
Это код интервики-бота от Эдуарда Черненко. Использует модуль MediaWiki как минимум версии 1.07.
#! /usr/bin/perl -w ### # Interwiki bot for MediaWiki. # Copyright (C) 2006 Edward Chernenko. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. ### our $VERSION = "1.0.0"; sub lang2host { my $lang = shift; return "$lang.wikipedia.org"; } BEGIN { ## # # CONFIGURATION # @languages = ( 'aa','ab','af','ak','als','am','an','ang','ar','arc','as','ast','av', 'ay','az','ba','be','bg','bh','bi','bm','bn','bo','br','bs','bug','ca','ce', 'ceb','ch','cho','chr','chy','co','cr','cs','csb','cv','cy','da','de', 'dv','dz','ee','el','en','eo','es','et','eu','fa','ff','fi','fiu-vro', 'fj','fo','fr','fur','fy','ga','gd','gl','gn','got','gu','gv','ha', 'haw','he','hi','ho','hr','ht','hu','hy','hz','ia','id','ie','ig', 'ii','ik','ilo','io','is','it','iu','ja','jbo','jv','ka','kg','ki','kj', 'kk','kl','km','kn','ko','kr','ks','ku','kv','kw','ky','la','lad','lb', 'lg','li','lmo','ln','lo','lt','lv','mg','mh','mi','mk','ml','mn','mo', 'mr','ms','mt','mus','my','na','nah','nap','nds','ne','ng','nl','nn', 'no','nv','ny','oc','om','or','os','pa','pam','pi','pih','pl','ps','pt', 'qu','rm','rn','ro','roa-rup','ru','rw','sa','sc','scn','sco','sd', 'se','sg','sh','si','simple','sk','sl','sm','sn','so','sq','sr', 'ss','st','su','sv','sw','ta','te','tg','th','ti','tk','tl','tn', 'to','tpi','tr','ts','tt','tum','tw','ty','udm','ug','uk','ur','uz','ve','vec', 'vi','vo','wa','war','wo','xh','yi','yo','za','zh','zh-min-nan','zu' ); $start_lang = "ru"; $case_sensitive = 0; ## # # INITIALIZATION # my $i = 0; foreach my $iw(@languages) { $iw_order{$iw} = $i ++; } $start_host = lang2host($start_lang); } use MediaWiki; my $c = MediaWiki->new(); $c->setup(); $c->{summary} = "[[:ru:User:Edward Chernenko|Edward]]'s interwiki bot"; use Algorithm::Diff qw/sdiff/; ## # # WORK LOOP # my $nr_of_edits = 0; while(1) { $c->switch($start_host); my $pg = $c->random() || last; my $text = $pg->content; my $title = $pg->title; # die $text; my %iwiki = ($start_lang => $title); foreach my $iw(@languages) { while($text =~ /(?<=\[\[$iw:).*?(?=\]\])/g) { my $this_iwiki = $case_sensitive ? $& : ucfirst($&); $this_iwiki =~ tr/_/ /; $iwiki{$iw} = $this_iwiki; } } print "Interwiki fetched from '$title\'@" . $c->_cfg("wiki", "host") . ": " . hash_dump(\%iwiki); foreach my $iw(keys %iwiki) { # print "Going to fetch '$iwiki{$iw}\'\@$iw...\n"; $c->switch(lang2host($iw)); my $text_f = $c->text($iwiki{$iw}); if(!$text_f) { delete $iwiki{$iw}; next; } F_IWIKI: foreach my $f_iw(@languages) { while($text =~ /(?<=\[\[$f_iw:).*?(?=\]\])/g) { my $this_iwiki = $case_sensitive ? $& : ucfirst($&); $this_iwiki =~ tr/_/ /; if($iwiki{$f_iw} && $iwiki{$f_iw} ne $this_iwiki) { print "Interwiki conflict: $title\n"; next F_IWIKI; } $iwiki{$f_iw} = $this_iwiki; } } } print "Interwiki calculated: " . hash_dump(\%iwiki); my @ordered_keys = sort { $iw_order{$a} <=> $iw_order{$b} } keys %iwiki; foreach my $iw(@ordered_keys) { $c->switch(lang2host($iw)); my $f_pg = $c->get($iwiki{$iw}, "rw"); my $old_text = $f_pg->{content}; foreach my $f_iw(@languages) { $f_pg->{content} =~ s/\[\[$f_iw:.*?\]\]\n?//g; } foreach my $f_iw(@ordered_keys) { $f_pg->{content} .= "[[$f_iw:$iwiki{$f_iw}]]\n" if($f_iw ne $iw); } next if($old_text eq $f_pg->{content}); print "DIFF for '$iwiki{$iw}\'\@$iw:"; my $diff = my_diff($old_text, $f_pg->{content}); print $diff ? "\n$diff\n" : " (no diff)"; $f_pg->save(); $nr_of_edits ++; } } sub my_diff { my($s1, $s2) = @_; my @a1 = split /[\r\n]+/, $s1; my @a2 = split /[\r\n]+/, $s2; my @lines = (); foreach my $action(sdiff(\@a1, \@a2)) { my($op, $arg1, $arg2) = @$action; if($op eq 'u') { push @lines, " $arg1\n"; next; } my $op_c = $op eq 'c'; if($op_c || $op eq '-') { push @lines, "- $arg1\n"; } if($op_c || $op eq '+') { push @lines, "+ $arg2\n"; } } my $lines_cnt = @lines; my $diff = ""; my $was_changed = 0; for(my $i = 0; $i < $lines_cnt; $i ++) { if($lines[$i] =~ /^[+-]/) { $was_changed = 1; $diff .= $lines[$i]; } elsif($was_changed || ($i != $lines_cnt - 1 && $lines[$i + 1] =~ /^[+\-]/g)) { $was_changed = 0; $diff .= $lines[$i]; } } return $diff; } sub hash_dump { my $href = shift; my $text = "{ "; my $i = 0; foreach my $key(keys %{$href}) { $text .= "," if($i); $text .= " '$key\' => '" . $href->{$key} . "'"; $i ++; } $text .= " }\n"; return $text; }