Miguel de Cervantes y Saavedra - Don Quijote de la Mancha - Ebook:
HTML+ZIP- TXT - TXT+ZIP

Wikipedia for Schools (ES) - Static Wikipedia (ES) 2006
CLASSICISTRANIERI HOME PAGE - YOUTUBE CHANNEL
SITEMAP
Make a donation: IBAN: IT36M0708677020000000008016 - BIC/SWIFT:  ICRAITRRU60 - VALERIO DI STEFANO or
Privacy Policy Cookie Policy Terms and Conditions
Участник:ShurShur/getwikidumps.pl — Википедия

Участник:ShurShur/getwikidumps.pl

Материал из Википедии — свободной энциклопедии

См. также: Участник:ShurShur/Dumps

#!/usr/bin/perl
# getwikidumps 20060621 by Shurik
# 
# This cript downloads all Wikimedia dumps from URL $base/$language$project
# 
use Getopt::Long;
#use AppConfig;
use LWP::Simple;
use strict;

# standard projects (w/o language prefix)
my @projects=(
  "wiki",         # Wikipedia
  "wikibooks",    # WikiBooks
  "wikinews",     # WikiNews
  "wikiquote",    # WikiQuote
  "wikisource",   # WikiSource
  "wiktionary"    # Wiktionary
);

# language prefixes and references to projects
my %languages=(
  "ru"=>\@projects, # Russian
  "commons"=>["wiki"],
  "meta"=>["wiki"],
);

# files to skip
my @skipfiles=(
  "pages-meta-history.xml.bz2" # Dup of better compressed pages-meta-history.xml.7z
);

# base url
my $base="http://download.wikimedia.org/";

# autoflush
$|=1;

sub help {
  my $me=$0;
  $me=~s#.*/##;
  print <<EOF;
Usage: $me [OPTIONS] <project definitions>....
Project definitions:
  <lang>:<project> - e.g. ru:wikibooks for ru.wikibooks.org or meta:wiki for meta.wikimedia.org
  <lang>           - same as <lang>:wiki
  <lang>:          - same as <lang>:wiki
Options:
  -h               - this help
Sample call:
  $me ru: ru:wikibooks meta commons:wiki
    - download dumps for ruwiki, ruwikibooks, metawiki & commonswiki
EOF
  exit;
}

# parse @ARGV
my $owndumplist=0;
sub adddump {
  if(!$owndumplist) {
    $owndumplist=1;
    %languages=qw();
  }
  my ($tmp)=@_;
  my ($lang,$project)=split /:/,$tmp;
  if($project=~/^$/) { $project="wiki"; }
  my $ref=$languages{$lang};
  if(!$ref) {
    $ref=$languages{$lang}=[];
  }
  push @$ref,$project;
}
GetOptions("-h"=>\&help,"<>"=>\&adddump) || help;

sub getlastdate {
  my ($language,$project)=@_;
  my $pg=get "$base/$language$project/";
  my @tmp=($pg=~m#href="(\d{8})/"#ig);
  return pop @tmp;
}

sub getfilelist {
  my ($language,$project,$date)=@_;
  my $pg=get "$base/$language$project/$date/";
  my @tmp=($pg=~m#./$language$project-$date-(.+?)"#ig);
  return @tmp;
}

sub getfile {
  my ($language,$project,$date,$file)=@_;
  my $url="$base/$language$project/$date/$language$project-$date-$file";
  my $loc="$language$project/$date/$language$project-$date-$file";
  my $skip=0;
  if(-f $loc) {
    $skip=1;
  } else {
    for my $cfile (@skipfiles) {
      if($cfile eq $file) {
        $skip=1;
        last;
      }
    }
  }
  my $ok=0;
  if($skip) {
    print scalar(localtime).": [$language] $project [$date] $file ";
  } else {
    print scalar(localtime).": [$language] $project [$date] $file ";
    system "mkdir -p $language$project/$date/";
    my $cmd="wget -c $url -O $loc.tmp 2>&1";
    sleep 1; # prevent quick reconnect
    open P,"$cmd |";
    while(<P>) {
      if(/^\d\d:\d\d:\d\d\s\((.+?)\)\s-\s\`.+?\'\ssaved\s\[(.+?)\]/) {
        print "($1)\n";
        $ok=1;
      }
      if(/The\sfile\sis\salready\sfully\sretrieved;\snothing\sto\sdo/) {
        print "(exists)\n";
        $ok=1;
      }
    }
    close P;
  }
  if($ok) {
    rename "$loc.tmp",$loc;
  } else {
    print "(skipped)\n";
  }
}

sub getproject {
  my ($language,$project)=@_;
  my @date=getlastdate $language,$project;
  if($date[0]) {
    for my $date (@date) {
      printf scalar(localtime).": [$language] $project [$date] started\n";
      my @list=getfilelist $language,$project,$date;
      for my $file(@list) {
        getfile $language,$project,$date,$file;
      }
      printf scalar(localtime).": [$language] $project [$date] completed\n";
    }
  } else {
    printf scalar(localtime).": [$language] $project [not found]\n";
  }
}

for my $language (keys %languages) {
  for my $project (@{$languages{$language}}) {
    getproject $language,$project;
  }
}
 
Static Wikipedia 2008 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2007 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - en - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Static Wikipedia 2006 (no images)

aa - ab - af - ak - als - am - an - ang - ar - arc - as - ast - av - ay - az - ba - bar - bat_smg - bcl - be - be_x_old - bg - bh - bi - bm - bn - bo - bpy - br - bs - bug - bxr - ca - cbk_zam - cdo - ce - ceb - ch - cho - chr - chy - co - cr - crh - cs - csb - cu - cv - cy - da - de - diq - dsb - dv - dz - ee - el - eml - eo - es - et - eu - ext - fa - ff - fi - fiu_vro - fj - fo - fr - frp - fur - fy - ga - gan - gd - gl - glk - gn - got - gu - gv - ha - hak - haw - he - hi - hif - ho - hr - hsb - ht - hu - hy - hz - ia - id - ie - ig - ii - ik - ilo - io - is - it - iu - ja - jbo - jv - ka - kaa - kab - kg - ki - kj - kk - kl - km - kn - ko - kr - ks - ksh - ku - kv - kw - ky - la - lad - lb - lbe - lg - li - lij - lmo - ln - lo - lt - lv - map_bms - mdf - mg - mh - mi - mk - ml - mn - mo - mr - mt - mus - my - myv - mzn - na - nah - nap - nds - nds_nl - ne - new - ng - nl - nn - no - nov - nrm - nv - ny - oc - om - or - os - pa - pag - pam - pap - pdc - pi - pih - pl - pms - ps - pt - qu - quality - rm - rmy - rn - ro - roa_rup - roa_tara - ru - rw - sa - sah - sc - scn - sco - sd - se - sg - sh - si - simple - sk - sl - sm - sn - so - sr - srn - ss - st - stq - su - sv - sw - szl - ta - te - tet - tg - th - ti - tk - tl - tlh - tn - to - tpi - tr - ts - tt - tum - tw - ty - udm - ug - uk - ur - uz - ve - vec - vi - vls - vo - wa - war - wo - wuu - xal - xh - yi - yo - za - zea - zh - zh_classical - zh_min_nan - zh_yue - zu -

Sub-domains

CDRoms - Magnatune - Librivox - Liber Liber - Encyclopaedia Britannica - Project Gutenberg - Wikipedia 2008 - Wikipedia 2007 - Wikipedia 2006 -

Other Domains

https://www.classicistranieri.it - https://www.ebooksgratis.com - https://www.gutenbergaustralia.com - https://www.englishwikipedia.com - https://www.wikipediazim.com - https://www.wikisourcezim.com - https://www.projectgutenberg.net - https://www.projectgutenberg.es - https://www.radioascolto.com - https://www.debitoformtivo.it - https://www.wikipediaforschools.org - https://www.projectgutenbergzim.com