每个游戏的详情页可能有些许差异,比如有的有作弊码、秘籍、手册,有的有midi音乐文件,有的没有。
此脚本已经做了判断,并按游戏名目录存储。
Code: [show] | [select all]
=info
Mojo::UserAgent 批量获取 nes 游戏资源
523066680/vicyang
2018-12
=cut
use File::Slurp;
use File::Path;
use File::Basename;
use Mojo::UserAgent;
use Mojo::DOM;
use Try::Tiny;
STDOUT->autoflush(1);
our $wdir = "F:/temp/nesgames";
our $main = "http://www.nesfiles.com";
our $games = "http://www.nesfiles.com/Games";
mkpath $wdir unless -e $wdir;
chdir $wdir;
our $ua = Mojo::UserAgent->new();
our @headers = ( "User-Agent" => "Firefox/63.0" );
get_games_list($games);
sub get_games_list
{
our $main;
my ($link) = @_;
my $res = $ua->get( $link, \@headers )->res;
my $dom = $res->dom;
for my $e ( $dom->find(".nesfilesTable a")->each )
{
#printf "%s %s\n", $e->attr("href"), $e->text;
get_files( $main .$e->attr("href"), $e->text );
#exit;
}
}
sub get_files
{
our $main;
my ($link, $name) = @_;
my $title = basename($link);
my ($res, $dom);
my $fname = "${title}.html";
if ( -e $fname ) {
my $html = read_file($fname);
$dom = Mojo::DOM->new( $html );
} else {
$res = $ua->get( $link, \@headers )->res;
$dom = $res->dom;
write_file( $fname, {binmode=>":raw"}, $res->body );
}
# 获取资源明细,略过 Ebay 相关的条目
mkdir $title unless -e $title;
my ($head, $list, $res2);
for my $section ($dom->find(".GameSection")->each)
{
$head = $section->at("header")->text;
last if $head=~/Ebay$/i;
# 秘籍/代码
if ($head=~/Codes/i) {
write_file( $title ."/Codes_Cheats.txt", $section->all_text );
next;
}
if ($head=~/Screenshots/i) {
# 如果是屏幕截图
$list = $section->find("img")->map(attr=>"src");
} else {
# 其他情况获取 href
$list = $section->find("a")->map(attr=>"href");
}
printf "%s\n", $head;
for my $href ( $list->each )
{
printf "%s\n", $href;
$fname = $title ."/". basename($href);
next if -e $fname; # 跳过已经存在的文件
$res2 = try_to_get( "${main}$href" );
next unless defined $res2; # 如果获取失败
write_file( $fname, {binmode=>":raw"}, $res2->body);
}
}
}
sub try_to_get
{
our ($ua);
my ($link) = @_;
my $res;
my $times = 0;
while (1)
{
try { $res = $ua->get( $link )->result; }
catch { printf "Error %s, retry: %d\n", $_, $times; };
$times++;
last if (defined $res and $res->is_success);
return undef if ( $times > 5 );
}
return $res;
}