animate.tvのscrape (2)

use strict;
use warnings;
use WWW::Mechanize;
use Web::Scraper;
use XML::LibXML::Simple;
#use Path::Class;
use Encode;
use utf8;
binmode STDOUT, ":utf8";

my $uri = shift || die;
print STDERR "get $uri ... \n";

my $info = scraper{
  process '//table[@class="playlist"]', 'content[]' => scraper{
    process '//tr[1]', 'title' => 'TEXT',
    process '//tr[3]/td[1]', subtitle => 'TEXT',
    process '//tr[3]/td[@class="play_btn"]/a', 'play' => '@href',
  };
  result 'content';
}->scrape(new URI($uri));
### $info

my $mech = new WWW::Mechanize( autocheck => 1 );
my $parser = XML::LibXML::Simple->new();
foreach my $e (@$info){
  next unless $e->{play};
  
  print STDERR "get $e->{play} ... \n";
  $mech->get($uri);
  $mech->get($e->{play});
  ### content : $mech->content
  
  my $content_utf8 = Encode::decode("sjis", $mech->content);
  my $tree = $parser->XMLin($content_utf8)->{Entry};
  print "$tree->{Ref}->{href}\t$e->{title}\t$e->{subtitle}\t$tree->{Title}\t$tree->{Author}\n";
  sleep 1;
}

こんなもんか。XML::Simpleの日本語の取り扱いがヘンではまった。

ていうか、なにげにメチャ便利だなこれ。