昨日のコード
あまり表立って公開するのもよろしくない気がしなくもない。
use strict; use warnings; use URI; use Web::Scraper; use List::MoreUtils qw/uniq/; use utf8; binmode STDOUT, ':utf8'; my @table_kw = qw(a ka sa ta na ha ma ya ra wa); my @sy_list; foreach my $kw (@table_kw){ ### $kw my $pagers = scraper { process 'table.actorTbl tr.Pager a', 'l[]' => '@href'; result 'l'; }->scrape(URI->new("http://seigura.com/senior/directory/tabid/69/sextype/Women/jpIdx/$kw/Default.aspx")); ### $pagers my $pages = scalar(uniq(grep(/Page\$/, @$pagers))) + 1; ### $pages for(my $i=1; $i<=$pages; $i++){ push(@sy_list, scrape_prof('http://seigura.com/Default.aspx?TabId=69&sextype=Women&jpIdx=' . $kw . '&__EVENTTARGET=dnn$ctr683$SeiyuMeikanList$grdMain&__EVENTARGUMENT=Page$' . $i)); sleep 3; } } foreach my $p (@sy_list){ print "$p->{name}\t$p->{furi}\t$p->{birth}\t$p->{blood}\t$p->{home}\t$p->{position}\n"; } sub scrape_prof{ my $uri = shift; ### $uri my @profiles; my $profClm = scraper{ process 'div.profClm', 'profs[]' => 'HTML'; result 'profs'; }->scrape(URI->new($uri)); ### $profClm for(my $i=3, my $k=0; $i<=10; $i++){ for(my $j=0; $j<=1; $j++){ my $n = sprintf("%02d", $i); my $s = ($j == 0) ? 'L' : 'R'; my $p = scraper{ process "span#dnn_ctr683_SeiyuMeikanList_grdMain_ctl${n}_lblName_${s}" , 'name' => 'TEXT'; process "span#dnn_ctr683_SeiyuMeikanList_grdMain_ctl${n}_lblFuri_${s}" , 'furi' => ['TEXT', sub{ s/[()]//g; $_ }]; process "span#dnn_ctr683_SeiyuMeikanList_grdMain_ctl${n}_lblBirthday_${s}" , 'birth' => 'TEXT'; process "span#dnn_ctr683_SeiyuMeikanList_grdMain_ctl${n}_lblHometown_${s}" , 'home' => 'TEXT'; process "span#dnn_ctr683_SeiyuMeikanList_grdMain_ctl${n}_lblBloodType_${s}", 'blood' => 'TEXT'; process "span#dnn_ctr683_SeiyuMeikanList_grdMain_ctl${n}_lblPosition_${s}" , 'position' => 'TEXT'; }->scrape($profClm->[$k]); ### $p push(@profiles, $p); return @profiles if(++$k >= @$profClm); } } }
forループはscraperの中に入れられそうだな・・・。そうすれば$kとか使って個数チェックする必要も無くなるけど、グローバル変数でMain_ctl${n}の数とL/Rをチェックしなきゃダメか。それはそれで面倒だ。