#!/usr/bin/perl use strict; use warnings; use Encode; use Encode::Guess qw/ euc-jp shiftjis 7bit-jis /; binmode STDOUT, ":utf8"; use LWP::Simple; use XML::Simple; my $t = shift @ARGV; (my $ec = $t) =~ s/([^0-9A-Za-z_])/'%'.unpack('H2',$1)/ge; my $url = "http://search.yahooapis.jp/WebSearchService/V1/" ."webSearch?appid=YahooDemo&query=link:$ec"; my $results = get_results($url); my %anchor; foreach my $page (@$results) { my $str = get($page->{Url}); eval {$str = decode('Guess', $str)}; next if ($@); while ($str =~ m!<\s*a\s[^>]*?href=["']$t['"][^>]*?> (.+?) <\s*/\s*a\s*>!gsmix) { $anchor{$1}++; } } foreach my $anchor_text (sort {$anchor{$b} <=> $anchor{$a}} keys %anchor) { print "$anchor_text:$anchor{$anchor_text}\n"; } sub get_results { my ($url) = @_; my $yahoo_response = get($url); my $xmlsimple = XML::Simple->new(ForceArray => [ 'Result' ]); my $yahoo_xml = $xmlsimple->XMLin($yahoo_response); return $yahoo_xml->{Result}; }
% ./hack_anchor.pl http://nais.to/~yto/ たつをのホームページ:4 たつを:3 たつをさん:2 山下 達雄:2 山下達雄:2 山下達雄さん:1 とある先輩:1
$Encode::Guess::NoUTFAutoGuess = 1; Encode::Guess->set_suspects( qw(euc-jp shiftjis 7bit-jis utf8) );