#!/usr/bin/perl -T use strict; use warnings; use Encode; use CGI; use LWP::Simple; use XML::Simple; use HTML::Template; use MeCab; my $q = new CGI; my $key = $q->param('key') || ""; my $new_key; my $r = yapi_search({key => $key}); if (@$r == 0) { my $word = get_term_frequency($key); my @keys = sort {$word->{$b} <=> $word->{$a}} keys %$word; if (@keys > 0) { $new_key = join(" ", @keys); $r = yapi_search({key => $new_key}); } } my $template = join("", <DATA>); my $t = HTML::Template->new(scalarref => \$template, associate => $q, die_on_bad_params => 0); $t->param(results => $r); $t->param(new_key => $new_key); print $q->header(-charset => 'UTF-8'), $t->output(); sub yapi_search { my ($args_ref) = @_; my $key = $args_ref->{key}; my $num = $args_ref->{num} || 10; return [] unless $key; $key =~ s/([^0-9A-Za-z_])/'%'.unpack('H2',$1)/ge; $key =~ s/ /+/g; my $url = "http://search.yahooapis.jp/WebSearchService/V1/" ."webSearch?appid=YahooDemo&query=$key&results=$num"; my $yahoo_response = get($url); my $xmlsimple = XML::Simple->new(); my $yahoo_xml = $xmlsimple->XMLin($yahoo_response); if (ref($yahoo_xml->{Result}) eq "ARRAY") { # found: many return $yahoo_xml->{Result}; } elsif (ref($yahoo_xml->{Result}) eq "HASH") { # found: 1 return [$yahoo_xml->{Result}]; } return []; # not found } sub get_term_frequency { my ($str) = @_; Encode::from_to($str, 'utf-8', 'euc-jp'); my $m = new MeCab::Tagger(""); my $n = $m->parseToNode($str); my %word; while ($n = $n->{next}) { if ($n->{feature} =~ /^\xcc\xbe\xbb\xec/) { # 名詞 my $w = $n->{surface}; Encode::from_to($w, 'euc-jp', 'utf-8'); $word{$w}++; } } return \%word; } __DATA__ <html lang="ja"> <head> <title>Sentence Search</title> </head> <body> <h1>Sentence Search</h1> <form method="get"> <input type="text" name="key" value="<TMPL_VAR name=key>" size="80"> <input type="submit" value="search"> </form> <TMPL_IF name=new_key> <p>Not Found: <TMPL_VAR name=key></p> <p>New Search Key: <TMPL_VAR name=new_key></p> </TMPL_IF> <h2>Search Results</h2> <TMPL_LOOP name=results> <h3><a href="<TMPL_VAR name=Url>"><TMPL_VAR name=Title></a></h3> <p><TMPL_VAR name=Summary></p> </TMPL_LOOP> </body> </html>