#!/usr/bin/perl -T
use strict;
use warnings;
use Encode;
use CGI;
use LWP::Simple;
use XML::Simple;
use HTML::Template;
use MeCab;
my $q = new CGI;
my $key = $q->param('key') || "";
my $new_key;
my $r = yapi_search({key => $key});
if (@$r == 0) {
my $word = get_term_frequency($key);
my @keys = sort {$word->{$b} <=> $word->{$a}} keys %$word;
if (@keys > 0) {
$new_key = join(" ", @keys);
$r = yapi_search({key => $new_key});
}
}
my $template = join("", <DATA>);
my $t = HTML::Template->new(scalarref => \$template,
associate => $q,
die_on_bad_params => 0);
$t->param(results => $r);
$t->param(new_key => $new_key);
print $q->header(-charset => 'UTF-8'), $t->output();
sub yapi_search {
my ($args_ref) = @_;
my $key = $args_ref->{key};
my $num = $args_ref->{num} || 10;
return [] unless $key;
$key =~ s/([^0-9A-Za-z_])/'%'.unpack('H2',$1)/ge;
$key =~ s/ /+/g;
my $url = "http://search.yahooapis.jp/WebSearchService/V1/"
."webSearch?appid=YahooDemo&query=$key&results=$num";
my $yahoo_response = get($url);
my $xmlsimple = XML::Simple->new();
my $yahoo_xml = $xmlsimple->XMLin($yahoo_response);
if (ref($yahoo_xml->{Result}) eq "ARRAY") { # found: many
return $yahoo_xml->{Result};
} elsif (ref($yahoo_xml->{Result}) eq "HASH") { # found: 1
return [$yahoo_xml->{Result}];
}
return []; # not found
}
sub get_term_frequency {
my ($str) = @_;
Encode::from_to($str, 'utf-8', 'euc-jp');
my $m = new MeCab::Tagger("");
my $n = $m->parseToNode($str);
my %word;
while ($n = $n->{next}) {
if ($n->{feature} =~ /^\xcc\xbe\xbb\xec/) { # 名詞
my $w = $n->{surface};
Encode::from_to($w, 'euc-jp', 'utf-8');
$word{$w}++;
}
}
return \%word;
}
__DATA__
<html lang="ja">
<head>
<title>Sentence Search</title>
</head>
<body>
<h1>Sentence Search</h1>
<form method="get">
<input type="text" name="key" value="<TMPL_VAR name=key>" size="80">
<input type="submit" value="search">
</form>
<TMPL_IF name=new_key>
<p>Not Found: <TMPL_VAR name=key></p>
<p>New Search Key: <TMPL_VAR name=new_key></p>
</TMPL_IF>
<h2>Search Results</h2>
<TMPL_LOOP name=results>
<h3><a href="<TMPL_VAR name=Url>"><TMPL_VAR name=Title></a></h3>
<p><TMPL_VAR name=Summary></p>
</TMPL_LOOP>
</body>
</html>
