プロトコル\データ構造 | XML | JSON, JSONP |
REST風 | MECAPI, Perl | キーフレーズ抽出API, JavaScript |
SOAP, XML-RPC | はてブ件数取得API, Perl | - |
#!/usr/bin/perl use strict; use warnings; use LWP::Simple; use XML::Simple; use URI::Escape; use Encode; use utf8; use open ':utf8'; binmode STDIN, ":utf8"; binmode STDOUT, ":utf8"; while (<>) { chomp; next unless $_; my $r_ref = mecapi({sentence => $_}); my $new_sentence = ""; foreach my $w (@$r_ref) { if ($w->{feature} =~ /,固有名詞,/) { $new_sentence .= "X" x length($w->{surface}); } else { $new_sentence .= $w->{surface}; } } print "$new_sentence\n"; } sub mecapi { my ($args_ref) = @_; my $s = URI::Escape::uri_escape_utf8($args_ref->{sentence}) || ""; my $url = "https://maapi.net/apis/mecapi?sentence=$s"; my $response = get($url); my $xmlsimple = XML::Simple->new(ForceArray => [ 'word' ]); my $xml = $xmlsimple->XMLin($response); return $xml->{word}; }
% cat a.txt 今日は鈴木君と渋谷のマクドナルドでコーヒーやコカコーラを飲みました。 % ./fsj.pl a.txt 今日はXX君とXXのXXXXXXでコーヒーやXXXXXを飲みました。
(https://maapi.net/apis/mecapi?sentence=%E3%83%9A%E3%83%B3%E3%81%AF%E8%B5%B0%E3%82%8B)<MecabResult> <word> <surface>ペン</surface> <feature>名詞,一般,*,*,*,*,ペン,ペン,ペン</feature> </word> <word> <surface>は</surface> <feature>助詞,係助詞,*,*,*,*,は,ハ,ワ</feature> </word> <word> <surface>走る</surface> <feature>動詞,自立,*,*,五段・ラ行,基本形,走る,ハシル,ハシル</feature> </word> </MecabResult>
#!/usr/bin/perl use strict; use warnings; use XMLRPC::Lite; my @urls; while (<>) { chomp; next unless /^http/; push @urls, $_; } my $EndPoint = 'http://b.hatena.ne.jp/xmlrpc'; my $map = XMLRPC::Lite ->proxy($EndPoint) ->call('bookmark.getCount', @urls) ->result; foreach my $url (@urls) { my $num = $map->{$url} || 0; my $hbu = $url; $hbu =~ s{^https?://}{http://b.hatena.ne.jp/entry/}; print "$num $hbu\n"; }
% cat hbn-test.txt
http://www.yahoo.co.jp/
http://chalow.net/2009-06-16-1.html
http://chalow.net/2009-08-15-1.html
http://chalow.net/
% ./hbn.pl hbn-test.txt
6827 http://b.hatena.ne.jp/entry/www.yahoo.co.jp/
939 http://b.hatena.ne.jp/entry/chalow.net/2009-06-16-1.html
506 http://b.hatena.ne.jp/entry/chalow.net/2009-08-15-1.html
134 http://b.hatena.ne.jp/entry/chalow.net/
<html> <head> <meta http-equiv="Content-Type" content="text/html;charset=UTF-8"> <title>キーフレーズ抽出タグクラウド風(Simple)</title> <script> function disp(json){ var q = ''; for (var k in json) { q += '<span style="font-size:'+json[k]+'px;margin:10px;">'+k+'</span> '; } document.getElementById('dsp').innerHTML = q; }; function dJSON(cb){ this.proxy = 'http://jlp.yahooapis.jp/KeyphraseService/V1/extract'; this.cb = cb; this.parse = function(cb){ var q = document.getElementById('txt').value; var script = document.createElement('script'); script.id = this.proxy + '?appid=THISISAPEN' + '&output=json' + '&callback=' + this.cb + '&sentence=' + encodeURI(q); script.charset = 'UTF-8'; script.src = script.id; document.lastChild.appendChild(script); }; return this; } var djson = new dJSON('disp'); </script> </head> <body> <input type="button" onclick="javascript:djson.parse();" value="do"> <input type="text" size="60"id="txt"> <div id="dsp"></div> </body> </html>
タグクラウドをブラウザ上に表示する関数 disp が、抽出フレーズとの重要度(Max 100)を引数に呼ばれることになる。disp({"\u30a8\u30d3\u30b9":100,"\u30da\u30f3":59})