#!/usr/bin/perl use strict; use warnings; use Encode; use URI::Escape; use LWP::Simple; use XML::Simple; use Getopt::Long; use utf8; use open ':utf8'; binmode STDIN, ":utf8"; binmode STDOUT, ":utf8"; my $appid = "YahooDemo"; my %opt = (); GetOptions(\%opt, "results=s", "response=s", "filter=s", "ma_response=s", "ma_filter=s", "uniq_response=s", "uniq_filter=s", "uniq_by_baseform=s"); my $param_results = $opt{'results'} || "ma"; my $param_response = $opt{'response'} || "surface,pos"; my $param_filter = $opt{'filter'} || ""; my $param_ma_response = $opt{'ma_response'} || ""; my $param_ma_filter = $opt{'ma_filter'} || ""; my $param_uniq_response = $opt{'uniq_response'} || ""; my $param_uniq_filter = $opt{'uniq_filter'} || ""; my $param_uniq_by_baseform = $opt{'uniq_by_baseform'} || ""; while (<>) { chomp; my $xml_ref = webma($_); foreach my $v ("ma_result", "uniq_result") { next unless $xml_ref->{$v}; foreach my $i (@{$xml_ref->{$v}->{word_list}->{word}}) { print join("\t", map {$i->{$_}} keys %$i), "\n"; } } } sub webma { my ($key) = @_; return {} unless $key; my $url = "http://jlp.yahooapis.jp/MAService/V1/parse" ."?appid=$appid" ."&results=$param_results" ."&response=$param_response" ."&filter=$param_filter" ."&ma_response=$param_ma_response" ."&ma_filter=$param_ma_filter" ."&uniq_response=$param_uniq_response" ."&uniq_filter=$param_uniq_filter" ."&uniq_by_baseform=$param_uniq_by_baseform" ."&sentence=".URI::Escape::uri_escape_utf8($key); my $response = get($url); my $xmlsimple = XML::Simple->new(ForceArray => [ 'word' ]); return $xmlsimple->XMLin($response); }
% cat a.txt これは六本木の赤いペンです。 % ./yapima.pl --response=surface,pos,reading < a.txt これ 名詞 これ は 助詞 は 六本木 名詞 ろっぽんぎ の 助詞 の 赤い 形容詞 あかい ペン 名詞 ぺん です 助動詞 です 。 特殊 。