日時 | 2005/05/27 (Fri) 19:00 - 21:00 |
会場 | デジハリ東京御茶ノ水本校 |
#!/usr/bin/perl use strict; use Encode; ### read amazon report (CSV) my %shopping_cart; my %info; my $N; while (<>) { my @item = split(/\t/, $_); my ($title, $asin, $date) = @item[1, 2, 3]; next if (length($asin) != 10 or length($date) == 0); $shopping_cart{$date}{$asin}++; $info{$asin}{title} = $title; $info{$asin}{count}++; $N++; } ### count pair my %freq_pair; foreach my $i (values %shopping_cart) { my @asin_list = keys %$i; next if (@asin_list < 2); for (my $i = 0; $i < @asin_list; $i++) { for (my $j = $i + 1; $j < @asin_list; $j++) { my @tmp = sort ($asin_list[$i], $asin_list[$j]); $freq_pair{$tmp[0]."\t".$tmp[1]}++; } } } ### calculate mutual information my %mi; foreach (sort {$freq_pair{$b} <=> $freq_pair{$a}} keys %freq_pair) { my ($w1, $w2) = split(/\t/, $_); my $f12 = $freq_pair{$_}; my $f1 = $info{$w1}{count}; my $f2 = $info{$w2}{count}; $mi{$_} = (log(($N * $f12)/($f1 * $f2)))/log(2); } ### output foreach (sort {$mi{$b} <=> $mi{$a}} keys %mi) { next if ($freq_pair{$_} < 2); my ($w1, $w2) = split(/\t/, $_); my $f12 = $freq_pair{$_}; my $f1 = $info{$w1}{count}; my $f2 = $info{$w2}{count}; my $mi = $mi{$_}; my $ostr = << "FMT" ---------- mi: $mi freq: $f12 $f1 $f2 $w1 $info{$w1}{title} $w2 $info{$w2}{title} FMT ; print encode('euc-jp', decode('utf8', $ostr)); }