| 日時 | 2005/05/27 (Fri) 19:00 - 21:00 |
| 会場 | デジハリ東京御茶ノ水本校 |
















#!/usr/bin/perl
use strict;
use Encode;
### read amazon report (CSV)
my %shopping_cart;
my %info;
my $N;
while (<>) {
my @item = split(/\t/, $_);
my ($title, $asin, $date) = @item[1, 2, 3];
next if (length($asin) != 10 or length($date) == 0);
$shopping_cart{$date}{$asin}++;
$info{$asin}{title} = $title;
$info{$asin}{count}++;
$N++;
}
### count pair
my %freq_pair;
foreach my $i (values %shopping_cart) {
my @asin_list = keys %$i;
next if (@asin_list < 2);
for (my $i = 0; $i < @asin_list; $i++) {
for (my $j = $i + 1; $j < @asin_list; $j++) {
my @tmp = sort ($asin_list[$i], $asin_list[$j]);
$freq_pair{$tmp[0]."\t".$tmp[1]}++;
}
}
}
### calculate mutual information
my %mi;
foreach (sort {$freq_pair{$b} <=> $freq_pair{$a}} keys %freq_pair) {
my ($w1, $w2) = split(/\t/, $_);
my $f12 = $freq_pair{$_};
my $f1 = $info{$w1}{count};
my $f2 = $info{$w2}{count};
$mi{$_} = (log(($N * $f12)/($f1 * $f2)))/log(2);
}
### output
foreach (sort {$mi{$b} <=> $mi{$a}} keys %mi) {
next if ($freq_pair{$_} < 2);
my ($w1, $w2) = split(/\t/, $_);
my $f12 = $freq_pair{$_};
my $f1 = $info{$w1}{count};
my $f2 = $info{$w2}{count};
my $mi = $mi{$_};
my $ostr = << "FMT"
----------
mi: $mi
freq: $f12 $f1 $f2
$w1 $info{$w1}{title}
$w2 $info{$w2}{title}
FMT
;
print encode('euc-jp', decode('utf8', $ostr));
}