


#!/usr/bin/perl
use strict;
use warnings;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/8.0");
my $req = HTTP::Request->new(GET => 'http://example.com/');
$req->header('Accept' => 'text/html');
my $res = $ua->request($req);
if ($res->is_success) {
print $res->content;
} else {
print "Error: " . $res->status_line . "\n";
}
my $ip = 0;
while (<>) {
my $idxstr = $_;
my $start = 0;
while ($idxstr =~ m{([\x00-\x7f]|[\xC0-\xDF][\x80-\xBF]|
[\xE0-\xEF][\x80-\xBF]{2}|
[\xF0-\xF7][\x80-\xBF]{3})}gsx){
my $c = $1;
last if $c =~ /[\t\n]/;
#printf "[%s] %d %d\n", $c, $start, $ip + $start;
print pack("N", $ip + $start);
$start += length($c);
}
$ip += length($idxstr);
}
% mkipu8.pl text-utf8.txt > text-utf8.txt.ary % mkary -so text-utf8.txt ... % sass 'ほげ' text-utf8.txt ...
<div><a href="画像URL"><img src="サムネイル"></a><img src="画像URL"
onerror="this.parentNode.firstChild.firstChild.style.border=0;
this.parentNode.firstChild.removeAttribute('href');
this.parentNode.removeChild(this);"
onload="this.parentNode.removeChild(this);"
height="0" width="0"></div>
