URIエスケープを含んだ文字コード変換とか色々

あると便利かと思ったんだけど、今ひとつスマートな実装が思いつかないんだよなー。

use strict;
use warnings;
use Encode;
use URI::Escape;

my $ESCAPED     = 1;  # normal URI-escaped
my $ESCAPED_ALL = 2;  # all URI-escaped
my $ESCAPED_WP  = 4;  # URI-escaped without prefix '%' (must be used with $ESCAPED or $ESCAPED_ALL)
my $ENCODE_SJIS = 8;
my $ENCODE_EUC  = 16;
my $ENCODE_UTF8 = 32;
my %ENCODER = (
  $ENCODE_SJIS => find_encoding('sjis'),
  $ENCODE_EUC  => find_encoding('euc-jp'),
  $ENCODE_UTF8 => find_encoding('utf-8'),
  );

sub ___convert{
  my ($str, $frome, $toe, $from, $to) = @_;
  my $value = $str;
  ### args : ($str, $frome, $toe, $from, $to)
  ## at first unescape characer
  if($frome & $ESCAPED_WP){
    $value =~ s/(..)/%$1/g;
  }
  if($frome & $ESCAPED){
    $value = URI::Escape::uri_unescape($value);
  }
  ## encode if "from" is specified
  if($from and $to){
    ### from encoder : $ENCODER{$from}
    ### to   encoder : $ENCODER{$to}
    Encode::_utf8_off($value);
    $value = $ENCODER{$to}->encode($ENCODER{$from}->decode($value));
    Encode::_utf8_off($value);    
  }
  ## escape again
  if($toe & $ESCAPED){
    $value = URI::Escape::uri_escape($value);
  }
  elsif($toe & $ESCAPED_ALL){
    $value = URI::Escape::uri_escape($value, '\x00-\xff');
  }
  if($toe & $ESCAPED_WP){
    $value =~ s/%//g;
  }
  return $value;
}

## wrapper function:
## s : string
## u : without prefix
## e : escaped
## E : escaped_all

sub convert_s2s   { ___convert($_[0], 0, 0,                        $_[1], $_[2]); }
sub convert_s2e   { ___convert($_[0], 0, $ESCAPED,                 $_[1], $_[2]); }
sub convert_s2eu  { ___convert($_[0], 0, $ESCAPED|$ESCAPED_WP,     $_[1], $_[2]); }
sub convert_s2E   { ___convert($_[0], 0, $ESCAPED_ALL,             $_[1], $_[2]); }
sub convert_s2Eu  { ___convert($_[0], 0, $ESCAPED_ALL|$ESCAPED_WP, $_[1], $_[2]); }

sub convert_e2s   { ___convert($_[0], $ESCAPED, 0,                        $_[1], $_[2]); }
sub convert_e2e   { ___convert($_[0], $ESCAPED, $ESCAPED,                 $_[1], $_[2]); }
sub convert_e2eu  { ___convert($_[0], $ESCAPED, $ESCAPED|$ESCAPED_WP,     $_[1], $_[2]); }
sub convert_e2E   { ___convert($_[0], $ESCAPED, $ESCAPED_ALL,             $_[1], $_[2]); }
sub convert_e2Eu  { ___convert($_[0], $ESCAPED, $ESCAPED_ALL|$ESCAPED_WP, $_[1], $_[2]); }

sub convert_eu2s   { ___convert($_[0], $ESCAPED|$ESCAPED_WP, 0,                        $_[1], $_[2]); }
sub convert_eu2e   { ___convert($_[0], $ESCAPED|$ESCAPED_WP, $ESCAPED,                 $_[1], $_[2]); }
sub convert_eu2eu  { ___convert($_[0], $ESCAPED|$ESCAPED_WP, $ESCAPED|$ESCAPED_WP,     $_[1], $_[2]); }
sub convert_eu2E   { ___convert($_[0], $ESCAPED|$ESCAPED_WP, $ESCAPED_ALL,             $_[1], $_[2]); }
sub convert_eu2Eu  { ___convert($_[0], $ESCAPED|$ESCAPED_WP, $ESCAPED_ALL|$ESCAPED_WP, $_[1], $_[2]); }

## main

print "convert_s2s  : " . convert_s2s  ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_s2e  : " . convert_s2e  ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_s2eu : " . convert_s2eu ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_s2E  : " . convert_s2E  ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_s2Eu : " . convert_s2Eu ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
                          
print "convert_e2s  : " . convert_e2s  ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_e2e  : " . convert_e2e  ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_e2eu : " . convert_e2eu ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_e2E  : " . convert_e2E  ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_e2Eu : " . convert_e2Eu ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
                          
print "convert_eu2s : " . convert_eu2s ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_eu2e : " . convert_eu2e ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_eu2eu: " . convert_eu2eu($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_eu2E : " . convert_eu2E ($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
print "convert_eu2Eu: " . convert_eu2Eu($ARGV[0], $ENCODE_SJIS, $ENCODE_EUC) . "\n";
$ perl t.pl "大正野球娘。" | nkf -s
convert_s2s  : 大正野球娘。
convert_s2e  : %C2%E7%C0%B5%CC%EE%B5%E5%CC%BC%A1%A3
convert_s2eu : C2E7C0B5CCEEB5E5CCBCA1A3
convert_s2E  : %C2%E7%C0%B5%CC%EE%B5%E5%CC%BC%A1%A3
convert_s2Eu : C2E7C0B5CCEEB5E5CCBCA1A3
convert_e2s  : 大正野球娘。
convert_e2e  : %C2%E7%C0%B5%CC%EE%B5%E5%CC%BC%A1%A3
convert_e2eu : C2E7C0B5CCEEB5E5CCBCA1A3
convert_e2E  : %C2%E7%C0%B5%CC%EE%B5%E5%CC%BC%A1%A3
convert_e2Eu : C2E7C0B5CCEEB5E5CCBCA1A3
convert_eu2s : %大%正%野%球%娘%。
convert_eu2e : %25%C2%E7%25%C0%B5%25%CC%EE%25%B5%E5%25%CC%BC%25%A1%A3
convert_eu2eu: 25C2E725C0B525CCEE25B5E525CCBC25A1A3
convert_eu2E : %25%C2%E7%25%C0%B5%25%CC%EE%25%B5%E5%25%CC%BC%25%A1%A3
convert_eu2Eu: 25C2E725C0B525CCEE25B5E525CCBC25A1A3

うーん、まあ、目的は果たせてるみたいだから、とりあえずこんな感じで。