#!/usr/bin/perl
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if $running_under_some_shell;
#!./perl
# $Id: piconv,v 2.4 2009/07/08 13:34:15 dankogai Exp $
#
use 5.8.0;
use strict;
use Encode ;
use Encode::Alias;
my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
use File::Basename;
my $name = basename($0);
use Getopt::Long qw(:config no_ignore_case);
my %Opt;
help()
unless
GetOptions(\%Opt,
'from|f=s',
'to|t=s',
'list|l',
'string|s=s',
'check|C=i',
'c',
'perlqq|p',
'htmlcref',
'xmlcref',
'debug|D',
'scheme|S=s',
'resolve|r=s',
'help',
);
$Opt{help} and help();
$Opt{list} and list_encodings();
my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
defined $Opt{resolve} and resolve_encoding($Opt{resolve});
$Opt{from} || $Opt{to} || help();
my $from = $Opt{from} || $locale or help("from_encoding unspecified");
my $to = $Opt{to} || $locale or help("to_encoding unspecified");
$Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
my $scheme = do {
if (defined $Opt{scheme}) {
if (!exists $Scheme{$Opt{scheme}}) {
warn "Unknown scheme '$Opt{scheme}', fallback to 'from_to'.\n";
'from_to';
} else {
$Opt{scheme};
}
} else {
'from_to';
}
};
$Opt{check} ||= $Opt{c};
$Opt{perlqq} and $Opt{check} = Encode::PERLQQ;
$Opt{htmlcref} and $Opt{check} = Encode::HTMLCREF;
$Opt{xmlcref} and $Opt{check} = Encode::XMLCREF;
if ($Opt{debug}){
my $cfrom = Encode->getEncoding($from)->name;
my $cto = Encode->getEncoding($to)->name;
print <<"EOT";
Scheme: $scheme
From: $from => $cfrom
To: $to => $cto
EOT
}
my %use_bom = map { $_ => 1 } qw/UTF-16 UTF-32/;
# we do not use <> (or ARGV) for the sake of binmode()
@ARGV or push @ARGV, \*STDIN;
unless ( $scheme eq 'perlio' ) {
binmode STDOUT;
my $need2slurp = $use_bom{ find_encoding($to)->name };
for my $argv (@ARGV) {
my $ifh = ref $argv ? $argv : undef;
$ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
$ifh or open $ifh, "<", $argv or next;
binmode $ifh;
if ( $scheme eq 'from_to' ) { # default
if ($need2slurp){
local $/;
$_ = <$ifh>;
Encode::from_to( $_, $from, $to, $Opt{check} );
print;
}else{
while (<$ifh>) {
Encode::from_to( $_, $from, $to, $Opt{check} );
print;
}
}
}
elsif ( $scheme eq 'decode_encode' ) { # step-by-step
if ($need2slurp){
local $/;
$_ = <$ifh>;
my $decoded = decode( $from, $_, $Opt{check} );
my $encoded = encode( $to, $decoded );
print $encoded;
}else{
while (<$ifh>) {
my $decoded = decode( $from, $_, $Opt{check} );
my $encoded = encode( $to, $decoded );
print $encoded;
}
}
}
else { # won't reach
die "$name: unknown scheme: $scheme";
}
}
}
else {
# NI-S favorite
binmode STDOUT => "raw:encoding($to)";
for my $argv (@ARGV) {
my $ifh = ref $argv ? $argv : undef;
$ifh or open $ifh, "<", $argv or warn "Can't open $argv: $!" and next;
$ifh or open $ifh, "<", $argv or next;
binmode $ifh => "raw:encoding($from)";
print while (<$ifh>);
}
}
sub list_encodings {
print join( "\n", Encode->encodings(":all") ), "\n";
exit 0;
}
sub resolve_encoding {
if ( my $alias = Encode::resolve_alias( $_[0] ) ) {
print $alias, "\n";
exit 0;
}
else {
warn "$name: $_[0] is not known to Encode\n";
exit 1;
}
}
sub help {
my $message = shift;
$message and print STDERR "$name error: $message\n";
print STDERR <<"EOT";
$name [-f from_encoding] [-t to_encoding] [-s string] [files...]
$name -l
$name -r encoding_alias
-l,--list
lists all available encodings
-r,--resolve encoding_alias
resolve encoding to its (Encode) canonical name
-f,--from from_encoding
when omitted, the current locale will be used
-t,--to to_encoding
when omitted, the current locale will be used
-s,--string string
"string" will be the input instead of STDIN or files
The following are mainly of interest to Encode hackers:
-D,--debug show debug information
-C N | -c check the validity of the input
-S,--scheme scheme use the scheme for conversion
Those are handy when you can only see ascii characters:
-p,--perlqq
--htmlcref
--xmlcref
EOT
exit;
}
__END__
=head1 NAME
piconv -- iconv(1), reinvented in perl
=head1 SYNOPSIS
piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
piconv -l
piconv [-C N|-c|-p]
piconv -S scheme ...
piconv -r encoding
piconv -D ...
piconv -h
=head1 DESCRIPTION
B