package Unicode::UCD;
use strict;
use warnings;
our $VERSION = '0.27';
use Storable qw(dclone);
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(charinfo
charblock charscript
charblocks charscripts
charinrange
general_categories bidi_types
compexcl
casefold casespec
namedseq);
use Carp;
=head1 NAME
Unicode::UCD - Unicode character database
=head1 SYNOPSIS
use Unicode::UCD 'charinfo';
my $charinfo = charinfo($codepoint);
use Unicode::UCD 'casefold';
my $casefold = casefold(0xFB00);
use Unicode::UCD 'casespec';
my $casespec = casespec(0xFB00);
use Unicode::UCD 'charblock';
my $charblock = charblock($codepoint);
use Unicode::UCD 'charscript';
my $charscript = charscript($codepoint);
use Unicode::UCD 'charblocks';
my $charblocks = charblocks();
use Unicode::UCD 'charscripts';
my $charscripts = charscripts();
use Unicode::UCD qw(charscript charinrange);
my $range = charscript($script);
print "looks like $script\n" if charinrange($range, $codepoint);
use Unicode::UCD qw(general_categories bidi_types);
my $categories = general_categories();
my $types = bidi_types();
use Unicode::UCD 'compexcl';
my $compexcl = compexcl($codepoint);
use Unicode::UCD 'namedseq';
my $namedseq = namedseq($named_sequence_name);
my $unicode_version = Unicode::UCD::UnicodeVersion();
=head1 DESCRIPTION
The Unicode::UCD module offers a series of functions that
provide a simple interface to the Unicode
Character Database.
=head2 code point argument
Some of the functions are called with a I, which is either
a decimal or a hexadecimal scalar designating a Unicode code point, or C
followed by hexadecimals designating a Unicode code point. In other words, if
you want a code point to be interpreted as a hexadecimal number, you must
prefix it with either C<0x> or C, because a string like e.g. C<123> will be
interpreted as a decimal code point. Also note that Unicode is B
as a reference to a hash of fields as defined by the Unicode
standard. If the L is not assigned in the standard
(i.e., has the general category C
the input L
expressed in hexadecimal, with leading zeros
added if necessary to make it contain at least four hexdigits
=item B, all IN UPPER CASE.
Some control-type code points do not have names.
This field will be empty for C
.
This will match one of the keys in the hash returned by L.
=item B
used in the Canonical Ordering Algorithm.
For Unicode 5.1, this is described in Section 3.11 C
.
This will match one of the keys in the hash returned by L.
=item B
has no decomposition; or is one or more codes
(separated by spaces) that taken in order represent a decomposition for
I
. Each has at least four hexdigits.
The codes may be preceded by a word enclosed in angle brackets then a space,
like C
is a decimal digit this is its integer numeric value
=item B
represents a whole number, this is its integer numeric value
=item B
represents a whole or rational number, this is its numeric value.
Rational values are expressed as a string like C<1/4>.
=item B
is mirrored in bidirectional text
=item B
in the Unicode 1.0 standard if one
existed for this code point and is different from the current name
=item B
;
otherwise it is that mapping expressed as at least four hexdigits.
(L should be used in addition to B
;
otherwise it is that mapping expressed as at least four hexdigits.
(L should be used in addition to B
;
otherwise it is that mapping expressed as at least four hexdigits.
(L should be used in addition to B
belongs to (used in \p{In...}).
See L.
=item B