diff options
author | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2006-06-01 14:50:50 +0000 |
---|---|---|
committer | Rafael Garcia-Suarez <rgarciasuarez@gmail.com> | 2006-06-01 14:50:50 +0000 |
commit | aadaa45530a169c37ceb95c3ed76644ccbd2a644 (patch) | |
tree | f1262b24f96f199b7ade533ded29c80a10ea3f3f | |
parent | 1a959400560e3bbe1b60592fa513f3356e654efa (diff) | |
download | perl-aadaa45530a169c37ceb95c3ed76644ccbd2a644.tar.gz |
Add the perlunitut manpage, by Juerd Waalboer
p4raw-id: //depot/perl@28345
-rw-r--r-- | MANIFEST | 1 | ||||
-rw-r--r-- | pod.lst | 1 | ||||
-rw-r--r-- | pod/perl.pod | 1 | ||||
-rw-r--r-- | pod/perltoc.pod | 354 | ||||
-rw-r--r-- | pod/perlunitut.pod | 425 | ||||
-rw-r--r-- | vms/descrip_mms.template | 11 | ||||
-rw-r--r-- | win32/pod.mak | 1 |
7 files changed, 598 insertions, 196 deletions
@@ -2963,6 +2963,7 @@ pod/perltoot.pod Perl OO tutorial, part 1 pod/perltrap.pod Perl traps for the unwary pod/perlunicode.pod Perl Unicode support pod/perluniintro.pod Perl Unicode introduction +pod/perlunitut.pod Perl Unicode tutorial pod/perlutil.pod utilities packaged with the Perl distribution pod/perlvar.pod Perl predefined variables pod/perlxs.pod Perl XS application programming interface @@ -78,6 +78,7 @@ h Reference Manual perllocale Perl locale support perluniintro Perl Unicode introduction perlunicode Perl Unicode support + perlunitut Perl Unicode tutorial perlebcdic Considerations for running Perl on EBCDIC platforms perlsec Perl security diff --git a/pod/perl.pod b/pod/perl.pod index c722d18b1c..e00a758905 100644 --- a/pod/perl.pod +++ b/pod/perl.pod @@ -95,6 +95,7 @@ For ease of access, the Perl manual has been split up into several sections. perllocale Perl locale support perluniintro Perl Unicode introduction perlunicode Perl Unicode support + perlunitut Perl Unicode tutorial perlebcdic Considerations for running Perl on EBCDIC platforms perlsec Perl security diff --git a/pod/perltoc.pod b/pod/perltoc.pod index d183bfb137..1439bd9a25 100644 --- a/pod/perltoc.pod +++ b/pod/perltoc.pod @@ -3818,6 +3818,34 @@ autodetected, C<use encoding> needed to upgrade non-Latin-1 byte strings =back +=head2 perlunitut - Perl Unicode Tutorial + +=over 4 + +=item DESCRIPTION + +=over 4 + +=item Definitions + +=item Your new toolkit + +=item I/O flow (the actual 5 minute tutorial) + +=item Q and A + +=back + +=item SUMMARY + +=item ACKNOWLEDGEMENTS + +=item AUTHOR + +=item SEE ALSO + +=back + =head2 perlebcdic - Considerations for running Perl on EBCDIC platforms =over 4 @@ -5336,7 +5364,7 @@ PL_sv_undef X<PL_sv_undef>, PL_sv_yes X<PL_sv_yes> GvSV X<GvSV>, gv_const_sv X<gv_const_sv>, gv_fetchmeth X<gv_fetchmeth>, gv_fetchmethod_autoload X<gv_fetchmethod_autoload>, gv_fetchmeth_autoload X<gv_fetchmeth_autoload>, gv_stashpv X<gv_stashpv>, gv_stashpvn -X<gv_stashpvn>, gv_stashsv X<gv_stashsv> +X<gv_stashpvn>, gv_stashpvs X<gv_stashpvs>, gv_stashsv X<gv_stashsv> =item Handy Values @@ -5351,13 +5379,13 @@ X<HeSVKEY_force>, HeSVKEY_set X<HeSVKEY_set>, HeVAL X<HeVAL>, HvNAME X<HvNAME>, hv_assert X<hv_assert>, hv_clear X<hv_clear>, hv_clear_placeholders X<hv_clear_placeholders>, hv_delete X<hv_delete>, hv_delete_ent X<hv_delete_ent>, hv_exists X<hv_exists>, hv_exists_ent -X<hv_exists_ent>, hv_fetch X<hv_fetch>, hv_fetch_ent X<hv_fetch_ent>, -hv_iterinit X<hv_iterinit>, hv_iterkey X<hv_iterkey>, hv_iterkeysv -X<hv_iterkeysv>, hv_iternext X<hv_iternext>, hv_iternextsv -X<hv_iternextsv>, hv_iternext_flags X<hv_iternext_flags>, hv_iterval -X<hv_iterval>, hv_magic X<hv_magic>, hv_scalar X<hv_scalar>, hv_store -X<hv_store>, hv_store_ent X<hv_store_ent>, hv_undef X<hv_undef>, newHV -X<newHV> +X<hv_exists_ent>, hv_fetch X<hv_fetch>, hv_fetchs X<hv_fetchs>, +hv_fetch_ent X<hv_fetch_ent>, hv_iterinit X<hv_iterinit>, hv_iterkey +X<hv_iterkey>, hv_iterkeysv X<hv_iterkeysv>, hv_iternext X<hv_iternext>, +hv_iternextsv X<hv_iternextsv>, hv_iternext_flags X<hv_iternext_flags>, +hv_iterval X<hv_iterval>, hv_magic X<hv_magic>, hv_scalar X<hv_scalar>, +hv_store X<hv_store>, hv_stores X<hv_stores>, hv_store_ent X<hv_store_ent>, +hv_undef X<hv_undef>, newHV X<newHV> =item Magical Functions @@ -5372,11 +5400,11 @@ X<SvUNLOCK> =item Memory Management Copy X<Copy>, CopyD X<CopyD>, Move X<Move>, MoveD X<MoveD>, Newx X<Newx>, -Newxc X<Newxc>, Newxz X<Newxz>, Poison X<Poison>, PoisonNew X<PoisonNew>, -PoisonWith X<PoisonWith>, Renew X<Renew>, Renewc X<Renewc>, Safefree -X<Safefree>, savepv X<savepv>, savepvn X<savepvn>, savesharedpv -X<savesharedpv>, savesvpv X<savesvpv>, StructCopy X<StructCopy>, Zero -X<Zero>, ZeroD X<ZeroD> +Newxc X<Newxc>, Newxz X<Newxz>, Poison X<Poison>, PoisonFree X<PoisonFree>, +PoisonNew X<PoisonNew>, PoisonWith X<PoisonWith>, Renew X<Renew>, Renewc +X<Renewc>, Safefree X<Safefree>, savepv X<savepv>, savepvn X<savepvn>, +savepvs X<savepvs>, savesharedpv X<savesharedpv>, savesvpv X<savesvpv>, +StructCopy X<StructCopy>, Zero X<Zero>, ZeroD X<ZeroD> =item Miscellaneous Functions @@ -5464,61 +5492,64 @@ X<SvPV_nolen>, SvPV_nomg X<SvPV_nomg>, SvPV_set X<SvPV_set>, SvREFCNT X<SvREFCNT>, SvREFCNT_dec X<SvREFCNT_dec>, SvREFCNT_inc X<SvREFCNT_inc>, SvREFCNT_inc_NN X<SvREFCNT_inc_NN>, SvREFCNT_inc_simple X<SvREFCNT_inc_simple>, SvREFCNT_inc_simple_NN X<SvREFCNT_inc_simple_NN>, -SvREFCNT_inc_simple_void X<SvREFCNT_inc_simple_void>, SvREFCNT_inc_void -X<SvREFCNT_inc_void>, SvREFCNT_inc_void_NN X<SvREFCNT_inc_void_NN>, SvROK -X<SvROK>, SvROK_off X<SvROK_off>, SvROK_on X<SvROK_on>, SvRV X<SvRV>, -SvRV_set X<SvRV_set>, SvSTASH X<SvSTASH>, SvSTASH_set X<SvSTASH_set>, -SvTAINT X<SvTAINT>, SvTAINTED X<SvTAINTED>, SvTAINTED_off X<SvTAINTED_off>, -SvTAINTED_on X<SvTAINTED_on>, SvTRUE X<SvTRUE>, SvTYPE X<SvTYPE>, SvUOK -X<SvUOK>, SvUPGRADE X<SvUPGRADE>, SvUTF8 X<SvUTF8>, SvUTF8_off -X<SvUTF8_off>, SvUTF8_on X<SvUTF8_on>, SvUV X<SvUV>, SvUVX X<SvUVX>, SvUVx -X<SvUVx>, SvUV_nomg X<SvUV_nomg>, SvUV_set X<SvUV_set>, SvVOK X<SvVOK>, -sv_catpvn_nomg X<sv_catpvn_nomg>, sv_catsv_nomg X<sv_catsv_nomg>, -sv_derived_from X<sv_derived_from>, sv_report_used X<sv_report_used>, -sv_setsv_nomg X<sv_setsv_nomg> +SvREFCNT_inc_simple_void X<SvREFCNT_inc_simple_void>, +SvREFCNT_inc_simple_void_NN X<SvREFCNT_inc_simple_void_NN>, +SvREFCNT_inc_void X<SvREFCNT_inc_void>, SvREFCNT_inc_void_NN +X<SvREFCNT_inc_void_NN>, SvROK X<SvROK>, SvROK_off X<SvROK_off>, SvROK_on +X<SvROK_on>, SvRV X<SvRV>, SvRV_set X<SvRV_set>, SvSTASH X<SvSTASH>, +SvSTASH_set X<SvSTASH_set>, SvTAINT X<SvTAINT>, SvTAINTED X<SvTAINTED>, +SvTAINTED_off X<SvTAINTED_off>, SvTAINTED_on X<SvTAINTED_on>, SvTRUE +X<SvTRUE>, SvTYPE X<SvTYPE>, SvUOK X<SvUOK>, SvUPGRADE X<SvUPGRADE>, SvUTF8 +X<SvUTF8>, SvUTF8_off X<SvUTF8_off>, SvUTF8_on X<SvUTF8_on>, SvUV X<SvUV>, +SvUVX X<SvUVX>, SvUVx X<SvUVx>, SvUV_nomg X<SvUV_nomg>, SvUV_set +X<SvUV_set>, SvVOK X<SvVOK>, sv_catpvn_nomg X<sv_catpvn_nomg>, +sv_catsv_nomg X<sv_catsv_nomg>, sv_derived_from X<sv_derived_from>, +sv_report_used X<sv_report_used>, sv_setsv_nomg X<sv_setsv_nomg> =item SV-Body Allocation looks_like_number X<looks_like_number>, newRV_noinc X<newRV_noinc>, newSV X<newSV>, newSVhek X<newSVhek>, newSViv X<newSViv>, newSVnv X<newSVnv>, newSVpv X<newSVpv>, newSVpvf X<newSVpvf>, newSVpvn X<newSVpvn>, -newSVpvn_share X<newSVpvn_share>, newSVrv X<newSVrv>, newSVsv X<newSVsv>, -newSVuv X<newSVuv>, sv_2bool X<sv_2bool>, sv_2cv X<sv_2cv>, sv_2io -X<sv_2io>, sv_2iv_flags X<sv_2iv_flags>, sv_2mortal X<sv_2mortal>, sv_2nv -X<sv_2nv>, sv_2pvbyte X<sv_2pvbyte>, sv_2pvutf8 X<sv_2pvutf8>, sv_2pv_flags +newSVpvn_share X<newSVpvn_share>, newSVpvs X<newSVpvs>, newSVpvs_share +X<newSVpvs_share>, newSVrv X<newSVrv>, newSVsv X<newSVsv>, newSVuv +X<newSVuv>, sv_2bool X<sv_2bool>, sv_2cv X<sv_2cv>, sv_2io X<sv_2io>, +sv_2iv_flags X<sv_2iv_flags>, sv_2mortal X<sv_2mortal>, sv_2nv X<sv_2nv>, +sv_2pvbyte X<sv_2pvbyte>, sv_2pvutf8 X<sv_2pvutf8>, sv_2pv_flags X<sv_2pv_flags>, sv_2uv_flags X<sv_2uv_flags>, sv_backoff X<sv_backoff>, sv_bless X<sv_bless>, sv_catpv X<sv_catpv>, sv_catpvf X<sv_catpvf>, sv_catpvf_mg X<sv_catpvf_mg>, sv_catpvn X<sv_catpvn>, sv_catpvn_flags -X<sv_catpvn_flags>, sv_catpv_mg X<sv_catpv_mg>, sv_catsv X<sv_catsv>, -sv_catsv_flags X<sv_catsv_flags>, sv_chop X<sv_chop>, sv_clear X<sv_clear>, -sv_cmp X<sv_cmp>, sv_cmp_locale X<sv_cmp_locale>, sv_collxfrm -X<sv_collxfrm>, sv_copypv X<sv_copypv>, sv_dec X<sv_dec>, sv_eq X<sv_eq>, -sv_force_normal_flags X<sv_force_normal_flags>, sv_free X<sv_free>, sv_gets -X<sv_gets>, sv_grow X<sv_grow>, sv_inc X<sv_inc>, sv_insert X<sv_insert>, -sv_isa X<sv_isa>, sv_isobject X<sv_isobject>, sv_len X<sv_len>, sv_len_utf8 -X<sv_len_utf8>, sv_magic X<sv_magic>, sv_magicext X<sv_magicext>, -sv_mortalcopy X<sv_mortalcopy>, sv_newmortal X<sv_newmortal>, sv_newref -X<sv_newref>, sv_pos_b2u X<sv_pos_b2u>, sv_pos_u2b X<sv_pos_u2b>, -sv_pvbyten_force X<sv_pvbyten_force>, sv_pvn_force X<sv_pvn_force>, -sv_pvn_force_flags X<sv_pvn_force_flags>, sv_pvutf8n_force -X<sv_pvutf8n_force>, sv_reftype X<sv_reftype>, sv_replace X<sv_replace>, -sv_reset X<sv_reset>, sv_rvweaken X<sv_rvweaken>, sv_setiv X<sv_setiv>, -sv_setiv_mg X<sv_setiv_mg>, sv_setnv X<sv_setnv>, sv_setnv_mg +X<sv_catpvn_flags>, sv_catpvs X<sv_catpvs>, sv_catpv_mg X<sv_catpv_mg>, +sv_catsv X<sv_catsv>, sv_catsv_flags X<sv_catsv_flags>, sv_chop X<sv_chop>, +sv_clear X<sv_clear>, sv_cmp X<sv_cmp>, sv_cmp_locale X<sv_cmp_locale>, +sv_collxfrm X<sv_collxfrm>, sv_copypv X<sv_copypv>, sv_dec X<sv_dec>, sv_eq +X<sv_eq>, sv_force_normal_flags X<sv_force_normal_flags>, sv_free +X<sv_free>, sv_gets X<sv_gets>, sv_grow X<sv_grow>, sv_inc X<sv_inc>, +sv_insert X<sv_insert>, sv_isa X<sv_isa>, sv_isobject X<sv_isobject>, +sv_len X<sv_len>, sv_len_utf8 X<sv_len_utf8>, sv_magic X<sv_magic>, +sv_magicext X<sv_magicext>, sv_mortalcopy X<sv_mortalcopy>, sv_newmortal +X<sv_newmortal>, sv_newref X<sv_newref>, sv_pos_b2u X<sv_pos_b2u>, +sv_pos_u2b X<sv_pos_u2b>, sv_pvbyten_force X<sv_pvbyten_force>, +sv_pvn_force X<sv_pvn_force>, sv_pvn_force_flags X<sv_pvn_force_flags>, +sv_pvutf8n_force X<sv_pvutf8n_force>, sv_reftype X<sv_reftype>, sv_replace +X<sv_replace>, sv_reset X<sv_reset>, sv_rvweaken X<sv_rvweaken>, sv_setiv +X<sv_setiv>, sv_setiv_mg X<sv_setiv_mg>, sv_setnv X<sv_setnv>, sv_setnv_mg X<sv_setnv_mg>, sv_setpv X<sv_setpv>, sv_setpvf X<sv_setpvf>, sv_setpvf_mg X<sv_setpvf_mg>, sv_setpviv X<sv_setpviv>, sv_setpviv_mg X<sv_setpviv_mg>, -sv_setpvn X<sv_setpvn>, sv_setpvn_mg X<sv_setpvn_mg>, sv_setpv_mg -X<sv_setpv_mg>, sv_setref_iv X<sv_setref_iv>, sv_setref_nv X<sv_setref_nv>, -sv_setref_pv X<sv_setref_pv>, sv_setref_pvn X<sv_setref_pvn>, sv_setref_uv -X<sv_setref_uv>, sv_setsv X<sv_setsv>, sv_setsv_flags X<sv_setsv_flags>, -sv_setsv_mg X<sv_setsv_mg>, sv_setuv X<sv_setuv>, sv_setuv_mg -X<sv_setuv_mg>, sv_tainted X<sv_tainted>, sv_true X<sv_true>, sv_unmagic -X<sv_unmagic>, sv_unref_flags X<sv_unref_flags>, sv_untaint X<sv_untaint>, -sv_upgrade X<sv_upgrade>, sv_usepvn_flags X<sv_usepvn_flags>, -sv_utf8_decode X<sv_utf8_decode>, sv_utf8_downgrade X<sv_utf8_downgrade>, -sv_utf8_encode X<sv_utf8_encode>, sv_utf8_upgrade X<sv_utf8_upgrade>, -sv_utf8_upgrade_flags X<sv_utf8_upgrade_flags>, sv_vcatpvf X<sv_vcatpvf>, -sv_vcatpvfn X<sv_vcatpvfn>, sv_vcatpvf_mg X<sv_vcatpvf_mg>, sv_vsetpvf -X<sv_vsetpvf>, sv_vsetpvfn X<sv_vsetpvfn>, sv_vsetpvf_mg X<sv_vsetpvf_mg> +sv_setpvn X<sv_setpvn>, sv_setpvn_mg X<sv_setpvn_mg>, sv_setpvs +X<sv_setpvs>, sv_setpv_mg X<sv_setpv_mg>, sv_setref_iv X<sv_setref_iv>, +sv_setref_nv X<sv_setref_nv>, sv_setref_pv X<sv_setref_pv>, sv_setref_pvn +X<sv_setref_pvn>, sv_setref_uv X<sv_setref_uv>, sv_setsv X<sv_setsv>, +sv_setsv_flags X<sv_setsv_flags>, sv_setsv_mg X<sv_setsv_mg>, sv_setuv +X<sv_setuv>, sv_setuv_mg X<sv_setuv_mg>, sv_tainted X<sv_tainted>, sv_true +X<sv_true>, sv_unmagic X<sv_unmagic>, sv_unref_flags X<sv_unref_flags>, +sv_untaint X<sv_untaint>, sv_upgrade X<sv_upgrade>, sv_usepvn_flags +X<sv_usepvn_flags>, sv_utf8_decode X<sv_utf8_decode>, sv_utf8_downgrade +X<sv_utf8_downgrade>, sv_utf8_encode X<sv_utf8_encode>, sv_utf8_upgrade +X<sv_utf8_upgrade>, sv_utf8_upgrade_flags X<sv_utf8_upgrade_flags>, +sv_vcatpvf X<sv_vcatpvf>, sv_vcatpvfn X<sv_vcatpvfn>, sv_vcatpvf_mg +X<sv_vcatpvf_mg>, sv_vsetpvf X<sv_vsetpvf>, sv_vsetpvfn X<sv_vsetpvfn>, +sv_vsetpvf_mg X<sv_vsetpvf_mg> =item Unicode Support @@ -10597,6 +10628,16 @@ threads->detach(), threads->self(), $thr->tid(), threads->tid(), threads->object($tid), threads->yield(), threads->list(), $thr1->equal($thr2), async BLOCK;, $thr->_handle(), threads->_handle() +=item THREAD CONTEXT + +=over 4 + +=item Explicit context + +=item Implicit context + +=back + =item THREAD STACK SIZE threads->get_stack_size();, $size = $thr->get_stack_size();, $old_size = @@ -12160,7 +12201,7 @@ redoop, nextop, lastop =item B::COP Methods -label, stash, stashpv, file, cop_seq, arybase, line, warnings, io +label, stash, stashpv, file, cop_seq, arybase, line, warnings, io, hints =back @@ -12358,10 +12399,11 @@ B<~> B<#>I<var>, B<#>I<var>I<N>, B<#>I<Var>, B<#addr>, B<#arg>, B<#class>, B<#classsym>, B<#coplabel>, B<#exname>, B<#extarg>, B<#firstaddr>, -B<#flags>, B<#flagval>, B<#hyphseq>, B<#label>, B<#lastaddr>, B<#name>, -B<#NAME>, B<#next>, B<#nextaddr>, B<#noise>, B<#private>, B<#privval>, -B<#seq>, B<#seqnum>, B<#opt>, B<#static>, B<#sibaddr>, B<#svaddr>, -B<#svclass>, B<#svval>, B<#targ>, B<#targarg>, B<#targarglife>, B<#typenum> +B<#flags>, B<#flagval>, B<#hints>, B<#hintsval>, B<#hyphseq>, B<#label>, +B<#lastaddr>, B<#name>, B<#NAME>, B<#next>, B<#nextaddr>, B<#noise>, +B<#private>, B<#privval>, B<#seq>, B<#seqnum>, B<#opt>, B<#static>, +B<#sibaddr>, B<#svaddr>, B<#svclass>, B<#svval>, B<#targ>, B<#targarg>, +B<#targarglife>, B<#typenum> =back @@ -12469,8 +12511,8 @@ strict, $[, bytes, utf8, integer, re, warnings, hint_bits, warning_bits =item OPTIONS AND LINT CHECKS -B<context>, B<implicit-read> and B<implicit-write>, B<bare-subs>, -B<dollar-underscore>, B<private-names>, B<undefined-subs>, +B<magic-diamond>, B<context>, B<implicit-read> and B<implicit-write>, +B<bare-subs>, B<dollar-underscore>, B<private-names>, B<undefined-subs>, B<regexp-variables>, B<all>, B<none> =item NON LINT-CHECK OPTIONS @@ -12479,6 +12521,11 @@ B<-u Package> =item EXTENDING LINT +=item TODO + +while(<FH>) stomps $_, strict oo, unchecked system calls, more tests, +validate against older perls + =item BUGS =item AUTHOR @@ -15006,40 +15053,39 @@ C<d_attribute_format>, C<d_attribute_malloc>, C<d_attribute_nonnull>, C<d_attribute_noreturn>, C<d_attribute_pure>, C<d_attribute_unused>, C<d_attribute_warn_unused_result>, C<d_bcmp>, C<d_bcopy>, C<d_bsd>, C<d_bsdgetpgrp>, C<d_bsdsetpgrp>, C<d_builtin_choose_expr>, -C<d_builtin_expect>, C<d_bzero>, C<d_casti32>, C<d_castneg>, C<d_charvspr>, -C<d_chown>, C<d_chroot>, C<d_chsize>, C<d_class>, C<d_clearenv>, -C<d_closedir>, C<d_cmsghdr_s>, C<d_const>, C<d_copysignl>, C<d_crypt>, -C<d_crypt_r>, C<d_csh>, C<d_ctermid_r>, C<d_ctime_r>, -C<d_c99_variadic_macros>, C<d_cuserid>, C<d_dbl_dig>, C<d_dbminitproto>, -C<d_difftime>, C<d_dirfd>, C<d_dirnamlen>, C<d_dlerror>, C<d_dlopen>, -C<d_dlsymun>, C<d_dosuid>, C<d_drand48_r>, C<d_drand48proto>, C<d_dup2>, -C<d_eaccess>, C<d_endgrent>, C<d_endgrent_r>, C<d_endhent>, -C<d_endhostent_r>, C<d_endnent>, C<d_endnetent_r>, C<d_endpent>, -C<d_endprotoent_r>, C<d_endpwent>, C<d_endpwent_r>, C<d_endsent>, -C<d_endservent_r>, C<d_eofnblk>, C<d_eunice>, C<d_faststdio>, C<d_fchdir>, -C<d_fchmod>, C<d_fchown>, C<d_fcntl>, C<d_fcntl_can_lock>, C<d_fd_macros>, -C<d_fd_set>, C<d_fds_bits>, C<d_fgetpos>, C<d_finite>, C<d_finitel>, -C<d_flexfnam>, C<d_flock>, C<d_flockproto>, C<d_fork>, C<d_fp_class>, -C<d_fpathconf>, C<d_fpclass>, C<d_fpclassify>, C<d_fpclassl>, -C<d_fpos64_t>, C<d_frexpl>, C<d_fs_data_s>, C<d_fseeko>, C<d_fsetpos>, -C<d_fstatfs>, C<d_fstatvfs>, C<d_fsync>, C<d_ftello>, C<d_ftime>, -C<d_futimes>, C<d_Gconvert>, C<d_getcwd>, C<d_getespwnam>, C<d_getfsstat>, -C<d_getgrent>, C<d_getgrent_r>, C<d_getgrgid_r>, C<d_getgrnam_r>, -C<d_getgrps>, C<d_gethbyaddr>, C<d_gethbyname>, C<d_gethent>, -C<d_gethname>, C<d_gethostbyaddr_r>, C<d_gethostbyname_r>, -C<d_gethostent_r>, C<d_gethostprotos>, C<d_getitimer>, C<d_getlogin>, -C<d_getlogin_r>, C<d_getmnt>, C<d_getmntent>, C<d_getnbyaddr>, -C<d_getnbyname>, C<d_getnent>, C<d_getnetbyaddr_r>, C<d_getnetbyname_r>, -C<d_getnetent_r>, C<d_getnetprotos>, C<d_getpagsz>, C<d_getpbyname>, -C<d_getpbynumber>, C<d_getpent>, C<d_getpgid>, C<d_getpgrp>, C<d_getpgrp2>, -C<d_getppid>, C<d_getprior>, C<d_getprotobyname_r>, -C<d_getprotobynumber_r>, C<d_getprotoent_r>, C<d_getprotoprotos>, -C<d_getprpwnam>, C<d_getpwent>, C<d_getpwent_r>, C<d_getpwnam_r>, -C<d_getpwuid_r>, C<d_getsbyname>, C<d_getsbyport>, C<d_getsent>, -C<d_getservbyname_r>, C<d_getservbyport_r>, C<d_getservent_r>, -C<d_getservprotos>, C<d_getspnam>, C<d_getspnam_r>, C<d_gettimeod>, -C<d_gmtime_r>, C<d_gnulibc>, C<d_grpasswd>, C<d_hasmntopt>, C<d_htonl>, -C<d_ilogbl>, C<d_inc_version_list>, C<d_index>, C<d_inetaton>, +C<d_builtin_expect>, C<d_bzero>, C<d_c99_variadic_macros>, C<d_casti32>, +C<d_castneg>, C<d_charvspr>, C<d_chown>, C<d_chroot>, C<d_chsize>, +C<d_class>, C<d_clearenv>, C<d_closedir>, C<d_cmsghdr_s>, C<d_const>, +C<d_copysignl>, C<d_crypt>, C<d_crypt_r>, C<d_csh>, C<d_ctermid_r>, +C<d_ctime_r>, C<d_cuserid>, C<d_dbl_dig>, C<d_dbminitproto>, C<d_difftime>, +C<d_dirfd>, C<d_dirnamlen>, C<d_dlerror>, C<d_dlopen>, C<d_dlsymun>, +C<d_dosuid>, C<d_drand48_r>, C<d_drand48proto>, C<d_dup2>, C<d_eaccess>, +C<d_endgrent>, C<d_endgrent_r>, C<d_endhent>, C<d_endhostent_r>, +C<d_endnent>, C<d_endnetent_r>, C<d_endpent>, C<d_endprotoent_r>, +C<d_endpwent>, C<d_endpwent_r>, C<d_endsent>, C<d_endservent_r>, +C<d_eofnblk>, C<d_eunice>, C<d_faststdio>, C<d_fchdir>, C<d_fchmod>, +C<d_fchown>, C<d_fcntl>, C<d_fcntl_can_lock>, C<d_fd_macros>, C<d_fd_set>, +C<d_fds_bits>, C<d_fgetpos>, C<d_finite>, C<d_finitel>, C<d_flexfnam>, +C<d_flock>, C<d_flockproto>, C<d_fork>, C<d_fp_class>, C<d_fpathconf>, +C<d_fpclass>, C<d_fpclassify>, C<d_fpclassl>, C<d_fpos64_t>, C<d_frexpl>, +C<d_fs_data_s>, C<d_fseeko>, C<d_fsetpos>, C<d_fstatfs>, C<d_fstatvfs>, +C<d_fsync>, C<d_ftello>, C<d_ftime>, C<d_futimes>, C<d_Gconvert>, +C<d_getcwd>, C<d_getespwnam>, C<d_getfsstat>, C<d_getgrent>, +C<d_getgrent_r>, C<d_getgrgid_r>, C<d_getgrnam_r>, C<d_getgrps>, +C<d_gethbyaddr>, C<d_gethbyname>, C<d_gethent>, C<d_gethname>, +C<d_gethostbyaddr_r>, C<d_gethostbyname_r>, C<d_gethostent_r>, +C<d_gethostprotos>, C<d_getitimer>, C<d_getlogin>, C<d_getlogin_r>, +C<d_getmnt>, C<d_getmntent>, C<d_getnbyaddr>, C<d_getnbyname>, +C<d_getnent>, C<d_getnetbyaddr_r>, C<d_getnetbyname_r>, C<d_getnetent_r>, +C<d_getnetprotos>, C<d_getpagsz>, C<d_getpbyname>, C<d_getpbynumber>, +C<d_getpent>, C<d_getpgid>, C<d_getpgrp>, C<d_getpgrp2>, C<d_getppid>, +C<d_getprior>, C<d_getprotobyname_r>, C<d_getprotobynumber_r>, +C<d_getprotoent_r>, C<d_getprotoprotos>, C<d_getprpwnam>, C<d_getpwent>, +C<d_getpwent_r>, C<d_getpwnam_r>, C<d_getpwuid_r>, C<d_getsbyname>, +C<d_getsbyport>, C<d_getsent>, C<d_getservbyname_r>, C<d_getservbyport_r>, +C<d_getservent_r>, C<d_getservprotos>, C<d_getspnam>, C<d_getspnam_r>, +C<d_gettimeod>, C<d_gmtime_r>, C<d_gnulibc>, C<d_grpasswd>, C<d_hasmntopt>, +C<d_htonl>, C<d_ilogbl>, C<d_inc_version_list>, C<d_index>, C<d_inetaton>, C<d_int64_t>, C<d_isascii>, C<d_isfinite>, C<d_isinf>, C<d_isnan>, C<d_isnanl>, C<d_killpg>, C<d_lchown>, C<d_ldbl_dig>, C<d_libm_lib_version>, C<d_link>, C<d_localtime_r>, C<d_locconv>, @@ -15701,32 +15747,6 @@ Dumper =back -=head2 Devel::Arena - Perl extension for inspecting the core's arena -structures - -=over 4 - -=item SYNOPSIS - -=item DESCRIPTION - -=over 4 - -=item EXPORT - -sv_stats [DONT_SHARE], shared_string_table, sizes, HEK_size STRING, -shared_string_table_effectiveness, write_stats_at_END - -=back - -=item SEE ALSO - -=item AUTHOR - -=item COPYRIGHT AND LICENSE - -=back - =head2 Devel::DProf - a Perl code profiler =over 4 @@ -15781,9 +15801,9 @@ shared_string_table_effectiveness, write_stats_at_END =item Perl API not supported by ppport.h -perl 5.9.3, perl 5.9.2, perl 5.9.1, perl 5.9.0, perl 5.8.3, perl 5.8.1, -perl 5.8.0, perl 5.7.3, perl 5.7.2, perl 5.7.1, perl 5.6.1, perl 5.6.0, -perl 5.005_03, perl 5.005, perl 5.004_05, perl 5.004 +perl 5.9.4, perl 5.9.3, perl 5.9.2, perl 5.9.1, perl 5.9.0, perl 5.8.3, +perl 5.8.1, perl 5.8.0, perl 5.7.3, perl 5.7.2, perl 5.7.1, perl 5.6.1, +perl 5.6.0, perl 5.005_03, perl 5.005, perl 5.004_05, perl 5.004 =back @@ -15857,59 +15877,6 @@ perl 5.005_03, perl 5.005, perl 5.004_05, perl 5.004 =back -=head2 Devel::Size - Perl extension for finding the memory usage of Perl -variables - -=over 4 - -=item SYNOPSIS - -=item DESCRIPTION - -=item FUNCTIONS - -=over 4 - -=item size($ref) - -=item total_size($ref) - -=back - -=item EXPORT - -=item UNDERSTANDING MEMORY ALLOCATION - -=over 4 - -=item The C library - -=item Perl - -=back - -=item DANGERS - -=item Messages: texts originating from this module. - -=over 4 - -=item Errors - -=item warnings - -=back - -=item BUGS - -=item AUTHOR - -=item COPYRIGHT - -=item SEE ALSO - -=back - =head2 Digest - Modules that calculate message digests =over 4 @@ -15990,15 +15957,16 @@ B<sha224_base64($data, ...)>, B<sha256_base64($data, ...)>, B<sha384_base64($data, ...)>, B<sha512_base64($data, ...)>, B<new($alg)>, B<reset($alg)>, B<hashsize>, B<algorithm>, B<clone>, B<add($data, ...)>, B<add_bits($data, $nbits)>, B<add_bits($bits)>, B<addfile(*FILE)>, -B<dump($filename)>, B<load($filename)>, B<digest>, B<hexdigest>, -B<b64digest>, B<hmac_sha1($data, $key)>, B<hmac_sha224($data, $key)>, -B<hmac_sha256($data, $key)>, B<hmac_sha384($data, $key)>, -B<hmac_sha512($data, $key)>, B<hmac_sha1_hex($data, $key)>, -B<hmac_sha224_hex($data, $key)>, B<hmac_sha256_hex($data, $key)>, -B<hmac_sha384_hex($data, $key)>, B<hmac_sha512_hex($data, $key)>, -B<hmac_sha1_base64($data, $key)>, B<hmac_sha224_base64($data, $key)>, -B<hmac_sha256_base64($data, $key)>, B<hmac_sha384_base64($data, $key)>, -B<hmac_sha512_base64($data, $key)> +B<addfile($filename [, $mode])>, B<"b"> read file in binary mode, +B<"p"> use portable mode, B<dump($filename)>, B<load($filename)>, +B<digest>, B<hexdigest>, B<b64digest>, B<hmac_sha1($data, $key)>, +B<hmac_sha224($data, $key)>, B<hmac_sha256($data, $key)>, +B<hmac_sha384($data, $key)>, B<hmac_sha512($data, $key)>, +B<hmac_sha1_hex($data, $key)>, B<hmac_sha224_hex($data, $key)>, +B<hmac_sha256_hex($data, $key)>, B<hmac_sha384_hex($data, $key)>, +B<hmac_sha512_hex($data, $key)>, B<hmac_sha1_base64($data, $key)>, +B<hmac_sha224_base64($data, $key)>, B<hmac_sha256_base64($data, $key)>, +B<hmac_sha384_base64($data, $key)>, B<hmac_sha512_base64($data, $key)> =item SEE ALSO @@ -26222,14 +26190,14 @@ path not available =item SEE ALSO -=item AUTHOR +=item AUTHORS =item BUGS =item SUPPORT AnnoCPAN: Annotated CPAN documentation, CPAN Ratings, RT: CPAN's request -tracker, Search CPAN +tracker, Search CPAN, Kobes' CPAN Search, Perl Documentation =item LICENSE @@ -26281,14 +26249,14 @@ path not available =item SEE ALSO -=item AUTHOR +=item AUTHORS =item BUGS =item SUPPORT AnnoCPAN: Annotated CPAN documentation, CPAN Ratings, RT: CPAN's request -tracker, Search CPAN +tracker, Search CPAN, Kobes' CPAN Search, Perl Documentation =item LICENSE diff --git a/pod/perlunitut.pod b/pod/perlunitut.pod new file mode 100644 index 0000000000..ae8d0b1566 --- /dev/null +++ b/pod/perlunitut.pod @@ -0,0 +1,425 @@ +=head1 NAME + +perlunitut - Perl Unicode Tutorial + +=head1 DESCRIPTION + +The days of just flinging strings around are over. It's well established that +modern programs need to be capable of communicating funny accented letters, and +things like euro symbols. This means that programmers need new habits. It's +easy to program Unicode capable software, but it does require discipline to do +it right. + +There's a lot to know about character sets, and text encodings. It's probably +best to spend a full day learning all this, but the basics can be learned in +minutes. + +These are not the very basics, though. It is assumed that you already +know the difference between bytes and characters, and realise (and accept!) +that there are many different character sets and encodings, and that your +program has to be explicit about them. Recommended reading is "The Absolute +Minimum Every Software Developer Absolutely, Positively Must Know About Unicode +and Character Sets (No Excuses!)" by Joel Spolsky, at +L<http://joelonsoftware.com/articles/Unicode.html>. + +This tutorial speaks in rather absolute terms, and provides only a limited view +of the wealth of character string related features that Perl has to offer. For +most projects, this information will probably suffice. + +=head2 Definitions + +It's important to set a few things straight first. This is the most important +part of this tutorial. This view may conflict with other information that you +may have found on the web, but that's mostly because many sources are wrong. + +You may have to re-read this entire section a few times... + +=head3 Unicode + +B<Unicode> is a character set with room for lots of characters. The ordinal +value of a character is called a B<code point>. + +There are many, many code points, but computers work with bytes, and a byte can +have only 256 values. Unicode has many more characters, so you need a method +to make these accessible. + +Unicode is encoded using several competing encodings, of which UTF-8 is the +most used. In a Unicode encoding, multiple subsequent bytes can be used to +store a single code point, or simply: character. + +=head3 UTF-8 + +B<UTF-8> is a Unicode encoding. Many people think that Unicode and UTF-8 are +the same thing, but they're not. There are more Unicode encodings, but much of +the world has standardized on UTF-8. + +UTF-8 treats the first 128 codepoints, 0..127, the same as ASCII. They take +only one byte per character. All other characters are encoded as two or more +(up to six) bytes using a complex scheme. Fortunately, Perl handles this for +us, so we don't have to worry about this. + +=head3 Text strings (character strings) + +B<Text strings>, or B<character strings> are made of characters. Bytes are +irrelevant here, and so are encodings. Each character is just that: the +character. + +On a text string, you would do things like: + + $text =~ s/foo/bar/; + if ($string =~ /^\d+$/) { ... } + $text = ucfirst $text; + my $character_count = length $text; + +The value of a character (C<ord>, C<chr>) is the corresponding Unicode code +point. + +=head3 Binary strings (byte strings) + +B<Binary strings>, or B<byte strings> are made of bytes. Here, you don't have +characters, just bytes. All communication with the outside world (anything +outside of your current Perl process) is done in binary. + +On a binary string, you would do things like: + + my (@length_content) = unpack "(V/a)*", $binary; + $binary =~ s/\x00\x0F/\xFF\xF0/; # for the brave :) + print {$fh} $binary; + my $byte_count = length $binary; + +=head3 Encoding + +B<Encoding> (as a verb) is the conversion from I<text> to I<binary>. To encode, +you have to supply the target encoding, for example C<iso-8859-1> or C<UTF-8>. +Some encodings, like the C<iso-8859> ("latin") range, do not support the full +Unicode standard; characters that can't be represented are lost in the +conversion. + +=head3 Decoding + +B<Decoding> is the conversion from I<binary> to I<text>. To decode, you have to +know what encoding was used during the encoding phase. And most of all, it must +be something decodable. It doesn't make much sense to decode a PNG image into a +text string. + +=head3 Internal format + +Perl has an B<internal format>, an encoding that it uses to encode text strings +so it can store them in memory. All text strings are in this internal format. +In fact, text strings are never in any other format! + +You shouldn't worry about what this format is, because conversion is +automatically done when you decode or encode. + +=head2 Your new toolkit + +Add to your standard heading the following line: + + use Encode qw(encode decode); + +Or, if you're lazy, just: + + use Encode; + +=head2 I/O flow (the actual 5 minute tutorial) + +The typical input/output flow of a program is: + + 1. Receive and decode + 2. Process + 3. Encode and output + +If your input is binary, and is supposed to remain binary, you shouldn't decode +it to a text string, of course. But in all other cases, you should decode it. + +Decoding can't happen reliably if you don't know how the data was encoded. If +you get to choose, it's a good idea to standardize on UTF-8. + + my $foo = decode('UTF-8', get 'http://example.com/'); + my $bar = decode('ISO-8859-1', readline STDIN); + my $xyzzy = decode('Windows-1251', $cgi->param('foo')); + +Processing happens as you knew before. The only difference is that you're now +using characters instead of bytes. That's very useful if you use things like +C<substr>, or C<length>. + +It's important to realize that there are no bytes in a text string. Of course, +Perl has its internal encoding to store the string in memory, but ignore that. +If you have to do anything with the number of bytes, it's probably best to move +that part to step 3, just after you've encoded the string. Then you know +exactly how many bytes it will be in the destination string. + +The syntax for encoding text strings to binary strings is as simple as decoding: + + $body = encode('UTF-8', $body); + +If you needed to know the length of the string in bytes, now's the perfect time +for that. Because C<$body> is now a byte string, C<length> will report the +number of bytes, instead of the number of characters. The number of +characters is no longer known, because characters only exist in text strings. + + my $byte_count = length $body; + +And if the protocol you're using supports a way of letting the recipient know +which character encoding you used, please help the receiving end by using that +feature! For example, E-mail and HTTP support MIME headers, so you can use the +C<Content-Type> header. They can also have C<Content-Length> to indicate the +number of I<bytes>, which is always a good idea to supply if the number is +known. + + "Content-Type: text/plain; charset=UTF-8", + "Content-Length: $byte_count" + +=head2 Q and A + +=head3 This isn't really a Unicode tutorial, is it? + +No, Perl has an abstracted interface for all supported character encodings, so +this is actually a generic C<Encode> tutorial. But many people think that +Unicode is special and magical, and I didn't want to disappoint them, so I +decided to call this document a Unicode tutorial. + +=head3 What about binary data, like images? + +Well, apart from a bare C<binmode $fh>, you shouldn't treat them specially. +(The binmode is needed because otherwise Perl may convert line endings on Win32 +systems.) + +Be careful, though, to never combine text strings with binary strings. If you +need text in a binary stream, encode your text strings first using the +appropriate encoding, then join them with binary strings. See also: "What if I +don't encode?". + +=head3 What about the UTF-8 flag? + +Please, unless you're hacking the internals, or debugging weirdness, don't +think about the UTF-8 flag at all. That means that you very probably shouldn't +use C<is_utf8>, C<_utf8_on> or C<_utf8_off> at all. + +Perl's internal format happens to be UTF-8. Unfortunately, Perl can't keep a +secret, so everyone knows about this. That is the source of much confusion. +It's better to pretend that the internal format is some unknown encoding, +and that you always have to encode and decode explicitly. + +=head3 When should I decode or encode? + +Whenever you're communicating with anything that is external to your perl +process, like a database, a text file, a socket, or another program. Even if +the thing you're communicating with is also written in Perl. + +=head3 What if I don't decode? + +Whenever your encoded, binary string is used together with a text string, Perl +will assume that your binary string was encoded with ISO-8859-1, also known as +latin-1. If it wasn't latin-1, then your data is unpleasantly converted. For +example, if it was UTF-8, the individual bytes of multibyte characters are seen +as separate characters, and then again converted to UTF-8. Such double encoding +can be compared to double HTML encoding (C<&gt;>), or double URI encoding +(C<%253E>). + +This silent implicit decoding is known as "upgrading". That may sound +positive, but it's best to avoid it. + +=head3 What if I don't encode? + +Your text string will be sent using the bytes in Perl's internal format. In +some cases, Perl will warn you that you're doing something wrong, with a +friendly warning: + + Wide character in print at example.pl line 2. + +Because the internal format is often UTF-8, these bugs are hard to spot, +because UTF-8 is usually the encoding you wanted! But don't be lazy, and don't +use the fact that Perl's internal format is UTF-8 to your advantage. Encode +explicitly to avoid weird bugs, and to show to maintenance programmers that you +thought this through. + +=head3 Is there a way to automatically decode or encode? + +If all data that comes from a certain handle is encoded in exactly the same +way, you can tell the PerlIO system to automatically decode everything, with +the C<encoding> layer. If you do this, you can't accidentally forget to decode +or encode anymore, on things that use the layered handle. + +You can provide this layer when C<open>ing the file: + + open my $fh, '>:encoding(UTF-8)', $filename; # auto encoding on write + open my $fh, '<:encoding(UTF-8)', $filename; # auto decoding on read + +Or if you already have an open filehandle: + + binmode $fh, ':encoding(UTF-8)'; + +Some database drivers for DBI can also automatically encode and decode, but +that is typically limited to the UTF-8 encoding, because they cheat. + +=head3 Cheat?! Tell me, how can I cheat? + +Well, because Perl's internal format is UTF-8, you can just skip the encoding +or decoding step, and manipulate the UTF-8 flag directly. + +Instead of C<:encoding(UTF-8)>, you can simply use C<:utf8>. This is widely +accepted as good behavior. + +Instead of C<decode> and C<encode>, you could use C<_utf8_on> and C<_utf8_off>. +But this is, contrary to C<:utf8>, considered bad style. + +There are some shortcuts for oneliners; see C<-C> in L<perlrun>. + +=head3 What if I don't know which encoding was used? + +Do whatever you can to find out, and if you have to: guess. (Don't forget to +document your guess with a comment.) + +You could open the document in a web browser, and change the character set or +character encoding until you can visually confirm that all characters look the +way they should. + +There is no way to reliably detect the encoding automatically, so if people +keep sending you data without charset indication, you may have to educate them. + +=head3 Can I use Unicode in my Perl sources? + +Yes, you can! If your sources are UTF-8 encoded, you can indicate that with the +C<use utf8> pragma. + + use utf8; + +This doesn't do anything to your input, or to your output. It only influences +the way your sources are read. You can use Unicode in string literals, in +identifiers (but they still have to be "word characters" according to C<\w>), +and even in custom delimiters. + +=head3 Data::Dumper doesn't restore the UTF-8 flag; is it broken? + +No, Data::Dumper's Unicode abilities are as they should be. There have been +some complaints that it should restore the UTF-8 flag when the data is read +again with C<eval>. However, you should really not look at the flag, and +nothing indicates that Data::Dumper should break this rule. + +Here's what happens: when Perl reads in a string literal, it sticks to 8 bit +encoding as long as it can. (But perhaps originally it was internally encoded +as UTF-8, when you dumped it.) When it has to give that up because other +characters are added to the text string, it silently upgrades the string to +UTF-8. + +If you properly encode your strings for output, none of this is of your +concern, and you can just C<eval> dumped data as always. + +=head3 How can I determine if a string is a text string or a binary string? + +You can't. Some use the UTF-8 flag for this, but that's misuse, and makes well +behaved modules like Data::Dumper look bad. The flag is useless for this +purpose, because it's off when an 8 bit encoding (by default ISO-8859-1) is +used to store the string. + +This is something you, the programmer, has to keep track of; sorry. You could +consider adopting a kind of "Hungarian notation" to help with this. + +=head3 How do I convert from encoding FOO to encoding BAR? + +By first converting the FOO-encoded byte string to a text string, and then the +text string to a BAR-encoded byte string: + + my $text_string = decode('FOO', $foo_string); + my $bar_string = encode('BAR', $text_string); + +or by skipping the text string part, and going directly from one binary +encoding to the other: + + use Encode qw(from_to); + from_to($string, 'FOO', 'BAR'); # changes contents of $string + +or by letting automatic decoding and encoding do all the work: + + open my $foofh, '<:encoding(FOO)', 'example.foo.txt'; + open my $barfh, '>:encoding(BAR)', 'example.bar.txt'; + print { $barfh } $_ while <$foofh>; + +=head3 What about the C<use bytes> pragma? + +Don't use it. It makes no sense to deal with bytes in a text string, and it +makes no sense to deal with characters in a byte string. Do the proper +conversions (by decoding/encoding), and things will work out well: you get +character counts for decoded data, and byte counts for encoded data. + +C<use bytes> is usually a failed attempt to do something useful. Just forget +about it. + +=head3 What are C<decode_utf8> and C<encode_utf8>? + +These are alternate syntaxes for C<decode('utf8', ...)> and C<encode('utf8', +...)>. + +=head3 What's the difference between C<UTF-8> and C<utf8>? + +C<UTF-8> is the official standard. C<utf8> is Perl's way of being liberal in +what it accepts. If you have to communicate with things that aren't so liberal, +you may want to consider using C<UTF-8>. If you have to communicate with things +that are too liberal, you may have to use C<utf8>. The full explanation is in +L<Encode>. + +C<UTF-8> is internally known as C<utf-8-strict>. This tutorial uses UTF-8 +consistently, even where utf8 is actually used internally, because the +distinction can be hard to make, and is mostly irrelevant. + +Okay, if you insist: the "internal format" is utf8, not UTF-8. (When it's not +some other encoding.) + +=head3 I lost track; what encoding is the internal format really? + +It's good that you lost track, because you shouldn't depend on the internal +format being any specific encoding. But since you asked: by default, the +internal format is either ISO-8859-1 (latin-1), or utf8, depending on the +history of the string. + +Perl knows how it stored the string internally, and will use that knowledge +when you C<encode>. In other words: don't try to find out what the internal +encoding for a certain string is, but instead just encode it into the encoding +that you want. + +=head3 What character encodings does Perl support? + +To find out which character encodings your Perl supports, run: + + perl -MEncode -le "print for Encode->encodings(':all')" + +=head3 Which version of perl should I use? + +Well, if you can, upgrade to the most recent, but certainly C<5.8.1> or newer. +This tutorial is based on the status quo as of C<5.8.7>. + +You should also check your modules, and upgrade them if necessary. For example, +HTML::Entities requires version >= 1.32 to function correctly, even though the +changelog is silent about this. + +=head1 SUMMARY + +Decode everything you receive, encode everything you send out. (If it's text +data.) + +=head1 ACKNOWLEDGEMENTS + +Thanks to Johan Vromans from Squirrel Consultancy. His UTF-8 rants during the +Amsterdam Perl Mongers meetings got me interested and determined to find out +how to use character encodings in Perl in ways that don't break easily. + +Thanks to Gerard Goossen from TTY. His presentation "UTF-8 in the wild" (Dutch +Perl Workshop 2006) inspired me to publish my thoughts and write this tutorial. + +Thanks to the people who asked about this kind of stuff in several Perl IRC +channels, and have constantly reminded me that a simpler explanation was +needed. + +Thanks to the people who reviewed this document for me, before it went public. +They are: Benjamin Smith, Jan-Pieter Cornet, Johan Vromans, Lukas Mai, Nathan +Gray. + +=head1 AUTHOR + +Juerd Waalboer <juerd@cpan.org> + +=head1 SEE ALSO + +L<perlunicode>, L<perluniintro>, L<Encode> + diff --git a/vms/descrip_mms.template b/vms/descrip_mms.template index 6a5bc9b57a..29c1e44b9e 100644 --- a/vms/descrip_mms.template +++ b/vms/descrip_mms.template @@ -411,9 +411,10 @@ pod21 = [.lib.pods]perlpragma.pod [.lib.pods]perlqnx.pod [.lib.pods]perlre.pod [ pod22 = [.lib.pods]perlreref.pod [.lib.pods]perlretut.pod [.lib.pods]perlriscos.pod [.lib.pods]perlrun.pod [.lib.pods]perlsec.pod [.lib.pods]perlsolaris.pod pod23 = [.lib.pods]perlstyle.pod [.lib.pods]perlsub.pod [.lib.pods]perlsymbian.pod [.lib.pods]perlsyn.pod [.lib.pods]perlthrtut.pod [.lib.pods]perltie.pod pod24 = [.lib.pods]perltoc.pod [.lib.pods]perltodo.pod [.lib.pods]perltooc.pod [.lib.pods]perltoot.pod [.lib.pods]perltrap.pod [.lib.pods]perltru64.pod -pod25 = [.lib.pods]perltw.pod [.lib.pods]perlunicode.pod [.lib.pods]perluniintro.pod [.lib.pods]perlutil.pod [.lib.pods]perluts.pod [.lib.pods]perlvar.pod -pod26 = [.lib.pods]perlvmesa.pod [.lib.pods]perlvms.pod [.lib.pods]perlvos.pod [.lib.pods]perlwin32.pod [.lib.pods]perlxs.pod [.lib.pods]perlxstut.pod -pod = $(pod0) $(pod1) $(pod2) $(pod3) $(pod4) $(pod5) $(pod6) $(pod7) $(pod8) $(pod9) $(pod10) $(pod11) $(pod12) $(pod13) $(pod14) $(pod15) $(pod16) $(pod17) $(pod18) $(pod19) $(pod20) $(pod21) $(pod22) $(pod23) $(pod24) $(pod25) $(pod26) +pod25 = [.lib.pods]perltw.pod [.lib.pods]perlunicode.pod [.lib.pods]perluniintro.pod [.lib.pods]perlunitut.pod [.lib.pods]perlutil.pod [.lib.pods]perluts.pod +pod26 = [.lib.pods]perlvar.pod [.lib.pods]perlvmesa.pod [.lib.pods]perlvms.pod [.lib.pods]perlvos.pod [.lib.pods]perlwin32.pod [.lib.pods]perlxs.pod +pod27 = [.lib.pods]perlxstut.pod +pod = $(pod0) $(pod1) $(pod2) $(pod3) $(pod4) $(pod5) $(pod6) $(pod7) $(pod8) $(pod9) $(pod10) $(pod11) $(pod12) $(pod13) $(pod14) $(pod15) $(pod16) $(pod17) $(pod18) $(pod19) $(pod20) $(pod21) $(pod22) $(pod23) $(pod24) $(pod25) $(pod26) $(pod27) # Would be useful to automate the generation of this rule from pod/buildtoc # Plus its corresponding delete in the clean target. @@ -1254,6 +1255,10 @@ preplibrary : $(MINIPERL_EXE) $(LIBPREREQ) @ If F$Search("[.lib]pods.dir").eqs."" Then Create/Directory [.lib.pods] Copy/NoConfirm/Log $(MMS$SOURCE) [.lib.pods] +[.lib.pods]perlunitut.pod : [.pod]perlunitut.pod + @ If F$Search("[.lib]pods.dir").eqs."" Then Create/Directory [.lib.pods] + Copy/NoConfirm/Log $(MMS$SOURCE) [.lib.pods] + [.lib.pods]perlutil.pod : [.pod]perlutil.pod @ If F$Search("[.lib]pods.dir").eqs."" Then Create/Directory [.lib.pods] Copy/NoConfirm/Log $(MMS$SOURCE) [.lib.pods] diff --git a/win32/pod.mak b/win32/pod.mak index 1993d710f9..5f3bf61104 100644 --- a/win32/pod.mak +++ b/win32/pod.mak @@ -120,6 +120,7 @@ POD = \ perltrap.pod \ perlunicode.pod \ perluniintro.pod \ + perlunitut.pod \ perlutil.pod \ perlvar.pod \ perlxs.pod \ |