From 2f253cfc85ffd55a8acb988e91f0bc5ab348124c Mon Sep 17 00:00:00 2001 From: Lorry Tar Creator Date: Wed, 8 May 2013 22:21:52 +0000 Subject: HTML-Parser-3.71 --- Changes | 1664 ++++++++++++++++++++++++++++++++++++++++++ MANIFEST | 80 ++ META.json | 53 ++ META.yml | 30 + Makefile.PL | 64 ++ Parser.pm | 1240 +++++++++++++++++++++++++++++++ Parser.xs | 687 +++++++++++++++++ README | 65 ++ TODO | 28 + eg/hanchors | 48 ++ eg/hdump | 23 + eg/hform | 83 +++ eg/hlc | 20 + eg/hrefsub | 93 +++ eg/hstrip | 65 ++ eg/htext | 29 + eg/htextsub | 28 + eg/htitle | 21 + hints/solaris.pl | 4 + hparser.c | 1902 ++++++++++++++++++++++++++++++++++++++++++++++++ hparser.h | 132 ++++ lib/HTML/Entities.pm | 483 ++++++++++++ lib/HTML/Filter.pm | 112 +++ lib/HTML/HeadParser.pm | 315 ++++++++ lib/HTML/LinkExtor.pm | 185 +++++ lib/HTML/PullParser.pm | 209 ++++++ lib/HTML/TokeParser.pm | 371 ++++++++++ mkhctype | 57 ++ mkpfunc | 28 + t/api_version.t | 22 + t/argspec-bad.t | 40 + t/argspec.t | 148 ++++ t/argspec2.t | 21 + t/attr-encoded.t | 32 + t/callback.t | 49 ++ t/case-sensitive.t | 85 +++ t/cases.t | 105 +++ t/comment.t | 24 + t/crashme.t | 43 ++ t/declaration.t | 62 ++ t/default.t | 43 ++ t/document.t | 41 ++ t/dtext.t | 72 ++ t/entities.t | 213 ++++++ t/entities2.t | 57 ++ t/filter-methods.t | 205 ++++++ t/filter.t | 60 ++ t/handler-eof.t | 54 ++ t/handler.t | 67 ++ t/headparser-http.t | 20 + t/headparser.t | 200 +++++ t/ignore.t | 27 + t/largetags.t | 38 + t/linkextor-base.t | 41 ++ t/linkextor-rel.t | 36 + t/magic.t | 41 ++ t/marked-sect.t | 121 +++ t/msie-compat.t | 79 ++ t/offset.t | 58 ++ t/options.t | 36 + t/parsefile.t | 45 ++ t/parser.t | 184 +++++ t/plaintext.t | 58 ++ t/pod.t | 4 + t/process.t | 43 ++ t/pullparser.t | 55 ++ t/script.t | 41 ++ t/skipped-text.t | 89 +++ t/stack-realloc.t | 17 + t/textarea.t | 70 ++ t/threads.t | 39 + t/tokeparser.t | 164 +++++ t/uentities.t | 65 ++ t/unbroken-text.t | 60 ++ t/unicode-bom.t | 63 ++ t/unicode.t | 198 +++++ t/xml-mode.t | 112 +++ tokenpos.h | 49 ++ typemap | 5 + util.c | 311 ++++++++ 80 files changed, 11931 insertions(+) create mode 100644 Changes create mode 100644 MANIFEST create mode 100644 META.json create mode 100644 META.yml create mode 100644 Makefile.PL create mode 100644 Parser.pm create mode 100644 Parser.xs create mode 100644 README create mode 100644 TODO create mode 100755 eg/hanchors create mode 100755 eg/hdump create mode 100755 eg/hform create mode 100755 eg/hlc create mode 100755 eg/hrefsub create mode 100755 eg/hstrip create mode 100755 eg/htext create mode 100755 eg/htextsub create mode 100755 eg/htitle create mode 100644 hints/solaris.pl create mode 100644 hparser.c create mode 100644 hparser.h create mode 100644 lib/HTML/Entities.pm create mode 100644 lib/HTML/Filter.pm create mode 100644 lib/HTML/HeadParser.pm create mode 100644 lib/HTML/LinkExtor.pm create mode 100644 lib/HTML/PullParser.pm create mode 100644 lib/HTML/TokeParser.pm create mode 100755 mkhctype create mode 100755 mkpfunc create mode 100644 t/api_version.t create mode 100644 t/argspec-bad.t create mode 100644 t/argspec.t create mode 100644 t/argspec2.t create mode 100644 t/attr-encoded.t create mode 100644 t/callback.t create mode 100644 t/case-sensitive.t create mode 100644 t/cases.t create mode 100644 t/comment.t create mode 100644 t/crashme.t create mode 100644 t/declaration.t create mode 100644 t/default.t create mode 100644 t/document.t create mode 100644 t/dtext.t create mode 100644 t/entities.t create mode 100644 t/entities2.t create mode 100644 t/filter-methods.t create mode 100644 t/filter.t create mode 100644 t/handler-eof.t create mode 100644 t/handler.t create mode 100644 t/headparser-http.t create mode 100644 t/headparser.t create mode 100644 t/ignore.t create mode 100644 t/largetags.t create mode 100644 t/linkextor-base.t create mode 100644 t/linkextor-rel.t create mode 100644 t/magic.t create mode 100644 t/marked-sect.t create mode 100644 t/msie-compat.t create mode 100644 t/offset.t create mode 100644 t/options.t create mode 100644 t/parsefile.t create mode 100644 t/parser.t create mode 100644 t/plaintext.t create mode 100644 t/pod.t create mode 100644 t/process.t create mode 100644 t/pullparser.t create mode 100644 t/script.t create mode 100644 t/skipped-text.t create mode 100644 t/stack-realloc.t create mode 100644 t/textarea.t create mode 100644 t/threads.t create mode 100644 t/tokeparser.t create mode 100644 t/uentities.t create mode 100644 t/unbroken-text.t create mode 100644 t/unicode-bom.t create mode 100644 t/unicode.t create mode 100644 t/xml-mode.t create mode 100644 tokenpos.h create mode 100644 typemap create mode 100644 util.c diff --git a/Changes b/Changes new file mode 100644 index 0000000..933d43c --- /dev/null +++ b/Changes @@ -0,0 +1,1664 @@ +_______________________________________________________________________________ +2013-05-09 Release 3.71 + +Gisle Aas (1): + Transform ':' in headers to '-' [RT#80524] + + +_______________________________________________________________________________ +2013-03-28 Release 3.70 + +François Perrad (1): + Fix for cross-compiling with Buildroot + +Gisle Aas (1): + Comment typo fix + +Yves Orton (1): + Fix Issue #3 / RT #84144: HTML::Entities::decode_entities() needs + to call SV_CHECK_THINKFIRST() before checking READONLY flag + + +_______________________________________________________________________________ +2011-10-15 Release 3.69 + +Gisle Aas (4): + Documentation fix; encode_utf8 mixup [RT#71151] + Make it clearer that there are 2 (actually 3) options for handing "UTF-8 garbage" + Github is the official repo + Can't be bothered to try to fix the failures that occur on perl-5.6 + +Barbie (1): + fix to TokeParser to correctly handle option configuration + +Jon Jensen (1): + Aesthetic change: remove extra ; + +Ville Skyttä (1): + Trim surrounding whitespace from extracted URLs. + + +_______________________________________________________________________________ +2010-09-01 Release 3.68 + +Gisle Aas (1): + Declare the encoding of the POD to be utf8 + + +_______________________________________________________________________________ +2010-08-17 Release 3.67 + +Nicholas Clark (1): + bleadperl 2154eca7 breaks HTML::Parser 3.66 [RT#60368] + + +_______________________________________________________________________________ +2010-07-09 Release 3.66 + +Gisle Aas (1): + Fix entity decoding in utf8_mode for the title header + + +_______________________________________________________________________________ +2010-04-04 Release 3.65 + +Gisle Aas (1): + Eliminate buggy entities_decode_old + +Salvatore Bonaccorso (1): + Fixed endianness typo [RT#50811] + +Ville Skyttä (1): + Documentation fixes. + + +_______________________________________________________________________________ +2009-10-25 Release 3.64 + +Gisle Aas (5): + Convert files to UTF-8 + Don't allow decode_entities() to generate illegal Unicode chars + Copyright 2009 + Remove rendundant (repeated) test + Make parse_file() method use 3-arg open [RT#49434] + + + +_______________________________________________________________________________ +2009-10-22 Release 3.63 + +Gisle Aas (2): + Take more care to prepare the char range for encode_entities [RT#50170] + decode_entities confused by trailing incomplete entity + + + +_______________________________________________________________________________ +2009-08-13 Release 3.62 + +Ville Skyttä (4): + HTTP::Header doc typo fix. + Do not bother tracking style or script, they're ignored. + Bring HTML 5 head elements up to date with WD-html5-20090423. + Improve HeadParser performance. + +Gisle Aas (1): + Doc patch: Make it clearer what the return value from ->parse is + + + +_______________________________________________________________________________ +2009-06-20 Release 3.61 + +Gisle Aas (2): + Test that triggers the crash that Chip fixed + Complete documented list of literal tags + +Chip Salzenberg (1): + Avoid crash (referenced pend_text instead of skipped_text) + +Antonio Radici (1): + Reference HTML::LinkExttor [RT#43164] + + + +_______________________________________________________________________________ +2009-02-09 Release 3.60 + +Ville Skytta (5): + Spelling fixes. + Test multi-value headers. + Documentation improvements. + Do not terminate head parsing on the element (added in HTML 4.0). + Add support for HTML 5 and new HEAD elements. + +Damyan Ivanov (1): + Short description of the htextsub example + +Mike South (1): + Suppress warning when encode_entities is called with undef [RT#27567] + +Zefram (1): + HTML::Parser doesn't compile with perl 5.8.0. + + + +_______________________________________________________________________________ +2008-11-24 Gisle Aas + + Release 3.59 + + Restore perl-5.6 compatibility for HTML::HeadParser. + + Improved META.yml + + + +_______________________________________________________________________________ +2008-11-17 Gisle Aas + + Release 3.58 + + Suppress "Parsing of undecoded UTF-8 will give garbage" warning + with attr_encoded [RT#29089] + + HTML::HeadParser: + - Recognize the Unicode BOM in utf8_mode as well [RT#27522] + - Avoid ending up with '/' keys attribute in Link headers. + + + +_______________________________________________________________________________ +2008-11-16 Gisle Aas + + Release 3.57 + + The