65 files changed, 0 insertions, 9384 deletions
diff --git a/ext/HTML/Parser/Makefile.PL b/ext/HTML/Parser/Makefile.PL
deleted file mode 100644
index 79081f7985..0000000000
--- a/ext/HTML/Parser/Makefile.PL
+++ /dev/null
@@ -1,30 +0,0 @@
-require 5.006;
-use strict;
-use ExtUtils::MakeMaker;
-
-WriteMakefile(
-    NAME	 => 'HTML::Parser',
-    VERSION_FROM => 'Parser.pm',
-    H            => [ "hparser.h", "hctype.h", "tokenpos.h", "pfunc.h",
-		      "hparser.c", "util.c",
-		    ],
-    PREREQ_PM    => {
-		      'HTML::Tagset' => 3,
-                      'Test::More' => 0,    # only needed to run 'make test'
-                    },
-    DEFINE       => "-DMARKED_SECTION",
-    dist         => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
-    clean        => { FILES => 'hctype.h pfunc.h' },
-);
-
-
-sub MY::postamble
-{
-    '
-pfunc.h : mkpfunc
-	$(PERL) mkpfunc >pfunc.h
-
-hctype.h : mkhctype
-	$(PERL) mkhctype >hctype.h
-'
-}
diff --git a/ext/HTML/Parser/Parser.pm b/ext/HTML/Parser/Parser.pm
deleted file mode 100644
index 72d5a9841f..0000000000
--- a/ext/HTML/Parser/Parser.pm
+++ /dev/null
@@ -1,1233 +0,0 @@
-package HTML::Parser;
-
-# Copyright 1996-2007, Gisle Aas.
-# Copyright 1999-2000, Michael A. Chase.
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the same terms as Perl itself.
-
-use strict;
-use vars qw($VERSION @ISA);
-
-$VERSION = '3.56';  # $Date: 2007/01/12 09:18:31 $
-
-require HTML::Entities;
-
-require XSLoader;
-XSLoader::load('HTML::Parser', $VERSION);
-
-sub new
-{
-    my $class = shift;
-    my $self = bless {}, $class;
-    return $self->init(@_);
-}
-
-
-sub init
-{
-    my $self = shift;
-    $self->_alloc_pstate;
-
-    my %arg = @_;
-    my $api_version = delete $arg{api_version} || (@_ ? 3 : 2);
-    if ($api_version >= 4) {
-	require Carp;
-	Carp::croak("API version $api_version not supported " .
-		    "by HTML::Parser $VERSION");
-    }
-
-    if ($api_version < 3) {
-	# Set up method callbacks compatible with HTML-Parser-2.xx
-	$self->handler(text    => "text",    "self,text,is_cdata");
-	$self->handler(end     => "end",     "self,tagname,text");
-	$self->handler(process => "process", "self,token0,text");
-	$self->handler(start   => "start",
-		                  "self,tagname,attr,attrseq,text");
-
-	$self->handler(comment =>
-		       sub {
-			   my($self, $tokens) = @_;
-			   for (@$tokens) {
-			       $self->comment($_);
-			   }
-		       }, "self,tokens");
-
-	$self->handler(declaration =>
-		       sub {
-			   my $self = shift;
-			   $self->declaration(substr($_[0], 2, -1));
-		       }, "self,text");
-    }
-
-    if (my $h = delete $arg{handlers}) {
-	$h = {@$h} if ref($h) eq "ARRAY";
-	while (my($event, $cb) = each %$h) {
-	    $self->handler($event => @$cb);
-	}
-    }
-
-    # In the end we try to assume plain attribute or handler
-    while (my($option, $val) = each %arg) {
-	if ($option =~ /^(\w+)_h$/) {
-	    $self->handler($1 => @$val);
-	}
-        elsif ($option =~ /^(text|start|end|process|declaration|comment)$/) {
-	    require Carp;
-	    Carp::croak("Bad constructor option '$option'");
-        }
-	else {
-	    $self->$option($val);
-	}
-    }
-
-    return $self;
-}
-
-
-sub parse_file
-{
-    my($self, $file) = @_;
-    my $opened;
-    if (!ref($file) && ref(\$file) ne "GLOB") {
-        # Assume $file is a filename
-        local(*F);
-        open(F, $file) || return undef;
-	binmode(F);  # should we? good for byte counts
-        $opened++;
-        $file = *F;
-    }
-    my $chunk = '';
-    while (read($file, $chunk, 512)) {
-	$self->parse($chunk) || last;
-    }
-    close($file) if $opened;
-    $self->eof;
-}
-
-
-sub netscape_buggy_comment  # legacy
-{
-    my $self = shift;
-    require Carp;
-    Carp::carp("netscape_buggy_comment() is deprecated.  " .
-	       "Please use the strict_comment() method instead");
-    my $old = !$self->strict_comment;
-    $self->strict_comment(!shift) if @_;
-    return $old;
-}
-
-# set up method stubs
-sub text { }
-*start       = \&text;
-*end         = \&text;
-*comment     = \&text;
-*declaration = \&text;
-*process     = \&text;
-
-1;
-
-__END__
-
-
-=head1 NAME
-
-HTML::Parser - HTML parser class
-
-=head1 SYNOPSIS
-
- use HTML::Parser ();
-
- # Create parser object
- $p = HTML::Parser->new( api_version => 3,
-                         start_h => [\&start, "tagname, attr"],
-                         end_h   => [\&end,   "tagname"],
-                         marked_sections => 1,
-                       );
-
- # Parse document text chunk by chunk
- $p->parse($chunk1);
- $p->parse($chunk2);
- #...
- $p->eof;                 # signal end of document
-
- # Parse directly from file
- $p->parse_file("foo.html");
- # or
- open(my $fh, "<:utf8", "foo.html") || die;
- $p->parse_file($fh);
-
-=head1 DESCRIPTION
-
-Objects of the C<HTML::Parser> class will recognize markup and
-separate it from plain text (alias data content) in HTML
-documents.  As different kinds of markup and text are recognized, the
-corresponding event handlers are invoked.
-
-C<HTML::Parser> is not a generic SGML parser.  We have tried to
-make it able to deal with the HTML that is actually "out there", and
-it normally parses as closely as possible to the way the popular web
-browsers do it instead of strictly following one of the many HTML
-specifications from W3C.  Where there is disagreement, there is often
-an option that you can enable to get the official behaviour.
-
-The document to be parsed may be supplied in arbitrary chunks.  This
-makes on-the-fly parsing as documents are received from the network
-possible.
-
-If event driven parsing does not feel right for your application, you
-might want to use C<HTML::PullParser>.  This is an C<HTML::Parser>
-subclass that allows a more conventional program structure.
-
-
-=head1 METHODS
-
-The following method is used to construct a new C<HTML::Parser> object:
-
-=over
-
-=item $p = HTML::Parser->new( %options_and_handlers )
-
-This class method creates a new C<HTML::Parser> object and
-returns it.  Key/value argument pairs may be provided to assign event
-handlers or initialize parser options.  The handlers and parser
-options can also be set or modified later by the method calls described below.
-
-If a top level key is in the form "<event>_h" (e.g., "text_h") then it
-assigns a handler to that event, otherwise it initializes a parser
-option. The event handler specification value must be an array
-reference.  Multiple handlers may also be assigned with the 'handlers
-=> [%handlers]' option.  See examples below.
-
-If new() is called without any arguments, it will create a parser that
-uses callback methods compatible with version 2 of C<HTML::Parser>.
-See the section on "version 2 compatibility" below for details.
-
-The special constructor option 'api_version => 2' can be used to
-initialize version 2 callbacks while still setting other options and
-handlers.  The 'api_version => 3' option can be used if you don't want
-to set any options and don't want to fall back to v2 compatible
-mode.
-
-Examples:
-
- $p = HTML::Parser->new(api_version => 3,
-                        text_h => [ sub {...}, "dtext" ]);
-
-This creates a new parser object with a text event handler subroutine
-that receives the original text with general entities decoded.
-
- $p = HTML::Parser->new(api_version => 3,
-			start_h => [ 'my_start', "self,tokens" ]);
-
-This creates a new parser object with a start event handler method
-that receives the $p and the tokens array.
-
- $p = HTML::Parser->new(api_version => 3,
-		        handlers => { text => [\@array, "event,text"],
-                                      comment => [\@array, "event,text"],
-                                    });
-
-This creates a new parser object that stores the event type and the
-original text in @array for text and comment events.
-
-=back
-
-The following methods feed the HTML document
-to the C<HTML::Parser> object:
-
-=over
-
-=item $p->parse( $string )
-
-Parse $string as the next chunk of the HTML document.  The return
-value is normally a reference to the parser object (i.e. $p).
-Handlers invoked should not attempt to modify the $string in-place until
-$p->parse returns.
-
-If an invoked event handler aborts parsing by calling $p->eof, then
-$p->parse() will return a FALSE value.
-
-=item $p->parse( $code_ref )
-
-If a code reference is passed as the argument to be parsed, then the
-chunks to be parsed are obtained by invoking this function repeatedly.
-Parsing continues until the function returns an empty (or undefined)
-result.  When this happens $p->eof is automatically signaled.
-
-Parsing will also abort if one of the event handlers calls $p->eof.
-
-The effect of this is the same as:
-
- while (1) {
-    my $chunk = &$code_ref();
-    if (!defined($chunk) || !length($chunk)) {
-        $p->eof;
-        return $p;
-    }
-    $p->parse($chunk) || return undef;
- }
-
-But it is more efficient as this loop runs internally in XS code.
-
-=item $p->parse_file( $file )
-
-Parse text directly from a file.  The $file argument can be a
-filename, an open file handle, or a reference to an open file
-handle.
-
-If $file contains a filename and the file can't be opened, then the
-method returns an undefined value and $! tells why it failed.
-Otherwise the return value is a reference to the parser object.
-
-If a file handle is passed as the $file argument, then the file will
-normally be read until EOF, but not closed.
-
-If an invoked event handler aborts parsing by calling $p->eof,
-then $p->parse_file() may not have read the entire file.
-
-On systems with multi-byte line terminators, the values passed for the
-offset and length argspecs may be too low if parse_file() is called on
-a file handle that is not in binary mode.
-
-If a filename is passed in, then parse_file() will open the file in
-binary mode.
-
-=item $p->eof
-
-Signals the end of the HTML document.  Calling the $p->eof method
-outside a handler callback will flush any remaining buffered text
-(which triggers the C<text> event if there is any remaining text).
-
-Calling $p->eof inside a handler will terminate parsing at that point
-and cause $p->parse to return a FALSE value.  This also terminates
-parsing by $p->parse_file().
-
-After $p->eof has been called, the parse() and parse_file() methods
-can be invoked to feed new documents with the parser object.
-
-The return value from eof() is a reference to the parser object.
-
-=back
-
-
-Most parser options are controlled by boolean attributes.
-Each boolean attribute is enabled by calling the corresponding method
-with a TRUE argument and disabled with a FALSE argument.  The
-attribute value is left unchanged if no argument is given.  The return
-value from each method is the old attribute value.
-
-Methods that can be used to get and/or set parser options are:
-
-=over
-
-=item $p->attr_encoded
-
-=item $p->attr_encoded( $bool )
-
-By default, the C<attr> and C<@attr> argspecs will have general
-entities for attribute values decoded.  Enabling this attribute leaves
-entities alone.
-
-=item $p->boolean_attribute_value( $val )
-
-This method sets the value reported for boolean attributes inside HTML
-start tags.  By default, the name of the attribute is also used as its
-value.  This affects the values reported for C<tokens> and C<attr>
-argspecs.
-
-=item $p->case_sensitive
-
-=item $p->case_sensitive( $bool )
-
-By default, tagnames and attribute names are down-cased.  Enabling this
-attribute leaves them as found in the HTML source document.
-
-=item $p->closing_plaintext
-
-=item $p->closing_plaintext( $bool )
-
-By default, "plaintext" element can never be closed. Everything up to
-the end of the document is parsed in CDATA mode.  This historical
-behaviour is what at least MSIE does.  Enabling this attribute makes
-closing "</plaintext>" tag effective and the parsing process will resume
-after seeing this tag.  This emulates gecko-based browsers.
-
-=item $p->empty_element_tags
-
-=item $p->empty_element_tags( $bool )
-
-By default, empty element tags are not recognized as such and the "/"
-before ">" is just treated like a normal name character (unless
-C<strict_names> is enabled).  Enabling this attribute make
-C<HTML::Parser> recognize these tags.
-
-Empty element tags look like start tags, but end with the character
-sequence "/>" instead of ">".  When recognized by C<HTML::Parser> they
-cause an artificial end event in addition to the start event.  The
-C<text> for the artificial end event will be empty and the C<tokenpos>
-array will be undefined even though the the token array will have one
-element containing the tag name.
-
-=item $p->marked_sections
-
-=item $p->marked_sections( $bool )
-
-By default, section markings like <![CDATA[...]]> are treated like
-ordinary text.  When this attribute is enabled section markings are
-honoured.
-
-There are currently no events associated with the marked section
-markup, but the text can be returned as C<skipped_text>.
-
-=item $p->strict_comment
-
-=item $p->strict_comment( $bool )
-
-By default, comments are terminated by the first occurrence of "-->".
-This is the behaviour of most popular browsers (like Mozilla, Opera and
-MSIE), but it is not correct according to the official HTML
-standard.  Officially, you need an even number of "--" tokens before
-the closing ">" is recognized and there may not be anything but
-whitespace between an even and an odd "--".
-
-The official behaviour is enabled by enabling this attribute.
-
-Enabling of 'strict_comment' also disables recognizing these forms as
-comments:
-
-  </ comment>
-  <! comment>
-
-
-=item $p->strict_end
-
-=item $p->strict_end( $bool )
-
-By default, attributes and other junk are allowed to be present on end tags in a
-manner that emulates MSIE's behaviour.
-
-The official behaviour is enabled with this attribute.  If enabled,
-only whitespace is allowed between the tagname and the final ">".
-
-=item $p->strict_names
-
-=item $p->strict_names( $bool )
-
-By default, almost anything is allowed in tag and attribute names.
-This is the behaviour of most popular browsers and allows us to parse
-some broken tags with invalid attribute values like:
-
-   <IMG SRC=newprevlstGr.gif ALT=[PREV LIST] BORDER=0>
-
-By default, "LIST]" is parsed as a boolean attribute, not as
-part of the ALT value as was clearly intended.  This is also what
-Mozilla sees.
-
-The official behaviour is enabled by enabling this attribute.  If
-enabled, it will cause the tag above to be reported as text
-since "LIST]" is not a legal attribute name.
-
-=item $p->unbroken_text
-
-=item $p->unbroken_text( $bool )
-
-By default, blocks of text are given to the text handler as soon as
-possible (but the parser takes care always to break text at a
-boundary between whitespace and non-whitespace so single words and
-entities can always be decoded safely).  This might create breaks that
-make it hard to do transformations on the text. When this attribute is
-enabled, blocks of text are always reported in one piece.  This will
-delay the text event until the following (non-text) event has been
-recognized by the parser.
-
-Note that the C<offset> argspec will give you the offset of the first
-segment of text and C<length> is the combined length of the segments.
-Since there might be ignored tags in between, these numbers can't be
-used to directly index in the original document file.
-
-=item $p->utf8_mode
-
-=item $p->utf8_mode( $bool )
-
-Enable this option when parsing raw undecoded UTF-8.  This tells the
-parser that the entities expanded for strings reported by C<attr>,
-C<@attr> and C<dtext> should be expanded as decoded UTF-8 so they end
-up compatible with the surrounding text.
-
-If C<utf8_mode> is enabled then it is an error to pass strings
-containing characters with code above 255 to the parse() method, and
-the parse() method will croak if you try.
-
-Example: The Unicode character "\x{2665}" is "\xE2\x99\xA5" when UTF-8
-encoded.  The character can also be represented by the entity
-"&hearts;" or "&#x2665".  If we feed the parser:
-
-  $p->parse("\xE2\x99\xA5&hearts;");
-
-then C<dtext> will be reported as "\xE2\x99\xA5\x{2665}" without
-C<utf8_mode> enabled, but as "\xE2\x99\xA5\xE2\x99\xA5" when enabled.
-The later string is what you want.
-
-This option is only available with perl-5.8 or better.
-
-=item $p->xml_mode
-
-=item $p->xml_mode( $bool )
-
-Enabling this attribute changes the parser to allow some XML
-constructs.  This enables the behaviour controlled by individually by
-the C<case_sensitive>, C<empty_element_tags>, C<strict_names> and
-C<xml_pic> attributes and also suppresses special treatment of
-elements that are parsed as CDATA for HTML.
-
-=item $p->xml_pic
-
-=item $p->xml_pic( $bool )
-
-By default, I<processing instructions> are terminated by ">". When
-this attribute is enabled, processing instructions are terminated by
-"?>" instead.
-
-=back
-
-As markup and text is recognized, handlers are invoked.  The following
-method is used to set up handlers for different events:
-
-=over
-
-=item $p->handler( event => \&subroutine, $argspec )
-
-=item $p->handler( event => $method_name, $argspec )
-
-=item $p->handler( event => \@accum, $argspec )
-
-=item $p->handler( event => "" );
-
-=item $p->handler( event => undef );
-
-=item $p->handler( event );
-
-This method assigns a subroutine, method, or array to handle an event.
-
-Event is one of C<text>, C<start>, C<end>, C<declaration>, C<comment>,
-C<process>, C<start_document>, C<end_document> or C<default>.
-
-The C<\&subroutine> is a reference to a subroutine which is called to handle
-the event.
-
-The C<$method_name> is the name of a method of $p which is called to handle
-the event.
-
-The C<@accum> is an array that will hold the event information as
-sub-arrays.
-
-If the second argument is "", the event is ignored.
-If it is undef, the default handler is invoked for the event.
-
-The C<$argspec> is a string that describes the information to be reported
-for the event.  Any requested information that does not apply to a
-specific event is passed as C<undef>.  If argspec is omitted, then it
-is left unchanged.
-
-The return value from $p->handler is the old callback routine or a
-reference to the accumulator array.
-
-Any return values from handler callback routines/methods are always
-ignored.  A handler callback can request parsing to be aborted by
-invoking the $p->eof method.  A handler callback is not allowed to
-invoke the $p->parse() or $p->parse_file() method.  An exception will
-be raised if it tries.
-
-Examples:
-
-    $p->handler(start =>  "start", 'self, attr, attrseq, text' );
-
-This causes the "start" method of object $p to be called for 'start' events.
-The callback signature is $p->start(\%attr, \@attr_seq, $text).
-
-    $p->handler(start =>  \&start, 'attr, attrseq, text' );
-
-This causes subroutine start() to be called for 'start' events.
-The callback signature is start(\%attr, \@attr_seq, $text).
-
-    $p->handler(start =>  \@accum, '"S", attr, attrseq, text' );
-
-This causes 'start' event information to be saved in @accum.
-The array elements will be ['S', \%attr, \@attr_seq, $text].
-
-   $p->handler(start => "");
-
-This causes 'start' events to be ignored.  It also suppresses
-invocations of any default handler for start events.  It is in most
-cases equivalent to $p->handler(start => sub {}), but is more
-efficient.  It is different from the empty-sub-handler in that
-C<skipped_text> is not reset by it.
-
-   $p->handler(start => undef);
-
-This causes no handler to be associated with start events.
-If there is a default handler it will be invoked.
-
-=back
-
-Filters based on tags can be set up to limit the number of events
-reported.  The main bottleneck during parsing is often the huge number
-of callbacks made from the parser.  Applying filters can improve
-performance significantly.
-
-The following methods control filters:
-
-=over
-
-=item $p->ignore_elements( @tags )
-
-Both the C<start> event and the C<end> event as well as any events that
-would be reported in between are suppressed.  The ignored elements can
-contain nested occurrences of itself.  Example:
-
-   $p->ignore_elements(qw(script style));
-
-The C<script> and C<style> tags will always nest properly since their
-content is parsed in CDATA mode.  For most other tags
-C<ignore_elements> must be used with caution since HTML is often not
-I<well formed>.
-
-=item $p->ignore_tags( @tags )
-
-Any C<start> and C<end> events involving any of the tags given are
-suppressed.  To reset the filter (i.e. don't suppress any C<start> and
-C<end> events), call C<ignore_tags> without an argument.
-
-=item $p->report_tags( @tags )
-
-Any C<start> and C<end> events involving any of the tags I<not> given
-are suppressed.  To reset the filter (i.e. report all C<start> and
-C<end> events), call C<report_tags> without an argument.
-
-=back
-
-Internally, the system has two filter lists, one for C<report_tags>
-and one for C<ignore_tags>, and both filters are applied.  This
-effectively gives C<ignore_tags> precedence over C<report_tags>.
-
-Examples:
-
-   $p->ignore_tags(qw(style));
-   $p->report_tags(qw(script style));
-
-results in only C<script> events being reported.
-
-=head2 Argspec
-
-Argspec is a string containing a comma-separated list that describes
-the information reported by the event.  The following argspec
-identifier names can be used:
-
-=over
-
-=item C<attr>
-
-Attr causes a reference to a hash of attribute name/value pairs to be
-passed.
-
-Boolean attributes' values are either the value set by
-$p->boolean_attribute_value, or the attribute name if no value has been
-set by $p->boolean_attribute_value.
-
-This passes undef except for C<start> events.
-
-Unless C<xml_mode> or C<case_sensitive> is enabled, the attribute
-names are forced to lower case.
-
-General entities are decoded in the attribute values and
-one layer of matching quotes enclosing the attribute values is removed.
-
-The Unicode character set is assumed for entity decoding.  With Perl
-version 5.6 or earlier only the Latin-1 range is supported, and
-entities for characters outside the range 0..255 are left unchanged.
-
-=item C<@attr>
-
-Basically the same as C<attr>, but keys and values are passed as
-individual arguments and the original sequence of the attributes is
-kept.  The parameters passed will be the same as the @attr calculated
-here:
-
-   @attr = map { $_ => $attr->{$_} } @$attrseq;
-
-assuming $attr and $attrseq here are the hash and array passed as the
-result of C<attr> and C<attrseq> argspecs.
-
-This passes no values for events besides C<start>.
-
-=item C<attrseq>
-
-Attrseq causes a reference to an array of attribute names to be
-passed.  This can be useful if you want to walk the C<attr> hash in
-the original sequence.
-
-This passes undef except for C<start> events.
-
-Unless C<xml_mode> or C<case_sensitive> is enabled, the attribute
-names are forced to lower case.
-
-=item C<column>
-
-Column causes the column number of the start of the event to be passed.
-The first column on a line is 0.
-
-=item C<dtext>
-
-Dtext causes the decoded text to be passed.  General entities are
-automatically decoded unless the event was inside a CDATA section or
-was between literal start and end tags (C<script>, C<style>,
-C<xmp>, and C<plaintext>).
-
-The Unicode character set is assumed for entity decoding.  With Perl
-version 5.6 or earlier only the Latin-1 range is supported, and
-entities for characters outside the range 0..255 are left unchanged.
-
-This passes undef except for C<text> events.
-
-=item C<event>
-
-Event causes the event name to be passed.
-
-The event name is one of C<text>, C<start>, C<end>, C<declaration>,
-C<comment>, C<process>, C<start_document> or C<end_document>.
-
-=item C<is_cdata>
-
-Is_cdata causes a TRUE value to be passed if the event is inside a CDATA
-section or between literal start and end tags (C<script>,
-C<style>, C<xmp>, and C<plaintext>).
-
-if the flag is FALSE for a text event, then you should normally
-either use C<dtext> or decode the entities yourself before the text is
-processed further.
-
-=item C<length>
-
-Length causes the number of bytes of the source text of the event to
-be passed.
-
-=item C<line>
-
-Line causes the line number of the start of the event to be passed.
-The first line in the document is 1.  Line counting doesn't start
-until at least one handler requests this value to be reported.
-
-=item C<offset>
-
-Offset causes the byte position in the HTML document of the start of
-the event to be passed.  The first byte in the document has offset 0.
-
-=item C<offset_end>
-
-Offset_end causes the byte position in the HTML document of the end of
-the event to be passed.  This is the same as C<offset> + C<length>.
-
-=item C<self>
-
-Self causes the current object to be passed to the handler.  If the
-handler is a method, this must be the first element in the argspec.
-
-An alternative to passing self as an argspec is to register closures
-that capture $self by themselves as handlers.  Unfortunately this
-creates circular references which prevent the HTML::Parser object
-from being garbage collected.  Using the C<self> argspec avoids this
-problem.
-
-=item C<skipped_text>
-
-Skipped_text returns the concatenated text of all the events that have
-been skipped since the last time an event was reported.  Events might
-be skipped because no handler is registered for them or because some
-filter applies.  Skipped text also includes marked section markup,
-since there are no events that can catch it.
-
-If an C<"">-handler is registered for an event, then the text for this
-event is not included in C<skipped_text>.  Skipped text both before
-and after the C<"">-event is included in the next reported
-C<skipped_text>.
-
-=item C<tag>
-
-Same as C<tagname>, but prefixed with "/" if it belongs to an C<end>
-event and "!" for a declaration.  The C<tag> does not have any prefix
-for C<start> events, and is in this case identical to C<tagname>.
-
-=item C<tagname>
-
-This is the element name (or I<generic identifier> in SGML jargon) for
-start and end tags.  Since HTML is case insensitive, this name is
-forced to lower case to ease string matching.
-
-Since XML is case sensitive, the tagname case is not changed when
-C<xml_mode> is enabled.  The same happens if the C<case_sensitive> attribute
-is set.
-
-The declaration type of declaration elements is also passed as a tagname,
-even if that is a bit strange.
-In fact, in the current implementation tagname is
-identical to C<token0> except that the name may be forced to lower case.
-
-=item C<token0>
-
-Token0 causes the original text of the first token string to be
-passed.  This should always be the same as $tokens->[0].
-
-For C<declaration> events, this is the declaration type.
-
-For C<start> and C<end> events, this is the tag name.
-
-For C<process> and non-strict C<comment> events, this is everything
-inside the tag.
-
-This passes undef if there are no tokens in the event.
-
-=item C<tokenpos>
-
-Tokenpos causes a reference to an array of token positions to be
-passed.  For each string that appears in C<tokens>, this array
-contains two numbers.  The first number is the offset of the start of
-the token in the original C<text> and the second number is the length
-of the token.
-
-Boolean attributes in a C<start> event will have (0,0) for the
-attribute value offset and length.
-
-This passes undef if there are no tokens in the event (e.g., C<text>)
-and for artificial C<end> events triggered by empty element tags.
-
-If you are using these offsets and lengths to modify C<text>, you
-should either work from right to left, or be very careful to calculate
-the changes to the offsets.
-
-=item C<tokens>
-
-Tokens causes a reference to an array of token strings to be passed.
-The strings are exactly as they were found in the original text,
-no decoding or case changes are applied.
-
-For C<declaration> events, the array contains each word, comment, and
-delimited string starting with the declaration type.
-
-For C<comment> events, this contains each sub-comment.  If
-$p->strict_comments is disabled, there will be only one sub-comment.
-
-For C<start> events, this contains the original tag name followed by
-the attribute name/value pairs.  The values of boolean attributes will
-be either the value set by $p->boolean_attribute_value, or the
-attribute name if no value has been set by
-$p->boolean_attribute_value.
-
-For C<end> events, this contains the original tag name (always one token).
-
-For C<process> events, this contains the process instructions (always one
-token).
-
-This passes C<undef> for C<text> events.
-
-=item C<text>
-
-Text causes the source text (including markup element delimiters) to be
-passed.
-
-=item C<undef>
-
-Pass an undefined value.  Useful as padding where the same handler
-routine is registered for multiple events.
-
-=item C<'...'>
-
-A literal string of 0 to 255 characters enclosed
-in single (') or double (") quotes is passed as entered.
-
-=back
-
-The whole argspec string can be wrapped up in C<'@{...}'> to signal
-that the resulting event array should be flattened.  This only makes a
-difference if an array reference is used as the handler target.
-Consider this example:
-
-   $p->handler(text => [], 'text');
-   $p->handler(text => [], '@{text}']);
-
-With two text events; C<"foo">, C<"bar">; then the first example will end
-up with [["foo"], ["bar"]] and the second with ["foo", "bar"] in
-the handler target array.
-
-
-=head2 Events
-
-Handlers for the following events can be registered:
-
-=over
-
-=item C<comment>
-
-This event is triggered when a markup comment is recognized.
-
-Example:
-
-  <!-- This is a comment -- -- So is this -->
-
-=item C<declaration>
-
-This event is triggered when a I<markup declaration> is recognized.
-
-For typical HTML documents, the only declaration you are
-likely to find is <!DOCTYPE ...>.
-
-Example:
-
-  <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-  "http://www.w3.org/TR/html40/strict.dtd">
-
-DTDs inside <!DOCTYPE ...> will confuse HTML::Parser.
-
-=item C<default>
-
-This event is triggered for events that do not have a specific
-handler.  You can set up a handler for this event to catch stuff you
-did not want to catch explicitly.
-
-=item C<end>
-
-This event is triggered when an end tag is recognized.
-
-Example:
-
-  </A>
-
-=item C<end_document>
-
-This event is triggered when $p->eof is called and after any remaining
-text is flushed.  There is no document text associated with this event.
-
-=item C<process>
-
-This event is triggered when a processing instructions markup is
-recognized.
-
-The format and content of processing instructions are system and
-application dependent.
-
-Examples:
-
-  <? HTML processing instructions >
-  <? XML processing instructions ?>
-
-=item C<start>
-
-This event is triggered when a start tag is recognized.
-
-Example:
-
-  <A HREF="http://www.perl.com/">
-
-=item C<start_document>
-
-This event is triggered before any other events for a new document.  A
-handler for it can be used to initialize stuff.  There is no document
-text associated with this event.
-
-=item C<text>
-
-This event is triggered when plain text (characters) is recognized.
-The text may contain multiple lines.  A sequence of text may be broken
-between several text events unless $p->unbroken_text is enabled.
-
-The parser will make sure that it does not break a word or a sequence
-of whitespace between two text events.
-
-=back
-
-=head2 Unicode
-
-The C<HTML::Parser> can parse Unicode strings when running under
-perl-5.8 or better.  If Unicode is passed to $p->parse() then chunks
-of Unicode will be reported to the handlers.  The offset and length
-argspecs will also report their position in terms of characters.
-
-It is safe to parse raw undecoded UTF-8 if you either avoid decoding
-entities and make sure to not use I<argspecs> that do, or enable the
-C<utf8_mode> for the parser.  Parsing of undecoded UTF-8 might be
-useful when parsing from a file where you need the reported offsets
-and lengths to match the byte offsets in the file.
-
-If a filename is passed to $p->parse_file() then the file will be read
-in binary mode.  This will be fine if the file contains only ASCII or
-Latin-1 characters.  If the file contains UTF-8 encoded text then care
-must be taken when decoding entities as described in the previous
-paragraph, but better is to open the file with the UTF-8 layer so that
-it is decoded properly:
-
-   open(my $fh, "<:utf8", "index.html") || die "...: $!";
-   $p->parse_file($fh);
-
-If the file contains text encoded in a charset besides ASCII, Latin-1
-or UTF-8 then decoding will always be needed.
-
-=head1 VERSION 2 COMPATIBILITY
-
-When an C<HTML::Parser> object is constructed with no arguments, a set
-of handlers is automatically provided that is compatible with the old
-HTML::Parser version 2 callback methods.
-
-This is equivalent to the following method calls:
-
-   $p->handler(start   => "start",   "self, tagname, attr, attrseq, text");
-   $p->handler(end     => "end",     "self, tagname, text");
-   $p->handler(text    => "text",    "self, text, is_cdata");
-   $p->handler(process => "process", "self, token0, text");
-   $p->handler(comment =>
-             sub {
-		 my($self, $tokens) = @_;
-		 for (@$tokens) {$self->comment($_);}},
-             "self, tokens");
-   $p->handler(declaration =>
-             sub {
-		 my $self = shift;
-		 $self->declaration(substr($_[0], 2, -1));},
-             "self, text");
-
-Setting up these handlers can also be requested with the "api_version =>
-2" constructor option.
-
-=head1 SUBCLASSING
-
-The C<HTML::Parser> class is subclassable.  Parser objects are plain
-hashes and C<HTML::Parser> reserves only hash keys that start with
-"_hparser".  The parser state can be set up by invoking the init()
-method, which takes the same arguments as new().
-
-=head1 EXAMPLES
-
-The first simple example shows how you might strip out comments from
-an HTML document.  We achieve this by setting up a comment handler that
-does nothing and a default handler that will print out anything else:
-
-  use HTML::Parser;
-  HTML::Parser->new(default_h => [sub { print shift }, 'text'],
-                    comment_h => [""],
-                   )->parse_file(shift || die) || die $!;
-
-An alternative implementation is:
-
-  use HTML::Parser;
-  HTML::Parser->new(end_document_h => [sub { print shift },
-                                       'skipped_text'],
-                    comment_h      => [""],
-                   )->parse_file(shift || die) || die $!;
-
-This will in most cases be much more efficient since only a single
-callback will be made.
-
-The next example prints out the text that is inside the <title>
-element of an HTML document.  Here we start by setting up a start
-handler.  When it sees the title start tag it enables a text handler
-that prints any text found and an end handler that will terminate
-parsing as soon as the title end tag is seen:
-
-  use HTML::Parser ();
-
-  sub start_handler
-  {
-    return if shift ne "title";
-    my $self = shift;
-    $self->handler(text => sub { print shift }, "dtext");
-    $self->handler(end  => sub { shift->eof if shift eq "title"; },
-		           "tagname,self");
-  }
-
-  my $p = HTML::Parser->new(api_version => 3);
-  $p->handler( start => \&start_handler, "tagname,self");
-  $p->parse_file(shift || die) || die $!;
-  print "\n";
-
-More examples are found in the F<eg/> directory of the C<HTML-Parser>
-distribution: the program C<hrefsub> shows how you can edit all links
-found in a document; the program C<htextsub> shows how to edit the text only; the
-program C<hstrip> shows how you can strip out certain tags/elements
-and/or attributes; and the program C<htext> show how to obtain the
-plain text, but not any script/style content.
-
-You can browse the F<eg/> directory online from the I<[Browse]> link on
-the http://search.cpan.org/~gaas/HTML-Parser/ page.
-
-=head1 BUGS
-
-The <style> and <script> sections do not end with the first "</", but
-need the complete corresponding end tag.  The standard behaviour is
-not really practical.
-
-When the I<strict_comment> option is enabled, we still recognize
-comments where there is something other than whitespace between even
-and odd "--" markers.
-
-Once $p->boolean_attribute_value has been set, there is no way to
-restore the default behaviour.
-
-There is currently no way to get both quote characters
-into the same literal argspec.
-
-Empty tags, e.g. "<>" and "</>", are not recognized.  SGML allows them
-to repeat the previous start tag or close the previous start tag
-respectively.
-
-NET tags, e.g. "code/.../" are not recognized.  This is SGML
-shorthand for "<code>...</code>".
-
-Unclosed start or end tags, e.g. "<tt<b>...</b</tt>" are not
-recognized.
-
-=head1 DIAGNOSTICS
-
-The following messages may be produced by HTML::Parser.  The notation
-in this listing is the same as used in L<perldiag>:
-
-=over
-
-=item Not a reference to a hash
-
-(F) The object blessed into or subclassed from HTML::Parser is not a
-hash as required by the HTML::Parser methods.
-
-=item Bad signature in parser state object at %p
-
-(F) The _hparser_xs_state element does not refer to a valid state structure.
-Something must have changed the internal value
-stored in this hash element, or the memory has been overwritten.
-
-=item _hparser_xs_state element is not a reference
-
-(F) The _hparser_xs_state element has been destroyed.
-
-=item Can't find '_hparser_xs_state' element in HTML::Parser hash
-
-(F) The _hparser_xs_state element is missing from the parser hash.
-It was either deleted, or not created when the object was created.
-
-=item API version %s not supported by HTML::Parser %s
-
-(F) The constructor option 'api_version' with an argument greater than
-or equal to 4 is reserved for future extensions.
-
-=item Bad constructor option '%s'
-
-(F) An unknown constructor option key was passed to the new() or
-init() methods.
-
-=item Parse loop not allowed
-
-(F) A handler invoked the parse() or parse_file() method.
-This is not permitted.
-
-=item marked sections not supported
-
-(F) The $p->marked_sections() method was invoked in a HTML::Parser
-module that was compiled without support for marked sections.
-
-=item Unknown boolean attribute (%d)
-
-(F) Something is wrong with the internal logic that set up aliases for
-boolean attributes.
-
-=item Only code or array references allowed as handler
-
-(F) The second argument for $p->handler must be either a subroutine
-reference, then name of a subroutine or method, or a reference to an
-array.
-
-=item No handler for %s events
-
-(F) The first argument to $p->handler must be a valid event name; i.e. one
-of "start", "end", "text", "process", "declaration" or "comment".
-
-=item Unrecognized identifier %s in argspec
-
-(F) The identifier is not a known argspec name.
-Use one of the names mentioned in the argspec section above.
-
-=item Literal string is longer than 255 chars in argspec
-
-(F) The current implementation limits the length of literals in
-an argspec to 255 characters.  Make the literal shorter.
-
-=item Backslash reserved for literal string in argspec
-
-(F) The backslash character "\" is not allowed in argspec literals.
-It is reserved to permit quoting inside a literal in a later version.
-
-=item Unterminated literal string in argspec
-
-(F) The terminating quote character for a literal was not found.
-
-=item Bad argspec (%s)
-
-(F) Only identifier names, literals, spaces and commas
-are allowed in argspecs.
-
-=item Missing comma separator in argspec
-
-(F) Identifiers in an argspec must be separated with ",".
-
-=item Parsing of undecoded UTF-8 will give garbage when decoding entities
-
-(W) The first chunk parsed appears to contain undecoded UTF-8 and one
-or more argspecs that decode entities are used for the callback
-handlers.
-
-The result of decoding will be a mix of encoded and decoded characters
-for any entities that expand to characters with code above 127.  This
-is not a good thing.
-
-The solution is to use the Encode::encode_utf8() on the data before
-feeding it to the $p->parse().  For $p->parse_file() pass a file that
-has been opened in ":utf8" mode.
-
-The parser can process raw undecoded UTF-8 sanely if the C<utf8_mode>
-is enabled or if the "attr", "@attr" or "dtext" argspecs is avoided.
-
-=item Parsing string decoded with wrong endianess
-
-(W) The first character in the document is U+FFFE.  This is not a
-legal Unicode character but a byte swapped BOM.  The result of parsing
-will likely be garbage.
-
-=item Parsing of undecoded UTF-32
-
-(W) The parser found the Unicode UTF-32 BOM signature at the start
-of the document.  The result of parsing will likely be garbage.
-
-=item Parsing of undecoded UTF-16
-
-(W) The parser found the Unicode UTF-16 BOM signature at the start of
-the document.  The result of parsing will likely be garbage.
-
-=back
-
-=head1 SEE ALSO
-
-L<HTML::Entities>, L<HTML::PullParser>, L<HTML::TokeParser>, L<HTML::HeadParser>,
-L<HTML::LinkExtor>, L<HTML::Form>
-
-L<HTML::TreeBuilder> (part of the I<HTML-Tree> distribution)
-
-http://www.w3.org/TR/html4
-
-More information about marked sections and processing instructions may
-be found at C<http://www.sgml.u-net.com/book/sgml-8.htm>.
-
-=head1 COPYRIGHT
-
- Copyright 1996-2007 Gisle Aas. All rights reserved.
- Copyright 1999-2000 Michael A. Chase.  All rights reserved.
-
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=cut
diff --git a/ext/HTML/Parser/Parser.xs b/ext/HTML/Parser/Parser.xs
deleted file mode 100644
index a173eb65b0..0000000000
--- a/ext/HTML/Parser/Parser.xs
+++ /dev/null
@@ -1,672 +0,0 @@
-/* $Id: Parser.xs,v 2.137 2007/01/12 10:18:39 gisle Exp $
- *
- * Copyright 1999-2005, Gisle Aas.
- * Copyright 1999-2000, Michael A. Chase.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the same terms as Perl itself.
- */
-
-
-/*
- * Standard XS greeting.
- */
-#ifdef __cplusplus
-extern "C" {
-#endif
-#define PERL_NO_GET_CONTEXT     /* we want efficiency */
-#include "EXTERN.h"
-#include "perl.h"
-#include "XSUB.h"
-#ifdef __cplusplus
-}
-#endif
-
-
-
-/*
- * Some perl version compatibility gruff.
- */
-#include "patchlevel.h"
-#if PATCHLEVEL <= 4 /* perl5.004_XX */
-
-#ifndef PL_sv_undef
-   #define PL_sv_undef sv_undef
-   #define PL_sv_yes   sv_yes
-#endif
-
-#ifndef PL_hexdigit
-   #define PL_hexdigit hexdigit
-#endif
-
-#ifndef ERRSV
-   #define ERRSV GvSV(errgv)
-#endif
-
-#if (PATCHLEVEL == 4 && SUBVERSION <= 4)
-/* The newSVpvn function was introduced in perl5.004_05 */
-static SV *
-newSVpvn(char *s, STRLEN len)
-{
-    register SV *sv = newSV(0);
-    sv_setpvn(sv,s,len);
-    return sv;
-}
-#endif /* not perl5.004_05 */
-#endif /* perl5.004_XX */
-
-#ifndef dNOOP
-   #define dNOOP extern int errno
-#endif
-#ifndef dTHX
-   #define dTHX dNOOP
-   #define pTHX_
-   #define aTHX_
-#endif
-
-#ifndef MEMBER_TO_FPTR
-   #define MEMBER_TO_FPTR(x) (x)
-#endif
-
-#ifndef INT2PTR
-   #define INT2PTR(any,d)  (any)(d)
-   #define PTR2IV(p)       (IV)(p)
-#endif
-
-
-#if PATCHLEVEL > 6 || (PATCHLEVEL == 6 && SUBVERSION > 0)
-   #define RETHROW	   croak(Nullch)
-#else
-   #define RETHROW    { STRLEN my_na; croak("%s", SvPV(ERRSV, my_na)); }
-#endif
-
-#if PATCHLEVEL < 8
-   /* No useable Unicode support */
-   /* Make these harmless if present */
-   #undef SvUTF8
-   #undef SvUTF8_on
-   #undef SvUTF8_off
-   #define SvUTF8(sv)      0
-   #define SvUTF8_on(sv)   0
-   #define SvUTF8_off(sv)  0
-#else
-   #define UNICODE_HTML_PARSER
-#endif
-
-#ifdef G_WARN_ON
-   #define DOWARN (PL_dowarn & G_WARN_ON)
-#else
-   #define DOWARN PL_dowarn
-#endif
-
-/*
- * Include stuff.  We include .c files instead of linking them,
- * so that they don't have to pollute the external dll name space.
- */
-
-#ifdef EXTERN
-  #undef EXTERN
-#endif
-
-#define EXTERN static /* Don't pollute */
-
-#include "hparser.h"
-#include "util.c"
-#include "hparser.c"
-
-
-/*
- * Support functions for the XS glue
- */
-
-static SV*
-check_handler(pTHX_ SV* h)
-{
-    if (SvROK(h)) {
-	SV* myref = SvRV(h);
-	if (SvTYPE(myref) == SVt_PVCV)
-	    return newSVsv(h);
-	if (SvTYPE(myref) == SVt_PVAV)
-	    return SvREFCNT_inc(myref);
-	croak("Only code or array references allowed as handler");
-    }
-    return SvOK(h) ? newSVsv(h) : 0;
-}
-
-
-static PSTATE*
-get_pstate_iv(pTHX_ SV* sv)
-{
-    PSTATE *p;
-#if PATCHLEVEL < 8
-    p = INT2PTR(PSTATE*, SvIV(sv));
-#else
-    MAGIC *mg = SvMAGICAL(sv) ? mg_find(sv, '~') : NULL;
-
-    if (!mg)
-	croak("Lost parser state magic");
-    p = (PSTATE *)mg->mg_ptr;
-    if (!p)
-	croak("Lost parser state magic");
-#endif
-    if (p->signature != P_SIGNATURE)
-	croak("Bad signature in parser state object at %p", p);
-    return p;
-}
-
-
-static PSTATE*
-get_pstate_hv(pTHX_ SV* sv)                               /* used by XS typemap */
-{
-    HV* hv;
-    SV** svp;
-
-    sv = SvRV(sv);
-    if (!sv || SvTYPE(sv) != SVt_PVHV)
-	croak("Not a reference to a hash");
-    hv = (HV*)sv;
-    svp = hv_fetch(hv, "_hparser_xs_state", 17, 0);
-    if (svp) {
-	if (SvROK(*svp))
-	    return get_pstate_iv(aTHX_ SvRV(*svp));
-	else
-	    croak("_hparser_xs_state element is not a reference");
-    }
-    croak("Can't find '_hparser_xs_state' element in HTML::Parser hash");
-    return 0;
-}
-
-
-static void
-free_pstate(pTHX_ PSTATE* pstate)
-{
-    int i;
-    SvREFCNT_dec(pstate->buf);
-    SvREFCNT_dec(pstate->pend_text);
-    SvREFCNT_dec(pstate->skipped_text);
-#ifdef MARKED_SECTION
-    SvREFCNT_dec(pstate->ms_stack);
-#endif
-    SvREFCNT_dec(pstate->bool_attr_val);
-    for (i = 0; i < EVENT_COUNT; i++) {
-	SvREFCNT_dec(pstate->handlers[i].cb);
-	SvREFCNT_dec(pstate->handlers[i].argspec);
-    }
-
-    SvREFCNT_dec(pstate->report_tags);
-    SvREFCNT_dec(pstate->ignore_tags);
-    SvREFCNT_dec(pstate->ignore_elements);
-    SvREFCNT_dec(pstate->ignoring_element);
-
-    SvREFCNT_dec(pstate->tmp);
-
-    pstate->signature = 0;
-    Safefree(pstate);
-}
-
-static int
-magic_free_pstate(pTHX_ SV *sv, MAGIC *mg)
-{
-#if PATCHLEVEL < 8
-    free_pstate(aTHX_ get_pstate_iv(aTHX_ sv));
-#else
-    free_pstate(aTHX_ (PSTATE *)mg->mg_ptr);
-#endif
-    return 0;
-}
-
-#if defined(USE_ITHREADS) && PATCHLEVEL >= 8
-
-static PSTATE *
-dup_pstate(pTHX_ PSTATE *pstate, CLONE_PARAMS *params)
-{
-    PSTATE *pstate2;
-    int i;
-
-    Newz(56, pstate2, 1, PSTATE);
-    pstate2->signature = pstate->signature;
-
-    pstate2->buf = SvREFCNT_inc(sv_dup(pstate->buf, params));
-    pstate2->offset = pstate->offset;
-    pstate2->line = pstate->line;
-    pstate2->column = pstate->column;
-    pstate2->start_document = pstate->start_document;
-    pstate2->parsing = pstate->parsing;
-    pstate2->eof = pstate->eof;
-
-    pstate2->literal_mode = pstate->literal_mode;
-    pstate2->is_cdata = pstate->is_cdata;
-    pstate2->no_dash_dash_comment_end = pstate->no_dash_dash_comment_end;
-    pstate2->pending_end_tag = pstate->pending_end_tag;
-
-    pstate2->pend_text = SvREFCNT_inc(sv_dup(pstate->pend_text, params));
-    pstate2->pend_text_is_cdata = pstate->pend_text_is_cdata;
-    pstate2->pend_text_offset = pstate->pend_text_offset;
-    pstate2->pend_text_line = pstate->pend_text_offset;
-    pstate2->pend_text_column = pstate->pend_text_column;
-
-    pstate2->skipped_text = SvREFCNT_inc(sv_dup(pstate->skipped_text, params));
-
-#ifdef MARKED_SECTION
-    pstate2->ms = pstate->ms;
-    pstate2->ms_stack =
-	(AV *)SvREFCNT_inc(sv_dup((SV *)pstate->ms_stack, params));
-    pstate2->marked_sections = pstate->marked_sections;
-#endif
-
-    pstate2->strict_comment = pstate->strict_comment;
-    pstate2->strict_names = pstate->strict_names;
-    pstate2->strict_end = pstate->strict_end;
-    pstate2->xml_mode = pstate->xml_mode;
-    pstate2->unbroken_text = pstate->unbroken_text;
-    pstate2->attr_encoded = pstate->attr_encoded;
-    pstate2->case_sensitive = pstate->case_sensitive;
-    pstate2->closing_plaintext = pstate->closing_plaintext;
-    pstate2->utf8_mode = pstate->utf8_mode;
-    pstate2->empty_element_tags = pstate->empty_element_tags;
-    pstate2->xml_pic = pstate->xml_pic;
-
-    pstate2->bool_attr_val =
-	SvREFCNT_inc(sv_dup(pstate->bool_attr_val, params));
-    for (i = 0; i < EVENT_COUNT; i++) {
-	pstate2->handlers[i].cb =
-	    SvREFCNT_inc(sv_dup(pstate->handlers[i].cb, params));
-	pstate2->handlers[i].argspec =
-	    SvREFCNT_inc(sv_dup(pstate->handlers[i].argspec, params));
-    }
-    pstate2->argspec_entity_decode = pstate->argspec_entity_decode;
-
-    pstate2->report_tags =
-	(HV *)SvREFCNT_inc(sv_dup((SV *)pstate->report_tags, params));
-    pstate2->ignore_tags =
-	(HV *)SvREFCNT_inc(sv_dup((SV *)pstate->ignore_tags, params));
-    pstate2->ignore_elements =
-	(HV *)SvREFCNT_inc(sv_dup((SV *)pstate->ignore_elements, params));
-
-    pstate2->ignoring_element =
-	SvREFCNT_inc(sv_dup(pstate->ignoring_element, params));
-    pstate2->ignore_depth = pstate->ignore_depth;
-
-    if (params->flags & CLONEf_JOIN_IN) {
-	pstate2->entity2char =
-	    perl_get_hv("HTML::Entities::entity2char", TRUE);
-    } else {
-	pstate2->entity2char = (HV *)sv_dup((SV *)pstate->entity2char, params);
-    }
-    pstate2->tmp = SvREFCNT_inc(sv_dup(pstate->tmp, params));
-
-    return pstate2;
-}
-
-static int
-magic_dup_pstate(pTHX_ MAGIC *mg, CLONE_PARAMS *params)
-{
-    mg->mg_ptr = (char *)dup_pstate(aTHX_ (PSTATE *)mg->mg_ptr, params);
-    return 0;
-}
-
-#endif
-
-MGVTBL vtbl_pstate =
-{
-    0,
-    0,
-    0,
-    0,
-    MEMBER_TO_FPTR(magic_free_pstate),
-#if defined(USE_ITHREADS) && PATCHLEVEL >= 8
-    0,
-    MEMBER_TO_FPTR(magic_dup_pstate),
-#endif
-};
-
-
-/*
- *  XS interface definition.
- */
-
-MODULE = HTML::Parser		PACKAGE = HTML::Parser
-
-PROTOTYPES: DISABLE
-
-void
-_alloc_pstate(self)
-	SV* self;
-    PREINIT:
-	PSTATE* pstate;
-	SV* sv;
-	HV* hv;
-        MAGIC* mg;
-
-    CODE:
-	sv = SvRV(self);
-        if (!sv || SvTYPE(sv) != SVt_PVHV)
-            croak("Not a reference to a hash");
-	hv = (HV*)sv;
-
-	Newz(56, pstate, 1, PSTATE);
-	pstate->signature = P_SIGNATURE;
-	pstate->entity2char = perl_get_hv("HTML::Entities::entity2char", TRUE);
-	pstate->tmp = NEWSV(0, 20);
-
-	sv = newSViv(PTR2IV(pstate));
-#if PATCHLEVEL < 8
-	sv_magic(sv, 0, '~', 0, 0);
-#else
-	sv_magic(sv, 0, '~', (char *)pstate, 0);
-#endif
-	mg = mg_find(sv, '~');
-        assert(mg);
-        mg->mg_virtual = &vtbl_pstate;
-#if defined(USE_ITHREADS) && PATCHLEVEL >= 8
-        mg->mg_flags |= MGf_DUP;
-#endif
-	SvREADONLY_on(sv);
-
-	hv_store(hv, "_hparser_xs_state", 17, newRV_noinc(sv), 0);
-
-void
-parse(self, chunk)
-	SV* self;
-	SV* chunk
-    PREINIT:
-	PSTATE* p_state = get_pstate_hv(aTHX_ self);
-    PPCODE:
-	if (p_state->parsing)
-    	    croak("Parse loop not allowed");
-        p_state->parsing = 1;
-	if (SvROK(chunk) && SvTYPE(SvRV(chunk)) == SVt_PVCV) {
-	    SV* generator = chunk;
-	    STRLEN len;
-	    do {
-                int count;
-		PUSHMARK(SP);
-	        count = perl_call_sv(generator, G_SCALAR|G_EVAL);
-		SPAGAIN;
-		chunk = count ? POPs : 0;
-	        PUTBACK;
-
-	        if (SvTRUE(ERRSV)) {
-		    p_state->parsing = 0;
-		    p_state->eof = 0;
-		    RETHROW;
-                }
-
-		if (chunk && SvOK(chunk)) {
-		    (void)SvPV(chunk, len);  /* get length */
-		}
-		else {
-		    len = 0;
-                }
-		parse(aTHX_ p_state, len ? chunk : 0, self);
-	        SPAGAIN;
-
-            } while (len && !p_state->eof);
-        }
-	else {
-	    parse(aTHX_ p_state, chunk, self);
-            SPAGAIN;
-        }
-        p_state->parsing = 0;
-	if (p_state->eof) {
-	    p_state->eof = 0;
-            PUSHs(sv_newmortal());
-        }
-	else {
-	    PUSHs(self);
-	}
-
-void
-eof(self)
-	SV* self;
-    PREINIT:
-	PSTATE* p_state = get_pstate_hv(aTHX_ self);
-    PPCODE:
-        if (p_state->parsing)
-            p_state->eof = 1;
-        else {
-	    p_state->parsing = 1;
-	    parse(aTHX_ p_state, 0, self); /* flush */
-	    p_state->parsing = 0;
-	}
-	PUSHs(self);
-
-SV*
-strict_comment(pstate,...)
-	PSTATE* pstate
-    ALIAS:
-	HTML::Parser::strict_comment = 1
-	HTML::Parser::strict_names = 2
-        HTML::Parser::xml_mode = 3
-	HTML::Parser::unbroken_text = 4
-        HTML::Parser::marked_sections = 5
-        HTML::Parser::attr_encoded = 6
-        HTML::Parser::case_sensitive = 7
-	HTML::Parser::strict_end = 8
-	HTML::Parser::closing_plaintext = 9
-        HTML::Parser::utf8_mode = 10
-        HTML::Parser::empty_element_tags = 11
-        HTML::Parser::xml_pic = 12
-    PREINIT:
-	bool *attr;
-    CODE:
-        switch (ix) {
-	case  1: attr = &pstate->strict_comment;       break;
-	case  2: attr = &pstate->strict_names;         break;
-	case  3: attr = &pstate->xml_mode;             break;
-	case  4: attr = &pstate->unbroken_text;        break;
-        case  5:
-#ifdef MARKED_SECTION
-		 attr = &pstate->marked_sections;      break;
-#else
-	         croak("marked sections not supported"); break;
-#endif
-	case  6: attr = &pstate->attr_encoded;         break;
-	case  7: attr = &pstate->case_sensitive;       break;
-	case  8: attr = &pstate->strict_end;           break;
-	case  9: attr = &pstate->closing_plaintext;    break;
-#ifdef UNICODE_HTML_PARSER
-        case 10: attr = &pstate->utf8_mode;            break;
-#else
-	case 10: croak("The utf8_mode does not work with this perl; perl-5.8 or better required");
-#endif
-	case 11: attr = &pstate->empty_element_tags;    break;
-        case 12: attr = &pstate->xml_pic;              break;
-	default:
-	    croak("Unknown boolean attribute (%d)", ix);
-        }
-	RETVAL = boolSV(*attr);
-	if (items > 1)
-	    *attr = SvTRUE(ST(1));
-    OUTPUT:
-	RETVAL
-
-SV*
-boolean_attribute_value(pstate,...)
-        PSTATE* pstate
-    CODE:
-	RETVAL = pstate->bool_attr_val ? newSVsv(pstate->bool_attr_val)
-				       : &PL_sv_undef;
-	if (items > 1) {
-	    SvREFCNT_dec(pstate->bool_attr_val);
-	    pstate->bool_attr_val = newSVsv(ST(1));
-        }
-    OUTPUT:
-	RETVAL
-
-void
-ignore_tags(pstate,...)
-	PSTATE* pstate
-    ALIAS:
-	HTML::Parser::report_tags = 1
-	HTML::Parser::ignore_tags = 2
-	HTML::Parser::ignore_elements = 3
-    PREINIT:
-	HV** attr;
-	int i;
-    CODE:
-	switch (ix) {
-	case  1: attr = &pstate->report_tags;     break;
-	case  2: attr = &pstate->ignore_tags;     break;
-	case  3: attr = &pstate->ignore_elements; break;
-	default:
-	    croak("Unknown tag-list attribute (%d)", ix);
-	}
-	if (GIMME_V != G_VOID)
-	    croak("Can't report tag lists yet");
-
-	items--;  /* pstate */
-	if (items) {
-	    if (*attr)
-		hv_clear(*attr);
-	    else
-		*attr = newHV();
-
-	    for (i = 0; i < items; i++) {
-		SV* sv = ST(i+1);
-		if (SvROK(sv)) {
-		    sv = SvRV(sv);
-		    if (SvTYPE(sv) == SVt_PVAV) {
-			AV* av = (AV*)sv;
-			STRLEN j;
-			STRLEN len = av_len(av) + 1;
-			for (j = 0; j < len; j++) {
-			    SV**svp = av_fetch(av, j, 0);
-			    if (svp) {
-				hv_store_ent(*attr, *svp, newSViv(0), 0);
-			    }
-			}
-		    }
-		    else
-			croak("Tag list must be plain scalars and arrays");
-		}
-		else {
-		    hv_store_ent(*attr, sv, newSViv(0), 0);
-		}
-	    }
-	}
-	else if (*attr) {
-	    SvREFCNT_dec(*attr);
-            *attr = 0;
-	}
-
-void
-handler(pstate, eventname,...)
-	PSTATE* pstate
-	SV* eventname
-    PREINIT:
-	STRLEN name_len;
-	char *name = SvPV(eventname, name_len);
-        int event = -1;
-        int i;
-        struct p_handler *h;
-    PPCODE:
-	/* map event name string to event_id */
-	for (i = 0; i < EVENT_COUNT; i++) {
-	    if (strEQ(name, event_id_str[i])) {
-	        event = i;
-	        break;
-	    }
-	}
-        if (event < 0)
-	    croak("No handler for %s events", name);
-
-	h = &pstate->handlers[event];
-
-	/* set up return value */
-	if (h->cb) {
-	    PUSHs((SvTYPE(h->cb) == SVt_PVAV)
-	                 ? sv_2mortal(newRV_inc(h->cb))
-	                 : sv_2mortal(newSVsv(h->cb)));
-	}
-        else {
-	    PUSHs(&PL_sv_undef);
-        }
-
-        /* update */
-        if (items > 3) {
-	    SvREFCNT_dec(h->argspec);
-	    h->argspec = 0;
-	    h->argspec = argspec_compile(ST(3), pstate);
-	}
-        if (items > 2) {
-	    SvREFCNT_dec(h->cb);
-            h->cb = 0;
-	    h->cb = check_handler(aTHX_ ST(2));
-	}
-
-
-MODULE = HTML::Parser		PACKAGE = HTML::Entities
-
-void
-decode_entities(...)
-    PREINIT:
-        int i;
-	HV *entity2char = perl_get_hv("HTML::Entities::entity2char", FALSE);
-    PPCODE:
-	if (GIMME_V == G_SCALAR && items > 1)
-            items = 1;
-	for (i = 0; i < items; i++) {
-	    if (GIMME_V != G_VOID)
-	        ST(i) = sv_2mortal(newSVsv(ST(i)));
-	    else if (SvREADONLY(ST(i)))
-		croak("Can't inline decode readonly string");
-	    decode_entities(aTHX_ ST(i), entity2char, 0);
-	}
-	SP += items;
-
-void
-_decode_entities(string, entities, ...)
-    SV* string
-    SV* entities
-    PREINIT:
-	HV* entities_hv;
-        bool expand_prefix = (items > 2) ? SvTRUE(ST(2)) : 0;
-    CODE:
-        if (SvOK(entities)) {
-	    if (SvROK(entities) && SvTYPE(SvRV(entities)) == SVt_PVHV) {
-		entities_hv = (HV*)SvRV(entities);
-	    }
-            else {
-		croak("2nd argument must be hash reference");
-            }
-        }
-        else {
-            entities_hv = 0;
-        }
-	if (SvREADONLY(string))
-	    croak("Can't inline decode readonly string");
-	decode_entities(aTHX_ string, entities_hv, expand_prefix);
-
-bool
-_probably_utf8_chunk(string)
-    SV* string
-    PREINIT:
-        STRLEN len;
-        char *s;
-    CODE:
-#ifdef UNICODE_HTML_PARSER
-        sv_utf8_downgrade(string, 0);
-	s = SvPV(string, len);
-        RETVAL = probably_utf8_chunk(aTHX_ s, len);
-#else
-        RETVAL = 0; /* avoid never initialized complains from compiler */
-	croak("_probably_utf8_chunk() only works for Unicode enabled perls");
-#endif
-    OUTPUT:
-        RETVAL
-
-int
-UNICODE_SUPPORT()
-    PROTOTYPE:
-    CODE:
-#ifdef UNICODE_HTML_PARSER
-       RETVAL = 1;
-#else
-       RETVAL = 0;
-#endif
-    OUTPUT:
-       RETVAL
-
-
-MODULE = HTML::Parser		PACKAGE = HTML::Parser
diff --git a/ext/HTML/Parser/hints/solaris.pl b/ext/HTML/Parser/hints/solaris.pl
deleted file mode 100644
index f6f94f0aca..0000000000
--- a/ext/HTML/Parser/hints/solaris.pl
+++ /dev/null
@@ -1,4 +0,0 @@
-if ($Config{gccversion}) {
-  print "Turning off optimizations to avoid compiler bug\n";
-  $self->{OPTIMIZE} = " ";
-}
diff --git a/ext/HTML/Parser/hparser.c b/ext/HTML/Parser/hparser.c
deleted file mode 100644
index e5887c331f..0000000000
--- a/ext/HTML/Parser/hparser.c
+++ /dev/null
@@ -1,1916 +0,0 @@
-/* $Id: hparser.c,v 2.134 2007/01/12 10:54:06 gisle Exp $
- *
- * Copyright 1999-2007, Gisle Aas
- * Copyright 1999-2000, Michael A. Chase
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the same terms as Perl itself.
- */
-
-#ifndef EXTERN
-#define EXTERN extern
-#endif
-
-#include "hctype.h"    /* isH...() macros */
-#include "tokenpos.h"  /* dTOKEN; PUSH_TOKEN() */
-
-
-static
-struct literal_tag {
-    int len;
-    char* str;
-    int is_cdata;
-}
-literal_mode_elem[] =
-{
-    {6, "script", 1},
-    {5, "style", 1},
-    {3, "xmp", 1},
-    {9, "plaintext", 1},
-    {5, "title", 0},
-    {8, "textarea", 0},
-    {0, 0, 0}
-};
-
-enum argcode {
-    ARG_SELF = 1,  /* need to avoid '\0' in argspec string */
-    ARG_TOKENS,
-    ARG_TOKENPOS,
-    ARG_TOKEN0,
-    ARG_TAGNAME,
-    ARG_TAG,
-    ARG_ATTR,
-    ARG_ATTRARR,
-    ARG_ATTRSEQ,
-    ARG_TEXT,
-    ARG_DTEXT,
-    ARG_IS_CDATA,
-    ARG_SKIPPED_TEXT,
-    ARG_OFFSET,
-    ARG_OFFSET_END,
-    ARG_LENGTH,
-    ARG_LINE,
-    ARG_COLUMN,
-    ARG_EVENT,
-    ARG_UNDEF,
-    ARG_LITERAL, /* Always keep last */
-
-    /* extra flags always encoded first */
-    ARG_FLAG_FLAT_ARRAY
-};
-
-char *argname[] = {
-    /* Must be in the same order as enum argcode */
-    "self",     /* ARG_SELF */
-    "tokens",   /* ARG_TOKENS */   
-    "tokenpos", /* ARG_TOKENPOS */
-    "token0",   /* ARG_TOKEN0 */
-    "tagname",  /* ARG_TAGNAME */
-    "tag",      /* ARG_TAG */
-    "attr",     /* ARG_ATTR */
-    "@attr",    /* ARG_ATTRARR */
-    "attrseq",  /* ARG_ATTRSEQ */
-    "text",     /* ARG_TEXT */
-    "dtext",    /* ARG_DTEXT */
-    "is_cdata", /* ARG_IS_CDATA */
-    "skipped_text", /* ARG_SKIPPED_TEXT */
-    "offset",   /* ARG_OFFSET */
-    "offset_end", /* ARG_OFFSET_END */
-    "length",   /* ARG_LENGTH */
-    "line",     /* ARG_LINE */
-    "column",   /* ARG_COLUMN */
-    "event",    /* ARG_EVENT */
-    "undef",    /* ARG_UNDEF */
-    /* ARG_LITERAL (not compared) */
-    /* ARG_FLAG_FLAT_ARRAY */
-};
-
-#define CASE_SENSITIVE(p_state) \
-         ((p_state)->xml_mode || (p_state)->case_sensitive)
-#define STRICT_NAMES(p_state) \
-         ((p_state)->xml_mode || (p_state)->strict_names)
-#define ALLOW_EMPTY_TAG(p_state) \
-         ((p_state)->xml_mode || (p_state)->empty_element_tags)
-
-static void flush_pending_text(PSTATE* p_state, SV* self);
-
-/*
- * Parser functions.
- *
- *   parse()                       - top level entry point.
- *                                   deals with text and calls one of its
- *                                   subordinate parse_*() routines after
- *                                   looking at the first char after "<"
- *     parse_decl()                - deals with declarations         <!...>
- *       parse_comment()           - deals with <!-- ... -->
- *       parse_marked_section      - deals with <![ ... [ ... ]]>
- *     parse_end()                 - deals with end tags             </...>
- *     parse_start()               - deals with start tags           <A...>
- *     parse_process()             - deals with process instructions <?...>
- *     parse_null()                - deals with anything else        <....>
- *
- *     report_event() - called whenever any of the parse*() routines
- *                      has recongnized something.
- */
-
-static void
-report_event(PSTATE* p_state,
-	     event_id_t event,
-	     char *beg, char *end, U32 utf8,
-	     token_pos_t *tokens, int num_tokens,
-	     SV* self
-	    )
-{
-    struct p_handler *h;
-    dTHX;
-    dSP;
-    AV *array;
-    STRLEN my_na;
-    char *argspec;
-    char *s;
-    STRLEN offset;
-    STRLEN line;
-    STRLEN column;
-
-#ifdef UNICODE_HTML_PARSER
-    #define CHR_DIST(a,b) (utf8 ? utf8_distance((U8*)(a),(U8*)(b)) : (a) - (b))
-#else
-    #define CHR_DIST(a,b) ((a) - (b))
-#endif
-
-    /* some events might still fire after a handler has signaled eof
-     * so suppress them here.
-     */
-    if (p_state->eof)
-	return;
-
-    /* capture offsets */
-    offset = p_state->offset;
-    line = p_state->line;
-    column = p_state->column;
-
-#if 0
-    {  /* used for debugging at some point */
-	char *s = beg;
-	int i;
-
-	/* print debug output */
-	switch(event) {
-	case E_DECLARATION: printf("DECLARATION"); break;
-	case E_COMMENT:     printf("COMMENT"); break;
-	case E_START:       printf("START"); break;
-	case E_END:         printf("END"); break;
-	case E_TEXT:        printf("TEXT"); break;
-	case E_PROCESS:     printf("PROCESS"); break;
-	case E_NONE:        printf("NONE"); break;
-	default:            printf("EVENT #%d", event); break;
-	}
-
-	printf(" [");
-	while (s < end) {
-	    if (*s == '\n') {
-		putchar('\\'); putchar('n');
-	    }
-	    else
-		putchar(*s);
-	    s++;
-	}
-	printf("] %d\n", end - beg);
-	for (i = 0; i < num_tokens; i++) {
-	    printf("  token %d: %d %d\n",
-		   i,
-		   tokens[i].beg - beg,
-		   tokens[i].end - tokens[i].beg);
-	}
-    }
-#endif
-
-    if (p_state->pending_end_tag && event != E_TEXT && event != E_COMMENT) {
-	token_pos_t t;
-	char dummy;
-	t.beg = p_state->pending_end_tag;
-	t.end = p_state->pending_end_tag + strlen(p_state->pending_end_tag);
-	p_state->pending_end_tag = 0;
-	report_event(p_state, E_END, &dummy, &dummy, 0, &t, 1, self);
-	SPAGAIN;
-    }
-
-    /* update offsets */
-    p_state->offset += CHR_DIST(end, beg);
-    if (line) {
-	char *s = beg;
-	char *nl = NULL;
-	while (s < end) {
-	    if (*s == '\n') {
-		p_state->line++;
-		nl = s;
-	    }
-	    s++;
-	}
-	if (nl)
-	    p_state->column = CHR_DIST(end, nl) - 1;
-	else
-	    p_state->column += CHR_DIST(end, beg);
-    }
-
-    if (event == E_NONE)
-	goto IGNORE_EVENT;
-    
-#ifdef MARKED_SECTION
-    if (p_state->ms == MS_IGNORE)
-	goto IGNORE_EVENT;
-#endif
-
-    /* tag filters */
-    if (p_state->ignore_tags || p_state->report_tags || p_state->ignore_elements) {
-
-	if (event == E_START || event == E_END) {
-	    SV* tagname = p_state->tmp;
-
-	    assert(num_tokens >= 1);
-	    sv_setpvn(tagname, tokens[0].beg, tokens[0].end - tokens[0].beg);
-	    if (utf8)
-		SvUTF8_on(tagname);
-	    else
-		SvUTF8_off(tagname);
-	    if (!CASE_SENSITIVE(p_state))
-		sv_lower(aTHX_ tagname);
-
-	    if (p_state->ignoring_element) {
-		if (sv_eq(p_state->ignoring_element, tagname)) {
-		    if (event == E_START)
-			p_state->ignore_depth++;
-		    else if (--p_state->ignore_depth == 0) {
-			SvREFCNT_dec(p_state->ignoring_element);
-			p_state->ignoring_element = 0;
-		    }
-		}
-		goto IGNORE_EVENT;
-	    }
-
-	    if (p_state->ignore_elements &&
-		hv_fetch_ent(p_state->ignore_elements, tagname, 0, 0))
-	    {
-		if (event == E_START) {
-		    p_state->ignoring_element = newSVsv(tagname);
-		    p_state->ignore_depth = 1;
-		}
-		goto IGNORE_EVENT;
-	    }
-
-	    if (p_state->ignore_tags &&
-		hv_fetch_ent(p_state->ignore_tags, tagname, 0, 0))
-	    {
-		goto IGNORE_EVENT;
-	    }
-	    if (p_state->report_tags &&
-		!hv_fetch_ent(p_state->report_tags, tagname, 0, 0))
-	    {
-		goto IGNORE_EVENT;
-	    }
-	}
-	else if (p_state->ignoring_element) {
-	    goto IGNORE_EVENT;
-	}
-    }
-
-    h = &p_state->handlers[event];
-    if (!h->cb) {
-	/* event = E_DEFAULT; */
-	h = &p_state->handlers[E_DEFAULT];
-	if (!h->cb)
-	    goto IGNORE_EVENT;
-    }
-
-    if (SvTYPE(h->cb) != SVt_PVAV && !SvTRUE(h->cb)) {
-	/* FALSE scalar ('' or 0) means IGNORE this event */
-	return;
-    }
-
-    if (p_state->unbroken_text && event == E_TEXT) {
-	/* should buffer text */
-	if (!p_state->pend_text)
-	    p_state->pend_text = newSV(256);
-	if (SvOK(p_state->pend_text)) {
-	    if (p_state->is_cdata != p_state->pend_text_is_cdata) {
-		flush_pending_text(p_state, self);
-		SPAGAIN;
-		goto INIT_PEND_TEXT;
-	    }
-	}
-	else {
-	INIT_PEND_TEXT:
-	    p_state->pend_text_offset = offset;
-	    p_state->pend_text_line = line;
-	    p_state->pend_text_column = column;
-	    p_state->pend_text_is_cdata = p_state->is_cdata;
-	    sv_setpvn(p_state->pend_text, "", 0);
-	    if (!utf8)
-		SvUTF8_off(p_state->pend_text);
-	}
-#ifdef UNICODE_HTML_PARSER
-	if (utf8 && !SvUTF8(p_state->pend_text))
-	    sv_utf8_upgrade(p_state->pend_text);
-	if (utf8 || !SvUTF8(p_state->pend_text)) {
-	    sv_catpvn(p_state->pend_text, beg, end - beg);
-	}
-	else {
-	    SV *tmp = newSVpvn(beg, end - beg);
-	    sv_utf8_upgrade(tmp);
-	    sv_catsv(p_state->pend_text, tmp);
-	    SvREFCNT_dec(tmp);
-	}
-#else
-	sv_catpvn(p_state->pend_text, beg, end - beg);
-#endif
-	return;
-    }
-    else if (p_state->pend_text && SvOK(p_state->pend_text)) {
-	flush_pending_text(p_state, self);
-	SPAGAIN;
-    }
-
-    /* At this point we have decided to generate an event callback */
-
-    argspec = h->argspec ? SvPV(h->argspec, my_na) : "";
-
-    if (SvTYPE(h->cb) == SVt_PVAV) {
-	
-	if (*argspec == ARG_FLAG_FLAT_ARRAY) {
-	    argspec++;
-	    array = (AV*)h->cb;
-	}
-	else {
-	    /* start sub-array for accumulator array */
-	    array = newAV();
-	}
-    }
-    else {
-	array = 0;
-	if (*argspec == ARG_FLAG_FLAT_ARRAY)
-	    argspec++;
-
-	/* start argument stack for callback */
-	ENTER;
-	SAVETMPS;
-	PUSHMARK(SP);
-    }
-
-    for (s = argspec; *s; s++) {
-	SV* arg = 0;
-	int push_arg = 1;
-	enum argcode argcode = (enum argcode)*s;
-
-	switch( argcode ) {
-
-	case ARG_SELF:
-	    arg = sv_mortalcopy(self);
-	    break;
-
-	case ARG_TOKENS:
-	    if (num_tokens >= 1) {
-		AV* av = newAV();
-		SV* prev_token = &PL_sv_undef;
-		int i;
-		av_extend(av, num_tokens);
-		for (i = 0; i < num_tokens; i++) {
-		    if (tokens[i].beg) {
-			prev_token = newSVpvn(tokens[i].beg, tokens[i].end-tokens[i].beg);
-			if (utf8)
-			    SvUTF8_on(prev_token);
-			av_push(av, prev_token);
-		    }
-		    else { /* boolean */
-			av_push(av, p_state->bool_attr_val
-				? newSVsv(p_state->bool_attr_val)
-				: newSVsv(prev_token));
-		    }
-		}
-		arg = sv_2mortal(newRV_noinc((SV*)av));
-	    }
-	    break;
-
-	case ARG_TOKENPOS:
-	    if (num_tokens >= 1 && tokens[0].beg >= beg) {
-		AV* av = newAV();
-		int i;
-		av_extend(av, num_tokens*2);
-		for (i = 0; i < num_tokens; i++) {
-		    if (tokens[i].beg) {
-			av_push(av, newSViv(CHR_DIST(tokens[i].beg, beg)));
-			av_push(av, newSViv(CHR_DIST(tokens[i].end, tokens[i].beg)));
-		    }
-		    else { /* boolean tag value */
-			av_push(av, newSViv(0));
-			av_push(av, newSViv(0));
-		    }
-		}
-		arg = sv_2mortal(newRV_noinc((SV*)av));
-	    }
-	    break;
-
-	case ARG_TOKEN0:
-	case ARG_TAGNAME:
-	    /* fall through */
-
-	case ARG_TAG:
-	    if (num_tokens >= 1) {
-		arg = sv_2mortal(newSVpvn(tokens[0].beg,
-					  tokens[0].end - tokens[0].beg));
-		if (utf8)
-		    SvUTF8_on(arg);
-		if (!CASE_SENSITIVE(p_state) && argcode != ARG_TOKEN0)
-		    sv_lower(aTHX_ arg);
-		if (argcode == ARG_TAG && event != E_START) {
-		    char *e_type = "!##/#?#";
-		    sv_insert(arg, 0, 0, &e_type[event], 1);
-		}
-	    }
-	    break;
-
-	case ARG_ATTR:
-	case ARG_ATTRARR:
-	    if (event == E_START) {
-		HV* hv;
-		int i;
-		if (argcode == ARG_ATTR) {
-		    hv = newHV();
-		    arg = sv_2mortal(newRV_noinc((SV*)hv));
-		}
-		else {
-#ifdef __GNUC__
-		    /* gcc -Wall reports this variable as possibly used uninitialized */
-		    hv = 0;
-#endif
-		    push_arg = 0;  /* deal with argument pushing here */
-		}
-
-		for (i = 1; i < num_tokens; i += 2) {
-		    SV* attrname = newSVpvn(tokens[i].beg,
-					    tokens[i].end-tokens[i].beg);
-		    SV* attrval;
-
-		    if (utf8)
-			SvUTF8_on(attrname);
-		    if (tokens[i+1].beg) {
-			char *beg = tokens[i+1].beg;
-			STRLEN len = tokens[i+1].end - beg;
-			if (*beg == '"' || *beg == '\'') {
-			    assert(len >= 2 && *beg == beg[len-1]);
-			    beg++; len -= 2;
-			}
-			attrval = newSVpvn(beg, len);
-			if (utf8)
-			    SvUTF8_on(attrval);
-			if (!p_state->attr_encoded) {
-#ifdef UNICODE_HTML_PARSER
-			    if (p_state->utf8_mode)
-				sv_utf8_decode(attrval);
-#endif
-			    decode_entities(aTHX_ attrval, p_state->entity2char, 0);
-			    if (p_state->utf8_mode)
-				SvUTF8_off(attrval);
-			}
-		    }
-		    else { /* boolean */
-			if (p_state->bool_attr_val)
-			    attrval = newSVsv(p_state->bool_attr_val);
-			else
-			    attrval = newSVsv(attrname);
-		    }
-
-		    if (!CASE_SENSITIVE(p_state))
-			sv_lower(aTHX_ attrname);
-
-		    if (argcode == ARG_ATTR) {
-			if (hv_exists_ent(hv, attrname, 0) ||
-			    !hv_store_ent(hv, attrname, attrval, 0)) {
-			    SvREFCNT_dec(attrval);
-			}
-			SvREFCNT_dec(attrname);
-		    }
-		    else { /* ARG_ATTRARR */
-			if (array) {
-			    av_push(array, attrname);
-			    av_push(array, attrval);
-			}
-			else {
-			    XPUSHs(sv_2mortal(attrname));
-			    XPUSHs(sv_2mortal(attrval));
-			}
-		    }
-		}
-	    }
-	    else if (argcode == ARG_ATTRARR) {
-		push_arg = 0;
-	    }
-	    break;
-
-	case ARG_ATTRSEQ:       /* (v2 compatibility stuff) */
-	    if (event == E_START) {
-		AV* av = newAV();
-		int i;
-		for (i = 1; i < num_tokens; i += 2) {
-		    SV* attrname = newSVpvn(tokens[i].beg,
-					    tokens[i].end-tokens[i].beg);
-		    if (utf8)
-			SvUTF8_on(attrname);
-		    if (!CASE_SENSITIVE(p_state))
-			sv_lower(aTHX_ attrname);
-		    av_push(av, attrname);
-		}
-		arg = sv_2mortal(newRV_noinc((SV*)av));
-	    }
-	    break;
-	
-	case ARG_TEXT:
-	    arg = sv_2mortal(newSVpvn(beg, end - beg));
-	    if (utf8)
-		SvUTF8_on(arg);
-	    break;
-
-	case ARG_DTEXT:
-	    if (event == E_TEXT) {
-		arg = sv_2mortal(newSVpvn(beg, end - beg));
-		if (utf8)
-		    SvUTF8_on(arg);
-		if (!p_state->is_cdata) {
-#ifdef UNICODE_HTML_PARSER
-		    if (p_state->utf8_mode)
-			sv_utf8_decode(arg);
-#endif
-		    decode_entities(aTHX_ arg, p_state->entity2char, 1);
-		    if (p_state->utf8_mode)
-			SvUTF8_off(arg);
-		}
-	    }
-	    break;
-      
-	case ARG_IS_CDATA:
-	    if (event == E_TEXT) {
-		arg = boolSV(p_state->is_cdata);
-	    }
-	    break;
-
-        case ARG_SKIPPED_TEXT:
-	    arg = sv_2mortal(p_state->skipped_text);
-	    p_state->skipped_text = newSVpvn("", 0);
-            break;
-
-	case ARG_OFFSET:
-	    arg = sv_2mortal(newSViv(offset));
-	    break;
-
-	case ARG_OFFSET_END:
-	    arg = sv_2mortal(newSViv(offset + CHR_DIST(end, beg)));
-	    break;
-
-	case ARG_LENGTH:
-	    arg = sv_2mortal(newSViv(CHR_DIST(end, beg)));
-	    break;
-
-	case ARG_LINE:
-	    arg = sv_2mortal(newSViv(line));
-	    break;
-
-	case ARG_COLUMN:
-	    arg = sv_2mortal(newSViv(column));
-	    break;
-
-	case ARG_EVENT:
-	    assert(event >= 0 && event < EVENT_COUNT);
-	    arg = sv_2mortal(newSVpv(event_id_str[event], 0));
-	    break;
-
-	case ARG_LITERAL:
-	{
-	    int len = (unsigned char)s[1];
-	    arg = sv_2mortal(newSVpvn(s+2, len));
-	    if (SvUTF8(h->argspec))
-		SvUTF8_on(arg);
-	    s += len + 1;
-	}
-	break;
-
-	case ARG_UNDEF:
-	    arg = sv_mortalcopy(&PL_sv_undef);
-	    break;
-      
-	default:
-	    arg = sv_2mortal(newSVpvf("Bad argspec %d", *s));
-	    break;
-	}
-
-	if (push_arg) {
-	    if (!arg)
-		arg = sv_mortalcopy(&PL_sv_undef);
-
-	    if (array) {
-		/* have to fix mortality here or add mortality to
-		 * XPUSHs after removing it from the switch cases.
-		 */
-		av_push(array, SvREFCNT_inc(arg));
-	    }
-	    else {
-		XPUSHs(arg);
-	    }
-	}
-    }
-
-    if (array) {
-	if (array != (AV*)h->cb)
-	    av_push((AV*)h->cb, newRV_noinc((SV*)array));
-    }
-    else {
-	PUTBACK;
-
-	if ((enum argcode)*argspec == ARG_SELF && !SvROK(h->cb)) {
-	    char *method = SvPV(h->cb, my_na);
-	    perl_call_method(method, G_DISCARD | G_EVAL | G_VOID);
-	}
-	else {
-	    perl_call_sv(h->cb, G_DISCARD | G_EVAL | G_VOID);
-	}
-
-	if (SvTRUE(ERRSV)) {
-	    RETHROW;
-	}
-
-	FREETMPS;
-	LEAVE;
-    }
-    if (p_state->skipped_text)
-	SvCUR_set(p_state->skipped_text, 0);
-    return;
-
-IGNORE_EVENT:
-    if (p_state->skipped_text) {
-	if (event != E_TEXT && p_state->pend_text && SvOK(p_state->pend_text))
-	    flush_pending_text(p_state, self);
-#ifdef UNICODE_HTML_PARSER
-	if (utf8 && !SvUTF8(p_state->skipped_text))
-	    sv_utf8_upgrade(p_state->skipped_text);
-	if (utf8 || !SvUTF8(p_state->skipped_text)) {
-#endif
-	    sv_catpvn(p_state->skipped_text, beg, end - beg);
-#ifdef UNICODE_HTML_PARSER
-	}
-	else {
-	    SV *tmp = newSVpvn(beg, end - beg);
-	    sv_utf8_upgrade(tmp);
-	    sv_catsv(p_state->pend_text, tmp);
-	    SvREFCNT_dec(tmp);
-	}
-#endif
-    }
-#undef CHR_DIST    
-    return;
-}
-
-
-EXTERN SV*
-argspec_compile(SV* src, PSTATE* p_state)
-{
-    dTHX;
-    SV* argspec = newSVpvn("", 0);
-    STRLEN len;
-    char *s = SvPV(src, len);
-    char *end = s + len;
-
-    if (SvUTF8(src))
-	SvUTF8_on(argspec);
-
-    while (isHSPACE(*s))
-	s++;
-
-    if (*s == '@') {
-	/* try to deal with '@{ ... }' wrapping */
-	char *tmp = s + 1;
-	while (isHSPACE(*tmp))
-	    tmp++;
-	if (*tmp == '{') {
-	    char c = ARG_FLAG_FLAT_ARRAY;
-	    sv_catpvn(argspec, &c, 1);
-	    tmp++;
-	    while (isHSPACE(*tmp))
-		tmp++;
-	    s = tmp;
-	}
-    }
-    while (s < end) {
-	if (isHNAME_FIRST(*s) || *s == '@') {
-	    char *name = s;
-	    int a = ARG_SELF;
-	    char **arg_name;
-
-	    s++;
-	    while (isHNAME_CHAR(*s))
-		s++;
-
-	    /* check identifier */
-	    for ( arg_name = argname; a < ARG_LITERAL ; ++a, ++arg_name ) {
-		if (strnEQ(*arg_name, name, s - name) &&
-		    (*arg_name)[s - name] == '\0')
-		    break;
-	    }
-	    if (a < ARG_LITERAL) {
-		char c = (unsigned char) a;
-		sv_catpvn(argspec, &c, 1);
-
-		if (a == ARG_LINE || a == ARG_COLUMN) {
-		    if (!p_state->line)
-			p_state->line = 1; /* enable tracing of line/column */
-		}
-		if (a == ARG_SKIPPED_TEXT) {
-		    if (!p_state->skipped_text) {
-			p_state->skipped_text = newSVpvn("", 0);
-                    }
-                }
-		if (a == ARG_ATTR || a == ARG_ATTRARR || a == ARG_DTEXT) {
-		    p_state->argspec_entity_decode++;
-		}
-	    }
-	    else {
-		croak("Unrecognized identifier %.*s in argspec", s - name, name);
-	    }
-	}
-	else if (*s == '"' || *s == '\'') {
-	    char *string_beg = s;
-	    s++;
-	    while (s < end && *s != *string_beg && *s != '\\')
-		s++;
-	    if (*s == *string_beg) {
-		/* literal */
-		int len = s - string_beg - 1;
-		unsigned char buf[2];
-		if (len > 255)
-		    croak("Literal string is longer than 255 chars in argspec");
-		buf[0] = ARG_LITERAL;
-		buf[1] = len;
-		sv_catpvn(argspec, (char*)buf, 2);
-		sv_catpvn(argspec, string_beg+1, len);
-		s++;
-	    }
-	    else if (*s == '\\') {
-		croak("Backslash reserved for literal string in argspec");
-	    }
-	    else {
-		croak("Unterminated literal string in argspec");
-	    }
-	}
-	else {
-	    croak("Bad argspec (%s)", s);
-	}
-
-	while (isHSPACE(*s))
-	    s++;
-	
-	if (*s == '}' && SvPVX(argspec)[0] == ARG_FLAG_FLAT_ARRAY) {
-	    /* end of '@{ ... }' */
-	    s++;
-	    while (isHSPACE(*s))
-		s++;
-	    if (s < end)
-		croak("Bad argspec: stuff after @{...} (%s)", s);
-	}
-
-	if (s == end)
-	    break;
-	if (*s != ',') {
-	    croak("Missing comma separator in argspec");
-	}
-	s++;
-	while (isHSPACE(*s))
-	    s++;
-    }
-    return argspec;
-}
-
-
-static void
-flush_pending_text(PSTATE* p_state, SV* self)
-{
-    dTHX;
-    bool   old_unbroken_text = p_state->unbroken_text;
-    SV*    old_pend_text     = p_state->pend_text;
-    bool   old_is_cdata      = p_state->is_cdata;
-    STRLEN old_offset        = p_state->offset;
-    STRLEN old_line          = p_state->line;
-    STRLEN old_column        = p_state->column;
-
-    assert(p_state->pend_text && SvOK(p_state->pend_text));
-
-    p_state->unbroken_text = 0;
-    p_state->pend_text     = 0;
-    p_state->is_cdata      = p_state->pend_text_is_cdata;
-    p_state->offset        = p_state->pend_text_offset;
-    p_state->line          = p_state->pend_text_line;
-    p_state->column        = p_state->pend_text_column;
-
-    report_event(p_state, E_TEXT,
-		 SvPVX(old_pend_text), SvEND(old_pend_text), 
-		 SvUTF8(old_pend_text), 0, 0, self);
-    SvOK_off(old_pend_text);
-
-    p_state->unbroken_text = old_unbroken_text;
-    p_state->pend_text     = old_pend_text;
-    p_state->is_cdata      = old_is_cdata;
-    p_state->offset        = old_offset;
-    p_state->line          = old_line;
-    p_state->column        = old_column;
-}
-
-static char*
-skip_until_gt(char *beg, char *end)
-{
-    /* tries to emulate quote skipping behaviour observed in MSIE */
-    char *s = beg;
-    char quote = '\0';
-    char prev = ' ';
-    while (s < end) {
-	if (!quote && *s == '>')
-	    return s;
-	if (*s == '"' || *s == '\'') {
-	    if (*s == quote) {
-		quote = '\0';  /* end of quoted string */
-	    }
-	    else if (!quote && (prev == ' ' || prev == '=')) {
-		quote = *s;
-	    }
-	}
-	prev = *s++;
-    }
-    return end;
-}
-
-static char*
-parse_comment(PSTATE* p_state, char *beg, char *end, U32 utf8, SV* self)
-{
-    char *s = beg;
-
-    if (p_state->strict_comment) {
-	dTOKENS(4);
-	char *start_com = s;  /* also used to signal inside/outside */
-
-	while (1) {
-	    /* try to locate "--" */
-	FIND_DASH_DASH:
-	    /* printf("find_dash_dash: [%s]\n", s); */
-	    while (s < end && *s != '-' && *s != '>')
-		s++;
-
-	    if (s == end) {
-		FREE_TOKENS;
-		return beg;
-	    }
-
-	    if (*s == '>') {
-		s++;
-		if (start_com)
-		    goto FIND_DASH_DASH;
-
-		/* we are done recognizing all comments, make callbacks */
-		report_event(p_state, E_COMMENT,
-			     beg - 4, s, utf8,
-			     tokens, num_tokens,
-			     self);
-		FREE_TOKENS;
-
-		return s;
-	    }
-
-	    s++;
-	    if (s == end) {
-		FREE_TOKENS;
-		return beg;
-	    }
-
-	    if (*s == '-') {
-		/* two dashes in a row seen */
-		s++;
-		/* do something */
-		if (start_com) {
-		    PUSH_TOKEN(start_com, s-2);
-		    start_com = 0;
-		}
-		else {
-		    start_com = s;
-		}
-	    }
-	}
-    }
-    else if (p_state->no_dash_dash_comment_end) {
-	token_pos_t token;
-        token.beg = beg;
-        /* a lone '>' signals end-of-comment */
-	while (s < end && *s != '>')
-	    s++;
-	token.end = s;
-	if (s < end) {
-	    s++;
-	    report_event(p_state, E_COMMENT, beg-4, s, utf8, &token, 1, self);
-	    return s;
-	}
-	else {
-	    return beg;
-	}
-    }
-    else { /* non-strict comment */
-	token_pos_t token;
-	token.beg = beg;
-	/* try to locate /--\s*>/ which signals end-of-comment */
-    LOCATE_END:
-	while (s < end && *s != '-')
-	    s++;
-	token.end = s;
-	if (s < end) {
-	    s++;
-	    if (*s == '-') {
-		s++;
-		while (isHSPACE(*s))
-		    s++;
-		if (*s == '>') {
-		    s++;
-		    /* yup */
-		    report_event(p_state, E_COMMENT, beg-4, s, utf8, &token, 1, self);
-		    return s;
-		}
-	    }
-	    if (s < end) {
-		s = token.end + 1;
-		goto LOCATE_END;
-	    }
-	}
-    
-	if (s == end)
-	    return beg;
-    }
-
-    return 0;
-}
-
-
-#ifdef MARKED_SECTION
-
-static void
-marked_section_update(PSTATE* p_state)
-{
-    dTHX;
-    /* we look at p_state->ms_stack to determine p_state->ms */
-    AV* ms_stack = p_state->ms_stack;
-    p_state->ms = MS_NONE;
-
-    if (ms_stack) {
-	int stack_len = av_len(ms_stack);
-	int stack_idx;
-	for (stack_idx = 0; stack_idx <= stack_len; stack_idx++) {
-	    SV** svp = av_fetch(ms_stack, stack_idx, 0);
-	    if (svp) {
-		AV* tokens = (AV*)SvRV(*svp);
-		int tokens_len = av_len(tokens);
-		int i;
-		assert(SvTYPE(tokens) == SVt_PVAV);
-		for (i = 0; i <= tokens_len; i++) {
-		    SV** svp = av_fetch(tokens, i, 0);
-		    if (svp) {
-			STRLEN len;
-			char *token_str = SvPV(*svp, len);
-			enum marked_section_t token;
-			if (strEQ(token_str, "include"))
-			    token = MS_INCLUDE;
-			else if (strEQ(token_str, "rcdata"))
-			    token = MS_RCDATA;
-			else if (strEQ(token_str, "cdata"))
-			    token = MS_CDATA;
-			else if (strEQ(token_str, "ignore"))
-			    token = MS_IGNORE;
-			else
-			    token = MS_NONE;
-			if (p_state->ms < token)
-			    p_state->ms = token;
-		    }
-		}
-	    }
-	}
-    }
-    /* printf("MS %d\n", p_state->ms); */
-    p_state->is_cdata = (p_state->ms == MS_CDATA);
-    return;
-}
-
-
-static char*
-parse_marked_section(PSTATE* p_state, char *beg, char *end, U32 utf8, SV* self)
-{
-    dTHX;
-    char *s;
-    AV* tokens = 0;
-
-    if (!p_state->marked_sections)
-	return 0;
-
-    assert(beg[0] == '<');
-    assert(beg[1] == '!');
-    assert(beg[2] == '[');
-    s = beg + 3;
-
-FIND_NAMES:
-    while (isHSPACE(*s))
-	s++;
-    while (isHNAME_FIRST(*s)) {
-	char *name_start = s;
-	char *name_end;
-	SV *name;
-	s++;
-	while (isHNAME_CHAR(*s))
-	    s++;
-	name_end = s;
-	while (isHSPACE(*s))
-	    s++;
-	if (s == end)
-	    goto PREMATURE;
-
-	if (!tokens)
-	    tokens = newAV();
-	name = newSVpvn(name_start, name_end - name_start);
-	if (utf8)
-	    SvUTF8_on(name);
-	av_push(tokens, sv_lower(aTHX_ name));
-    }
-    if (*s == '-') {
-	s++;
-	if (*s == '-') {
-	    /* comment */
-	    s++;
-	    while (1) {
-		while (s < end && *s != '-')
-		    s++;
-		if (s == end)
-		    goto PREMATURE;
-
-		s++;  /* skip first '-' */
-		if (*s == '-') {
-		    s++;
-		    /* comment finished */
-		    goto FIND_NAMES;
-		}
-	    }      
-	}
-	else
-	    goto FAIL;
-      
-    }
-    if (*s == '[') {
-	s++;
-	/* yup */
-
-	if (!tokens) {
-	    tokens = newAV();
-	    av_push(tokens, newSVpvn("include", 7));
-	}
-
-	if (!p_state->ms_stack)
-	    p_state->ms_stack = newAV();
-	av_push(p_state->ms_stack, newRV_noinc((SV*)tokens));
-	marked_section_update(p_state);
-	report_event(p_state, E_NONE, beg, s, utf8, 0, 0, self);
-	return s;
-    }
-
-FAIL:
-    SvREFCNT_dec(tokens);
-    return 0; /* not yet implemented */
-  
-PREMATURE:
-    SvREFCNT_dec(tokens);
-    return beg;
-}
-#endif
-
-
-static char*
-parse_decl(PSTATE* p_state, char *beg, char *end, U32 utf8, SV* self)
-{
-    char *s = beg + 2;
-
-    if (*s == '-') {
-	/* comment? */
-
-	char *tmp;
-	s++;
-	if (s == end)
-	    return beg;
-
-	if (*s != '-')
-	    goto DECL_FAIL;  /* nope, illegal */
-
-	/* yes, two dashes seen */
-	s++;
-
-	tmp = parse_comment(p_state, s, end, utf8, self);
-	return (tmp == s) ? beg : tmp;
-    }
-
-#ifdef MARKED_SECTION
-    if (*s == '[') {
-	/* marked section */
-	char *tmp;
-	tmp = parse_marked_section(p_state, beg, end, utf8, self);
-	if (!tmp)
-	    goto DECL_FAIL;
-	return tmp;
-    }
-#endif
-
-    if (*s == '>') {
-	/* make <!> into empty comment <SGML Handbook 36:32> */
-	token_pos_t token;
-	token.beg = s;
-	token.end = s;
-	s++;
-	report_event(p_state, E_COMMENT, beg, s, utf8, &token, 1, self);
-	return s;
-    }
-
-    if (isALPHA(*s)) {
-	dTOKENS(8);
-	char *decl_id = s;
-	STRLEN decl_id_len;
-
-	s++;
-	/* declaration */
-	while (s < end && isHNAME_CHAR(*s))
-	    s++;
-	decl_id_len = s - decl_id;
-	if (s == end)
-	    goto PREMATURE;
-
-	/* just hardcode a few names as the recognized declarations */
-	if (!((decl_id_len == 7 &&
-	       strnEQx(decl_id, "DOCTYPE", 7, !CASE_SENSITIVE(p_state))) ||
-	      (decl_id_len == 6 &&
-	       strnEQx(decl_id, "ENTITY",  6, !CASE_SENSITIVE(p_state)))
-	    )
-	    )
-	{
-	    goto FAIL;
-	}
-
-	/* first word available */
-	PUSH_TOKEN(decl_id, s);
-
-	while (1) {
-	    while (s < end && isHSPACE(*s))
-		s++;
-
-	    if (s == end)
-		goto PREMATURE;
-
-	    if (*s == '"' || *s == '\'') {
-		char *str_beg = s;
-		s++;
-		while (s < end && *s != *str_beg)
-		    s++;
-		if (s == end)
-		    goto PREMATURE;
-		s++;
-		PUSH_TOKEN(str_beg, s);
-	    }
-	    else if (*s == '-') {
-		/* comment */
-		char *com_beg = s;
-		s++;
-		if (s == end)
-		    goto PREMATURE;
-		if (*s != '-')
-		    goto FAIL;
-		s++;
-
-		while (1) {
-		    while (s < end && *s != '-')
-			s++;
-		    if (s == end)
-			goto PREMATURE;
-		    s++;
-		    if (s == end)
-			goto PREMATURE;
-		    if (*s == '-') {
-			s++;
-			PUSH_TOKEN(com_beg, s);
-			break;
-		    }
-		}
-	    }
-	    else if (*s != '>') {
-		/* plain word */
-		char *word_beg = s;
-		s++;
-		while (s < end && isHNOT_SPACE_GT(*s))
-		    s++;
-		if (s == end)
-		    goto PREMATURE;
-		PUSH_TOKEN(word_beg, s);
-	    }
-	    else {
-		break;
-	    }
-	}
-
-	if (s == end)
-	    goto PREMATURE;
-	if (*s == '>') {
-	    s++;
-	    report_event(p_state, E_DECLARATION, beg, s, utf8, tokens, num_tokens, self);
-	    FREE_TOKENS;
-	    return s;
-	}
-
-    FAIL:
-	FREE_TOKENS;
-	goto DECL_FAIL;
-
-    PREMATURE:
-	FREE_TOKENS;
-	return beg;
-
-    }
-
-DECL_FAIL:
-    if (p_state->strict_comment)
-	return 0;
-
-    /* consider everything up to the first '>' a comment */
-    while (s < end && *s != '>')
-	s++;
-    if (s < end) {
-	token_pos_t token;
-	token.beg = beg + 2;
-	token.end = s;
-	s++;
-	report_event(p_state, E_COMMENT, beg, s, utf8, &token, 1, self);
-	return s;
-    }
-    else {
-	return beg;
-    }
-}
-
-
-static char*
-parse_start(PSTATE* p_state, char *beg, char *end, U32 utf8, SV* self)
-{
-    char *s = beg;
-    int empty_tag = 0;
-    dTOKENS(16);
-
-    hctype_t tag_name_first, tag_name_char;
-    hctype_t attr_name_first, attr_name_char;
-
-    if (STRICT_NAMES(p_state)) {
-	tag_name_first = attr_name_first = HCTYPE_NAME_FIRST;
-	tag_name_char  = attr_name_char  = HCTYPE_NAME_CHAR;
-    }
-    else {
-	tag_name_first = tag_name_char = HCTYPE_NOT_SPACE_GT;
-	attr_name_first = HCTYPE_NOT_SPACE_GT;
-	attr_name_char  = HCTYPE_NOT_SPACE_EQ_GT;
-    }
-
-    s += 2;
-
-    while (s < end && isHCTYPE(*s, tag_name_char)) {
-	if (*s == '/' && ALLOW_EMPTY_TAG(p_state)) {
-	    if ((s + 1) == end)
-		goto PREMATURE;
-	    if (*(s + 1) == '>')
-		break;
-	}
-	s++;
-    }
-    PUSH_TOKEN(beg+1, s);  /* tagname */
-
-    while (isHSPACE(*s))
-	s++;
-    if (s == end)
-	goto PREMATURE;
-
-    while (isHCTYPE(*s, attr_name_first)) {
-	/* attribute */
-	char *attr_name_beg = s;
-	char *attr_name_end;
-	if (*s == '/' && ALLOW_EMPTY_TAG(p_state)) {
-	    if ((s + 1) == end)
-		goto PREMATURE;
-	    if (*(s + 1) == '>')
-		break;
-	}
-	s++;
-	while (s < end && isHCTYPE(*s, attr_name_char)) {
-	    if (*s == '/' && ALLOW_EMPTY_TAG(p_state)) {
-		if ((s + 1) == end)
-		    goto PREMATURE;
-		if (*(s + 1) == '>')
-		    break;
-	    }
-	    s++;
-	}
-	if (s == end)
-	    goto PREMATURE;
-
-	attr_name_end = s;
-	PUSH_TOKEN(attr_name_beg, attr_name_end); /* attr name */
-
-	while (isHSPACE(*s))
-	    s++;
-	if (s == end)
-	    goto PREMATURE;
-
-	if (*s == '=') {
-	    /* with a value */
-	    s++;
-	    while (isHSPACE(*s))
-		s++;
-	    if (s == end)
-		goto PREMATURE;
-	    if (*s == '>') {
-		/* parse it similar to ="" */
-		PUSH_TOKEN(s, s);
-		break;
-	    }
-	    if (*s == '"' || *s == '\'') {
-		char *str_beg = s;
-		s++;
-		while (s < end && *s != *str_beg)
-		    s++;
-		if (s == end)
-		    goto PREMATURE;
-		s++;
-		PUSH_TOKEN(str_beg, s);
-	    }
-	    else {
-		char *word_start = s;
-		while (s < end && isHNOT_SPACE_GT(*s)) {
-		    if (*s == '/' && ALLOW_EMPTY_TAG(p_state)) {
-			if ((s + 1) == end)
-			    goto PREMATURE;
-			if (*(s + 1) == '>')
-			    break;
-		    }
-		    s++;
-		}
-		if (s == end)
-		    goto PREMATURE;
-		PUSH_TOKEN(word_start, s);
-	    }
-	    while (isHSPACE(*s))
-		s++;
-	    if (s == end)
-		goto PREMATURE;
-	}
-	else {
-	    PUSH_TOKEN(0, 0); /* boolean attr value */
-	}
-    }
-
-    if (ALLOW_EMPTY_TAG(p_state) && *s == '/') {
-	s++;
-	if (s == end)
-	    goto PREMATURE;
-	empty_tag = 1;
-    }
-
-    if (*s == '>') {
-	s++;
-	/* done */
-	report_event(p_state, E_START, beg, s, utf8, tokens, num_tokens, self);
-	if (empty_tag) {
-	    report_event(p_state, E_END, s, s, utf8, tokens, 1, self);
-	}
-	else if (!p_state->xml_mode) {
-	    /* find out if this start tag should put us into literal_mode
-	     */
-	    int i;
-	    int tag_len = tokens[0].end - tokens[0].beg;
-
-	    for (i = 0; literal_mode_elem[i].len; i++) {
-		if (tag_len == literal_mode_elem[i].len) {
-		    /* try to match it */
-		    char *s = beg + 1;
-		    char *t = literal_mode_elem[i].str;
-		    int len = tag_len;
-		    while (len) {
-			if (toLOWER(*s) != *t)
-			    break;
-			s++;
-			t++;
-			if (!--len) {
-			    /* found it */
-			    p_state->literal_mode = literal_mode_elem[i].str;
-			    p_state->is_cdata = literal_mode_elem[i].is_cdata;
-			    /* printf("Found %s\n", p_state->literal_mode); */
-			    goto END_OF_LITERAL_SEARCH;
-			}
-		    }
-		}
-	    }
-	END_OF_LITERAL_SEARCH:
-	    ;
-	}
-
-	FREE_TOKENS;
-	return s;
-    }
-  
-    FREE_TOKENS;
-    return 0;
-
-PREMATURE:
-    FREE_TOKENS;
-    return beg;
-}
-
-
-static char*
-parse_end(PSTATE* p_state, char *beg, char *end, U32 utf8, SV* self)
-{
-    char *s = beg+2;
-    hctype_t name_first, name_char;
-
-    if (STRICT_NAMES(p_state)) {
-	name_first = HCTYPE_NAME_FIRST;
-	name_char  = HCTYPE_NAME_CHAR;
-    }
-    else {
-	name_first = name_char = HCTYPE_NOT_SPACE_GT;
-    }
-
-    if (isHCTYPE(*s, name_first)) {
-	token_pos_t tagname;
-	tagname.beg = s;
-	s++;
-	while (s < end && isHCTYPE(*s, name_char))
-	    s++;
-	tagname.end = s;
-
-	if (p_state->strict_end) {
-	    while (isHSPACE(*s))
-		s++;
-	}
-	else {
-	    s = skip_until_gt(s, end);
-	}
-	if (s < end) {
-	    if (*s == '>') {
-		s++;
-		/* a complete end tag has been recognized */
-		report_event(p_state, E_END, beg, s, utf8, &tagname, 1, self);
-		return s;
-	    }
-	}
-	else {
-	    return beg;
-	}
-    }
-    else if (!p_state->strict_comment) {
-	s = skip_until_gt(s, end);
-	if (s < end) {
-	    token_pos_t token;
-	    token.beg = beg + 2;
-	    token.end = s;
-	    s++;
-	    report_event(p_state, E_COMMENT, beg, s, utf8, &token, 1, self);
-	    return s;
-	}
-	else {
-	    return beg;
-	}
-    }
-    return 0;
-}
-
-
-static char*
-parse_process(PSTATE* p_state, char *beg, char *end, U32 utf8, SV* self)
-{
-    char *s = beg + 2;  /* skip '<?' */
-    /* processing instruction */
-    token_pos_t token_pos;
-    token_pos.beg = s;
-
-    while (s < end) {
-	if (*s == '>') {
-	    token_pos.end = s;
-	    s++;
-
-	    if (p_state->xml_mode || p_state->xml_pic) {
-		/* XML processing instructions are ended by "?>" */
-		if (s - beg < 4 || s[-2] != '?')
-		    continue;
-		token_pos.end = s - 2;
-	    }
-      
-	    /* a complete processing instruction seen */
-	    report_event(p_state, E_PROCESS, beg, s, utf8, 
-			 &token_pos, 1, self);
-	    return s;
-	}
-	s++;
-    }
-    return beg;  /* could not fix end */
-}
-
-
-#ifdef USE_PFUNC
-static char*
-parse_null(PSTATE* p_state, char *beg, char *end, U32 utf8, SV* self)
-{
-    return 0;
-}
-
-
-
-#include "pfunc.h"                   /* declares the parsefunc[] */
-#endif /* USE_PFUNC */
-
-static char*
-parse_buf(pTHX_ PSTATE* p_state, char *beg, char *end, U32 utf8, SV* self)
-{
-    char *s = beg;
-    char *t = beg;
-    char *new_pos;
-
-    while (!p_state->eof) {
-	/*
-	 * At the start of this loop we will always be ready for eating text
-	 * or a new tag.  We will never be inside some tag.  The 't' points
-	 * to where we started and the 's' is advanced as we go.
-	 */
-
-	while (p_state->literal_mode) {
-	    char *l = p_state->literal_mode;
-	    bool skip_quoted_end = (strEQ(l, "script") || strEQ(l, "style"));
-	    char inside_quote = 0;
-	    bool escape_next = 0;
-	    char *end_text;
-
-	    while (s < end) {
-		if (*s == '<' && !inside_quote)
-		    break;
-		if (skip_quoted_end) {
-		    if (escape_next) {
-			escape_next = 0;
-		    }
-		    else {
-			if (*s == '\\')
-			    escape_next = 1;
-			else if (inside_quote && *s == inside_quote)
-			    inside_quote = 0;
-			else if (*s == '\r' || *s == '\n')
-			    inside_quote = 0;
-			else if (!inside_quote && (*s == '"' || *s == '\''))
-			    inside_quote = *s;
-		    }
-		}
-		s++;
-	    }
-
-	    if (s == end) {
-		s = t;
-		goto DONE;
-	    }
-
-	    end_text = s;
-	    s++;
-      
-	    /* here we rely on '\0' termination of perl svpv buffers */
-	    if (*s == '/') {
-		s++;
-		while (*l && toLOWER(*s) == *l) {
-		    s++;
-		    l++;
-		}
-
-		if (!*l && (strNE(p_state->literal_mode, "plaintext") || p_state->closing_plaintext)) {
-		    /* matched it all */
-		    token_pos_t end_token;
-		    end_token.beg = end_text + 2;
-		    end_token.end = s;
-
-		    while (isHSPACE(*s))
-			s++;
-		    if (*s == '>') {
-			s++;
-			if (t != end_text)
-			    report_event(p_state, E_TEXT, t, end_text, utf8,
-					 0, 0, self);
-			report_event(p_state, E_END,  end_text, s, utf8,
-				     &end_token, 1, self);
-			p_state->literal_mode = 0;
-			p_state->is_cdata = 0;
-			t = s;
-		    }
-		}
-	    }
-	}
-
-#ifdef MARKED_SECTION
-	while (p_state->ms == MS_CDATA || p_state->ms == MS_RCDATA) {
-	    while (s < end && *s != ']')
-		s++;
-	    if (*s == ']') {
-		char *end_text = s;
-		s++;
-		if (*s == ']' && *(s + 1) == '>') {
-		    s += 2;
-		    /* marked section end */
-		    if (t != end_text)
-			report_event(p_state, E_TEXT, t, end_text, utf8,
-				     0, 0, self);
-		    report_event(p_state, E_NONE, end_text, s, utf8, 0, 0, self);
-		    t = s;
-		    SvREFCNT_dec(av_pop(p_state->ms_stack));
-		    marked_section_update(p_state);
-		    continue;
-		}
-	    }
-	    if (s == end) {
-		s = t;
-		goto DONE;
-	    }
-	}
-#endif
-
-	/* first we try to match as much text as possible */
-	while (s < end && *s != '<') {
-#ifdef MARKED_SECTION
-	    if (p_state->ms && *s == ']') {
-		char *end_text = s;
-		s++;
-		if (*s == ']') {
-		    s++;
-		    if (*s == '>') {
-			s++;
-			report_event(p_state, E_TEXT, t, end_text, utf8,
-				     0, 0, self);
-			report_event(p_state, E_NONE, end_text, s, utf8,
-				     0, 0, self);
-			t = s;
-			SvREFCNT_dec(av_pop(p_state->ms_stack));
-			marked_section_update(p_state);    
-			continue;
-		    }
-		}
-	    }
-#endif
-	    s++;
-	}
-	if (s != t) {
-	    if (*s == '<') {
-		report_event(p_state, E_TEXT, t, s, utf8, 0, 0, self);
-		t = s;
-	    }
-	    else {
-		s--;
-		if (isHSPACE(*s)) {
-		    /* wait with white space at end */
-		    while (s >= t && isHSPACE(*s))
-			s--;
-		}
-		else {
-		    /* might be a chopped up entities/words */
-		    while (s >= t && !isHSPACE(*s))
-			s--;
-		    while (s >= t && isHSPACE(*s))
-			s--;
-		}
-		s++;
-		if (s != t)
-		    report_event(p_state, E_TEXT, t, s, utf8, 0, 0, self);
-		break;
-	    }
-	}
-
-	if (end - s < 3)
-	    break;
-
-	/* next char is known to be '<' and pointed to by 't' as well as 's' */
-	s++;
-
-#ifdef USE_PFUNC
-	new_pos = parsefunc[(unsigned char)*s](p_state, t, end, utf8, self);
-#else
-	if (isHNAME_FIRST(*s))
-	    new_pos = parse_start(p_state, t, end, utf8, self);
-	else if (*s == '/')
-	    new_pos = parse_end(p_state, t, end, utf8, self);
-	else if (*s == '!')
-	    new_pos = parse_decl(p_state, t, end, utf8, self);
-	else if (*s == '?')
-	    new_pos = parse_process(p_state, t, end, utf8, self);
-	else
-	    new_pos = 0;
-#endif /* USE_PFUNC */
-
-	if (new_pos) {
-	    if (new_pos == t) {
-		/* no progress, need more data to know what it is */
-		s = t;
-		break;
-	    }
-	    t = s = new_pos;
-	}
-
-	/* if we get out here then this was not a conforming tag, so
-	 * treat it is plain text at the top of the loop again (we
-	 * have already skipped past the "<").
-	 */
-    }
-
-DONE:
-    return s;
-
-}
-
-EXTERN void
-parse(pTHX_
-      PSTATE* p_state,
-      SV* chunk,
-      SV* self)
-{
-    char *s, *beg, *end;
-    U32 utf8 = 0;
-    STRLEN len;
-
-    if (!p_state->start_document) {
-	char dummy[1];
-	report_event(p_state, E_START_DOCUMENT, dummy, dummy, 0, 0, 0, self);
-	p_state->start_document = 1;
-    }
-
-    if (!chunk) {
-	/* eof */
-	char empty[1];
-	if (p_state->buf && SvOK(p_state->buf)) {
-	    /* flush it */
-	    s = SvPV(p_state->buf, len);
-	    end = s + len;
-	    utf8 = SvUTF8(p_state->buf);
-	    assert(len);
-
-	    while (s < end) {
-		if (p_state->literal_mode) {
-		    if (strEQ(p_state->literal_mode, "plaintext") ||
-			strEQ(p_state->literal_mode, "xmp") ||
-			strEQ(p_state->literal_mode, "textarea"))
-		    {
-			/* rest is considered text */
-			break;
-                    }
-		    if (strEQ(p_state->literal_mode, "script") ||
-			strEQ(p_state->literal_mode, "style"))
-		    {
-			/* effectively make it an empty element */
-			token_pos_t t;
-			char dummy;
-			t.beg = p_state->literal_mode;
-			t.end = p_state->literal_mode + strlen(p_state->literal_mode);
-			report_event(p_state, E_END, &dummy, &dummy, 0, &t, 1, self);
-		    }
-		    else {
-			p_state->pending_end_tag = p_state->literal_mode;
-		    }
-		    p_state->literal_mode = 0;
-		    s = parse_buf(aTHX_ p_state, s, end, utf8, self);
-		    continue;
-		}
-
-		if (!p_state->strict_comment && !p_state->no_dash_dash_comment_end && *s == '<') {
-		    p_state->no_dash_dash_comment_end = 1;
-		    s = parse_buf(aTHX_ p_state, s, end, utf8, self);
-		    continue;
-		}
-
-		if (!p_state->strict_comment && *s == '<') {
-		    char *s1 = s + 1;
-		    if (s1 == end || isHNAME_FIRST(*s1) || *s1 == '/' || *s1 == '!' || *s1 == '?') {
-			/* some kind of unterminated markup.  Report rest as as comment */
-			token_pos_t token;
-			token.beg = s + 1;
-			token.end = end;
-			report_event(p_state, E_COMMENT, s, end, utf8, &token, 1, self);
-			s = end;
-		    }
-		}
-
-		break;
-	    }
-
-	    if (s < end) {
-		/* report rest as text */
-		report_event(p_state, E_TEXT, s, end, utf8, 0, 0, self);
-	    }
-	    
-	    SvREFCNT_dec(p_state->buf);
-	    p_state->buf = 0;
-	}
-	if (p_state->pend_text && SvOK(p_state->pend_text))
-	    flush_pending_text(p_state, self);
-
-	if (p_state->ignoring_element) {
-	    /* document not balanced */
-	    SvREFCNT_dec(p_state->ignoring_element);
-	    p_state->ignoring_element = 0;
-	}
-	report_event(p_state, E_END_DOCUMENT, empty, empty, 0, 0, 0, self);
-
-	/* reset state */
-	p_state->offset = 0;
-	if (p_state->line)
-	    p_state->line = 1;
-	p_state->column = 0;
-	p_state->start_document = 0;
-	p_state->literal_mode = 0;
-	p_state->is_cdata = 0;
-	return;
-    }
-
-#ifdef UNICODE_HTML_PARSER
-    if (p_state->utf8_mode)
-	sv_utf8_downgrade(chunk, 0);
-#endif
-
-    if (p_state->buf && SvOK(p_state->buf)) {
-	sv_catsv(p_state->buf, chunk);
-	beg = SvPV(p_state->buf, len);
-	utf8 = SvUTF8(p_state->buf);
-    }
-    else {
-	beg = SvPV(chunk, len);
-	utf8 = SvUTF8(chunk);
-	if (p_state->offset == 0 && DOWARN) {
-	    /* Print warnings if we find unexpected Unicode BOM forms */
-#ifdef UNICODE_HTML_PARSER
-	    if (p_state->argspec_entity_decode &&
-		!p_state->utf8_mode && (
-                 (!utf8 && len >= 3 && strnEQ(beg, "\xEF\xBB\xBF", 3)) ||
-		 (utf8 && len >= 6 && strnEQ(beg, "\xC3\xAF\xC2\xBB\xC2\xBF", 6)) ||
-		 (!utf8 && probably_utf8_chunk(aTHX_ beg, len))
-		)
-	       )
-	    {
-		warn("Parsing of undecoded UTF-8 will give garbage when decoding entities");
-	    }
-	    if (utf8 && len >= 2 && strnEQ(beg, "\xFF\xFE", 2)) {
-		warn("Parsing string decoded with wrong endianess");
-	    }
-#endif
-	    if (!utf8 && len >= 4 &&
-		(strnEQ(beg, "\x00\x00\xFE\xFF", 4) ||
-		 strnEQ(beg, "\xFE\xFF\x00\x00", 4))
-		)
-	    {
-		warn("Parsing of undecoded UTF-32");
-	    }
-	    else if (!utf8 && len >= 2 &&
-		     (strnEQ(beg, "\xFE\xFF", 2) || strnEQ(beg, "\xFF\xFE", 2))
-		)
-	    {
-		warn("Parsing of undecoded UTF-16");
-	    }
-	}
-    }
-
-    if (!len)
-	return; /* nothing to do */
-
-    end = beg + len;
-    s = parse_buf(aTHX_ p_state, beg, end, utf8, self);
-
-    if (s == end || p_state->eof) {
-	if (p_state->buf) {
-	    SvOK_off(p_state->buf);
-	}
-    }
-    else {
-	/* need to keep rest in buffer */
-	if (p_state->buf) {
-	    /* chop off some chars at the beginning */
-	    if (SvOK(p_state->buf)) {
-		sv_chop(p_state->buf, s);
-	    }
-	    else {
-		sv_setpvn(p_state->buf, s, end - s);
-		if (utf8)
-		    SvUTF8_on(p_state->buf);
-		else
-		    SvUTF8_off(p_state->buf);
-	    }
-	}
-	else {
-	    p_state->buf = newSVpv(s, end - s);
-	    if (utf8)
-		SvUTF8_on(p_state->buf);
-	}
-    }
-    return;
-}
diff --git a/ext/HTML/Parser/hparser.h b/ext/HTML/Parser/hparser.h
deleted file mode 100644
index e7d1ee823b..0000000000
--- a/ext/HTML/Parser/hparser.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/* $Id: hparser.h,v 2.34 2006/04/26 07:01:10 gisle Exp $
- *
- * Copyright 1999-2005, Gisle Aas
- * Copyright 1999-2000, Michael A. Chase
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the same terms as Perl itself.
- */
-
-/*
- * Declare various structures and constants.  The main thing
- * is 'struct p_state' that contains various fields to represent
- * the state of the parser.
- */
-
-#ifdef MARKED_SECTION
-
-enum marked_section_t {
-    MS_NONE = 0,
-    MS_INCLUDE,
-    MS_RCDATA,
-    MS_CDATA,
-    MS_IGNORE
-};
-
-#endif /* MARKED_SECTION */
-
-
-#define P_SIGNATURE 0x16091964  /* tag struct p_state for safer cast */
-
-enum event_id {
-    E_DECLARATION = 0,
-    E_COMMENT,
-    E_START,
-    E_END,
-    E_TEXT,
-    E_PROCESS,
-    E_START_DOCUMENT,
-    E_END_DOCUMENT,
-    E_DEFAULT,
-    /**/
-    EVENT_COUNT,
-    E_NONE   /* used for reporting skipped text (non-events) */
-};
-typedef enum event_id event_id_t;
-
-/* must match event_id_t above */
-static char* event_id_str[] = {
-    "declaration",
-    "comment",
-    "start",
-    "end",
-    "text",
-    "process",
-    "start_document",
-    "end_document",
-    "default",
-};
-
-struct p_handler {
-    SV* cb;
-    SV* argspec;
-};
-
-struct p_state {
-    U32 signature;
-
-    /* state */
-    SV* buf;
-    STRLEN offset;
-    STRLEN line;
-    STRLEN column;
-    bool start_document;
-    bool parsing;
-    bool eof;
-
-    /* special parsing modes */
-    char* literal_mode;
-    bool  is_cdata;
-    bool  no_dash_dash_comment_end;
-    char *pending_end_tag;
-
-    /* unbroken_text option needs a buffer of pending text */
-    SV*    pend_text;
-    bool   pend_text_is_cdata;
-    STRLEN pend_text_offset;
-    STRLEN pend_text_line;
-    STRLEN pend_text_column;
-
-    /* skipped text is accumulated here */
-    SV* skipped_text;
-
-#ifdef MARKED_SECTION
-    /* marked section support */
-    enum marked_section_t ms;
-    AV* ms_stack;
-    bool marked_sections;
-#endif
-
-    /* various boolean configuration attributes */
-    bool strict_comment;
-    bool strict_names;
-    bool strict_end;
-    bool xml_mode;
-    bool unbroken_text;
-    bool attr_encoded;
-    bool case_sensitive;
-    bool closing_plaintext;
-    bool utf8_mode;
-    bool empty_element_tags;
-    bool xml_pic;
-
-    /* other configuration stuff */
-    SV* bool_attr_val;
-    struct p_handler handlers[EVENT_COUNT];
-    bool argspec_entity_decode;
-
-    /* filters */
-    HV* report_tags;
-    HV* ignore_tags;
-    HV* ignore_elements;
-
-    /* these are set when we are currently inside an element we want to ignore */
-    SV* ignoring_element;
-    int ignore_depth;
-
-    /* cache */
-    HV* entity2char;            /* %HTML::Entities::entity2char */
-    SV* tmp;
-};
-typedef struct p_state PSTATE;
-
diff --git a/ext/HTML/Parser/lib/HTML/Entities.pm b/ext/HTML/Parser/lib/HTML/Entities.pm
deleted file mode 100644
index 1e7dfc1f06..0000000000
--- a/ext/HTML/Parser/lib/HTML/Entities.pm
+++ /dev/null
@@ -1,491 +0,0 @@
-package HTML::Entities;
-
-# $Id: Entities.pm,v 1.35 2006/03/22 09:15:23 gisle Exp $
-
-=head1 NAME
-
-HTML::Entities - Encode or decode strings with HTML entities
-
-=head1 SYNOPSIS
-
- use HTML::Entities;
-
- $a = "V&aring;re norske tegn b&oslash;r &#230res";
- decode_entities($a);
- encode_entities($a, "\200-\377");
-
-For example, this:
-
- $input = "vis-�-vis Beyonc�'s na�ve\npapier-m�ch� r�sum�";
- print encode_entities($input), "\n"
-
-Prints this out:
-
- vis-&agrave;-vis Beyonc&eacute;'s na&iuml;ve
- papier-m&acirc;ch&eacute; r&eacute;sum&eacute;
-
-=head1 DESCRIPTION
-
-This module deals with encoding and decoding of strings with HTML
-character entities.  The module provides the following functions:
-
-=over 4
-
-=item decode_entities( $string, ... )
-
-This routine replaces HTML entities found in the $string with the
-corresponding Unicode character.  Under perl 5.6 and earlier only
-characters in the Latin-1 range are replaced. Unrecognized
-entities are left alone.
-
-If multiple strings are provided as argument they are each decoded
-separately and the same number of strings are returned.
-
-If called in void context the arguments are decoded in-place.
-
-This routine is exported by default.
-
-=item _decode_entities( $string, \%entity2char )
-
-=item _decode_entities( $string, \%entity2char, $expand_prefix )
-
-This will in-place replace HTML entities in $string.  The %entity2char
-hash must be provided.  Named entities not found in the %entity2char
-hash are left alone.  Numeric entities are expanded unless their value
-overflow.
-
-The keys in %entity2char are the entity names to be expanded and their
-values are what they should expand into.  The values do not have to be
-single character strings.  If a key has ";" as suffix,
-then occurrences in $string are only expanded if properly terminated
-with ";".  Entities without ";" will be expanded regardless of how
-they are terminated for compatiblity with how common browsers treat
-entities in the Latin-1 range.
-
-If $expand_prefix is TRUE then entities without trailing ";" in
-%entity2char will even be expanded as a prefix of a longer
-unrecognized name.  The longest matching name in %entity2char will be
-used. This is mainly present for compatibility with an MSIE
-misfeature.
-
-   $string = "foo&nbspbar";
-   _decode_entities($string, { nb => "@", nbsp => "\xA0" }, 1);
-   print $string;  # will print "foo�bar"
-
-This routine is exported by default.
-
-=item encode_entities( $string )
-
-=item encode_entities( $string, $unsafe_chars )
-
-This routine replaces unsafe characters in $string with their entity
-representation. A second argument can be given to specify which
-characters to consider unsafe (i.e., which to escape). The default set
-of characters to encode are control chars, high-bit chars, and the
-C<< < >>, C<< & >>, C<< > >>, C<< ' >> and C<< " >>
-characters.  But this, for example, would encode I<just> the
-C<< < >>, C<< & >>, C<< > >>, and C<< " >> characters:
-
-  $encoded = encode_entities($input, '<>&"');
-
-This routine is exported by default.
-
-=item encode_entities_numeric( $string )
-
-=item encode_entities_numeric( $string, $unsafe_chars )
-
-This routine works just like encode_entities, except that the replacement
-entities are always C<&#xI<hexnum>;> and never C<&I<entname>;>.  For
-example, C<encode_entities("r\xF4le")> returns "r&ocirc;le", but
-C<encode_entities_numeric("r\xF4le")> returns "r&#xF4;le".
-
-This routine is I<not> exported by default.  But you can always
-export it with C<use HTML::Entities qw(encode_entities_numeric);>
-or even C<use HTML::Entities qw(:DEFAULT encode_entities_numeric);>
-
-=back
-
-All these routines modify the string passed as the first argument, if
-called in a void context.  In scalar and array contexts, the encoded or
-decoded string is returned (without changing the input string).
-
-If you prefer not to import these routines into your namespace, you can
-call them as:
-
-  use HTML::Entities ();
-  $decoded = HTML::Entities::decode($a);
-  $encoded = HTML::Entities::encode($a);
-  $encoded = HTML::Entities::encode_numeric($a);
-
-The module can also export the %char2entity and the %entity2char
-hashes, which contain the mapping from all characters to the
-corresponding entities (and vice versa, respectively).
-
-=head1 COPYRIGHT
-
-Copyright 1995-2006 Gisle Aas. All rights reserved.
-
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=cut
-
-use strict;
-use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION);
-use vars qw(%entity2char %char2entity);
-
-require 5.004;
-require Exporter;
-@ISA = qw(Exporter);
-
-@EXPORT = qw(encode_entities decode_entities _decode_entities);
-@EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
-
-$VERSION = sprintf("%d.%02d", q$Revision: 1.35 $ =~ /(\d+)\.(\d+)/);
-sub Version { $VERSION; }
-
-require HTML::Parser;  # for fast XS implemented decode_entities
-
-
-%entity2char = (
- # Some normal chars that have special meaning in SGML context
- amp    => '&',  # ampersand 
-'gt'    => '>',  # greater than
-'lt'    => '<',  # less than
- quot   => '"',  # double quote
- apos   => "'",  # single quote
-
- # PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
- AElig	=> chr(198),  # capital AE diphthong (ligature)
- Aacute	=> chr(193),  # capital A, acute accent
- Acirc	=> chr(194),  # capital A, circumflex accent
- Agrave	=> chr(192),  # capital A, grave accent
- Aring	=> chr(197),  # capital A, ring
- Atilde	=> chr(195),  # capital A, tilde
- Auml	=> chr(196),  # capital A, dieresis or umlaut mark
- Ccedil	=> chr(199),  # capital C, cedilla
- ETH	=> chr(208),  # capital Eth, Icelandic
- Eacute	=> chr(201),  # capital E, acute accent
- Ecirc	=> chr(202),  # capital E, circumflex accent
- Egrave	=> chr(200),  # capital E, grave accent
- Euml	=> chr(203),  # capital E, dieresis or umlaut mark
- Iacute	=> chr(205),  # capital I, acute accent
- Icirc	=> chr(206),  # capital I, circumflex accent
- Igrave	=> chr(204),  # capital I, grave accent
- Iuml	=> chr(207),  # capital I, dieresis or umlaut mark
- Ntilde	=> chr(209),  # capital N, tilde
- Oacute	=> chr(211),  # capital O, acute accent
- Ocirc	=> chr(212),  # capital O, circumflex accent
- Ograve	=> chr(210),  # capital O, grave accent
- Oslash	=> chr(216),  # capital O, slash
- Otilde	=> chr(213),  # capital O, tilde
- Ouml	=> chr(214),  # capital O, dieresis or umlaut mark
- THORN	=> chr(222),  # capital THORN, Icelandic
- Uacute	=> chr(218),  # capital U, acute accent
- Ucirc	=> chr(219),  # capital U, circumflex accent
- Ugrave	=> chr(217),  # capital U, grave accent
- Uuml	=> chr(220),  # capital U, dieresis or umlaut mark
- Yacute	=> chr(221),  # capital Y, acute accent
- aacute	=> chr(225),  # small a, acute accent
- acirc	=> chr(226),  # small a, circumflex accent
- aelig	=> chr(230),  # small ae diphthong (ligature)
- agrave	=> chr(224),  # small a, grave accent
- aring	=> chr(229),  # small a, ring
- atilde	=> chr(227),  # small a, tilde
- auml	=> chr(228),  # small a, dieresis or umlaut mark
- ccedil	=> chr(231),  # small c, cedilla
- eacute	=> chr(233),  # small e, acute accent
- ecirc	=> chr(234),  # small e, circumflex accent
- egrave	=> chr(232),  # small e, grave accent
- eth	=> chr(240),  # small eth, Icelandic
- euml	=> chr(235),  # small e, dieresis or umlaut mark
- iacute	=> chr(237),  # small i, acute accent
- icirc	=> chr(238),  # small i, circumflex accent
- igrave	=> chr(236),  # small i, grave accent
- iuml	=> chr(239),  # small i, dieresis or umlaut mark
- ntilde	=> chr(241),  # small n, tilde
- oacute	=> chr(243),  # small o, acute accent
- ocirc	=> chr(244),  # small o, circumflex accent
- ograve	=> chr(242),  # small o, grave accent
- oslash	=> chr(248),  # small o, slash
- otilde	=> chr(245),  # small o, tilde
- ouml	=> chr(246),  # small o, dieresis or umlaut mark
- szlig	=> chr(223),  # small sharp s, German (sz ligature)
- thorn	=> chr(254),  # small thorn, Icelandic
- uacute	=> chr(250),  # small u, acute accent
- ucirc	=> chr(251),  # small u, circumflex accent
- ugrave	=> chr(249),  # small u, grave accent
- uuml	=> chr(252),  # small u, dieresis or umlaut mark
- yacute	=> chr(253),  # small y, acute accent
- yuml	=> chr(255),  # small y, dieresis or umlaut mark
-
- # Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
- copy   => chr(169),  # copyright sign
- reg    => chr(174),  # registered sign
- nbsp   => chr(160),  # non breaking space
-
- # Additional ISO-8859/1 entities listed in rfc1866 (section 14)
- iexcl  => chr(161),
- cent   => chr(162),
- pound  => chr(163),
- curren => chr(164),
- yen    => chr(165),
- brvbar => chr(166),
- sect   => chr(167),
- uml    => chr(168),
- ordf   => chr(170),
- laquo  => chr(171),
-'not'   => chr(172),    # not is a keyword in perl
- shy    => chr(173),
- macr   => chr(175),
- deg    => chr(176),
- plusmn => chr(177),
- sup1   => chr(185),
- sup2   => chr(178),
- sup3   => chr(179),
- acute  => chr(180),
- micro  => chr(181),
- para   => chr(182),
- middot => chr(183),
- cedil  => chr(184),
- ordm   => chr(186),
- raquo  => chr(187),
- frac14 => chr(188),
- frac12 => chr(189),
- frac34 => chr(190),
- iquest => chr(191),
-'times' => chr(215),    # times is a keyword in perl
- divide => chr(247),
-
- ( $] > 5.007 ? (
-  'OElig;'    => chr(338),
-  'oelig;'    => chr(339),
-  'Scaron;'   => chr(352),
-  'scaron;'   => chr(353),
-  'Yuml;'     => chr(376),
-  'fnof;'     => chr(402),
-  'circ;'     => chr(710),
-  'tilde;'    => chr(732),
-  'Alpha;'    => chr(913),
-  'Beta;'     => chr(914),
-  'Gamma;'    => chr(915),
-  'Delta;'    => chr(916),
-  'Epsilon;'  => chr(917),
-  'Zeta;'     => chr(918),
-  'Eta;'      => chr(919),
-  'Theta;'    => chr(920),
-  'Iota;'     => chr(921),
-  'Kappa;'    => chr(922),
-  'Lambda;'   => chr(923),
-  'Mu;'       => chr(924),
-  'Nu;'       => chr(925),
-  'Xi;'       => chr(926),
-  'Omicron;'  => chr(927),
-  'Pi;'       => chr(928),
-  'Rho;'      => chr(929),
-  'Sigma;'    => chr(931),
-  'Tau;'      => chr(932),
-  'Upsilon;'  => chr(933),
-  'Phi;'      => chr(934),
-  'Chi;'      => chr(935),
-  'Psi;'      => chr(936),
-  'Omega;'    => chr(937),
-  'alpha;'    => chr(945),
-  'beta;'     => chr(946),
-  'gamma;'    => chr(947),
-  'delta;'    => chr(948),
-  'epsilon;'  => chr(949),
-  'zeta;'     => chr(950),
-  'eta;'      => chr(951),
-  'theta;'    => chr(952),
-  'iota;'     => chr(953),
-  'kappa;'    => chr(954),
-  'lambda;'   => chr(955),
-  'mu;'       => chr(956),
-  'nu;'       => chr(957),
-  'xi;'       => chr(958),
-  'omicron;'  => chr(959),
-  'pi;'       => chr(960),
-  'rho;'      => chr(961),
-  'sigmaf;'   => chr(962),
-  'sigma;'    => chr(963),
-  'tau;'      => chr(964),
-  'upsilon;'  => chr(965),
-  'phi;'      => chr(966),
-  'chi;'      => chr(967),
-  'psi;'      => chr(968),
-  'omega;'    => chr(969),
-  'thetasym;' => chr(977),
-  'upsih;'    => chr(978),
-  'piv;'      => chr(982),
-  'ensp;'     => chr(8194),
-  'emsp;'     => chr(8195),
-  'thinsp;'   => chr(8201),
-  'zwnj;'     => chr(8204),
-  'zwj;'      => chr(8205),
-  'lrm;'      => chr(8206),
-  'rlm;'      => chr(8207),
-  'ndash;'    => chr(8211),
-  'mdash;'    => chr(8212),
-  'lsquo;'    => chr(8216),
-  'rsquo;'    => chr(8217),
-  'sbquo;'    => chr(8218),
-  'ldquo;'    => chr(8220),
-  'rdquo;'    => chr(8221),
-  'bdquo;'    => chr(8222),
-  'dagger;'   => chr(8224),
-  'Dagger;'   => chr(8225),
-  'bull;'     => chr(8226),
-  'hellip;'   => chr(8230),
-  'permil;'   => chr(8240),
-  'prime;'    => chr(8242),
-  'Prime;'    => chr(8243),
-  'lsaquo;'   => chr(8249),
-  'rsaquo;'   => chr(8250),
-  'oline;'    => chr(8254),
-  'frasl;'    => chr(8260),
-  'euro;'     => chr(8364),
-  'image;'    => chr(8465),
-  'weierp;'   => chr(8472),
-  'real;'     => chr(8476),
-  'trade;'    => chr(8482),
-  'alefsym;'  => chr(8501),
-  'larr;'     => chr(8592),
-  'uarr;'     => chr(8593),
-  'rarr;'     => chr(8594),
-  'darr;'     => chr(8595),
-  'harr;'     => chr(8596),
-  'crarr;'    => chr(8629),
-  'lArr;'     => chr(8656),
-  'uArr;'     => chr(8657),
-  'rArr;'     => chr(8658),
-  'dArr;'     => chr(8659),
-  'hArr;'     => chr(8660),
-  'forall;'   => chr(8704),
-  'part;'     => chr(8706),
-  'exist;'    => chr(8707),
-  'empty;'    => chr(8709),
-  'nabla;'    => chr(8711),
-  'isin;'     => chr(8712),
-  'notin;'    => chr(8713),
-  'ni;'       => chr(8715),
-  'prod;'     => chr(8719),
-  'sum;'      => chr(8721),
-  'minus;'    => chr(8722),
-  'lowast;'   => chr(8727),
-  'radic;'    => chr(8730),
-  'prop;'     => chr(8733),
-  'infin;'    => chr(8734),
-  'ang;'      => chr(8736),
-  'and;'      => chr(8743),
-  'or;'       => chr(8744),
-  'cap;'      => chr(8745),
-  'cup;'      => chr(8746),
-  'int;'      => chr(8747),
-  'there4;'   => chr(8756),
-  'sim;'      => chr(8764),
-  'cong;'     => chr(8773),
-  'asymp;'    => chr(8776),
-  'ne;'       => chr(8800),
-  'equiv;'    => chr(8801),
-  'le;'       => chr(8804),
-  'ge;'       => chr(8805),
-  'sub;'      => chr(8834),
-  'sup;'      => chr(8835),
-  'nsub;'     => chr(8836),
-  'sube;'     => chr(8838),
-  'supe;'     => chr(8839),
-  'oplus;'    => chr(8853),
-  'otimes;'   => chr(8855),
-  'perp;'     => chr(8869),
-  'sdot;'     => chr(8901),
-  'lceil;'    => chr(8968),
-  'rceil;'    => chr(8969),
-  'lfloor;'   => chr(8970),
-  'rfloor;'   => chr(8971),
-  'lang;'     => chr(9001),
-  'rang;'     => chr(9002),
-  'loz;'      => chr(9674),
-  'spades;'   => chr(9824),
-  'clubs;'    => chr(9827),
-  'hearts;'   => chr(9829),
-  'diams;'    => chr(9830),
- ) : ())
-);
-
-
-# Make the opposite mapping
-while (my($entity, $char) = each(%entity2char)) {
-    $entity =~ s/;\z//;
-    $char2entity{$char} = "&$entity;";
-}
-delete $char2entity{"'"};  # only one-way decoding
-
-# Fill in missing entities
-for (0 .. 255) {
-    next if exists $char2entity{chr($_)};
-    $char2entity{chr($_)} = "&#$_;";
-}
-
-my %subst;  # compiled encoding regexps
-
-sub decode_entities_old
-{
-    my $array;
-    if (defined wantarray) {
-	$array = [@_]; # copy
-    } else {
-	$array = \@_;  # modify in-place
-    }
-    my $c;
-    for (@$array) {
-	s/(&\#(\d+);?)/$2 < 256 ? chr($2) : $1/eg;
-	s/(&\#[xX]([0-9a-fA-F]+);?)/$c = hex($2); $c < 256 ? chr($c) : $1/eg;
-	s/(&(\w+);?)/$entity2char{$2} || $1/eg;
-    }
-    wantarray ? @$array : $array->[0];
-}
-
-sub encode_entities
-{
-    my $ref;
-    if (defined wantarray) {
-	my $x = $_[0];
-	$ref = \$x;     # copy
-    } else {
-	$ref = \$_[0];  # modify in-place
-    }
-    if (defined $_[1] and length $_[1]) {
-	unless (exists $subst{$_[1]}) {
-	    # Because we can't compile regex we fake it with a cached sub
-	    my $code = "sub {\$_[0] =~ s/([$_[1]])/\$char2entity{\$1} || num_entity(\$1)/ge; }";
-	    $subst{$_[1]} = eval $code;
-	    die( $@ . " while trying to turn range: \"$_[1]\"\n "
-	      . "into code: $code\n "
-	    ) if $@;
-	}
-	&{$subst{$_[1]}}($$ref);
-    } else {
-	# Encode control chars, high bit chars and '<', '&', '>', ''' and '"'
-	$$ref =~ s/([^\n\r\t !\#\$%\(-;=?-~])/$char2entity{$1} || num_entity($1)/ge;
-    }
-    $$ref;
-}
-
-sub encode_entities_numeric {
-    local %char2entity;
-    return &encode_entities;   # a goto &encode_entities wouldn't work
-}
-
-
-sub num_entity {
-    sprintf "&#x%X;", ord($_[0]);
-}
-
-# Set up aliases
-*encode = \&encode_entities;
-*encode_numeric = \&encode_entities_numeric;
-*encode_numerically = \&encode_entities_numeric;
-*decode = \&decode_entities;
-
-1;
diff --git a/ext/HTML/Parser/lib/HTML/Filter.pm b/ext/HTML/Parser/lib/HTML/Filter.pm
deleted file mode 100644
index 21fafac621..0000000000
--- a/ext/HTML/Parser/lib/HTML/Filter.pm
+++ /dev/null
@@ -1,112 +0,0 @@
-package HTML::Filter;
-
-use strict;
-use vars qw(@ISA $VERSION);
-
-require HTML::Parser;
-@ISA=qw(HTML::Parser);
-
-$VERSION = sprintf("%d.%02d", q$Revision: 2.11 $ =~ /(\d+)\.(\d+)/);
-
-sub declaration { $_[0]->output("<!$_[1]>")     }
-sub process     { $_[0]->output($_[2])          }
-sub comment     { $_[0]->output("<!--$_[1]-->") }
-sub start       { $_[0]->output($_[4])          }
-sub end         { $_[0]->output($_[2])          }
-sub text        { $_[0]->output($_[1])          }
-
-sub output      { print $_[1] }
-
-1;
-
-__END__
-
-=head1 NAME
-
-HTML::Filter - Filter HTML text through the parser
-
-=head1 NOTE
-
-B<This module is deprecated.> The C<HTML::Parser> now provides the
-functionally of C<HTML::Filter> much more efficiently with the the
-C<default> handler.
-
-=head1 SYNOPSIS
-
- require HTML::Filter;
- $p = HTML::Filter->new->parse_file("index.html");
-
-=head1 DESCRIPTION
-
-C<HTML::Filter> is an HTML parser that by default prints the
-original text of each HTML element (a slow version of cat(1) basically).
-The callback methods may be overridden to modify the filtering for some
-HTML elements and you can override output() method which is called to
-print the HTML text.
-
-C<HTML::Filter> is a subclass of C<HTML::Parser>. This means that
-the document should be given to the parser by calling the $p->parse()
-or $p->parse_file() methods.
-
-=head1 EXAMPLES
-
-The first example is a filter that will remove all comments from an
-HTML file.  This is achieved by simply overriding the comment method
-to do nothing.
-
-  package CommentStripper;
-  require HTML::Filter;
-  @ISA=qw(HTML::Filter);
-  sub comment { }  # ignore comments
-
-The second example shows a filter that will remove any E<lt>TABLE>s
-found in the HTML file.  We specialize the start() and end() methods
-to count table tags and then make output not happen when inside a
-table.
-
-  package TableStripper;
-  require HTML::Filter;
-  @ISA=qw(HTML::Filter);
-  sub start
-  {
-     my $self = shift;
-     $self->{table_seen}++ if $_[0] eq "table";
-     $self->SUPER::start(@_);
-  }
-
-  sub end
-  {
-     my $self = shift;
-     $self->SUPER::end(@_);
-     $self->{table_seen}-- if $_[0] eq "table";
-  }
-
-  sub output
-  {
-      my $self = shift;
-      unless ($self->{table_seen}) {
-	  $self->SUPER::output(@_);
-      }
-  }
-
-If you want to collect the parsed text internally you might want to do
-something like this:
-
-  package FilterIntoString;
-  require HTML::Filter;
-  @ISA=qw(HTML::Filter);
-  sub output { push(@{$_[0]->{fhtml}}, $_[1]) }
-  sub filtered_html { join("", @{$_[0]->{fhtml}}) }
-
-=head1 SEE ALSO
-
-L<HTML::Parser>
-
-=head1 COPYRIGHT
-
-Copyright 1997-1999 Gisle Aas.
-
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=cut
diff --git a/ext/HTML/Parser/lib/HTML/HeadParser.pm b/ext/HTML/Parser/lib/HTML/HeadParser.pm
deleted file mode 100644
index a8974f832b..0000000000
--- a/ext/HTML/Parser/lib/HTML/HeadParser.pm
+++ /dev/null
@@ -1,259 +0,0 @@
-package HTML::HeadParser;
-
-=head1 NAME
-
-HTML::HeadParser - Parse <HEAD> section of a HTML document
-
-=head1 SYNOPSIS
-
- require HTML::HeadParser;
- $p = HTML::HeadParser->new;
- $p->parse($text) and  print "not finished";
-
- $p->header('Title')          # to access <title>....</title>
- $p->header('Content-Base')   # to access <base href="http://...">
- $p->header('Foo')            # to access <meta http-equiv="Foo" content="...">
-
-=head1 DESCRIPTION
-
-The C<HTML::HeadParser> is a specialized (and lightweight)
-C<HTML::Parser> that will only parse the E<lt>HEAD>...E<lt>/HEAD>
-section of an HTML document.  The parse() method
-will return a FALSE value as soon as some E<lt>BODY> element or body
-text are found, and should not be called again after this.
-
-Note that the C<HTML::HeadParser> might get confused if raw undecoded
-UTF-8 is passed to the parse() method.  Make sure the strings are
-properly decoded before passing them on.
-
-The C<HTML::HeadParser> keeps a reference to a header object, and the
-parser will update this header object as the various elements of the
-E<lt>HEAD> section of the HTML document are recognized.  The following
-header fields are affected:
-
-=over 4
-
-=item Content-Base:
-
-The I<Content-Base> header is initialized from the E<lt>base
-href="..."> element.
-
-=item Title:
-
-The I<Title> header is initialized from the E<lt>title>...E<lt>/title>
-element.
-
-=item Isindex:
-
-The I<Isindex> header will be added if there is a E<lt>isindex>
-element in the E<lt>head>.  The header value is initialized from the
-I<prompt> attribute if it is present.  If no I<prompt> attribute is
-given it will have '?' as the value.
-
-=item X-Meta-Foo:
-
-All E<lt>meta> elements will initialize headers with the prefix
-"C<X-Meta->" on the name.  If the E<lt>meta> element contains a
-C<http-equiv> attribute, then it will be honored as the header name.
-
-=back
-
-=head1 METHODS
-
-The following methods (in addition to those provided by the
-superclass) are available:
-
-=over 4
-
-=cut
-
-
-require HTML::Parser;
-@ISA = qw(HTML::Parser);
-
-use HTML::Entities ();
-
-use strict;
-use vars qw($VERSION $DEBUG);
-#$DEBUG = 1;
-$VERSION = sprintf("%d.%02d", q$Revision: 2.22 $ =~ /(\d+)\.(\d+)/);
-
-=item $hp = HTML::HeadParser->new
-
-=item $hp = HTML::HeadParser->new( $header )
-
-The object constructor.  The optional $header argument should be a
-reference to an object that implement the header() and push_header()
-methods as defined by the C<HTTP::Headers> class.  Normally it will be
-of some class that isa or delegates to the C<HTTP::Headers> class.
-
-If no $header is given C<HTML::HeadParser> will create an
-C<HTTP::Header> object by itself (initially empty).
-
-=cut
-
-sub new
-{
-    my($class, $header) = @_;
-    unless ($header) {
-	require HTTP::Headers;
-	$header = HTTP::Headers->new;
-    }
-
-    my $self = $class->SUPER::new(api_version => 2,
-				  ignore_elements => [qw(script style)],
-				 );
-    $self->{'header'} = $header;
-    $self->{'tag'} = '';   # name of active element that takes textual content
-    $self->{'text'} = '';  # the accumulated text associated with the element
-    $self;
-}
-
-=item $hp->header;
-
-Returns a reference to the header object.
-
-=item $hp->header( $key )
-
-Returns a header value.  It is just a shorter way to write
-C<$hp-E<gt>header-E<gt>header($key)>.
-
-=cut
-
-sub header
-{
-    my $self = shift;
-    return $self->{'header'} unless @_;
-    $self->{'header'}->header(@_);
-}
-
-sub as_string    # legacy
-{
-    my $self = shift;
-    $self->{'header'}->as_string;
-}
-
-sub flush_text   # internal
-{
-    my $self = shift;
-    my $tag  = $self->{'tag'};
-    my $text = $self->{'text'};
-    $text =~ s/^\s+//;
-    $text =~ s/\s+$//;
-    $text =~ s/\s+/ /g;
-    print "FLUSH $tag => '$text'\n"  if $DEBUG;
-    if ($tag eq 'title') {
-	HTML::Entities::decode($text);
-	$self->{'header'}->push_header(Title => $text);
-    }
-    $self->{'tag'} = $self->{'text'} = '';
-}
-
-# This is an quote from the HTML3.2 DTD which shows which elements
-# that might be present in a <HEAD>...</HEAD>.  Also note that the
-# <HEAD> tags themselves might be missing:
-#
-# <!ENTITY % head.content "TITLE & ISINDEX? & BASE? & STYLE? &
-#                            SCRIPT* & META* & LINK*">
-#
-# <!ELEMENT HEAD O O  (%head.content)>
-
-
-sub start
-{
-    my($self, $tag, $attr) = @_;  # $attr is reference to a HASH
-    print "START[$tag]\n" if $DEBUG;
-    $self->flush_text if $self->{'tag'};
-    if ($tag eq 'meta') {
-	my $key = $attr->{'http-equiv'};
-	if (!defined($key) || !length($key)) {
-	    return unless $attr->{'name'};
-	    $key = "X-Meta-\u$attr->{'name'}";
-	}
-	$self->{'header'}->push_header($key => $attr->{content});
-    } elsif ($tag eq 'base') {
-	return unless exists $attr->{href};
-	$self->{'header'}->push_header('Content-Base' => $attr->{href});
-    } elsif ($tag eq 'isindex') {
-	# This is a non-standard header.  Perhaps we should just ignore
-	# this element
-	$self->{'header'}->push_header(Isindex => $attr->{prompt} || '?');
-    } elsif ($tag =~ /^(?:title|script|style)$/) {
-	# Just remember tag.  Initialize header when we see the end tag.
-	$self->{'tag'} = $tag;
-    } elsif ($tag eq 'link') {
-	return unless exists $attr->{href};
-	# <link href="http:..." rel="xxx" rev="xxx" title="xxx">
-	my $h_val = "<" . delete($attr->{href}) . ">";
-	for (sort keys %{$attr}) {
-	    $h_val .= qq(; $_="$attr->{$_}");
-	}
-	$self->{'header'}->push_header(Link => $h_val);
-    } elsif ($tag eq 'head' || $tag eq 'html') {
-	# ignore
-    } else {
-	 # stop parsing
-	$self->eof;
-    }
-}
-
-sub end
-{
-    my($self, $tag) = @_;
-    print "END[$tag]\n" if $DEBUG;
-    $self->flush_text if $self->{'tag'};
-    $self->eof if $tag eq 'head';
-}
-
-sub text
-{
-    my($self, $text) = @_;
-    $text =~ s/\x{FEFF}//;  # drop Unicode BOM if found
-    print "TEXT[$text]\n" if $DEBUG;
-    my $tag = $self->{tag};
-    if (!$tag && $text =~ /\S/) {
-	# Normal text means start of body
-        $self->eof;
-	return;
-    }
-    return if $tag ne 'title';
-    $self->{'text'} .= $text;
-}
-
-1;
-
-__END__
-
-=back
-
-=head1 EXAMPLE
-
- $h = HTTP::Headers->new;
- $p = HTML::HeadParser->new($h);
- $p->parse(<<EOT);
- <title>Stupid example</title>
- <base href="http://www.linpro.no/lwp/">
- Normal text starts here.
- EOT
- undef $p;
- print $h->title;   # should print "Stupid example"
-
-=head1 SEE ALSO
-
-L<HTML::Parser>, L<HTTP::Headers>
-
-The C<HTTP::Headers> class is distributed as part of the
-I<libwww-perl> package.  If you don't have that distribution installed
-you need to provide the $header argument to the C<HTML::HeadParser>
-constructor with your own object that implements the documented
-protocol.
-
-=head1 COPYRIGHT
-
-Copyright 1996-2001 Gisle Aas. All rights reserved.
-
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=cut
-
diff --git a/ext/HTML/Parser/lib/HTML/LinkExtor.pm b/ext/HTML/Parser/lib/HTML/LinkExtor.pm
deleted file mode 100644
index d543a5aba7..0000000000
--- a/ext/HTML/Parser/lib/HTML/LinkExtor.pm
+++ /dev/null
@@ -1,187 +0,0 @@
-package HTML::LinkExtor;
-
-# $Id: LinkExtor.pm,v 1.33 2003/10/10 10:20:56 gisle Exp $
-
-require HTML::Parser;
-@ISA = qw(HTML::Parser);
-$VERSION = sprintf("%d.%02d", q$Revision: 1.33 $ =~ /(\d+)\.(\d+)/);
-
-=head1 NAME
-
-HTML::LinkExtor - Extract links from an HTML document
-
-=head1 SYNOPSIS
-
- require HTML::LinkExtor;
- $p = HTML::LinkExtor->new(\&cb, "http://www.perl.org/");
- sub cb {
-     my($tag, %links) = @_;
-     print "$tag @{[%links]}\n";
- }
- $p->parse_file("index.html");
-
-=head1 DESCRIPTION
-
-I<HTML::LinkExtor> is an HTML parser that extracts links from an
-HTML document.  The I<HTML::LinkExtor> is a subclass of
-I<HTML::Parser>. This means that the document should be given to the
-parser by calling the $p->parse() or $p->parse_file() methods.
-
-=cut
-
-use strict;
-use HTML::Tagset ();
-
-# legacy (some applications grabs this hash directly)
-use vars qw(%LINK_ELEMENT);
-*LINK_ELEMENT = \%HTML::Tagset::linkElements;
-
-=over 4
-
-=item $p = HTML::LinkExtor->new
-
-=item $p = HTML::LinkExtor->new( $callback )
-
-=item $p = HTML::LinkExtor->new( $callback, $base )
-
-The constructor takes two optional arguments. The first is a reference
-to a callback routine. It will be called as links are found. If a
-callback is not provided, then links are just accumulated internally
-and can be retrieved by calling the $p->links() method.
-
-The $base argument is an optional base URL used to absolutize all URLs found.
-You need to have the I<URI> module installed if you provide $base.
-
-The callback is called with the lowercase tag name as first argument,
-and then all link attributes as separate key/value pairs.  All
-non-link attributes are removed.
-
-=cut
-
-sub new
-{
-    my($class, $cb, $base) = @_;
-    my $self = $class->SUPER::new(
-                    start_h => ["_start_tag", "self,tagname,attr"],
-		    report_tags => [keys %HTML::Tagset::linkElements],
-	       );
-    $self->{extractlink_cb} = $cb;
-    if ($base) {
-	require URI;
-	$self->{extractlink_base} = URI->new($base);
-    }
-    $self;
-}
-
-sub _start_tag
-{
-    my($self, $tag, $attr) = @_;
-
-    my $base = $self->{extractlink_base};
-    my $links = $HTML::Tagset::linkElements{$tag};
-    $links = [$links] unless ref $links;
-
-    my @links;
-    my $a;
-    for $a (@$links) {
-	next unless exists $attr->{$a};
-	push(@links, $a, $base ? URI->new($attr->{$a}, $base)->abs($base)
-                               : $attr->{$a});
-    }
-    return unless @links;
-    $self->_found_link($tag, @links);
-}
-
-sub _found_link
-{
-    my $self = shift;
-    my $cb = $self->{extractlink_cb};
-    if ($cb) {
-	&$cb(@_);
-    } else {
-	push(@{$self->{'links'}}, [@_]);
-    }
-}
-
-=item $p->links
-
-Returns a list of all links found in the document.  The returned
-values will be anonymous arrays with the follwing elements:
-
-  [$tag, $attr => $url1, $attr2 => $url2,...]
-
-The $p->links method will also truncate the internal link list.  This
-means that if the method is called twice without any parsing
-between them the second call will return an empty list.
-
-Also note that $p->links will always be empty if a callback routine
-was provided when the I<HTML::LinkExtor> was created.
-
-=cut
-
-sub links
-{
-    my $self = shift;
-    exists($self->{'links'}) ? @{delete $self->{'links'}} : ();
-}
-
-# We override the parse_file() method so that we can clear the links
-# before we start a new file.
-sub parse_file
-{
-    my $self = shift;
-    delete $self->{'links'};
-    $self->SUPER::parse_file(@_);
-}
-
-=back
-
-=head1 EXAMPLE
-
-This is an example showing how you can extract links from a document
-received using LWP:
-
-  use LWP::UserAgent;
-  use HTML::LinkExtor;
-  use URI::URL;
-
-  $url = "http://www.perl.org/";  # for instance
-  $ua = LWP::UserAgent->new;
-
-  # Set up a callback that collect image links
-  my @imgs = ();
-  sub callback {
-     my($tag, %attr) = @_;
-     return if $tag ne 'img';  # we only look closer at <img ...>
-     push(@imgs, values %attr);
-  }
-
-  # Make the parser.  Unfortunately, we don't know the base yet
-  # (it might be diffent from $url)
-  $p = HTML::LinkExtor->new(\&callback);
-
-  # Request document and parse it as it arrives
-  $res = $ua->request(HTTP::Request->new(GET => $url),
-                      sub {$p->parse($_[0])});
-
-  # Expand all image URLs to absolute ones
-  my $base = $res->base;
-  @imgs = map { $_ = url($_, $base)->abs; } @imgs;
-
-  # Print them out
-  print join("\n", @imgs), "\n";
-
-=head1 SEE ALSO
-
-L<HTML::Parser>, L<HTML::Tagset>, L<LWP>, L<URI::URL>
-
-=head1 COPYRIGHT
-
-Copyright 1996-2001 Gisle Aas.
-
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=cut
-
-1;
diff --git a/ext/HTML/Parser/lib/HTML/PullParser.pm b/ext/HTML/Parser/lib/HTML/PullParser.pm
deleted file mode 100644
index e851fe001d..0000000000
--- a/ext/HTML/Parser/lib/HTML/PullParser.pm
+++ /dev/null
@@ -1,211 +0,0 @@
-package HTML::PullParser;
-
-# $Id: PullParser.pm,v 2.9 2006/04/26 08:00:28 gisle Exp $
-
-require HTML::Parser;
-@ISA=qw(HTML::Parser);
-$VERSION = sprintf("%d.%02d", q$Revision: 2.9 $ =~ /(\d+)\.(\d+)/);
-
-use strict;
-use Carp ();
-
-sub new
-{
-    my($class, %cnf) = @_;
-
-    # Construct argspecs for the various events
-    my %argspec;
-    for (qw(start end text declaration comment process default)) {
-	my $tmp = delete $cnf{$_};
-	next unless defined $tmp;
-	$argspec{$_} = $tmp;
-    }
-    Carp::croak("Info not collected for any events")
-	  unless %argspec;
-
-    my $file = delete $cnf{file};
-    my $doc  = delete $cnf{doc};
-    Carp::croak("Can't parse from both 'doc' and 'file' at the same time")
-	  if defined($file) && defined($doc);
-    Carp::croak("No 'doc' or 'file' given to parse from")
-	  unless defined($file) || defined($doc);
-
-    # Create object
-    $cnf{api_version} = 3;
-    my $self = $class->SUPER::new(%cnf);
-
-    my $accum = $self->{pullparser_accum} = [];
-    while (my($event, $argspec) = each %argspec) {
-	$self->SUPER::handler($event => $accum, $argspec);
-    }
-
-    if (defined $doc) {
-	$self->{pullparser_str_ref} = ref($doc) ? $doc : \$doc;
-	$self->{pullparser_str_pos} = 0;
-    }
-    else {
-	if (!ref($file) && ref(\$file) ne "GLOB") {
-	    require IO::File;
-	    $file = IO::File->new($file, "r") || return;
-	}
-
-	$self->{pullparser_file} = $file;
-    }
-    $self;
-}
-
-
-sub handler
-{
-    Carp::croak("Can't set handlers for HTML::PullParser");
-}
-
-
-sub get_token
-{
-    my $self = shift;
-    while (!@{$self->{pullparser_accum}} && !$self->{pullparser_eof}) {
-	if (my $f = $self->{pullparser_file}) {
-	    # must try to parse more from the file
-	    my $buf;
-	    if (read($f, $buf, 512)) {
-		$self->parse($buf);
-	    } else {
-		$self->eof;
-		$self->{pullparser_eof}++;
-		delete $self->{pullparser_file};
-	    }
-	}
-	elsif (my $sref = $self->{pullparser_str_ref}) {
-	    # must try to parse more from the scalar
-	    my $pos = $self->{pullparser_str_pos};
-	    my $chunk = substr($$sref, $pos, 512);
-	    $self->parse($chunk);
-	    $pos += length($chunk);
-	    if ($pos < length($$sref)) {
-		$self->{pullparser_str_pos} = $pos;
-	    }
-	    else {
-		$self->eof;
-		$self->{pullparser_eof}++;
-		delete $self->{pullparser_str_ref};
-		delete $self->{pullparser_str_pos};
-	    }
-	}
-	else {
-	    die;
-	}
-    }
-    shift @{$self->{pullparser_accum}};
-}
-
-
-sub unget_token
-{
-    my $self = shift;
-    unshift @{$self->{pullparser_accum}}, @_;
-    $self;
-}
-
-1;
-
-
-__END__
-
-=head1 NAME
-
-HTML::PullParser - Alternative HTML::Parser interface
-
-=head1 SYNOPSIS
-
- use HTML::PullParser;
-
- $p = HTML::PullParser->new(file => "index.html",
-                            start => 'event, tagname, @attr',
-                            end   => 'event, tagname',
-                            ignore_elements => [qw(script style)],
-                           ) || die "Can't open: $!";
- while (my $token = $p->get_token) {
-     #...do something with $token
- }
-
-=head1 DESCRIPTION
-
-The HTML::PullParser is an alternative interface to the HTML::Parser class.
-It basically turns the HTML::Parser inside out.  You associate a file
-(or any IO::Handle object or string) with the parser at construction time and
-then repeatedly call $parser->get_token to obtain the tags and text
-found in the parsed document.
-
-The following methods are provided:
-
-=over 4
-
-=item $p = HTML::PullParser->new( file => $file, %options )
-
-=item $p = HTML::PullParser->new( doc => \$doc, %options )
-
-A C<HTML::PullParser> can be made to parse from either a file or a
-literal document based on whether the C<file> or C<doc> option is
-passed to the parser's constructor.
-
-The C<file> passed in can either be a file name or a file handle
-object.  If a file name is passed, and it can't be opened for reading,
-then the constructor will return an undefined value and $!  will tell
-you why it failed.  Otherwise the argument is taken to be some object
-that the C<HTML::PullParser> can read() from when it needs more data.
-The stream will be read() until EOF, but not closed.
-
-A C<doc> can be passed plain or as a reference
-to a scalar.  If a reference is passed then the value of this scalar
-should not be changed before all tokens have been extracted.
-
-Next the information to be returned for the different token types must
-be set up.  This is done by simply associating an argspec (as defined
-in L<HTML::Parser>) with the events you have an interest in.  For
-instance, if you want C<start> tokens to be reported as the string
-C<'S'> followed by the tagname and the attributes you might pass an
-C<start>-option like this:
-
-   $p = HTML::PullParser->new(
-          doc   => $document_to_parse,
-          start => '"S", tagname, @attr',
-          end   => '"E", tagname',
-        );
-
-At last other C<HTML::Parser> options, like C<ignore_tags>, and
-C<unbroken_text>, can be passed in.  Note that you should not use the
-I<event>_h options to set up parser handlers.  That would confuse the
-inner logic of C<HTML::PullParser>.
-
-=item $token = $p->get_token
-
-This method will return the next I<token> found in the HTML document,
-or C<undef> at the end of the document.  The token is returned as an
-array reference.  The content of this array match the argspec set up
-during C<HTML::PullParser> construction.
-
-=item $p->unget_token( @tokens )
-
-If you find out you have read too many tokens you can push them back,
-so that they are returned again the next time $p->get_token is called.
-
-=back
-
-=head1 EXAMPLES
-
-The 'eg/hform' script shows how we might parse the form section of
-HTML::Documents using HTML::PullParser.
-
-=head1 SEE ALSO
-
-L<HTML::Parser>, L<HTML::TokeParser>
-
-=head1 COPYRIGHT
-
-Copyright 1998-2001 Gisle Aas.
-
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=cut
diff --git a/ext/HTML/Parser/lib/HTML/TokeParser.pm b/ext/HTML/Parser/lib/HTML/TokeParser.pm
deleted file mode 100644
index a1b8837cb4..0000000000
--- a/ext/HTML/Parser/lib/HTML/TokeParser.pm
+++ /dev/null
@@ -1,371 +0,0 @@
-package HTML::TokeParser;
-
-# $Id: TokeParser.pm,v 2.37 2006/04/26 08:00:28 gisle Exp $
-
-require HTML::PullParser;
-@ISA=qw(HTML::PullParser);
-$VERSION = sprintf("%d.%02d", q$Revision: 2.37 $ =~ /(\d+)\.(\d+)/);
-
-use strict;
-use Carp ();
-use HTML::Entities qw(decode_entities);
-use HTML::Tagset ();
-
-my %ARGS =
-(
- start       => "'S',tagname,attr,attrseq,text",
- end         => "'E',tagname,text",
- text        => "'T',text,is_cdata",
- process     => "'PI',token0,text",
- comment     => "'C',text",
- declaration => "'D',text",
-
- # options that default on
- unbroken_text => 1,
-);
-
-
-sub new
-{
-    my $class = shift;
-    my %cnf;
-    if (@_ == 1) {
-	my $type = (ref($_[0]) eq "SCALAR") ? "doc" : "file";
-	%cnf = ($type => $_[0]);
-    }
-    else {
-	%cnf = @_;
-    }
-
-    my $textify = delete $cnf{textify} || {img => "alt", applet => "alt"};
-
-    my $self = $class->SUPER::new(%cnf, %ARGS) || return undef;
-
-    $self->{textify} = $textify;
-    $self;
-}
-
-
-sub get_tag
-{
-    my $self = shift;
-    my $token;
-    while (1) {
-	$token = $self->get_token || return undef;
-	my $type = shift @$token;
-	next unless $type eq "S" || $type eq "E";
-	substr($token->[0], 0, 0) = "/" if $type eq "E";
-	return $token unless @_;
-	for (@_) {
-	    return $token if $token->[0] eq $_;
-	}
-    }
-}
-
-
-sub _textify {
-    my($self, $token) = @_;
-    my $tag = $token->[1];
-    return undef unless exists $self->{textify}{$tag};
-
-    my $alt = $self->{textify}{$tag};
-    my $text;
-    if (ref($alt)) {
-	$text = &$alt(@$token);
-    } else {
-	$text = $token->[2]{$alt || "alt"};
-	$text = "[\U$tag]" unless defined $text;
-    }
-    return $text;
-}
-
-
-sub get_text
-{
-    my $self = shift;
-    my @text;
-    while (my $token = $self->get_token) {
-	my $type = $token->[0];
-	if ($type eq "T") {
-	    my $text = $token->[1];
-	    decode_entities($text) unless $token->[2];
-	    push(@text, $text);
-	} elsif ($type =~ /^[SE]$/) {
-	    my $tag = $token->[1];
-	    if ($type eq "S") {
-		if (defined(my $text = _textify($self, $token))) {
-		    push(@text, $text);
-		    next;
-		}
-	    } else {
-		$tag = "/$tag";
-	    }
-	    if (!@_ || grep $_ eq $tag, @_) {
-		 $self->unget_token($token);
-		 last;
-	    }
-	    push(@text, " ")
-		if $tag eq "br" || !$HTML::Tagset::isPhraseMarkup{$token->[1]};
-	}
-    }
-    join("", @text);
-}
-
-
-sub get_trimmed_text
-{
-    my $self = shift;
-    my $text = $self->get_text(@_);
-    $text =~ s/^\s+//; $text =~ s/\s+$//; $text =~ s/\s+/ /g;
-    $text;
-}
-
-sub get_phrase {
-    my $self = shift;
-    my @text;
-    while (my $token = $self->get_token) {
-	my $type = $token->[0];
-	if ($type eq "T") {
-	    my $text = $token->[1];
-	    decode_entities($text) unless $token->[2];
-	    push(@text, $text);
-	} elsif ($type =~ /^[SE]$/) {
-	    my $tag = $token->[1];
-	    if ($type eq "S") {
-		if (defined(my $text = _textify($self, $token))) {
-		    push(@text, $text);
-		    next;
-		}
-	    }
-	    if (!$HTML::Tagset::isPhraseMarkup{$tag}) {
-		$self->unget_token($token);
-		last;
-	    }
-	    push(@text, " ") if $tag eq "br";
-	}
-    }
-    my $text = join("", @text);
-    $text =~ s/^\s+//; $text =~ s/\s+$//; $text =~ s/\s+/ /g;
-    $text;
-}
-
-1;
-
-
-__END__
-
-=head1 NAME
-
-HTML::TokeParser - Alternative HTML::Parser interface
-
-=head1 SYNOPSIS
-
- require HTML::TokeParser;
- $p = HTML::TokeParser->new("index.html") ||
-      die "Can't open: $!";
- $p->empty_element_tags(1);  # configure its behaviour
-
- while (my $token = $p->get_token) {
-     #...
- }
-
-=head1 DESCRIPTION
-
-The C<HTML::TokeParser> is an alternative interface to the
-C<HTML::Parser> class.  It is an C<HTML::PullParser> subclass with a
-predeclared set of token types.  If you wish the tokens to be reported
-differently you probably want to use the C<HTML::PullParser> directly.
-
-The following methods are available:
-
-=over 4
-
-=item $p = HTML::TokeParser->new( $filename, %opt );
-
-=item $p = HTML::TokeParser->new( $filehandle, %opt );
-
-=item $p = HTML::TokeParser->new( \$document, %opt );
-
-The object constructor argument is either a file name, a file handle
-object, or the complete document to be parsed.  Extra options can be
-provided as key/value pairs and are processed as documented by the base
-classes.
-
-If the argument is a plain scalar, then it is taken as the name of a
-file to be opened and parsed.  If the file can't be opened for
-reading, then the constructor will return C<undef> and $! will tell
-you why it failed.
-
-If the argument is a reference to a plain scalar, then this scalar is
-taken to be the literal document to parse.  The value of this
-scalar should not be changed before all tokens have been extracted.
-
-Otherwise the argument is taken to be some object that the
-C<HTML::TokeParser> can read() from when it needs more data.  Typically
-it will be a filehandle of some kind.  The stream will be read() until
-EOF, but not closed.
-
-A newly constructed C<HTML::TokeParser> differ from its base classes
-by having the C<unbroken_text> attribute enabled by default. See
-L<HTML::Parser> for a description of this and other attributes that
-influence how the document is parsed. It is often a good idea to enable
-C<empty_element_tags> behaviour.
-
-Note that the parsing result will likely not be valid if raw undecoded
-UTF-8 is used as a source.  When parsing UTF-8 encoded files turn
-on UTF-8 decoding:
-
-   open(my $fh, "<:utf8", "index.html") || die "Can't open 'index.html': $!";
-   my $p = HTML::TokeParser->new( $fh );
-   # ...
-
-If a $filename is passed to the constructor the file will be opened in
-raw mode and the parsing result will only be valid if its content is
-Latin-1 or pure ASCII.
-
-If parsing from an UTF-8 encoded string buffer decode it first:
-
-   utf8::decode($document);
-   my $p = HTML::TokeParser->new( \$document );
-   # ...
-
-=item $p->get_token
-
-This method will return the next I<token> found in the HTML document,
-or C<undef> at the end of the document.  The token is returned as an
-array reference.  The first element of the array will be a string
-denoting the type of this token: "S" for start tag, "E" for end tag,
-"T" for text, "C" for comment, "D" for declaration, and "PI" for
-process instructions.  The rest of the token array depend on the type
-like this:
-
-  ["S",  $tag, $attr, $attrseq, $text]
-  ["E",  $tag, $text]
-  ["T",  $text, $is_data]
-  ["C",  $text]
-  ["D",  $text]
-  ["PI", $token0, $text]
-
-where $attr is a hash reference, $attrseq is an array reference and
-the rest are plain scalars.  The L<HTML::Parser/Argspec> explains the
-details.
-
-=item $p->unget_token( @tokens )
-
-If you find you have read too many tokens you can push them back,
-so that they are returned the next time $p->get_token is called.
-
-=item $p->get_tag
-
-=item $p->get_tag( @tags )
-
-This method returns the next start or end tag (skipping any other
-tokens), or C<undef> if there are no more tags in the document.  If
-one or more arguments are given, then we skip tokens until one of the
-specified tag types is found.  For example:
-
-   $p->get_tag("font", "/font");
-
-will find the next start or end tag for a font-element.
-
-The tag information is returned as an array reference in the same form
-as for $p->get_token above, but the type code (first element) is
-missing. A start tag will be returned like this:
-
-  [$tag, $attr, $attrseq, $text]
-
-The tagname of end tags are prefixed with "/", i.e. end tag is
-returned like this:
-
-  ["/$tag", $text]
-
-=item $p->get_text
-
-=item $p->get_text( @endtags )
-
-This method returns all text found at the current position. It will
-return a zero length string if the next token is not text. Any
-entities will be converted to their corresponding character.
-
-If one or more arguments are given, then we return all text occurring
-before the first of the specified tags found. For example:
-
-   $p->get_text("p", "br");
-
-will return the text up to either a paragraph of linebreak element.
-
-The text might span tags that should be I<textified>.  This is
-controlled by the $p->{textify} attribute, which is a hash that
-defines how certain tags can be treated as text.  If the name of a
-start tag matches a key in this hash then this tag is converted to
-text.  The hash value is used to specify which tag attribute to obtain
-the text from.  If this tag attribute is missing, then the upper case
-name of the tag enclosed in brackets is returned, e.g. "[IMG]".  The
-hash value can also be a subroutine reference.  In this case the
-routine is called with the start tag token content as its argument and
-the return value is treated as the text.
-
-The default $p->{textify} value is:
-
-  {img => "alt", applet => "alt"}
-
-This means that <IMG> and <APPLET> tags are treated as text, and that
-the text to substitute can be found in the ALT attribute.
-
-=item $p->get_trimmed_text
-
-=item $p->get_trimmed_text( @endtags )
-
-Same as $p->get_text above, but will collapse any sequences of white
-space to a single space character.  Leading and trailing white space is
-removed.
-
-=item $p->get_phrase
-
-This will return all text found at the current position ignoring any
-phrasal-level tags.  Text is extracted until the first non
-phrasal-level tag.  Textification of tags is the same as for
-get_text().  This method will collapse white space in the same way as
-get_trimmed_text() does.
-
-The definition of <i>phrasal-level tags</i> is obtained from the
-HTML::Tagset module.
-
-=back
-
-=head1 EXAMPLES
-
-This example extracts all links from a document.  It will print one
-line for each link, containing the URL and the textual description
-between the <A>...</A> tags:
-
-  use HTML::TokeParser;
-  $p = HTML::TokeParser->new(shift||"index.html");
-
-  while (my $token = $p->get_tag("a")) {
-      my $url = $token->[1]{href} || "-";
-      my $text = $p->get_trimmed_text("/a");
-      print "$url\t$text\n";
-  }
-
-This example extract the <TITLE> from the document:
-
-  use HTML::TokeParser;
-  $p = HTML::TokeParser->new(shift||"index.html");
-  if ($p->get_tag("title")) {
-      my $title = $p->get_trimmed_text;
-      print "Title: $title\n";
-  }
-
-=head1 SEE ALSO
-
-L<HTML::PullParser>, L<HTML::Parser>
-
-=head1 COPYRIGHT
-
-Copyright 1998-2005 Gisle Aas.
-
-This library is free software; you can redistribute it and/or
-modify it under the same terms as Perl itself.
-
-=cut
diff --git a/ext/HTML/Parser/mkhctype b/ext/HTML/Parser/mkhctype
deleted file mode 100755
index eeae40df67..0000000000
--- a/ext/HTML/Parser/mkhctype
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/perl
-
-($progname = $0) =~ s,.*/,,;
-
-print "/* This file is autogenerated by $progname */\n";
-
-print <<'EOT';
-
-#define HCTYPE_SPACE                 0x01
-#define HCTYPE_NAME_FIRST            0x02
-#define HCTYPE_NAME_CHAR             0x04
-#define HCTYPE_NOT_SPACE_GT          0x08
-#define HCTYPE_NOT_SPACE_EQ_GT       0x10
-#define HCTYPE_NOT_SPACE_SLASH_GT    0x20
-#define HCTYPE_NOT_SPACE_EQ_SLASH_GT 0x40
-
-#define HCTYPE(c)       hctype[(unsigned char)(c)]
-#define isHCTYPE(c, w)  (HCTYPE(c) & (w))
-
-#define isHSPACE(c)        isHCTYPE(c, HCTYPE_SPACE)
-#define isHNAME_FIRST(c)   isHCTYPE(c, HCTYPE_NAME_FIRST)
-#define isHNAME_CHAR(c)    isHCTYPE(c, HCTYPE_NAME_CHAR)
-#define isHNOT_SPACE_GT(c) isHCTYPE(c, HCTYPE_NOT_SPACE_GT)
-
-typedef unsigned char hctype_t;
-
-EOT
-
-print "static hctype_t hctype[] = {\n";
-
-for my $c (0 .. 255) {
-    print "    " unless $c % 8;
-
-    local $_ = chr($c);
-    my $v = 0;
-    if (/^\s$/) { # isSPACE
-	$v |= 0x1
-    }
-    elsif ($_ ne ">") {
-	$v |= 0x08;
-	$v |= 0x10 if $_ ne "=";
-        $v |= 0x20 if $_ ne "/";
-        $v |= 0x40 if $_ ne "=";
-    }
-
-    if (/^[\w.\-:]$/) {
-	$v |= 0x4;
-	$v |= 0x2 unless /^[\d.-]$/;  # XML allow /[:_]/ as first char
-    }
-
-    printf "0x%02x, ", $v;
-    unless (($c+1) % 8) {
-	printf " /* %3d - %3d */\n", $c - 7, $c;
-    }
-}
-print "};\n";
-
diff --git a/ext/HTML/Parser/mkpfunc b/ext/HTML/Parser/mkpfunc
deleted file mode 100755
index 810bc1fb1c..0000000000
--- a/ext/HTML/Parser/mkpfunc
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/usr/bin/perl
-
-($progname = $0) =~ s,.*/,,;
-
-print "/* This file is autogenerated by $progname */\n";
-
-print "typedef char*(*PFUNC)(PSTATE*, char *beg, char *end, U32 utf8, SV* self);\n";
-print "static PFUNC parsefunc[] = {\n";
-
-for my $c (0..255) {
-    local $_ = chr($c);
-    my $func = "null";
-    if (/^[A-Za-z]$/) {
-	$func = "start";
-    }
-    elsif ($_ eq "/") {
-	$func = "end";
-    }
-    elsif ($_ eq "!") {
-	$func = "decl";
-    }
-    elsif ($_ eq "?") {
-	$func = "process";
-    }
-    printf "    %-15s /* %3d */\n", "parse_$func,", $c;
-}
-
-print "};\n";
diff --git a/ext/HTML/Parser/t/api_version.t b/ext/HTML/Parser/t/api_version.t
deleted file mode 100644
index 98031216c4..0000000000
--- a/ext/HTML/Parser/t/api_version.t
+++ /dev/null
@@ -1,22 +0,0 @@
-use Test::More tests => 4;
-
-use strict;
-use HTML::Parser ();
-
-my $p = HTML::Parser->new(api_version => 3);
-
-ok(!$p->handler("start"), "API version 3");
-
-my $failed;
-eval {
-   my $p = HTML::Parser->new(api_version => 4);
-   $failed++;
-};
-like($@, qr/^API version 4 not supported/);
-ok(!$failed, "API version 4");
-
-$p = HTML::Parser->new(api_version => 2);
-
-is($p->handler("start"), "start", "API version 2");
-
-
diff --git a/ext/HTML/Parser/t/argspec-bad.t b/ext/HTML/Parser/t/argspec-bad.t
deleted file mode 100644
index 8c0b19975b..0000000000
--- a/ext/HTML/Parser/t/argspec-bad.t
+++ /dev/null
@@ -1,40 +0,0 @@
-use Test::More tests => 6;
-
-use strict;
-use HTML::Parser ();
-
-my $p = HTML::Parser->new(api_version => 3);
-
-eval {
-   $p->handler(end => "end", q(xyzzy));
-};
-like($@, qr/^Unrecognized identifier xyzzy in argspec/);
-
-
-eval {
-   $p->handler(end => "end", q(tagname text));
-};
-like($@, qr/^Missing comma separator in argspec/);
-
-
-eval {
-   $p->handler(end => "end", q(tagname, "text));
-};
-like($@, qr/^Unterminated literal string in argspec/);
-
-
-eval {
-   $p->handler(end => "end", q(tagname, "t\\t"));
-};
-like($@, qr/^Backslash reserved for literal string in argspec/);
-
-eval {
-   $p->handler(end => "end", '"' . ("x" x 256) . '"');
-};
-like($@, qr/^Literal string is longer than 255 chars in argspec/);
-
-$p->handler(end => sub { is(length(shift), 255) },
-	           '"' . ("x" x 255) . '"');
-$p->parse("</x>");
-
-
diff --git a/ext/HTML/Parser/t/argspec.t b/ext/HTML/Parser/t/argspec.t
deleted file mode 100644
index e8aa7a5c4a..0000000000
--- a/ext/HTML/Parser/t/argspec.t
+++ /dev/null
@@ -1,148 +0,0 @@
-
-use strict;
-require HTML::Parser;
-
-my $decl = '<!ENTITY nbsp   CDATA "&#160;" -- no-break space -->';
-my $com1 = '<!-- Comment -->';
-my $com2 = '<!-- Comment -- -- Comment -->';
-my $start = '<a href="foo">';
-my $end = '</a>';
-my $empty = "<IMG SRC='foo'/>";
-my $proc = '<? something completely different ?>';
-
-my @argspec = qw( self offset length
-		  event tagname tag token0
-	          text
-	          is_cdata dtext
-	          tokens
-	          tokenpos
-	          attr
-	          attrseq );
-
-my @result = ();
-my $p = HTML::Parser -> new(default_h => [\@result, join(',', @argspec)],
-			    strict_comment => 1, xml_mode => 1);
-
-my @tests =
-    ( # string, expected results
-      $decl  => [[$p, 0, 52, 'declaration', 'ENTITY', '!ENTITY', 'ENTITY',
-		 '<!ENTITY nbsp   CDATA "&#160;" -- no-break space -->',
-		 undef, undef,
-	       ['ENTITY', 'nbsp', 'CDATA', '"&#160;"', '-- no-break space --'],
-		 [2, 6, 9, 4, 16, 5, 22, 8, 31, 20],
-		 undef, undef ]],
-      $com1  => [[$p, 0, 16, 'comment', ' Comment ', '# Comment ', ' Comment ',
-		 '<!-- Comment -->', 
-		 undef, undef,
-		 [' Comment '],
-		 [4, 9],
-		 undef, undef ]],
-      $com2  => [[$p, 0, 30, 'comment', ' Comment ', '# Comment ', ' Comment ',
-		 '<!-- Comment -- -- Comment -->',
-		 undef, undef,
-		 [' Comment ', ' Comment '],
-		 [4, 9, 18, 9],
-		 undef, undef ]],
-      $start => [[$p, 0, 14, 'start', 'a', 'a', 'a',
-		 '<a href="foo">', 
-		 undef, undef,
-		 ['a', 'href', '"foo"'],
-		 [1, 1, 3, 4, 8, 5],
-		 {'href', 'foo'}, ['href'] ]],
-      $end   => [[$p, 0, 4, 'end', 'a', '/a', 'a',
-		 '</a>',
-		 undef, undef,
-		 ['a'],
-		 [2, 1],
-		 undef, undef ]],
-      $empty => [[$p, 0, 16, 'start', 'IMG', 'IMG', 'IMG',
-		  "<IMG SRC='foo'/>",
-		  undef, undef,
-		  ['IMG', 'SRC', "'foo'"],
-		  [1, 3, 5, 3, 9, 5],
-		  {'SRC', 'foo'}, ['SRC'] ],
-		 [$p, 16, 0, 'end', 'IMG', '/IMG', 'IMG',
-		  '',
-		  undef, undef,
-		  ['IMG'],
-		  undef,
-		  undef, undef ],
-		 ],
-       $proc  => [[$p, 0, 36, 'process', ' something completely different ',
-		  '? something completely different ',
-		  ' something completely different ',
-		  '<? something completely different ?>',
-		  undef, undef,
-		  [' something completely different '],
-		  [2, 32],
-		  undef, undef ]],
-      "$end\n$end"   => [[$p, 0, 4, 'end', 'a', '/a', 'a',
-			  '</a>',
-			  undef, undef,
-			  ['a'],
-			  [2, 1],
-			  undef, undef],
-			 [$p, 4, 1, 'text', undef, undef, undef,
-			  "\n",
-			  '', "\n",
-			  undef,
-			  undef,
-			  undef, undef],
-			 [$p, 5, 4, 'end', 'a', '/a', 'a',
-			  '</a>',
-			  undef, undef,
-			  ['a'],
-			  [2, 1],
-			  undef, undef ]],
-      );
-
-use Test::More;
-plan tests => @tests / 2;
-
-sub string_tag {
-    my (@pieces) = @_;
-    my $part;
-    foreach $part ( @pieces ) {
-	if (!defined $part) {
-	    $part = 'undef';
-	}
-	elsif (!ref $part) {
-	    $part = "'$part'" if $part !~ /^\d+$/;
-	}
-	elsif ('ARRAY' eq ref $part ) {
-	    $part = '[' . join(', ', string_tag(@$part)) . ']';
-	}
-	elsif ('HASH' eq ref $part ) {
-	    $part = '{' . join(',', string_tag(%$part)) . '}';
-	}
-	else {
-	    $part = '<' . ref($part) . '>';
-	}
-    }
-    return join(", ", @pieces );
-}
-
-my $i = 0;
-TEST:
-while (@tests) {
-    my($html, $expected) = splice @tests, 0, 2;
-    ++$i;
-
-    @result = ();
-    $p->parse($html)->eof;
-
-    shift(@result) if $result[0][3] eq "start_document";
-    pop(@result)   if $result[-1][3] eq "end_document";
-
-    # Compare results for each element expected
-    foreach (@$expected) {
-	my $want = string_tag($_);
-	my $got = string_tag(shift @result);
-	if ($want ne $got) {
-           is($want, $got);
-           next TEST;
-        }
-    }
-
-    pass;
-}
diff --git a/ext/HTML/Parser/t/argspec2.t b/ext/HTML/Parser/t/argspec2.t
deleted file mode 100644
index 6f594b97b6..0000000000
--- a/ext/HTML/Parser/t/argspec2.t
+++ /dev/null
@@ -1,21 +0,0 @@
-use Test::More tests => 2;
-
-use strict;
-use HTML::Parser;
-
-my @start;
-my @text;
-
-my $p = HTML::Parser->new(api_version => 3);
-$p->handler(start => \@start, '@{tagname, @attr}');
-$p->handler(text  => \@text,  '@{dtext}');
-$p->parse(<<EOT)->eof;
-Hi
-<a href="abc">Foo</a><b>:-)</b>
-EOT
-
-is("@start", "a href abc b");
-
-is(join("", @text), "Hi\nFoo:-)\n");
-
-
diff --git a/ext/HTML/Parser/t/attr-encoded.t b/ext/HTML/Parser/t/attr-encoded.t
deleted file mode 100644
index 4d458eb76c..0000000000
--- a/ext/HTML/Parser/t/attr-encoded.t
+++ /dev/null
@@ -1,32 +0,0 @@
-use strict;
-use Test::More tests => 2;
-
-use HTML::Parser ();
-my $p = HTML::Parser->new();
-$p->attr_encoded(1);
-
-my $text = "";
-$p->handler(start =>
-	    sub {
-		 my($tag, $attr) = @_;
-		 $text .= "S[$tag";
-		 for my $k (sort keys %$attr) {
-		     my $v =  $attr->{$k};
-		     $text .= " $k=$v";
-		 }
-		 $text .= "]";
-	     }, "tagname,attr");
-
-my $html = <<'EOT';
-<tag arg="&amp;&lt;&gt">
-EOT
-
-$p->parse($html)->eof;
-
-is($text, 'S[tag arg=&amp;&lt;&gt]');
-
-$text = "";
-$p->attr_encoded(0);
-$p->parse($html)->eof;
-
-is($text, 'S[tag arg=&<>]');
diff --git a/ext/HTML/Parser/t/callback.t b/ext/HTML/Parser/t/callback.t
deleted file mode 100644
index 7a456cfd82..0000000000
--- a/ext/HTML/Parser/t/callback.t
+++ /dev/null
@@ -1,49 +0,0 @@
-use Test::More tests => 47;
-
-use strict;
-use HTML::Parser;
-
-my @expected;
-my $p = HTML::Parser->new(api_version => 3,
-                          unbroken_text => 1,
-	                  default_h => [\@expected, '@{event, text}'],
-			 );
-
-my $doc = <<'EOT';
-<title>Hi</title>
-<h1>Ho ho</h1>
-<--comment->
-EOT
-
-$p->parse($doc)->eof;
-#use Data::Dump; Data::Dump::dump(@expected);
-
-for my $i (1..length($doc)) {
-     my @t;
-     $p->handler(default => \@t);
-     $p->parse(chunk($doc, $i));
-
-     # check that we got the same stuff
-     #diag "X:", join(":", @t);
-     #diag "Y:", join(":", @expected);
-     is(join(":", @t), join(":", @expected));
-}
-
-sub chunk {
-    my $str = shift;
-    my $size = shift || 1;
-    sub {
-	my $res = substr($str, 0, $size);
-        #diag "...$res";
-        substr($str, 0, $size) = "";
-	$res;
-    }
-}
-
-# Test croking behaviour
-$p->handler(default => []);
-
-eval {
-   $p->parse(sub { die "Hi" });
-};
-like($@, qr/^Hi/);
diff --git a/ext/HTML/Parser/t/case-sensitive.t b/ext/HTML/Parser/t/case-sensitive.t
deleted file mode 100644
index 565b20b3e9..0000000000
--- a/ext/HTML/Parser/t/case-sensitive.t
+++ /dev/null
@@ -1,85 +0,0 @@
-use strict;
-use Test::More tests => 8;
-
-use HTML::Parser ();
-my $p = HTML::Parser->new();
-$p->case_sensitive(1);
-
-my $text = "";
-$p->handler(start =>
-	    sub {
-		 my($tag, $attr, $attrseq) = @_;
-		 $text .= "S[$tag";
-		 for my $k (sort keys %$attr) {
-		     my $v =  $attr->{$k};
-		     $text .= " $k=$v";
-		 }
-		 if (@$attrseq) { $text.=" Order:" ; }
-		 for my $k (@$attrseq) {
-		     $text .= " $k";
-		 }
-		 $text .= "]";
-	     }, "tagname,attr,attrseq");
-$p->handler(end =>
-	    sub {
-		 my ($tag) = @_;
-		 $text .= "E[$tag]";
-	     }, "tagname");
-
-my $html = <<'EOT';
-<tAg aRg="Value" arg="other value"></tAg>
-EOT
-my $cs = 'S[tAg aRg=Value arg=other value Order: aRg arg]E[tAg]';
-my $ci = 'S[tag arg=Value Order: arg arg]E[tag]';
-
-$p->parse($html)->eof;
-is($text, $cs);
-
-$text = "";
-$p->case_sensitive(0);
-$p->parse($html)->eof;
-is($text, $ci);
-
-$text = "";
-$p->case_sensitive(1);
-$p->xml_mode(1);
-$p->parse($html)->eof;
-is($text, $cs);
-
-$text = "";
-$p->case_sensitive(0);
-$p->parse($html)->eof;
-is($text, $cs);
-
-$html = <<'EOT';
-<tAg aRg="Value" arg="other value"></tAg>
-<iGnOrE></ignore>
-EOT
-$p->ignore_tags('ignore');
-$cs = 'S[tAg aRg=Value arg=other value Order: aRg arg]E[tAg]S[iGnOrE]';
-$ci = 'S[tag arg=Value Order: arg arg]E[tag]';
-
-$text = "";
-$p->case_sensitive(0);
-$p->xml_mode(0);
-$p->parse($html)->eof;
-is($text, $ci);
- 
-$text = "";
-$p->case_sensitive(1);
-$p->xml_mode(0);
-$p->parse($html)->eof;
-is($text, $cs);
-
-$text = "";
-$p->case_sensitive(0);
-$p->xml_mode(1);
-$p->parse($html)->eof;
-is($text, $cs);
- 
-$text = "";
-$p->case_sensitive(1);
-$p->xml_mode(1);
-$p->parse($html)->eof;
-is($text, $cs);
- 
diff --git a/ext/HTML/Parser/t/cases.t b/ext/HTML/Parser/t/cases.t
deleted file mode 100644
index a53727950c..0000000000
--- a/ext/HTML/Parser/t/cases.t
+++ /dev/null
@@ -1,105 +0,0 @@
-use Test::More;
-
-require HTML::Parser;
-
-package P; @ISA = qw(HTML::Parser);
-
-my @result;
-sub start
-{
-    my($self, $tag, $attr) = @_;
-    push @result, "START[$tag]";
-    for (sort keys %$attr) {
-        push @result, "\t$_: " . $attr->{$_};
-    }
-    $start++;
-}
-
-sub end
-{
-    my($self, $tag) = @_;
-    push @result, "END[$tag]";
-    $end++;
-}
-
-sub text
-{
-    my $self = shift;
-    push @result, "TEXT[$_[0]]";
-    $text++;
-}
-
-sub comment
-{
-    my $self = shift;
-    push @result, "COMMENT[$_[0]]";
-    $comment++;
-}
-
-sub declaration
-{
-    my $self = shift;
-    push @result, "DECLARATION[$_[0]]";
-    $declaration++;
-}
-
-package main;
-
-
-@tests =
-    (
-     '<a ">' => ['START[a]', "\t\": \""],
-     '<a/>' => ['START[a/]',],
-     '<a />' => ['START[a]', "\t/: /"],
-     '<a a/>' => ['START[a]', "\ta/: a/"],
-     '<a a/=/>' => ['START[a]', "\ta/: /"],
-     '<a x="foo&nbsp;bar">' => ['START[a]', "\tx: foo\xA0bar"],
-     '<a x="foo&nbspbar">' => ['START[a]', "\tx: foo&nbspbar"],
-     '<� >' => ['TEXT[<�]', 'TEXT[ >]'],
-     '2 < 5' => ['TEXT[2 ]', 'TEXT[<]', 'TEXT[ 5]'],
-     '2 <5> 2' => ['TEXT[2 ]', 'TEXT[<5>]', 'TEXT[ 2]'],
-     '2 <a' => ['TEXT[2 ]', 'TEXT[<a]'],
-     '2 <a> 2' => ['TEXT[2 ]', 'START[a]', 'TEXT[ 2]'],
-     '2 <a href=foo' => ['TEXT[2 ]', 'TEXT[<a href=foo]'],
-     "2 <a href='foo bar'> 2" =>
-         ['TEXT[2 ]', 'START[a]', "\thref: foo bar", 'TEXT[ 2]'],
-     '2 <a href=foo bar> 2' =>
-         ['TEXT[2 ]', 'START[a]', "\tbar: bar", "\thref: foo", 'TEXT[ 2]'],
-     '2 <a href="foo bar"> 2' =>
-         ['TEXT[2 ]', 'START[a]', "\thref: foo bar", 'TEXT[ 2]'],
-     '2 <a href="foo\'bar"> 2' =>
-         ['TEXT[2 ]', 'START[a]', "\thref: foo'bar", 'TEXT[ 2]'],
-     "2 <a href='foo\"bar'> 2" =>
-         ['TEXT[2 ]', 'START[a]', "\thref: foo\"bar", 'TEXT[ 2]'],
-     "2 <a href='foo&quot;bar'> 2" =>
-         ['TEXT[2 ]', 'START[a]', "\thref: foo\"bar", 'TEXT[ 2]'],
-     '2 <a.b> 2' => ['TEXT[2 ]', 'START[a.b]', 'TEXT[ 2]'],
-     '2 <a.b-12 a.b = 2 a> 2' =>
-         ['TEXT[2 ]', 'START[a.b-12]', "\ta: a", "\ta.b: 2", 'TEXT[ 2]'],
-     '2 <a_b> 2' => ['TEXT[2 ]', 'START[a_b]', 'TEXT[ 2]'],
-     '<!ENTITY nbsp   CDATA "&#160;" -- no-break space -->' =>
-         ['DECLARATION[ENTITY nbsp   CDATA "&#160;" -- no-break space --]'],
-     '<!-- comment -->' => ['COMMENT[ comment ]'],
-     '<!-- comment -- --- comment -->' =>
-         ['COMMENT[ comment ]', 'COMMENT[- comment ]'],
-     '<!-- comment <!-- not comment --> comment -->' =>
-         ['COMMENT[ comment <!]', 'COMMENT[> comment ]'],
-     '<!-- <a href="foo"> -->' => ['COMMENT[ <a href="foo"> ]'],
-     );
-
-plan tests => @tests / 2;
-
-my $i = 0;
-TEST:
-while (@tests) {
-    ++$i;
-    my ($html, $expected) = splice @tests, 0, 2;
-    @result = ();
-
-    $p = new P;
-    $p->strict_comment(1);
-    $p->parse($html)->eof;
-
-    ok(eq_array($expected, \@result)) or diag("Expected: @$expected\n",
-					      "Got:      @result\n");
-}
diff --git a/ext/HTML/Parser/t/comment.t b/ext/HTML/Parser/t/comment.t
deleted file mode 100644
index 303449ebaa..0000000000
--- a/ext/HTML/Parser/t/comment.t
+++ /dev/null
@@ -1,24 +0,0 @@
-use Test::More tests => 1;
-
-use strict;
-use HTML::Parser;
-
-my $p = HTML::Parser->new(api_version => 3);
-my @com;
-$p->handler(comment => sub { push(@com, shift) }, "token0");
-$p->handler(default => sub { push(@com, shift() . "[" . shift() . "]") }, "event, text");
-
-$p->parse("<foo><><!><!-><!--><!---><!----><!-----><!------>");
-$p->parse("<!--+--");
-$p->parse("\n\n");
-$p->parse(">");
-$p->parse("<!a'b>");
-$p->parse("<!--foo--->");
-$p->parse("<!--foo---->");
-$p->parse("<!--foo----->-->");
-$p->parse("<foo>");
-$p->parse("<!3453><!-3456><!FOO><>");
-$p->eof;
-
-my $com = join(":", @com);
-is($com, "start_document[]:start[<foo>]:text[<>]::-:><!-::-:--:+:a'b:foo-:foo--:foo---:text[-->]:start[<foo>]:3453:-3456:FOO:text[<>]:end_document[]");
diff --git a/ext/HTML/Parser/t/crashme.t b/ext/HTML/Parser/t/crashme.t
deleted file mode 100644
index 1a1e8e47c9..0000000000
--- a/ext/HTML/Parser/t/crashme.t
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/usr/bin/perl
-
-# This test will simply run the parser on random junk.
-
-my $no_tests = shift || 3;
-use Test::More;
-plan tests => $no_tests;
-
-use HTML::Parser ();
-
-my $file = "junk$$.html";
-die if -e $file;
-
-for (1..$no_tests) {
-
-    open(JUNK, ">$file") || die;
-    for (1 .. rand(5000)) {
-        for (1 .. rand(200)) {
-            print JUNK pack("N", rand(2**32));
-        }
-        print JUNK ("<", "&", ">")[rand(3)];  # make these a bit more likely
-    }
-    close(JUNK);
-
-    #diag "Parse @{[-s $file]} bytes of junk";
-
-    HTML::Parser->new->parse_file($file);
-    pass();
-
-    #print_mem();
-}
-
-unlink($file);
-
-
-sub print_mem
-{
-    # this probably only works on Linux
-    open(STAT, "/proc/self/status") || return;
-    while (<STAT>) {
-        diag $_ if /^VmSize/;
-    }
-}
diff --git a/ext/HTML/Parser/t/declaration.t b/ext/HTML/Parser/t/declaration.t
deleted file mode 100644
index 17de561651..0000000000
--- a/ext/HTML/Parser/t/declaration.t
+++ /dev/null
@@ -1,62 +0,0 @@
-use Test::More tests => 2;
-
-use HTML::Parser;
-my $res = "";
-
-sub decl
-{
-    my $t = shift;
-    $res .= "[" . join("\n", map "<$_>", @$t) . "]";
-}
-
-sub text
-{
-    $res .= shift;
-}
-
-my $p = HTML::Parser->new(declaration_h => [\&decl, "tokens"],
-			  default_h     => [\&text, "text"],
-	                 );
-
-$p->parse(<<EOT)->eof;
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" --<comment>--
-  "http://www.w3.org/TR/html40/strict.dtd">
-
-<!ENTITY foo "<!-- foo -->">
-<!Entity foo "<!-- foo -->">
-
-<!row --> foo
-EOT
-
-is($res, <<EOT);
-[<DOCTYPE>
-<HTML>
-<PUBLIC>
-<"-//W3C//DTD HTML 4.01//EN">
-<--<comment>-->
-<"http://www.w3.org/TR/html40/strict.dtd">]
-
-[<ENTITY>
-<foo>
-<"<!-- foo -->">]
-[<Entity>
-<foo>
-<"<!-- foo -->">]
-
-<!row --> foo
-EOT
-
-$res = "";
-$p->parse(<<EOT)->eof;
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"[]>
-EOT
-is($res, <<EOT);
-[<DOCTYPE>
-<html>
-<PUBLIC>
-<"-//W3C//DTD XHTML 1.0 Strict//EN">
-<"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<[]>]
-EOT
-
diff --git a/ext/HTML/Parser/t/default.t b/ext/HTML/Parser/t/default.t
deleted file mode 100644
index 4b5ed794fd..0000000000
--- a/ext/HTML/Parser/t/default.t
+++ /dev/null
@@ -1,43 +0,0 @@
-use strict;
-use Test::More tests => 3;
-
-my $text = "";
-use HTML::Parser ();
-my $p = HTML::Parser->new(default_h => [sub { $text .= shift }, "text"],
-                         );
-
-my $html = <<'EOT';
-
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                       "http://www.w3.org/TR/html40/strict.dtd">
-
-<title>foo</title>
-<!-- comment <a> -->
-<?process instruction>
-
-EOT
-
-$p->parse($html)->eof;
-
-is($text, $html);
-
-$text = "";
-$p->handler(start => sub { }, "");
-$p->handler(declaration => sub { }, "");
-$p->parse($html)->eof;
-
-my $html2;
-$html2 = $html;
-$html2 =~ s/<title>//;
-$html2 =~ s/<!DOCTYPE[^>]*>//;
-
-is($text, $html2);
-
-$text = "";
-$p->handler(start => undef);
-$p->parse($html)->eof;
-
-$html2 = $html;
-$html2 =~ s/<!DOCTYPE[^>]*>//;
-
-is($text, $html2);
diff --git a/ext/HTML/Parser/t/document.t b/ext/HTML/Parser/t/document.t
deleted file mode 100644
index 6696939050..0000000000
--- a/ext/HTML/Parser/t/document.t
+++ /dev/null
@@ -1,41 +0,0 @@
-#!perl -w
-
-use Test;
-plan tests => 6;
-
-
-use HTML::Parser;
-use File::Spec;
-
-my $events;
-my $p = HTML::Parser->new(default_h => [sub { $events .= "$_[0]\n";}, "event"]);
-
-$events = "";
-$p->eof;
-ok($events, "start_document\nend_document\n");
-
-$events = "";
-$p->parse_file(File::Spec->devnull);
-ok($events, "start_document\nend_document\n");
-
-$events = "";
-$p->parse("");
-$p->eof;
-ok($events, "start_document\nend_document\n");
-
-$events = "";
-$p->parse("");
-$p->parse("");
-$p->eof;
-ok($events, "start_document\nend_document\n");
-
-$events = "";
-$p->parse("");
-$p->parse("<a>");
-$p->eof;
-ok($events, "start_document\nstart\nend_document\n");
-
-$events = "";
-$p->parse("<a> ");
-$p->eof;
-ok($events, "start_document\nstart\ntext\nend_document\n");
diff --git a/ext/HTML/Parser/t/dtext.t b/ext/HTML/Parser/t/dtext.t
deleted file mode 100644
index 883c61f045..0000000000
--- a/ext/HTML/Parser/t/dtext.t
+++ /dev/null
@@ -1,72 +0,0 @@
-#!perl -w
-
-use strict;
-use Test::More tests => 2;
-
-use HTML::Parser ();
-
-my $dtext = "";
-my $text  = "";
-
-sub append
-{
-    $dtext .= shift;
-    $text .= shift;
-}
-
-my $p = HTML::Parser->new(text_h    => [\&append, "dtext, text"],
-			  default_h => [\&append, "text,  text" ],
-			 );
-
-my $doc = <<'EOT';
-<title>&aring</title>
-<a href="foo&aring">&aring&aring;&#65&#65;&lt&#65&gt;&#x41&#X41;</a>
-<?&aring>
-foo&nbsp;bar
-foo&nbspbar
-&xyzzy
-&xyzzy;
-<!-- &#0; -->
-&#1;
-&#255;
-&#xFF
-&#xFFG
-<!-- &#256; -->
-&#40000000000000000000000000000;
-&#x400000000000000000000000000000000;
-&
-&#
-&#x
-<xmp>&aring</xmp>
-<script>&aring</script>
-<ScRIPT>&aring</scRIPT>
-<skript>&aring</script>
-EOT
-
-$p->parse($doc)->eof;
-
-is($text, $doc);
-is($dtext, <<"EOT");
-<title>�</title>
-<a href="foo&aring">��AA<A>AA</a>
-<?&aring>
-foo\240bar
-foo\240bar
-&xyzzy
-&xyzzy;
-<!-- &#0; -->
-\1
-\377
-\377
-\377G
-<!-- &#256; -->
-&#40000000000000000000000000000;
-&#x400000000000000000000000000000000;
-&
-&#
-&#x
-<xmp>&aring</xmp>
-<script>&aring</script>
-<ScRIPT>&aring</scRIPT>
-<skript>�</script>
-EOT
diff --git a/ext/HTML/Parser/t/entities.t b/ext/HTML/Parser/t/entities.t
deleted file mode 100644
index b8342f5a7c..0000000000
--- a/ext/HTML/Parser/t/entities.t
+++ /dev/null
@@ -1,193 +0,0 @@
-use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
-
-use Test::More tests => 12;
-
-$a = "V&aring;re norske tegn b&oslash;r &#230res";
-
-decode_entities($a);
-
-is($a, "V�re norske tegn b�r �res");
-
-encode_entities($a);
-
-is($a, "V&aring;re norske tegn b&oslash;r &aelig;res");
-
-decode_entities($a);
-encode_entities_numeric($a);
-
-is($a, "V&#xE5;re norske tegn b&#xF8;r &#xE6;res");
-
-$a = "<&>\"'";
-is(encode_entities($a), "&lt;&amp;&gt;&quot;&#39;");
-is(encode_entities_numeric($a), "&#x3C;&#x26;&#x3E;&#x22;&#x27;");
-
-$a = "abcdef";
-is(encode_entities($a, 'a-c'), "&#97;&#98;&#99;def");
-
-
-# See how well it does against rfc1866...
-$ent = $plain = "";
-while (<DATA>) {
-    next unless /^\s*<!ENTITY\s+(\w+)\s*CDATA\s*\"&\#(\d+)/;
-    $ent .= "&$1;";
-    $plain .= chr($2);
-}
-
-$a = $ent;
-decode_entities($a);
-is($a, $plain);
-
-# Try decoding when the ";" are left out
-$a = $ent,
-$a =~ s/;//g;
-decode_entities($a);
-is($a, $plain);
-
-
-$a = $plain;
-encode_entities($a);
-is($a, $ent);
-
-
-# From: Bill Simpson-Young <bill.simpson-young@cmis.csiro.au>
-# Subject: HTML entities problem with 5.11
-# To: libwww-perl@ics.uci.edu
-# Date: Fri, 05 Sep 1997 16:56:55 +1000
-# Message-Id: <199709050657.QAA10089@snowy.nsw.cmis.CSIRO.AU>
-#
-# Hi. I've got a problem that has surfaced with the changes to 
-# HTML::Entities.pm for 5.11 (it doesn't happen with 5.08).  It's happening 
-# in the process of encoding then decoding special entities.  Eg, what goes 
-# in as "abc&def&ghi" comes out as "abc&def;&ghi;".
-
-is(decode_entities("abc&def&ghi&abc;&def;"), "abc&def&ghi&abc;&def;");
-
-# Decoding of &apos;
-is(decode_entities("&apos;"), "'");
-is(encode_entities("'", "'"), "&#39;");
-
-
-__END__
-# Quoted from rfc1866.txt
-
-14. Proposed Entities
-
-   The HTML DTD references the "Added Latin 1" entity set, which only
-   supplies named entities for a subset of the non-ASCII characters in
-   [ISO-8859-1], namely the accented characters. The following entities
-   should be supported so that all ISO 8859-1 characters may only be
-   referenced symbolically. The names for these entities are taken from
-   the appendixes of [SGML].
-
-    <!ENTITY nbsp   CDATA "&#160;" -- no-break space -->
-    <!ENTITY iexcl  CDATA "&#161;" -- inverted exclamation mark -->
-    <!ENTITY cent   CDATA "&#162;" -- cent sign -->
-    <!ENTITY pound  CDATA "&#163;" -- pound sterling sign -->
-    <!ENTITY curren CDATA "&#164;" -- general currency sign -->
-    <!ENTITY yen    CDATA "&#165;" -- yen sign -->
-    <!ENTITY brvbar CDATA "&#166;" -- broken (vertical) bar -->
-    <!ENTITY sect   CDATA "&#167;" -- section sign -->
-    <!ENTITY uml    CDATA "&#168;" -- umlaut (dieresis) -->
-    <!ENTITY copy   CDATA "&#169;" -- copyright sign -->
-    <!ENTITY ordf   CDATA "&#170;" -- ordinal indicator, feminine -->
-    <!ENTITY laquo  CDATA "&#171;" -- angle quotation mark, left -->
-    <!ENTITY not    CDATA "&#172;" -- not sign -->
-    <!ENTITY shy    CDATA "&#173;" -- soft hyphen -->
-    <!ENTITY reg    CDATA "&#174;" -- registered sign -->
-    <!ENTITY macr   CDATA "&#175;" -- macron -->
-    <!ENTITY deg    CDATA "&#176;" -- degree sign -->
-    <!ENTITY plusmn CDATA "&#177;" -- plus-or-minus sign -->
-    <!ENTITY sup2   CDATA "&#178;" -- superscript two -->
-    <!ENTITY sup3   CDATA "&#179;" -- superscript three -->
-    <!ENTITY acute  CDATA "&#180;" -- acute accent -->
-    <!ENTITY micro  CDATA "&#181;" -- micro sign -->
-    <!ENTITY para   CDATA "&#182;" -- pilcrow (paragraph sign) -->
-    <!ENTITY middot CDATA "&#183;" -- middle dot -->
-    <!ENTITY cedil  CDATA "&#184;" -- cedilla -->
-    <!ENTITY sup1   CDATA "&#185;" -- superscript one -->
-    <!ENTITY ordm   CDATA "&#186;" -- ordinal indicator, masculine -->
-    <!ENTITY raquo  CDATA "&#187;" -- angle quotation mark, right -->
-    <!ENTITY frac14 CDATA "&#188;" -- fraction one-quarter -->
-    <!ENTITY frac12 CDATA "&#189;" -- fraction one-half -->
-    <!ENTITY frac34 CDATA "&#190;" -- fraction three-quarters -->
-    <!ENTITY iquest CDATA "&#191;" -- inverted question mark -->
-    <!ENTITY Agrave CDATA "&#192;" -- capital A, grave accent -->
-    <!ENTITY Aacute CDATA "&#193;" -- capital A, acute accent -->
-    <!ENTITY Acirc  CDATA "&#194;" -- capital A, circumflex accent -->
-
-
-
-Berners-Lee & Connolly      Standards Track                    [Page 75]
-
-RFC 1866            Hypertext Markup Language - 2.0        November 1995
-
-
-    <!ENTITY Atilde CDATA "&#195;" -- capital A, tilde -->
-    <!ENTITY Auml   CDATA "&#196;" -- capital A, dieresis or umlaut mark -->
-    <!ENTITY Aring  CDATA "&#197;" -- capital A, ring -->
-    <!ENTITY AElig  CDATA "&#198;" -- capital AE diphthong (ligature) -->
-    <!ENTITY Ccedil CDATA "&#199;" -- capital C, cedilla -->
-    <!ENTITY Egrave CDATA "&#200;" -- capital E, grave accent -->
-    <!ENTITY Eacute CDATA "&#201;" -- capital E, acute accent -->
-    <!ENTITY Ecirc  CDATA "&#202;" -- capital E, circumflex accent -->
-    <!ENTITY Euml   CDATA "&#203;" -- capital E, dieresis or umlaut mark -->
-    <!ENTITY Igrave CDATA "&#204;" -- capital I, grave accent -->
-    <!ENTITY Iacute CDATA "&#205;" -- capital I, acute accent -->
-    <!ENTITY Icirc  CDATA "&#206;" -- capital I, circumflex accent -->
-    <!ENTITY Iuml   CDATA "&#207;" -- capital I, dieresis or umlaut mark -->
-    <!ENTITY ETH    CDATA "&#208;" -- capital Eth, Icelandic -->
-    <!ENTITY Ntilde CDATA "&#209;" -- capital N, tilde -->
-    <!ENTITY Ograve CDATA "&#210;" -- capital O, grave accent -->
-    <!ENTITY Oacute CDATA "&#211;" -- capital O, acute accent -->
-    <!ENTITY Ocirc  CDATA "&#212;" -- capital O, circumflex accent -->
-    <!ENTITY Otilde CDATA "&#213;" -- capital O, tilde -->
-    <!ENTITY Ouml   CDATA "&#214;" -- capital O, dieresis or umlaut mark -->
-    <!ENTITY times  CDATA "&#215;" -- multiply sign -->
-    <!ENTITY Oslash CDATA "&#216;" -- capital O, slash -->
-    <!ENTITY Ugrave CDATA "&#217;" -- capital U, grave accent -->
-    <!ENTITY Uacute CDATA "&#218;" -- capital U, acute accent -->
-    <!ENTITY Ucirc  CDATA "&#219;" -- capital U, circumflex accent -->
-    <!ENTITY Uuml   CDATA "&#220;" -- capital U, dieresis or umlaut mark -->
-    <!ENTITY Yacute CDATA "&#221;" -- capital Y, acute accent -->
-    <!ENTITY THORN  CDATA "&#222;" -- capital THORN, Icelandic -->
-    <!ENTITY szlig  CDATA "&#223;" -- small sharp s, German (sz ligature) -->
-    <!ENTITY agrave CDATA "&#224;" -- small a, grave accent -->
-    <!ENTITY aacute CDATA "&#225;" -- small a, acute accent -->
-    <!ENTITY acirc  CDATA "&#226;" -- small a, circumflex accent -->
-    <!ENTITY atilde CDATA "&#227;" -- small a, tilde -->
-    <!ENTITY auml   CDATA "&#228;" -- small a, dieresis or umlaut mark -->
-    <!ENTITY aring  CDATA "&#229;" -- small a, ring -->
-    <!ENTITY aelig  CDATA "&#230;" -- small ae diphthong (ligature) -->
-    <!ENTITY ccedil CDATA "&#231;" -- small c, cedilla -->
-    <!ENTITY egrave CDATA "&#232;" -- small e, grave accent -->
-    <!ENTITY eacute CDATA "&#233;" -- small e, acute accent -->
-    <!ENTITY ecirc  CDATA "&#234;" -- small e, circumflex accent -->
-    <!ENTITY euml   CDATA "&#235;" -- small e, dieresis or umlaut mark -->
-    <!ENTITY igrave CDATA "&#236;" -- small i, grave accent -->
-    <!ENTITY iacute CDATA "&#237;" -- small i, acute accent -->
-    <!ENTITY icirc  CDATA "&#238;" -- small i, circumflex accent -->
-    <!ENTITY iuml   CDATA "&#239;" -- small i, dieresis or umlaut mark -->
-    <!ENTITY eth    CDATA "&#240;" -- small eth, Icelandic -->
-    <!ENTITY ntilde CDATA "&#241;" -- small n, tilde -->
-    <!ENTITY ograve CDATA "&#242;" -- small o, grave accent -->
-
-
-
-Berners-Lee & Connolly      Standards Track                    [Page 76]
-
-RFC 1866            Hypertext Markup Language - 2.0        November 1995
-
-
-    <!ENTITY oacute CDATA "&#243;" -- small o, acute accent -->
-    <!ENTITY ocirc  CDATA "&#244;" -- small o, circumflex accent -->
-    <!ENTITY otilde CDATA "&#245;" -- small o, tilde -->
-    <!ENTITY ouml   CDATA "&#246;" -- small o, dieresis or umlaut mark -->
-    <!ENTITY divide CDATA "&#247;" -- divide sign -->
-    <!ENTITY oslash CDATA "&#248;" -- small o, slash -->
-    <!ENTITY ugrave CDATA "&#249;" -- small u, grave accent -->
-    <!ENTITY uacute CDATA "&#250;" -- small u, acute accent -->
-    <!ENTITY ucirc  CDATA "&#251;" -- small u, circumflex accent -->
-    <!ENTITY uuml   CDATA "&#252;" -- small u, dieresis or umlaut mark -->
-    <!ENTITY yacute CDATA "&#253;" -- small y, acute accent -->
-    <!ENTITY thorn  CDATA "&#254;" -- small thorn, Icelandic -->
-    <!ENTITY yuml   CDATA "&#255;" -- small y, dieresis or umlaut mark -->
diff --git a/ext/HTML/Parser/t/entities2.t b/ext/HTML/Parser/t/entities2.t
deleted file mode 100644
index 7840c719ba..0000000000
--- a/ext/HTML/Parser/t/entities2.t
+++ /dev/null
@@ -1,57 +0,0 @@
-#!perl -w
-
-use strict;
-use Test::More tests => 9;
-
-use HTML::Entities qw(_decode_entities);
-
-eval {
-    _decode_entities("&lt;", undef);
-};
-like($@, qr/^Can't inline decode readonly string/);
-
-eval {
-    my $a = "";
-    _decode_entities($a, $a);
-};
-like($@, qr/^2nd argument must be hash reference/);
-
-eval {
-    my $a = "";
-    _decode_entities($a, []);
-};
-like($@, qr/^2nd argument must be hash reference/);
-
-$a = "&lt;";
-_decode_entities($a, undef);
-is($a, "&lt;");
-
-_decode_entities($a, { "lt" => "<" });
-is($a, "<");
-
-my $x = "x" x 20;
-
-my $err;
-for (":", ":a", "a:", "a:a", "a:a:a", "a:::a") {
-    my $a = $_;
-    $a =~ s/:/&a;/g;
-    my $b = $_;
-    $b =~ s/:/$x/g;
-    _decode_entities($a, { "a" => $x });
-    if ($a ne $b) {
-	diag "Something went wrong with '$_'";
-	$err++;
-    }
-}
-ok(!$err);
-
-$a = "foo&nbsp;bar";
-_decode_entities($a, \%HTML::Entities::entity2char);
-is($a, "foo\xA0bar");
-
-$a = "foo&nbspbar";
-_decode_entities($a, \%HTML::Entities::entity2char);
-is($a, "foo&nbspbar");
-
-_decode_entities($a, \%HTML::Entities::entity2char, 1);
-is($a, "foo\xA0bar");
diff --git a/ext/HTML/Parser/t/filter-methods.t b/ext/HTML/Parser/t/filter-methods.t
deleted file mode 100644
index 9eccaf1655..0000000000
--- a/ext/HTML/Parser/t/filter-methods.t
+++ /dev/null
@@ -1,205 +0,0 @@
-#!/usr/bin/perl -w
-
-use Test::More tests => 12;
-use strict;
-
-use HTML::Parser;
-
-my $p = HTML::Parser->new(api_version => 3, ignore_tags => [qw(b i em tt)]);
-$p->ignore_elements("script");
-$p->unbroken_text(1);
-
-$p->handler(default => [], "event, text");
-$p->parse(<<"EOT")->eof;
-<html><head><title>foo</title><Script language="Perl">
-   while (<B>) {
-      # ...
-   }
-</Script><body>
-This is an <i>italic</i> and <b>bold</b> text.
-</body>
-</html>
-EOT
-
-my $t = join("||", map join("|", @$_), @{$p->handler("default")});
-#diag $t;
-
-is($t, "start_document|||start|<html>||start|<head>||start|<title>||text|foo||end|</title>||start|<body>||text|
-This is an italic and bold text.
-||end|</body>||text|
-||end|</html>||text|
-||end_document|", 'ignore_elements');
-
-
-#------------------------------------------------------
-
-$p = HTML::Parser->new(api_version => 3);
-$p->report_tags("a");
-$p->handler(start => sub {
-		my($tagname, %attr) = @_;
-		ok($tagname eq "a" && $attr{href} eq "#a", 'report_tags start');
-            }, 'tagname, @attr');
-$p->handler(end => sub {
-		my $tagname = shift;
-		is($tagname, "a", 'report_tags end');
-            }, 'tagname');
-
-$p->parse(<<EOT)->eof;
-
-<h1>Next example</h1>
-
-This is <a href="#a">very nice</a> example.
-
-EOT
-
-
-#------------------------------------------------------
-
-my @tags;
-$p = HTML::Parser->new(api_version => 3);
-$p->report_tags(qw(a em));
-$p->ignore_tags(qw(em));
-$p->handler(end => sub {push @tags, @_;}, 'tagname');
-
-$p->parse(<<EOT)->eof;
-
-<h1>Next example</h1>
-
-This is <em>yet another</em> <a href="#a">very nice</a> example.
-
-EOT
-is(join('|', @tags), 'a', 'report_tags followed by ignore_tags');
-
-
-#------------------------------------------------------
-
-@tags = ();
-$p = HTML::Parser->new(api_version => 3);
-$p->report_tags(qw(h1));
-$p->report_tags();
-$p->handler(end => sub {push @tags, @_;}, 'tagname');
-
-$p->parse(<<EOT)->eof;
-
-<h1>Next example</h1>
-<h2>Next example</h2>
-
-EOT
-is(join('|', @tags), 'h1|h2', 'reset report_tags filter');
-
-
-#------------------------------------------------------
-
-@tags = ();
-$p = HTML::Parser->new(api_version => 3);
-$p->report_tags(qw(h1 h2));
-$p->ignore_tags(qw(h2));
-$p->report_tags(qw(h1 h2));
-$p->handler(end => sub {push @tags, @_;}, 'tagname');
-
-$p->parse(<<EOT)->eof;
-
-<h1>Next example</h1>
-<h2>Next example</h2>
-
-EOT
-is(join('|', @tags), 'h1', 'report_tags does not reset ignore_tags');
-
-
-#------------------------------------------------------
-
-@tags = ();
-$p = HTML::Parser->new(api_version => 3);
-$p->report_tags(qw(h1 h2));
-$p->ignore_tags(qw(h2));
-$p->report_tags();
-$p->handler(end => sub {push @tags, @_;}, 'tagname');
-
-$p->parse(<<EOT)->eof;
-
-<h1>Next example</h1>
-<h2>Next example</h2>
-
-EOT
-is(join('|', @tags), 'h1', 'reset report_tags does no reset ignore_tags');
-
-
-#------------------------------------------------------
-
-@tags = ();
-$p = HTML::Parser->new(api_version => 3);
-$p->report_tags(qw(h1 h2));
-$p->report_tags(qw(h3));
-$p->handler(end => sub {push @tags, @_;}, 'tagname');
-
-$p->parse(<<EOT)->eof;
-
-<h1>Next example</h1>
-<h2>Next example</h2>
-<h3>Next example</h3>
-
-EOT
-is(join('|', @tags), 'h3', 'report_tags replaces filter');
-
-
-#------------------------------------------------------
-
-
-@tags = ();
-$p = HTML::Parser->new(api_version => 3);
-$p->ignore_tags(qw(h1 h2));
-$p->ignore_tags(qw(h3));
-$p->handler(end => sub {push @tags, @_;}, 'tagname');
-
-$p->parse(<<EOT)->eof;
-
-<h1>Next example</h1>
-<h2>Next example</h2>
-<h3>Next example</h3>
-
-EOT
-is(join('|', @tags), 'h1|h2', 'ignore_tags replaces filter');
-
-
-#------------------------------------------------------
-
-@tags = ();
-$p = HTML::Parser->new(api_version => 3);
-$p->ignore_tags(qw(h2));
-$p->ignore_tags();
-$p->handler(end => sub {push @tags, @_;}, 'tagname');
-
-$p->parse(<<EOT)->eof;
-
-<h1>Next example</h1>
-<h2>Next example</h2>
-
-EOT
-is(join('|', @tags), 'h1|h2', 'reset ignore_tags filter');
-
-
-#------------------------------------------------------
-
-@tags = ();
-$p = HTML::Parser->new(api_version => 3);
-$p->ignore_tags(qw(h2));
-$p->report_tags(qw(h1 h2));
-$p->handler(end => sub {push @tags, @_;}, 'tagname');
-
-$p->parse(<<EOT)->eof;
-
-<h1>Next example</h1>
-<h2>Next example</h2>
-
-EOT
-is(join('|', @tags), 'h1', 'ignore_tags before report_tags');
-#------------------------------------------------------
-
-$p = HTML::Parser->new(api_version => 3);
-$p->ignore_elements("script");
-my $res="";
-$p->handler(default=> sub {$res.=$_[0];}, 'text');
-$p->parse(<<'EOT')->eof;
-A <script> B </script> C </script> D <script> E </script> F
-EOT
-is($res,"A  C  D  F\n","ignore </script> without <script> correctly");
diff --git a/ext/HTML/Parser/t/filter.t b/ext/HTML/Parser/t/filter.t
deleted file mode 100644
index 3b18f9ec71..0000000000
--- a/ext/HTML/Parser/t/filter.t
+++ /dev/null
@@ -1,60 +0,0 @@
-use Test::More tests => 3;
-
-my $HTML = <<EOT;
-
-<!DOCTYPE HTML>
-<!-- comment
-<h1>Foo</h1>
--->
-
-<H1
->Bar</H1
->
-
-<Table><tr><td>1<td>2<td>3
-<tr>
-</table>
-
-<?process>
-
-EOT
-
-use HTML::Filter;
-use SelectSaver;
-
-my $tmpfile = "test-$$.htm";
-die "$tmpfile already exists" if -e $tmpfile;
-
-open(HTML, ">$tmpfile") or die "$!";
-
-{
-    my $save = new SelectSaver(HTML);
-    HTML::Filter->new->parse($HTML)->eof;
-}
-close(HTML);
-
-open(HTML, $tmpfile) or die "$!";
-local($/) = undef;
-my $FILTERED = <HTML>;
-close(HTML);
-
-#print $FILTERED;
-is($FILTERED, $HTML);
-
-{
-    package MyFilter;
-    @ISA=qw(HTML::Filter);
-    sub comment {}
-    sub output { push(@{$_[0]->{fhtml}}, $_[1]) }
-    sub filtered_html { join("", @{$_[0]->{fhtml}}) }
-}
-
-my $f2 = MyFilter->new->parse_file($tmpfile)->filtered_html;
-unlink($tmpfile) or warn "Can't unlink $tmpfile: $!";
-
-#diag $f2;
-
-unlike($f2, qr/Foo/);
-like($f2, qr/Bar/);
-
-
diff --git a/ext/HTML/Parser/t/handler-eof.t b/ext/HTML/Parser/t/handler-eof.t
deleted file mode 100644
index 39419dc4da..0000000000
--- a/ext/HTML/Parser/t/handler-eof.t
+++ /dev/null
@@ -1,54 +0,0 @@
-use Test::More tests => 6;
-
-use strict;
-use HTML::Parser ();
-
-my $p = HTML::Parser->new(api_version => 3);
-
-$p->handler(start => sub { my $attr = shift; is($attr->{testno}, 1) },
-		     "attr");
-$p->handler(end => sub { shift->eof }, "self");
-my $text;
-$p->handler(text => sub { $text = shift }, "text");
-
-is($p->parse("<foo testno=1>"), $p);
-
-$text = '';
-ok(!$p->parse("</foo><foo testno=999>"));
-ok(!$text);
-
-$p->handler(end => sub { $p->parse("foo"); }, "");
-eval {
-    $p->parse("</foo>");
-};
-like($@,  qr/Parse loop not allowed/);
-
-# We used to get into an infinite loop if the eof triggered
-# handler called ->eof
-
-use HTML::Parser;
-$p = HTML::Parser->new(api_version => 3);
-
-my $i;
-$p->handler("default" =>
-	    sub {
-		my $p=shift;
-	        #++$i; diag "$i @_";
-		$p->eof;
-	    }, "self, event");
-$p->parse("Foo");
-$p->eof;
-
-# We used to sometimes trigger events after a handler signaled eof
-my $title='';
-$p = HTML::Parser->new(api_version => 3,);
-$p->handler(start=> \&title_handler, 'tagname, self');
-$p->parse("<head><title>foo</title>\n</head>");
-is($title, "foo");
-
-sub title_handler {
-    return if shift ne 'title';
-    my $self = shift; 
-    $self->handler(text => sub { $title .= shift}, 'dtext');
-    $self->handler(end => sub { shift->eof if shift eq 'title' }, 'tagname, self');
-}
diff --git a/ext/HTML/Parser/t/handler.t b/ext/HTML/Parser/t/handler.t
deleted file mode 100644
index 8d7bbc51ee..0000000000
--- a/ext/HTML/Parser/t/handler.t
+++ /dev/null
@@ -1,67 +0,0 @@
-# Test handler method
-
-use Test::More tests => 11;
-
-my $testno;
-
-use HTML::Parser;
-{
-    package MyParser;
-    use vars qw(@ISA);
-    @ISA=(HTML::Parser);
-    
-    sub foo
-    {
-	Test::More::is($_[1]{testno}, Test::More->builder->current_test + 1);
-    }
-
-    sub bar
-    {
-	Test::More::is($_[1], Test::More->builder->current_test + 1);
-    }
-}
-
-$p = MyParser->new(api_version => 3);
-
-eval {
-    $p->handler(foo => "foo", "foo");
-};
-
-like($@, qr/^No handler for foo events/);
-
-eval {
-   $p->handler(start => "foo", "foo");
-};
-like($@, qr/^Unrecognized identifier foo in argspec/);
-
-my $h = $p->handler(start => "foo", "self,tagname");
-ok(!defined($h));
-
-$x = \substr("xfoo", 1);
-$p->handler(start => $$x, "self,attr");
-$p->parse("<a testno=4>");
-
-$p->handler(start => \&MyParser::foo, "self,attr");
-$p->parse("<a testno=5>");
-
-$p->handler(start => "foo");
-$p->parse("<a testno=6>");
-
-$p->handler(start => "bar", "self,'7'");
-$p->parse("<a>");
-
-eval {
-    $p->handler(start => {}, "self");
-};
-like($@, qr/^Only code or array references allowed as handler/);
-
-$a = [];
-$p->handler(start => $a);
-$h = $p->handler("start");
-is($p->handler("start", "foo"), $a);
-
-is($p->handler("start", \&MyParser::foo, ""), "foo");
-
-is($p->handler("start"), \&MyParser::foo);
-
-
diff --git a/ext/HTML/Parser/t/headparser-http.t b/ext/HTML/Parser/t/headparser-http.t
deleted file mode 100644
index b722c64f9c..0000000000
--- a/ext/HTML/Parser/t/headparser-http.t
+++ /dev/null
@@ -1,20 +0,0 @@
-use Test::More tests => 1;
-
-eval {
-   require HTML::HeadParser;
-   $p = HTML::HeadParser->new;
-};
-
-SKIP: {
-skip $@, 1 if $@ =~ /^Can't locate HTTP/;
-
-$p = HTML::HeadParser->new($h);
-$p->parse(<<EOT);
-<title>Stupid example</title>
-<base href="http://www.sn.no/libwww-perl/">
-Normal text starts here.
-EOT
-$h = $p->header;
-undef $p;
-is($h->title, "Stupid example");
-}
diff --git a/ext/HTML/Parser/t/headparser.t b/ext/HTML/Parser/t/headparser.t
deleted file mode 100644
index b4228be0b2..0000000000
--- a/ext/HTML/Parser/t/headparser.t
+++ /dev/null
@@ -1,154 +0,0 @@
-#!perl -w
-
-use strict;
-use Test::More tests => 11;
-
-{ package H;
-  sub new { bless {}, shift; }
-
-  sub header {
-     my $self = shift;
-     my $key  = uc(shift);
-     my $old = $self->{$key};
-     if (@_) { $self->{$key} = shift; }
-     $old;
-  }
-
-  sub push_header {
-     my($self, $k, $v) = @_;
-     $k = uc($k);
-     if (exists $self->{$k}) {
-        $self->{$k} = [ $self->{$k} ] unless ref $self->{$k};
-	push(@{$self->{$k}}, $v);
-     } else {
-	$self->{$k} = $v;
-     }
-  }
-
-  sub as_string {
-     my $self = shift;
-     my $str = "";
-     for (sort keys %$self) {
-         if (ref($self->{$_})) {
-            my $v;
-            for $v (@{$self->{$_}}) {
-	        $str .= "$_: $v\n";
-            }
-         } else {
-            $str .= "$_: $self->{$_}\n";
-         }
-     }
-     $str;
-  }
-}
-
-
-my $HTML = <<'EOT';
-
-<title>&Aring v�re eller &#229; ikke v�re</title>
-<meta http-equiv="Expires" content="Soon">
-<meta http-equiv="Foo" content="Bar">
-<link href="mailto:gisle@aas.no" rev=made title="Gisle Aas">
-
-<script>
-
-   "</script>"
-    ignore this
-
-</script>
-
-<base href="http://www.sn.no">
-<meta name="Keywords" content="test, test, test,...">
-<meta name="Keywords" content="more">
-
-Dette er vanlig tekst.  Denne teksten definerer ogs� slutten p�
-&lt;head> delen av dokumentet.
-
-<style>
-
-   "</style>"
-   ignore this too
-
-</style>
-
-<isindex>
-
-Dette er ogs� vanlig tekst som ikke skal blir parset i det hele tatt.
-
-EOT
-
-$| = 1;
-
-#$HTML::HeadParser::DEBUG = 1;
-require HTML::HeadParser;
-my $p = HTML::HeadParser->new( H->new );
-
-if ($p->parse($HTML)) {
-    fail("Need more data which should not happen");
-} else {
-    #diag $p->as_string;
-    pass();
-}
-
-like($p->header('Title'), qr/� v�re eller � ikke v�re/);
-is($p->header('Expires'), 'Soon');
-is($p->header('Content-Base'), 'http://www.sn.no');
-like($p->header('Link'), qr/<mailto:gisle\@aas.no>/);
-
-# This header should not be present because the head ended
-ok(!$p->header('Isindex'));
-
-
-# Try feeding one char at a time
-my $expected = $p->as_string;
-my $nl = 1;
-$p = HTML::HeadParser->new(H->new);
-while ($HTML =~ /(.)/sg) {
-    #print STDERR '#' if $nl;
-    #print STDERR $1;
-    $nl = $1 eq "\n";
-    $p->parse($1) or last;
-}
-is($p->as_string, $expected);
-
-
-# Try reading it from a file
-my $file = "hptest$$.html";
-die "$file already exists" if -e $file;
-
-open(FILE, ">$file") or die "Can't create $file: $!";
-binmode(FILE);
-print FILE $HTML;
-print FILE "<p>This is more content...</p>\n" x 2000;
-print FILE "<title>Buuuh!</title>\n" x 200;
-close FILE or die "Can't close $file: $!";
-
-$p = HTML::HeadParser->new(H->new);
-$p->parse_file($file);
-unlink($file) or warn "Can't unlink $file: $!";
-
-is($p->header("Title"), "� v�re eller � ikke v�re");
-
-
-# We got into an infinite loop on data without tags and no EOL.
-# This was actually a HTML::Parser bug.
-open(FILE, ">$file") or die "Can't create $file: $!";
-print FILE "Foo";
-close(FILE);
-
-$p = HTML::HeadParser->new(H->new);
-$p->parse_file($file);
-unlink($file) or warn "Can't unlink $file: $!";
-
-ok(!$p->as_string);
-
-SKIP: {
-  skip "Need Unicode support", 2 if $] < 5.008;
-
-  # Test that the Unicode BOM does not confuse us?
-  $p = HTML::HeadParser->new(H->new);
-  ok($p->parse("\x{FEFF}\n<title>Hi <foo></title>"));
-  $p->eof;
-
-  is($p->header("title"), "Hi <foo>");
-}
diff --git a/ext/HTML/Parser/t/ignore.t b/ext/HTML/Parser/t/ignore.t
deleted file mode 100644
index 008739e0ad..0000000000
--- a/ext/HTML/Parser/t/ignore.t
+++ /dev/null
@@ -1,27 +0,0 @@
-
-use Test::More tests => 4;
-
-use strict;
-use HTML::Parser ();
-
-my $html = '<A href="foo">text</A>';
-
-my $text = '';
-my $p = HTML::Parser->new(default_h => [sub {$text .= shift;}, 'text']);
-$p->parse($html)->eof;
-is($text, $html);
-
-$text = '';
-$p->handler(start => "");
-$p->parse($html)->eof;
-is($text, 'text</A>');
-
-$text = '';
-$p->handler(end => 0);
-$p->parse($html)->eof;
-is($text, 'text');
-
-$text = '';
-$p->handler(start => undef);
-$p->parse($html)->eof;
-is($text, '<A href="foo">text');
diff --git a/ext/HTML/Parser/t/largetags.t b/ext/HTML/Parser/t/largetags.t
deleted file mode 100644
index a9ed3ff69e..0000000000
--- a/ext/HTML/Parser/t/largetags.t
+++ /dev/null
@@ -1,38 +0,0 @@
-# Exercise the tokenpos buffer allocation routines by feeding it
-# very large tags.
-
-use Test::More tests => 2;
-
-use strict;
-use HTML::Parser ();
-
-my $p = HTML::Parser->new(api_version => 3);
-
-$p->handler("start" =>
-	    sub {
-		my $tp = shift;
-		#diag int(@$tp), " - ", join(", ", @$tp);
-		is(@$tp, 2 + 26 * 6 * 4);
-	    }, "tokenpos");
-
-$p->handler("declaration" =>
-	    sub {
-		my $t = shift;
-		#diag int(@$t), " - @$t";
-		is(@$t, 26 * 6 * 2 + 1);
-	    }, "tokens");
-
-$p->parse("<a ");
-for ("aa" .. "fz") {
-    $p->parse("$_=1 ");
-}
-$p->parse(">");
-
-$p->parse("<!DOCTYPE ");
-for ("aa" .. "fz") {
-    $p->parse("$_ -- $_ -- ");
-}
-$p->parse(">");
-$p->eof;
-exit;
-
diff --git a/ext/HTML/Parser/t/linkextor-base.t b/ext/HTML/Parser/t/linkextor-base.t
deleted file mode 100644
index 7ef8f0254b..0000000000
--- a/ext/HTML/Parser/t/linkextor-base.t
+++ /dev/null
@@ -1,41 +0,0 @@
-# This test that HTML::LinkExtor really absolutize links correctly
-# when a base URL is given to the constructor.
-
-use Test::More tests => 5;
-require HTML::LinkExtor;
-
-SKIP: {
-eval {
-   require URI;
-};
-skip $@, 5 if $@;
-
-# Try with base URL and the $p->links interface.
-$p = HTML::LinkExtor->new(undef, "http://www.sn.no/foo/foo.html");
-$p->parse(<<HTML)->eof;
-<head>
-<base href="http://www.sn.no/">
-</head>
-<body background="http://www.sn.no/sn.gif">
-
-This is <A HREF="link.html">link</a> and an <img SRC="img.jpg"
-lowsrc="img.gif" alt="Image">.
-HTML
-
-@p = $p->links;
-
-# There should be 4 links in the document
-is(@p, 4);
-
-for (@p) {
-    ($t, %attr) = @$_ if $_->[0] eq 'img';
-}
-
-is($t, 'img');
-
-is(delete $attr{src}, "http://www.sn.no/foo/img.jpg");
-
-is(delete $attr{lowsrc}, "http://www.sn.no/foo/img.gif");
-
-ok(!scalar(keys %attr)); # there should be no more attributes
-}
diff --git a/ext/HTML/Parser/t/linkextor-rel.t b/ext/HTML/Parser/t/linkextor-rel.t
deleted file mode 100644
index 1190a96c3f..0000000000
--- a/ext/HTML/Parser/t/linkextor-rel.t
+++ /dev/null
@@ -1,36 +0,0 @@
-use Test::More tests => 4;
-
-require HTML::LinkExtor;
-
-$HTML = <<HTML;
-<head>
-<base href="http://www.sn.no/">
-</head>
-<body background="http://www.sn.no/sn.gif">
-
-This is <A HREF="link.html">link</a> and an <img SRC="img.jpg"
-lowsrc="img.gif" alt="Image">.
-HTML
-
-
-# Try the callback interface
-$links = "";
-$p = HTML::LinkExtor->new(
-  sub {
-      my($tag, %links) = @_;
-      #diag "$tag @{[%links]}";
-      $links .= "$tag @{[%links]}\n";
-  });
-
-$p->parse($HTML); $p->eof;
-
-ok($links =~ m|^base href http://www\.sn\.no/$|m);
-ok($links =~ m|^body background http://www\.sn\.no/sn\.gif$|m);
-ok($links =~ m|^a href link\.html$|m);
-
-# Used to be problems when using the links method on a document with
-# no links it it.  This is a test to prove that it works.
-$p = new HTML::LinkExtor;
-$p->parse("this is a document with no links"); $p->eof;
-@a = $p->links;
-is(@a, 0);
diff --git a/ext/HTML/Parser/t/magic.t b/ext/HTML/Parser/t/magic.t
deleted file mode 100644
index 366f275236..0000000000
--- a/ext/HTML/Parser/t/magic.t
+++ /dev/null
@@ -1,41 +0,0 @@
-# Check that the magic signature at the top of struct p_state works and that we
-# catch modifications to _hparser_xs_state gracefully
-
-use Test::More tests => 5;
-
-use HTML::Parser;
-
-$p = HTML::Parser->new(api_version => 3);
-
-$p->xml_mode(1);
-
-# We should not be able to simply modify this stuff
-eval {
-    ${$p->{_hparser_xs_state}} += 4;
-};
-like($@, qr/^Modification of a read-only value attempted/);
-
-
-my $x = delete $p->{_hparser_xs_state};
-
-eval {
-    $p->xml_mode(1);
-};
-like($@, qr/^Can't find '_hparser_xs_state'/);
-
-$p->{_hparser_xs_state} = \($$x + 16);
-
-eval {
-    $p->xml_mode(1);
-};
-like($@, $] >= 5.008 ? qr/^Lost parser state magic/ : qr/^Bad signature in parser state object/);
-
-$p->{_hparser_xs_state} = 33;
-eval {
-    $p->xml_mode(1);
-};
-like($@,  qr/^_hparser_xs_state element is not a reference/);
-
-$p->{_hparser_xs_state} = $x;
-
-ok($p->xml_mode(0));
diff --git a/ext/HTML/Parser/t/marked-sect.t b/ext/HTML/Parser/t/marked-sect.t
deleted file mode 100644
index 6a63478156..0000000000
--- a/ext/HTML/Parser/t/marked-sect.t
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/perl -w
-
-use strict;
-my $tag;
-my $text;
-
-use HTML::Parser ();
-my $p = HTML::Parser->new(start_h => [sub { $tag = shift  }, "tagname"],
-	                  text_h  => [sub { $text .= shift }, "dtext"],
-                         );
-
-
-use Test::More tests => 14;
-
-SKIP: {
-eval {
-    $p->marked_sections(1);
-};
-skip $@, 14 if $@;
-
-$p->parse("<![[foo]]>");
-is($text, "foo");
-
-$p->parse("<![TEMP INCLUDE[bar]]>");
-is($text, "foobar");
-
-$p->parse("<![ INCLUDE -- IGNORE -- [foo<![IGNORE[bar]]>]]>\n<br>");
-is($text, "foobarfoo\n");
-
-$text = "";
-$p->parse("<![  CDATA   [&lt;foo");
-$p->parse("<![IGNORE[bar]]>,bar&gt;]]><br>");
-is($text, "&lt;foo<![IGNORE[bar,bar>]]>");
-
-$text = "";
-$p->parse("<![ RCDATA [&aring;<a>]]><![CDATA[&aring;<a>]]>&aring;<a><br>");
-is($text, "�<a>&aring;<a>�");
-is($tag, "br");
-
-$text = "";
-$p->parse("<![INCLUDE RCDATA CDATA IGNORE [foo&aring;<a>]]><br>");
-is($text,  "");
-
-$text = "";
-$p->parse("<![INCLUDE RCDATA CDATA [foo&aring;<a>]]><br>");
-is($text, "foo&aring;<a>");
-
-$text = "";
-$p->parse("<![INCLUDE RCDATA [foo&aring;<a>]]><br>");
-is($text, "foo�<a>");
-
-$text = "";
-$p->parse("<![INCLUDE [foo&aring;<a>]]><br>");
-is($text, "foo�");
-
-$text = "";
-$p->parse("<![[foo&aring;<a>]]><br>");
-is($text, "foo�");
-
-# offsets/line/column numbers
-$p = HTML::Parser->new(default_h => [\&x, "line,column,offset,event,text"],
-		       marked_sections => 1,
-		      );
-$p->parse(<<'EOT')->eof;
-<title>Test</title>
-<![CDATA
-  [foo&aring;<a>
-]]>
-<![[
-INCLUDE
-STUFF
-]]>
-  <h1>Test</h1>
-EOT
-
-my @x;
-sub x {
-    my($line, $col, $offset, $event, $text) = @_;
-    $text =~ s/\n/\\n/g;
-    $text =~ s/ /./g;
-    push(@x, "$line.$col:$offset $event \"$text\"\n");
-}
-
-#diag @x;
-is(join("", @x), <<'EOT');
-1.0:0 start_document ""
-1.0:0 start "<title>"
-1.7:7 text "Test"
-1.11:11 end "</title>"
-1.19:19 text "\n"
-3.3:32 text "foo&aring;<a>\n"
-4.3:49 text "\n"
-5.4:54 text "\nINCLUDE\nSTUFF\n"
-8.3:72 text "\n.."
-9.2:75 start "<h1>"
-9.6:79 text "Test"
-9.10:83 end "</h1>"
-9.15:88 text "\n"
-10.0:89 end_document ""
-EOT
-
-my $doc = "<Tag><![CDATA[This is cdata]]></Tag>";
-my $result = "";
-$p = HTML::Parser->new(
-    marked_sections => 1,
-    handlers => {
-        default => [ sub { $result .= join("",@_); }, "skipped_text,text" ]
-    }
-)->parse($doc)->eof;
-is($doc, $result);
-
-$text = "";
-$p = HTML::Parser->new(
-    text_h => [sub { $text .= shift }, "dtext"],
-    marked_sections => 1,
-);
-
-$p->parse("<![CDATA[foo [1]]]>");
-is($text, "foo [1]", "CDATA text ending in square bracket");
-
-} # SKIP
diff --git a/ext/HTML/Parser/t/msie-compat.t b/ext/HTML/Parser/t/msie-compat.t
deleted file mode 100644
index 90d4b7e3b7..0000000000
--- a/ext/HTML/Parser/t/msie-compat.t
+++ /dev/null
@@ -1,58 +0,0 @@
-#!perl -w
-
-use strict;
-use HTML::Parser;
-
-use Test::More tests => 2;
-
-my $TEXT = "";
-sub h
-{
-    my($event, $tagname, $text) = @_;
-    for ($event, $tagname, $text) {
-        if (defined) {
-	    s/([\n\r\t])/sprintf "\\%03o", ord($1)/ge;
-	}
-	else {
-	    $_ = "<undef>";
-	}
-    }
-
-    $TEXT .= "[$event,$tagname,$text]\n";
-}
-
-my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text"]);
-$p->parse("<a>");
-$p->parse("</a f>");
-$p->parse("</a 'foo<>' 'bar>' x>");
-$p->parse("</a \"foo<>\"");
-$p->parse(" \"bar>\" x>");
-$p->parse("</ foo bar>");
-$p->parse("</ \"<>\" >");
-$p->parse("<!--comment>text<!--comment><p");
-$p->eof;
-
-is($TEXT, <<'EOT');
-[start_document,<undef>,]
-[start,a,<a>]
-[end,a,</a f>]
-[end,a,</a 'foo<>' 'bar>' x>]
-[end,a,</a "foo<>" "bar>" x>]
-[comment, foo bar,</ foo bar>]
-[comment, "<>" ,</ "<>" >]
-[comment,comment,<!--comment>]
-[text,<undef>,text]
-[comment,comment,<!--comment>]
-[comment,p,<p]
-[end_document,<undef>,]
-EOT
-
-$TEXT = "";
-$p->parse("<!comment>");
-$p->eof;
-
-is($TEXT, <<'EOT');
-[start_document,<undef>,]
-[comment,comment,<!comment>]
-[end_document,<undef>,]
-EOT
diff --git a/ext/HTML/Parser/t/offset.t b/ext/HTML/Parser/t/offset.t
deleted file mode 100644
index 840728d7c3..0000000000
--- a/ext/HTML/Parser/t/offset.t
+++ /dev/null
@@ -1,58 +0,0 @@
-use strict;
-use HTML::Parser ();
-use Test::More tests => 1;
-
-my $HTML = <<'EOT';
-
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
- "http://www.w3.org/TR/html40/strict.dtd">
-
-<foo bar baz=3>heisan
-</foo> <?process>
-<!-- comment -->
-<xmp>xmp</xmp>
-
-EOT
-
-my $p = HTML::Parser->new(api_version => 3);
-
-my $sum_len = 0;
-my $count = 0;
-my $err;
-
-$p->handler(default =>
-	    sub {
-		my($offset, $length, $offset_end, $line, $col, $text) = @_;
-		my $copy = $text;
-		$copy =~ s/\n/\\n/g;
-		substr($copy, 30) = "..." if length($copy) > 32;
-		#diag sprintf ">>> %d.%d %s", $line, $col, $copy;
-		if ($offset != $sum_len) {
-		   diag "offset mismatch $offset vs $sum_len";
-		   $err++;
-                }
-		if ($offset_end != $offset + $length) {
-		   diag "offset_end $offset_end wrong";
-		   $err++;
-                }
-		if ($length != length($text)) {
-		   diag "length mismatch";
-		   $err++;
-		}
-                if (substr($HTML, $offset, $length) ne $text) {
-		   diag "content mismatch";
-		   $err++;
-		}
-		$sum_len += $length;
-		$count++;
-	    },
-	    'offset,length,offset_end,line,column,text');
-
-for (split(//, $HTML)) {
-   $p->parse($_);
-}
-$p->eof;
-
-ok($count > 5 && !$err);
-
-
diff --git a/ext/HTML/Parser/t/options.t b/ext/HTML/Parser/t/options.t
deleted file mode 100644
index ff5f7db564..0000000000
--- a/ext/HTML/Parser/t/options.t
+++ /dev/null
@@ -1,36 +0,0 @@
-# Test option setting methods
-
-use Test::More tests => 10;
-
-use strict;
-use HTML::Parser ();
-
-my $p = HTML::Parser->new(api_version => 3,
-			  xml_mode => 1);
-my $old;
-
-$old = $p->boolean_attribute_value("foo");
-ok(!defined $old);
-
-$old = $p->boolean_attribute_value();
-is($old, "foo");
-
-$old = $p->boolean_attribute_value(undef);
-is($old, "foo");
-ok(!defined($p->boolean_attribute_value));
-
-ok($p->xml_mode(0));
-ok(!$p->xml_mode);
-
-my $seen_buggy_comment_warning;
-$SIG{__WARN__} =
-    sub {
-	local $_ = shift;
-	$seen_buggy_comment_warning++
-	    if /^netscape_buggy_comment\(\) is deprecated/;
-    };
-
-ok(!$p->strict_comment(1));
-ok($p->strict_comment);
-ok(!$p->netscape_buggy_comment);
-ok($seen_buggy_comment_warning);
diff --git a/ext/HTML/Parser/t/parsefile.t b/ext/HTML/Parser/t/parsefile.t
deleted file mode 100644
index f373f066f6..0000000000
--- a/ext/HTML/Parser/t/parsefile.t
+++ /dev/null
@@ -1,45 +0,0 @@
-use Test::More tests => 6;
-
-my $filename = "file$$.htm";
-die "$filename is already there" if -e $filename;
-open(FILE, ">$filename") || die "Can't create $filename: $!";
-print FILE <<'EOT'; close(FILE);
-<title>Heisan</title>
-EOT
-
-{
-    package MyParser;
-    require HTML::Parser;
-    @ISA=qw(HTML::Parser);
-
-    sub start
-    {
-	my($self, $tag, $attr) = @_;
-	Test::More::is($tag, "title");
-    }
-}
-
-MyParser->new->parse_file($filename);
-open(FILE, $filename) || die;
-MyParser->new->parse_file(*FILE);
-seek(FILE, 0, 0) || die;
-MyParser->new->parse_file(\*FILE);
-close(FILE);
-
-require IO::File;
-my $io = IO::File->new($filename) || die;
-MyParser->new->parse_file($io);
-$io->seek(0, 0) || die;
-MyParser->new->parse_file(*$io);
-
-my $text = '';
-$io->seek(0, 0) || die;
-MyParser->new(
-    start_h => [ sub{ shift->eof; }, "self" ],
-    text_h =>  [ sub{ $text = shift; }, "text" ])->parse_file(*$io);
-ok(!$text);
-
-close($io);  # needed because of bug in perl
-undef($io);
-
-unlink($filename) or warn "Can't unlink $filename: $!";
diff --git a/ext/HTML/Parser/t/parser.t b/ext/HTML/Parser/t/parser.t
deleted file mode 100644
index 0ce4d95803..0000000000
--- a/ext/HTML/Parser/t/parser.t
+++ /dev/null
@@ -1,184 +0,0 @@
-use Test::More tests => 7;
-
-$HTML = <<'HTML';
-
-<!DOCTYPE HTML>
-
-<body>
-
-Various entities.  The parser must never break them in the middle:
-
-&#x2F
-&#x2F;
-&#200
-&#3030;
-&#XFFFF;
-&aring-&Aring;
-
-<ul>
-<li><a href="foo 'bar' baz>" id=33>This is a link</a>
-<li><a href='foo "bar" baz> &aring' id=34>This is another one</a>
-</ul>
-
-<p><div align="center"><img src="http://www.perl.com/perl.gif"
-alt="camel"></div>
-
-<!-- this is
-a comment --> and this is not.
-
-<!-- this is the kind of >comment< -- --> that Netscape hates -->
-
-< this > was not a tag. <this is/not either>
-
-</body>
-
-HTML
-
-#-------------------------------------------------------------------
-
-{
-    package P;
-    require HTML::Parser;
-    @ISA=qw(HTML::Parser);
-    $OUT='';
-    $COUNT=0;
-
-    sub new
-    {
-	my $class = shift;
-	my $self = $class->SUPER::new;
-	$OUT = '';
-        die "Can only have one" if $COUNT++;
-	$self;
-    }
-
-    sub DESTROY
-    {
-	my $self = shift;
-	eval { $self->SUPER::DESTROY; };
-	$COUNT--;
-    }
-
-    sub declaration
-    {
-	my($self, $decl) = @_;
-	$OUT .= "[[$decl]]|";
-    }
-
-    sub start
-    {
-	my($self, $tag, $attr) = @_;
-	$attr = join("/", map "$_=$attr->{$_}", sort keys %$attr);
-	$attr = "/$attr" if length $attr;
-	$OUT .= "<<$tag$attr>>|";
-    }
-
-    sub end
-    {
-	my($self, $tag) = @_;
-	$OUT .= ">>$tag<<|";
-    }
-
-    sub comment
-    {
-	my($self, $comment) = @_;
-	$OUT .= "##$comment##|";
-    }
-
-    sub text
-    {
-	my($self, $text) = @_;
-	#$text =~ s/\n/\\n/g;
-	#$text =~ s/\t/\\t/g;
-	#$text =~ s/ /�/g;
-	$OUT .= "$text|";
-    }
-
-    sub result
-    {
-	$OUT;
-    }
-}
-
-for $chunksize (64*1024, 64, 13, 3, 1, "file", "filehandle") {
-#for $chunksize (1) {
-    if ($chunksize =~ /^file/) {
-        #print "Parsing from $chunksize";
-    } else {
-        #print "Parsing using $chunksize byte chunks";
-    }
-    my $p = P->new;
-
-    if ($chunksize =~ /^file/) {
-	# First we must create the file
-	my $tmpfile = "tmp-$$.html";
-	my $file = $tmpfile;
-	die "$file already exists" if -e $file;
-	open(FILE, ">$file") or die "Can't create $file: $!";
-        binmode FILE;
-        print FILE $HTML;
-        close(FILE);
-
-	if ($chunksize eq "filehandle") {
-	    require FileHandle;
-	    my $fh = FileHandle->new($file) || die "Can't open $file: $!";
-	    $file = $fh;
-	}
-
-        # then we can parse it.
-        $p->parse_file($file);
-        close $file if $chunksize eq "filehandle";
-        unlink($tmpfile) || warn "Can't unlink $tmpfile: $!";
-    } else {
-	my $copy = $HTML;
-	while (length $copy) {
-	    my $chunk = substr($copy, 0, $chunksize);
-	    substr($copy, 0, $chunksize) = '';
-	    $p->parse($chunk);
-	}
-	$p->eof;
-    }
-
-    my $res = $p->result;
-    my $bad;
-    
-    # Then we start looking for things that should not happen
-    if ($res =~ /\s\|\s/) {
-	diag "broken space";
-	$bad++;
-    }
-    for (
-	 # Make sure entities are not broken
-	 '&#x2F', '&#x2F;', '&#200', '&#3030;', '&#XFFFF;', '&aring', '&Aring',
-
-         # Some elements that should be produced
-         "|[[DOCTYPE HTML]]|",
-         "|## this is\na comment ##|",
-         "|<<ul>>|\n|<<li>>|<<a/href=foo 'bar' baz>/id=33>>|",
-	 '|<<li>>|<<a/href=foo "bar" baz> �/id=34>>',
-         "|>>ul<<|", "|>>body<<|\n\n|",
-        )
-   {
-        if (index($res, $_) < 0) {
-	    diag "Can't find '$_' in parsed document";
-	    $bad++;
-        }
-    }
-
-    diag $res if $bad || $ENV{PRINT_RESULTS};
-
-    # And we check that we get the same result all the time
-    $res =~ s/\|//g;  # remove all break marks
-    if ($last_res && $res ne $last_res) {
-        diag "The result is not the same as last time";
-        $bad++;
-    }
-    $last_res = $res;
-
-    unless ($res =~ /Various entities/) {
-	diag "Some text must be missing";
-	$bad++;
-    }
-
-    ok(!$bad);
-}
diff --git a/ext/HTML/Parser/t/plaintext.t b/ext/HTML/Parser/t/plaintext.t
deleted file mode 100644
index b2e1e19e11..0000000000
--- a/ext/HTML/Parser/t/plaintext.t
+++ /dev/null
@@ -1,45 +0,0 @@
-use Test::More tests => 2;
-
-use strict;
-use HTML::Parser;
-
-my @a;
-my $p = HTML::Parser->new(api_version => 3);
-$p->handler(default => \@a, '@{event, text, is_cdata}');
-$p->parse(<<EOT)->eof;
-<xmp><foo></xmp>x<plaintext><foo>
-</plaintext>
-foo
-EOT
-
-for (@a) {
-    $_ = "" unless defined;
-}
-
-my $doc = join(":", @a);
-
-#diag $doc;
-
-is($doc, "start_document:::start:<xmp>::text:<foo>:1:end:</xmp>::text:x::start:<plaintext>::text:<foo>
-</plaintext>
-foo
-:1:end_document::");
-
-@a = ();
-$p->closing_plaintext('yep, emulate gecko');
-$p->parse(<<EOT)->eof;
-<plaintext><foo>
-</plaintext>foo<b></b>
-EOT
-
-for (@a) {
-    $_ = "" unless defined;
-}
-
-$doc = join(":", @a);
-
-#diag $doc;
-
-is($doc, "start_document:::start:<plaintext>::text:<foo>
-:1:end:</plaintext>::text:foo::start:<b>::end:</b>::text:
-::end_document::");
diff --git a/ext/HTML/Parser/t/pod.t b/ext/HTML/Parser/t/pod.t
deleted file mode 100644
index 437887a0a2..0000000000
--- a/ext/HTML/Parser/t/pod.t
+++ /dev/null
@@ -1,4 +0,0 @@
-use Test::More;
-eval "use Test::Pod 1.00";
-plan skip_all => "Test::Pod 1.00 required for testing POD" if $@;
-all_pod_files_ok();
diff --git a/ext/HTML/Parser/t/process.t b/ext/HTML/Parser/t/process.t
deleted file mode 100644
index 9d27250a92..0000000000
--- a/ext/HTML/Parser/t/process.t
+++ /dev/null
@@ -1,43 +0,0 @@
-use strict;
-
-use Test::More tests => 12;
-
-my $pi;
-my $orig;
-
-use HTML::Parser ();
-my $p = HTML::Parser->new(process_h => [sub { $pi = shift; $orig = shift; },
-				        "token0,text"]
-	                 );
-
-$p->parse("<a><?foo><a>");
-
-is($pi, "foo");
-is($orig, "<?foo>");
-
-$p->parse("<a><?><a>");
-is($pi, "");
-is($orig, "<?>");
-
-$p->parse("<a><?
-foo
-><a>");
-is($pi, "\nfoo\n");
-is($orig, "<?\nfoo\n>");
-
-for (qw(< a > < ? b a r > < a >)) {
-   $p->parse($_);
-}
-
-is($pi, "bar");
-is($orig, "<?bar>");
-
-$p->xml_mode(1);
-
-$p->parse("<a><?foo>bar??><a>");
-is($pi, "foo>bar?");
-is($orig, "<?foo>bar??>");
-
-$p->parse("<a><??></a>");
-is($pi, "");
-is($orig, "<??>");
diff --git a/ext/HTML/Parser/t/pullparser.t b/ext/HTML/Parser/t/pullparser.t
deleted file mode 100644
index 80a186b399..0000000000
--- a/ext/HTML/Parser/t/pullparser.t
+++ /dev/null
@@ -1,55 +0,0 @@
-use Test::More tests => 3;
-
-use HTML::PullParser;
-
-my $doc = <<'EOT';
-<title>Title</title>
-<style> h1 { background: white }
-<foo>
-</style>
-<H1 ID="3">Heading</H1>
-<!-- ignore this -->
-
-This is a text with a <A HREF="http://www.sol.no" name="l1">link</a>.
-EOT
-
-my $p = HTML::PullParser->new(doc   => $doc,
-			      start => 'event,tagname,@attr',
-                              end   => 'event,tagname',
-			      text  => 'event,dtext',
-
-                              ignore_elements         => [qw(script style)],
-			      unbroken_text           => 1,
-			      boolean_attribute_value => 1,
-			     );
-
-my $t = $p->get_token;
-is($t->[0], "start");
-is($t->[1], "title");
-$p->unget_token($t);
-
-my @a;
-while (my $t = $p->get_token) {
-    for (@$t) {
-	s/\s/./g;
-    }
-    push(@a, join("|", @$t));
-}
-
-my $res = join("\n", @a, "");
-#diag $res;
-is($res, <<'EOT');
-start|title
-text|Title
-end|title
-text|..
-start|h1|id|3
-text|Heading
-end|h1
-text|...This.is.a.text.with.a.
-start|a|href|http://www.sol.no|name|l1
-text|link
-end|a
-text|..
-EOT
-
diff --git a/ext/HTML/Parser/t/script.t b/ext/HTML/Parser/t/script.t
deleted file mode 100644
index 2a75ccb35b..0000000000
--- a/ext/HTML/Parser/t/script.t
+++ /dev/null
@@ -1,41 +0,0 @@
-#!perl -w
-
-use strict;
-use Test;
-plan tests => 1;
-
-use HTML::Parser;
-
-my $TEXT = "";
-sub h
-{
-    my($event, $tagname, $text) = @_;
-    for ($event, $tagname, $text) {
-        if (defined) {
-	    s/([\n\r\t])/sprintf "\\%03o", ord($1)/ge;
-	}
-	else {
-	    $_ = "<undef>";
-	}
-    }
-
-    $TEXT .= "[$event,$tagname,$text]\n";
-}
-
-my $p = HTML::Parser->new(default_h => [\&h, "event,tagname,text"], empty_element_tags => 1);
-$p->parse(q(<tr><td align="center" height="100"><script src="whatever"/><SCRIPT language="JavaScript1.1">bust = Math.floor(1000000*Math.random());document.write('<SCR' + 'IPT LANGUAGE="JavaScript1.1" SRC="http://adv.virgilio.it/js.ng/site=virg&adsize=728x90&subsite=mail&sez=comfree&pos=43&bust='+bust+'?">\n');document.write('</SCR' + 'IPT>\n');</SCRIPT></td></tr>));
-$p->eof;
-
-ok($TEXT, <<'EOT');
-[start_document,<undef>,]
-[start,tr,<tr>]
-[start,td,<td align="center" height="100">]
-[start,script,<script src="whatever"/>]
-[end,script,]
-[start,script,<SCRIPT language="JavaScript1.1">]
-[text,<undef>,bust = Math.floor(1000000*Math.random());document.write('<SCR' + 'IPT LANGUAGE="JavaScript1.1" SRC="http://adv.virgilio.it/js.ng/site=virg&adsize=728x90&subsite=mail&sez=comfree&pos=43&bust='+bust+'?">\n');document.write('</SCR' + 'IPT>\n');]
-[end,script,</SCRIPT>]
-[end,td,</td>]
-[end,tr,</tr>]
-[end_document,<undef>,]
-EOT
diff --git a/ext/HTML/Parser/t/skipped-text.t b/ext/HTML/Parser/t/skipped-text.t
deleted file mode 100644
index 8bd2704b20..0000000000
--- a/ext/HTML/Parser/t/skipped-text.t
+++ /dev/null
@@ -1,74 +0,0 @@
-use Test::More tests => 3;
-
-use strict;
-use HTML::Parser;
-
-my $p = HTML::Parser->new(api_version => 3);
-
-$p->report_tags("a");
-
-my @doc;
-
-$p->handler(start => \&a_handler, "skipped_text, text");
-$p->handler(end_document => \@doc, '@{skipped_text}');
-
-$p->parse(<<EOT)->eof;
-<title>hi</title>
-<h1><a href="foo">link</a></h1>
-and <a foo="">some</a> text.
-EOT
-
-sub a_handler {
-    push(@doc, shift);
-    my $text = shift;
-    push(@doc, uc($text));
-}
-
-
-is(join("", @doc), <<'EOT');
-<title>hi</title>
-<h1><A HREF="FOO">link</a></h1>
-and <A FOO="">some</a> text.
-EOT
-
-#
-# Comment stripper.  Interaction with "" handlers.
-#
-my $doc = <<EOT;
-<html>text</html>
-<!-- comment -->
-and some more <b>text</b>.
-EOT
-(my $expected = $doc) =~ s/<!--.*?-->//;
-
-$p = HTML::Parser->new(api_version => 3);
-$p->handler(comment => "");
-$p->handler(end_document => sub {
-		                my $stripped = shift;
-				#diag $stripped;
-	                        is($stripped, $expected);
-			    }, "skipped_text");
-for (split(//, $doc)) {
-    $p->parse($_);
-}
-$p->eof;
-
-#
-# Interaction with unbroken text
-#
-my @x;
-$p = HTML::Parser->new(api_version => 3, unbroken_text => 1);
-$p->handler(text => \@x, '@{"X", skipped_text, text}');
-$p->handler(end => "");
-$p->handler(end_document => \@x, '@{"Y", skipped_text}');
-
-$doc = "a a<a>b b</a>c c<x>d d</x>e";
-
-for (split(//, $doc)) {
-   $p->parse($_);
-}
-$p->eof;
-
-#diag join(":", @x);
-is(join(":", @x), "X::a a:X:<a>:b bc c:X:<x>:d de:Y:");
-
diff --git a/ext/HTML/Parser/t/stack-realloc.t b/ext/HTML/Parser/t/stack-realloc.t
deleted file mode 100644
index 46c7d35666..0000000000
--- a/ext/HTML/Parser/t/stack-realloc.t
+++ /dev/null
@@ -1,17 +0,0 @@
-#!perl -w
-
-# HTML-Parser 3.33 and older used to core dump on this program because
-# of missing SPAGAIN calls in parse() XS code.  It was not prepared for
-# the stack to get realloced.
-
-$| = 1;
-
-use Test::More tests => 1;
-
-use HTML::Parser;
-my $x = HTML::Parser->new(api_version => 3);
-my @row;
-$x->handler(end => sub { push(@row, (1) x 505); 1 },   "tagname");
-$x->parse("</TD>");
-
-pass;
diff --git a/ext/HTML/Parser/t/textarea.t b/ext/HTML/Parser/t/textarea.t
deleted file mode 100644
index 120f79bb29..0000000000
--- a/ext/HTML/Parser/t/textarea.t
+++ /dev/null
@@ -1,70 +0,0 @@
-use Test::More tests => 1;
-
-use strict;
-use HTML::Parser;
-
-my $html = <<'EOT';
-<html>
-<title>This is a <nice> title</title>
-<!--comment-->
-<script language="perl">while (<DATA>) { &amp; }</script>
-
-<FORM>
-
-<textarea name="foo" cols=50 rows=10>
-
-foo
-<foo>
-<!--comment-->
-&amp;
-foo
-</FORM>
-
-</textarea>
-
-</FORM>
-
-</html>
-EOT
-
-my $dump = "";
-sub tdump {
-   my @a = @_;
-   for (@a) {
-      $_ = "<undef>" unless defined;
-      s/\n/\\n/g;
-   }
-   $dump .= join("|", @a) . "\n";
-}
-
-my $p = HTML::Parser->new(default_h => [\&tdump, "event,text,dtext,is_cdata"]);
-$p->parse($html)->eof;
-
-#diag $dump;
-
-is($dump, <<'EOT');
-start_document||<undef>|<undef>
-start|<html>|<undef>|<undef>
-text|\n|\n|
-start|<title>|<undef>|<undef>
-text|This is a <nice> title|This is a <nice> title|
-end|</title>|<undef>|<undef>
-text|\n|\n|
-comment|<!--comment-->|<undef>|<undef>
-text|\n|\n|
-start|<script language="perl">|<undef>|<undef>
-text|while (<DATA>) { &amp; }|while (<DATA>) { &amp; }|1
-end|</script>|<undef>|<undef>
-text|\n\n|\n\n|
-start|<FORM>|<undef>|<undef>
-text|\n\n|\n\n|
-start|<textarea name="foo" cols=50 rows=10>|<undef>|<undef>
-text|\n\nfoo\n<foo>\n<!--comment-->\n&amp;\nfoo\n</FORM>\n\n|\n\nfoo\n<foo>\n<!--comment-->\n&\nfoo\n</FORM>\n\n|
-end|</textarea>|<undef>|<undef>
-text|\n\n|\n\n|
-end|</FORM>|<undef>|<undef>
-text|\n\n|\n\n|
-end|</html>|<undef>|<undef>
-text|\n|\n|
-end_document||<undef>|<undef>
-EOT
diff --git a/ext/HTML/Parser/t/threads.t b/ext/HTML/Parser/t/threads.t
deleted file mode 100644
index 8da91e9b78..0000000000
--- a/ext/HTML/Parser/t/threads.t
+++ /dev/null
@@ -1,39 +0,0 @@
-# Verify thread safety.
-
-use Config;
-use Test::More;
-
-BEGIN {
-    plan(skip_all => "Not configured for threads")
-	unless $Config{useithreads} && $] >= 5.008;
-    plan(tests => 1);
-}
-
-use threads;
-use HTML::Parser;
-
-my $ok=0;
-
-sub start
-{
-    my($tag,$attr)=@_;
-
-    $ok += ($tag eq "foo");
-    $ok += (defined($attr->{param}) && $attr->{param} eq "bar");
-}
-
-my $p = HTML::Parser->new
-    (api_version => 3,
-     handlers => {
-	 start => [\&start, "tagname,attr"],
-     });
-
-$p->parse("<foo pa");
-
-$ok=async {
-    $p->parse("ram=bar>");
-    $ok;
-}->join();
-
-is($ok,2);
-
diff --git a/ext/HTML/Parser/t/tokeparser.t b/ext/HTML/Parser/t/tokeparser.t
deleted file mode 100644
index 2084201fb9..0000000000
--- a/ext/HTML/Parser/t/tokeparser.t
+++ /dev/null
@@ -1,164 +0,0 @@
-use Test::More tests => 17;
-
-use strict;
-use HTML::TokeParser;
-
-# First we create an HTML document to test
-
-my $file = "ttest$$.htm";
-die "$file already exists" if -e $file;
-
-open(F, ">$file") or die "Can't create $file: $!";
-print F <<'EOT';  close(F);
-
-<!--This is a test-->
-<html><head><title>
-  This is the &lt;title&gt;
-</title>
-
-  <base href="http://www.perl.com">
-</head>
-
-<body background="bg.gif">
-
-    <h1>This is the <b>title</b> again
-    </h1>
-
-    And this is a link to the <a href="http://www.perl.com"><img src="camel.gif" alt="Perl">&nbsp;<!--nice isn't it-->Institute</a>
-
-   <br/><? process instruction >
-
-</body>
-</html>
-
-EOT
-
-END { unlink($file) || warn "Can't unlink $file: $!"; }
-
-
-my $p;
-
-
-$p = HTML::TokeParser->new($file) || die "Can't open $file: $!";
-ok($p->unbroken_text);
-if ($p->get_tag("foo", "title")) {
-    my $title = $p->get_trimmed_text;
-    #diag "Title: $title";
-    is($title, "This is the <title>");
-}
-undef($p);
-
-# Test with reference to glob
-open(F, $file) || die "Can't open $file: $!";
-$p = HTML::TokeParser->new(\*F);
-my $scount = 0;
-my $ecount = 0;
-my $tcount = 0;
-my $pcount = 0;
-while (my $token = $p->get_token) {
-    $scount++ if $token->[0] eq "S";
-    $ecount++ if $token->[0] eq "E";
-    $pcount++ if $token->[0] eq "PI";
-}
-undef($p);
-close F;
-
-# Test with glob
-open(F, $file) || die "Can't open $file: $!";
-$p = HTML::TokeParser->new(*F);
-$tcount++ while $p->get_tag;
-undef($p);
-close F;
-
-# Test with plain file name
-$p = HTML::TokeParser->new($file) || die;
-$tcount++ while $p->get_tag;
-undef($p);
-
-#diag "Number of tokens found: $tcount/2 = $scount + $ecount";
-is($tcount, 34);
-is($scount, 10);
-is($ecount, 7);
-is($pcount, 1);
-is($tcount/2, $scount + $ecount);
-
-ok(!HTML::TokeParser->new("/noT/thEre/$$"));
-
-
-$p = HTML::TokeParser->new($file) || die;
-$p->get_tag("a");
-my $atext = $p->get_text;
-undef($p);
-
-is($atext, "Perl\240Institute");
-
-# test parsing of embeded document
-$p = HTML::TokeParser->new(\<<HTML);
-<title>Title</title>
-<H1>
-Heading
-</h1>
-HTML
-
-ok($p->get_tag("h1"));
-is($p->get_trimmed_text, "Heading");
-undef($p);
-
-# test parsing of large embedded documents
-my $doc = "<a href='foo'>foo is bar</a>\n\n\n" x 2022;
-
-#use Time::HiRes qw(time);
-my $start = time;
-$p = HTML::TokeParser->new(\$doc);
-#diag "Construction time: ", time - $start;
-
-my $count;
-while (my $t = $p->get_token) {
-    $count++ if $t->[0] eq "S";
-}
-#diag "Parse time: ", time - $start;
-
-is($count, 2022);
-
-$p = HTML::TokeParser->new(\<<'EOT');
-<H1>This is a heading</H1>
-This is s<b>o</b>me<hr>text.
-<br />
-This is some more text.
-<p>
-This is even some more.
-EOT
-
-$p->get_tag("/h1");
-
-my $t = $p->get_trimmed_text("br", "p");
-is($t, "This is some text.");
-
-$p->get_tag;
-
-$t = $p->get_trimmed_text("br", "p");
-is($t,"This is some more text.");
-
-undef($p);
-
-$p = HTML::TokeParser->new(\<<'EOT');
-<H1>This is a <b>bold</b> heading</H1>
-This is some <i>italic</i> text.<br />This is some <span id=x>more text</span>.
-<p>
-This is even some more.
-EOT
-
-$p->get_tag("h1");
-
-$t = $p->get_phrase;
-is($t, "This is a bold heading");
-
-$t = $p->get_phrase;
-is($t, "");
-
-$p->get_tag;
-
-$t = $p->get_phrase;
-is($t, "This is some italic text. This is some more text.");
-
-undef($p);
diff --git a/ext/HTML/Parser/t/uentities.t b/ext/HTML/Parser/t/uentities.t
deleted file mode 100644
index b9decc54a0..0000000000
--- a/ext/HTML/Parser/t/uentities.t
+++ /dev/null
@@ -1,67 +0,0 @@
-# Test Unicode entities
-
-use HTML::Entities;
-
-use Test::More tests => 27;
-
-SKIP: {
-skip "This perl does not support Unicode or Unicode entities not selected",
-  27 if $] < 5.008 || !&HTML::Entities::UNICODE_SUPPORT;
-
-is(decode_entities("&euro"), "&euro");
-is(decode_entities("&euro;"), "\x{20AC}");
-
-is(decode_entities("&aring"), "�");
-is(decode_entities("&aring;"), "�");
-
-is(decode_entities("&#500000"), chr(500000));
-
-is(decode_entities("&#x10FFFD"), "\x{10FFFD}");
-
-is(decode_entities("&#xFFFC"), "\x{FFFC}");
-
-
-is(decode_entities("&#xFDD0"), "\x{FFFD}");
-is(decode_entities("&#xFDD1"), "\x{FFFD}");
-is(decode_entities("&#xFDE0"), "\x{FFFD}");
-is(decode_entities("&#xFDEF"), "\x{FFFD}");
-is(decode_entities("&#xFFFF"), "\x{FFFD}");
-is(decode_entities("&#x10FFFF"), "\x{FFFD}");
-is(decode_entities("&#x110000"), chr(0xFFFD));
-is(decode_entities("&#XFFFFFFFF"), chr(0xFFFD));
-
-is(decode_entities("&#0"), "\0");
-is(decode_entities("&#0;"), "\0");
-is(decode_entities("&#x0"), "\0");
-is(decode_entities("&#X0;"), "\0");
-
-is(decode_entities("&#&aring&#229&#229;&#xFFF"), "&#���\x{FFF}");
-
-# This might fail when we get more than 64 bit UVs
-is(decode_entities("&#0009999999999999999999999999999;"), "&#0009999999999999999999999999999;");
-is(decode_entities("&#xFFFF0000FFFF0000FFFF1"), "&#xFFFF0000FFFF0000FFFF1");
-
-my $err;
-for ([32, 48], [120, 169], [240, 250], [250, 260], [965, 975], [3000, 3005]) {
-    my $a = join("", map chr, $_->[0] .. $_->[1]);
-
-    my $e = encode_entities($a);
-    my $d = decode_entities($e);
-
-    unless ($d eq $a) {
-	diag "Wrong decoding in range $_->[0] .. $_->[1]";
-	# use Devel::Peek; Dump($a); Dump($d);
-	$err++;
-    }
-}
-ok(!$err);
-
-
-is(decode_entities("&#56256;&#56453;"), chr(0x100085));
-
-is(decode_entities("&#56256;&#56453;"), chr(0x100085));
-
-is(decode_entities("&#56256"), chr(0xFFFD));
-
-is(decode_entities("\260&rsquo;\260"), "\x{b0}\x{2019}\x{b0}");
-}
diff --git a/ext/HTML/Parser/t/unbroken-text.t b/ext/HTML/Parser/t/unbroken-text.t
deleted file mode 100644
index 7de85a9880..0000000000
--- a/ext/HTML/Parser/t/unbroken-text.t
+++ /dev/null
@@ -1,60 +0,0 @@
-use strict;
-use HTML::Parser;
-
-use Test::More tests => 3;
-
-my $text = "";
-sub text
-{
-    my $cdata = shift() ? "CDATA" : "TEXT";
-    my($offset, $line, $col, $t) = @_;
-    $text .= "[$cdata:$offset:$line.$col:$t]";
-}
-
-sub tag
-{
-    $text .= shift;
-}
-
-my $p = HTML::Parser->new(unbroken_text => 1,
-			  text_h =>  [\&text, "is_cdata,offset,line,column,text"],
-			  start_h => [\&tag, "text"],
-			  end_h   => [\&tag, "text"],
-			 );
-
-$p->parse("foo ");
-$p->parse("bar ");
-$p->parse("<foo>");
-$p->parse("bar\n");
-$p->parse("</foo>");
-$p->parse("<xmp>xmp</xmp>");
-$p->parse("atend");
-
-#diag $text;
-is($text, "[TEXT:0:1.0:foo bar ]<foo>[TEXT:13:1.13:bar\n]</foo><xmp>[CDATA:28:2.11:xmp]</xmp>");
-
-$text = "";
-$p->eof;
-
-#diag $text;
-is($text, "[TEXT:37:2.20:atend]");
-
-
-$p = HTML::Parser->new(unbroken_text => 1,
-		       text_h => [\&text, "is_cdata,offset,line,column,text"],
-		      );
-
-$text = "";
-$p->parse("foo");
-$p->parse("<foo");
-$p->parse(">bar\n");
-$p->parse("foo<xm");
-$p->parse("p>xmp");
-$p->parse("</xmp");
-$p->parse(">bar");
-$p->eof;
-
-#diag $text;
-is($text, "[TEXT:0:1.0:foobar\nfoo][CDATA:20:2.8:xmp][TEXT:29:2.17:bar]");
-
-
diff --git a/ext/HTML/Parser/t/unicode-bom.t b/ext/HTML/Parser/t/unicode-bom.t
deleted file mode 100644
index 34e066fb6c..0000000000
--- a/ext/HTML/Parser/t/unicode-bom.t
+++ /dev/null
@@ -1,59 +0,0 @@
-#!perl -w
-
-use strict;
-use Test::More tests => 2;
-use HTML::Parser;
-
-SKIP: {
-skip "This perl does not support Unicode", 2 if $] < 5.008;
-
-my @parsed;
-my $p = HTML::Parser->new(
-  api_version => 3,
-  start_h => [\@parsed, 'tag, attr'],
-);
-
-my @warn;
-$SIG{__WARN__} = sub {
-    push(@warn, $_[0]);
-};
-
-$p->parse("\xEF\xBB\xBF<head>Hi there</head>");
-$p->eof;
-
-#use Encode;
-$p->parse("\xEF\xBB\xBF<head>Hi there</head>" . chr(0x263A));
-$p->eof;
-
-$p->parse("\xFF\xFE<head>Hi there</head>");
-$p->eof;
-
-$p->parse("\xFE\xFF<head>Hi there</head>");
-$p->eof;
-
-$p->parse("\0\0\xFF\xFE<head>Hi there</head>");
-$p->eof;
-
-$p->parse("\xFE\xFF\0\0<head>Hi there</head>");
-$p->eof;
-
-is(join("", @warn), <<EOT);
-Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line 21.
-Parsing of undecoded UTF-8 will give garbage when decoding entities at $0 line 25.
-Parsing of undecoded UTF-16 at $0 line 28.
-Parsing of undecoded UTF-16 at $0 line 31.
-Parsing of undecoded UTF-32 at $0 line 34.
-Parsing of undecoded UTF-32 at $0 line 37.
-EOT
-
-@warn = ();
-
-$p = HTML::Parser->new(
-  api_version => 3,
-  start_h => [\@parsed, 'tag'],
-);
-
-$p->parse("\xEF\xBB\xBF<head>Hi there</head>");
-$p->eof;
-ok(!@warn);
-}
diff --git a/ext/HTML/Parser/t/unicode.t b/ext/HTML/Parser/t/unicode.t
deleted file mode 100644
index 82902dec8b..0000000000
--- a/ext/HTML/Parser/t/unicode.t
+++ /dev/null
@@ -1,183 +0,0 @@
-#!perl -w
-
-use strict;
-use HTML::Parser;
-use Test::More tests => 103;
-
-SKIP: {
-skip "This perl does not support Unicode", 103 if $] < 5.008;
-
-my @warn;
-$SIG{__WARN__} = sub {
-    push(@warn, $_[0]);
-};
-
-my @parsed;
-my $p = HTML::Parser->new(
-  api_version => 3,
-  default_h => [\@parsed, 'event, text, dtext, offset, length, offset_end, column, tokenpos, attr'],
-);
-
-my $doc = "<title>\x{263A}</title><h1 id=\x{2600} f>Smile &#x263a</h1>\x{0420}";
-is(length($doc), 46);
-
-$p->parse($doc)->eof;
-
-#use Data::Dump; Data::Dump::dump(@parsed);
-
-is(@parsed, 9);
-is($parsed[0][0], "start_document");
-
-is($parsed[1][0], "start");
-is($parsed[1][1], "<title>");
-SKIP: { skip "no utf8::is_utf8", 1 if !defined(&utf8::is_utf8); ok(utf8::is_utf8($parsed[1][1]), "is_utf8") };
-is($parsed[1][3], 0);
-is($parsed[1][4], 7);
-
-is($parsed[2][0], "text");
-is(ord($parsed[2][1]), 0x263A);
-is($parsed[2][2], chr(0x263A));
-is($parsed[2][3], 7);
-is($parsed[2][4], 1);
-is($parsed[2][5], 8);
-is($parsed[2][6], 7);
-
-is($parsed[3][0], "end");
-is($parsed[3][1], "</title>");
-is($parsed[3][3], 8);
-is($parsed[3][6], 8);
-
-is($parsed[4][0], "start");
-is($parsed[4][1], "<h1 id=\x{2600} f>");
-is(join("|", @{$parsed[4][7]}), "1|2|4|2|7|1|9|1|0|0");
-is($parsed[4][8]{id}, "\x{2600}");
-
-is($parsed[5][0], "text");
-is($parsed[5][1], "Smile &#x263a");
-is($parsed[5][2], "Smile \x{263A}");
-
-is($parsed[7][0], "text");
-is($parsed[7][1], "\x{0420}");
-is($parsed[7][2], "\x{0420}");
-
-is($parsed[8][0], "end_document");
-is($parsed[8][3], length($doc));
-is($parsed[8][5], length($doc));
-is($parsed[8][6], length($doc));
-is(@warn, 0);
-
-# Try to parse it as an UTF8 encoded string
-utf8::encode($doc);
-is(length($doc), 51);
-
-@parsed = ();
-$p->parse($doc)->eof;
-
-#use Data::Dump; Data::Dump::dump(@parsed);
-
-is(@parsed, 9);
-is($parsed[0][0], "start_document");
-
-is($parsed[1][0], "start");
-is($parsed[1][1], "<title>");
-SKIP: { skip "no utf8::is_utf8", 1 if !defined(&utf8::is_utf8); ok(!utf8::is_utf8($parsed[1][1]), "!is_utf8") };
-is($parsed[1][3], 0);
-is($parsed[1][4], 7);
-
-is($parsed[2][0], "text");
-is(ord($parsed[2][1]), 226);
-is($parsed[2][1], "\xE2\x98\xBA");
-is($parsed[2][2], "\xE2\x98\xBA");
-is($parsed[2][3], 7);
-is($parsed[2][4], 3);
-is($parsed[2][5], 10);
-is($parsed[2][6], 7);
-
-is($parsed[3][0], "end");
-is($parsed[3][1], "</title>");
-is($parsed[3][3], 10);
-is($parsed[3][6], 10);
-
-is($parsed[4][0], "start");
-is($parsed[4][1], "<h1 id=\xE2\x98\x80 f>");
-is(join("|", @{$parsed[4][7]}), "1|2|4|2|7|3|11|1|0|0");
-is($parsed[4][8]{id}, "\xE2\x98\x80");
-
-is($parsed[5][0], "text");
-is($parsed[5][1], "Smile &#x263a");
-is($parsed[5][2], "Smile \x{263A}");
-
-is($parsed[8][0], "end_document");
-is($parsed[8][3], length($doc));
-is($parsed[8][5], length($doc));
-is($parsed[8][6], length($doc));
-
-is(@warn, 1);
-like($warn[0], qr/^Parsing of undecoded UTF-8 will give garbage when decoding entities/);
-
-my $file = "test-$$.html";
-open(my $fh, ">:utf8", $file) || die;
-print $fh <<EOT;
-\x{FEFF}
-<title>\x{263A} Love! </title>
-<h1 id=&hearts;\x{2665}>&hearts; Love \x{2665}<h1>
-EOT
-close($fh) || die;
-
-@warn = ();
-@parsed = ();
-$p->parse_file($file);
-is(@parsed, "11");
-is($parsed[6][0], "start");
-is($parsed[6][8]{id}, "\x{2665}\xE2\x99\xA5");
-is($parsed[7][0], "text");
-is($parsed[7][1], "&hearts; Love \xE2\x99\xA5");
-is($parsed[7][2], "\x{2665} Love \xE2\x99\xA5");  # expected garbage
-is($parsed[10][3], -s $file);
-is(@warn, 1);
-like($warn[0], qr/^Parsing of undecoded UTF-8 will give garbage when decoding entities/);
-
-@warn = ();
-@parsed = ();
-open($fh, "<:raw:utf8", $file) || die;
-$p->parse_file($fh);
-is(@parsed, "11");
-is($parsed[6][0], "start");
-is($parsed[6][8]{id}, "\x{2665}\x{2665}");
-is($parsed[7][0], "text");
-is($parsed[7][1], "&hearts; Love \x{2665}");
-is($parsed[7][2], "\x{2665} Love \x{2665}");
-is($parsed[10][3], (-s $file) - 2 * 4);
-is(@warn, 0);
-
-@warn = ();
-@parsed = ();
-open($fh, "<:raw", $file) || die;
-$p->utf8_mode(1);
-$p->parse_file($fh);
-is(@parsed, "11");
-is($parsed[6][0], "start");
-is($parsed[6][8]{id}, "\xE2\x99\xA5\xE2\x99\xA5");
-is($parsed[7][0], "text");
-is($parsed[7][1], "&hearts; Love \xE2\x99\xA5");
-is($parsed[7][2], "\xE2\x99\xA5 Love \xE2\x99\xA5");
-is($parsed[10][3], -s $file);
-is(@warn, 0);
-
-unlink($file);
-
-@parsed = ();
-$p->parse(q(<a href="a=1&lang=2&times=3">foo</a>))->eof;
-is(@parsed, "5");
-is($parsed[1][0], "start");
-is($parsed[1][8]{href}, "a=1&lang=2\xd7=3");
-
-ok(!HTML::Entities::_probably_utf8_chunk(""));
-ok(!HTML::Entities::_probably_utf8_chunk("f"));
-ok(HTML::Entities::_probably_utf8_chunk("f\xE2\x99\xA5"));
-ok(HTML::Entities::_probably_utf8_chunk("f\xE2\x99\xA5o"));
-ok(HTML::Entities::_probably_utf8_chunk("f\xE2\x99\xA5o\xE2"));
-ok(HTML::Entities::_probably_utf8_chunk("f\xE2\x99\xA5o\xE2\x99"));
-ok(!HTML::Entities::_probably_utf8_chunk("f\xE2"));
-ok(!HTML::Entities::_probably_utf8_chunk("f\xE2\x99"));
-}
diff --git a/ext/HTML/Parser/t/xml-mode.t b/ext/HTML/Parser/t/xml-mode.t
deleted file mode 100644
index cdfc5b0ca0..0000000000
--- a/ext/HTML/Parser/t/xml-mode.t
+++ /dev/null
@@ -1,112 +0,0 @@
-use strict;
-use Test::More tests => 8;
-
-use HTML::Parser ();
-my $p = HTML::Parser->new(xml_mode => 1,
-			 );
-
-my $text = "";
-$p->handler(start =>
-	    sub {
-		 my($tag, $attr) = @_;
-		 $text .= "S[$tag";
-		 for my $k (sort keys %$attr) {
-		     my $v =  $attr->{$k};
-		     $text .= " $k=$v";
-		 }
-		 $text .= "]";
-	     }, "tagname,attr");
-$p->handler(end =>
-	     sub {
-		 $text .= "E[" . shift() . "]";
-	     }, "tagname");
-$p->handler(process => 
-	     sub {
-		 $text .= "PI[" . shift() . "]";
-	     }, "token0");
-$p->handler(text =>
-	     sub {
-		 $text .= shift;
-	     }, "text");
-
-my $xml = <<'EOT';
-<?xml version="1.0"?>
-<?IS10744:arch name="html"?><!-- comment -->
-<DOC>
-<title html="h1">My first architectual document</title>
-<author html="address">Geir Ove Gronmo, grove@infotek.no</author>
-<para>This is the first paragraph in this document</para>
-<para html="p">This is the second paragraph</para>
-<para/>
-<xmp><foo></foo></xmp>
-</DOC>
-EOT
-
-$p->parse($xml)->eof;
-
-is($text, <<'EOT');
-PI[xml version="1.0"]
-PI[IS10744:arch name="html"]
-S[DOC]
-S[title html=h1]My first architectual documentE[title]
-S[author html=address]Geir Ove Gronmo, grove@infotek.noE[author]
-S[para]This is the first paragraph in this documentE[para]
-S[para html=p]This is the second paragraphE[para]
-S[para]E[para]
-S[xmp]S[foo]E[foo]E[xmp]
-E[DOC]
-EOT
-
-$text = "";
-$p->xml_mode(0);
-$p->parse($xml)->eof;
-
-is($text, <<'EOT');
-PI[xml version="1.0"?]
-PI[IS10744:arch name="html"?]
-S[doc]
-S[title html=h1]My first architectual documentE[title]
-S[author html=address]Geir Ove Gronmo, grove@infotek.noE[author]
-S[para]This is the first paragraph in this documentE[para]
-S[para html=p]This is the second paragraphE[para]
-S[para/]
-S[xmp]<foo></foo>E[xmp]
-E[doc]
-EOT
-
-# Test that we get an empty tag back
-$p = HTML::Parser->new(api_version => 3,
-	               xml_mode => 1);
-
-$p->handler("end" =>
-	    sub {
-		my($tagname, $text) = @_;
-		is($tagname, "Xyzzy");
-	        ok(!length($text));
-	    }, "tagname,text");
-$p->parse("<Xyzzy foo=bar/>and some more")->eof;
-
-# Test that we get an empty tag back
-$p = HTML::Parser->new(api_version => 3,
-	               empty_element_tags => 1);
-
-$p->handler("end" =>
-	    sub {
-		my($tagname, $text) = @_;
-		is($tagname, "xyzzy");
-	        ok(!length($text));
-	    }, "tagname,text");
-$p->parse("<Xyzzy foo=bar/>and some more")->eof;
-
-$p = HTML::Parser->new(
-    api_version => 3,
-    xml_pic => 1,
-);
-
-$p->handler(
-    "process" => sub {
-	my($text, $t0) = @_;
-	is($text, "<?foo > bar?>");
-	is($t0, "foo > bar");
-    }, "text, token0");
-$p->parse("<?foo > bar?> and then")->eof;
diff --git a/ext/HTML/Parser/tokenpos.h b/ext/HTML/Parser/tokenpos.h
deleted file mode 100644
index aa971bfd4c..0000000000
--- a/ext/HTML/Parser/tokenpos.h
+++ /dev/null
@@ -1,49 +0,0 @@
-struct token_pos
-{
-    char *beg;
-    char *end;
-};
-typedef struct token_pos token_pos_t;
-
-#define dTOKENS(init_lim) \
-   token_pos_t token_buf[init_lim]; \
-   int token_lim = init_lim; \
-   token_pos_t *tokens = token_buf; \
-   int num_tokens = 0
-
-#define PUSH_TOKEN(p_beg, p_end) \
-   STMT_START { \
-       ++num_tokens; \
-       if (num_tokens == token_lim) \
-           tokens_grow(&tokens, &token_lim, (bool)(tokens != token_buf)); \
-       tokens[num_tokens-1].beg = p_beg; \
-       tokens[num_tokens-1].end = p_end; \
-   } STMT_END
-
-#define FREE_TOKENS \
-   STMT_START { \
-       if (tokens != token_buf) \
-          Safefree(tokens); \
-   } STMT_END
-
-static void
-tokens_grow(token_pos_t **token_ptr, int *token_lim_ptr, bool tokens_on_heap)
-{
-    int new_lim = *token_lim_ptr;
-    if (new_lim < 4)
-	new_lim = 4;
-    new_lim *= 2;
-
-    if (tokens_on_heap) {
-	Renew(*token_ptr, new_lim, token_pos_t);
-    }
-    else {
-	token_pos_t *new_tokens;
-	int i;
-	New(57, new_tokens, new_lim, token_pos_t);
-	for (i = 0; i < *token_lim_ptr; i++)
-	    new_tokens[i] = (*token_ptr)[i];
-	*token_ptr = new_tokens;
-    }
-    *token_lim_ptr = new_lim;
-}
diff --git a/ext/HTML/Parser/typemap b/ext/HTML/Parser/typemap
deleted file mode 100644
index a3238548a9..0000000000
--- a/ext/HTML/Parser/typemap
+++ /dev/null
@@ -1,5 +0,0 @@
-PSTATE*	T_PSTATE
-
-INPUT
-T_PSTATE
-	$var = get_pstate_hv(aTHX_ $arg)
diff --git a/ext/HTML/Parser/util.c b/ext/HTML/Parser/util.c
deleted file mode 100644
index 7e626bf714..0000000000
--- a/ext/HTML/Parser/util.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/* $Id: util.c,v 2.30 2006/03/22 09:15:17 gisle Exp $
- *
- * Copyright 1999-2006, Gisle Aas.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the same terms as Perl itself.
- */
-
-#ifndef EXTERN
-#define EXTERN extern
-#endif
-
-
-EXTERN SV*
-sv_lower(pTHX_ SV* sv)
-{
-    STRLEN len;
-    char *s = SvPV_force(sv, len);
-    for (; len--; s++)
-	*s = toLOWER(*s);
-    return sv;
-}
-
-EXTERN int
-strnEQx(const char* s1, const char* s2, STRLEN n, int ignore_case)
-{
-    while (n--) {
-	if (ignore_case) {
-	    if (toLOWER(*s1) != toLOWER(*s2))
-		return 0;
-	}
-	else {
-	    if (*s1 != *s2)
-		return 0;
-	}
-	s1++;
-	s2++;
-    }
-    return 1;
-}
-
-static void
-grow_gap(pTHX_ SV* sv, STRLEN grow, char** t, char** s, char** e)
-{
-    /*
-     SvPVX ---> AAAAAA...BBBBBB
-                     ^   ^     ^
-                     t   s     e
-    */
-    STRLEN t_offset = *t - SvPVX(sv);
-    STRLEN s_offset = *s - SvPVX(sv);
-    STRLEN e_offset = *e - SvPVX(sv);
-
-    SvGROW(sv, e_offset + grow + 1);
-
-    *t = SvPVX(sv) + t_offset;
-    *s = SvPVX(sv) + s_offset;
-    *e = SvPVX(sv) + e_offset;
-
-    Move(*s, *s+grow, *e - *s, char);
-    *s += grow;
-    *e += grow;
-}
-
-EXTERN SV*
-decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
-{
-    STRLEN len;
-    char *s = SvPV_force(sv, len);
-    char *t = s;
-    char *end = s + len;
-    char *ent_start;
-
-    char *repl;
-    STRLEN repl_len;
-#ifdef UNICODE_HTML_PARSER
-    char buf[UTF8_MAXLEN];
-    int repl_utf8;
-    int high_surrogate = 0;
-#else
-    char buf[1];
-#endif
-
-#if defined(__GNUC__) && defined(UNICODE_HTML_PARSER)
-    /* gcc -Wall reports this variable as possibly used uninitialized */
-    repl_utf8 = 0;
-#endif
-
-    while (s < end) {
-	assert(t <= s);
-
-	if ((*t++ = *s++) != '&')
-	    continue;
-
-	ent_start = s;
-	repl = 0;
-
-	if (*s == '#') {
-	    UV num = 0;
-	    UV prev = 0;
-	    int ok = 0;
-	    s++;
-	    if (*s == 'x' || *s == 'X') {
-		s++;
-		while (*s) {
-		    char *tmp = strchr(PL_hexdigit, *s);
-		    if (!tmp)
-			break;
-		    num = num << 4 | ((tmp - PL_hexdigit) & 15);
-		    if (prev && num <= prev) {
-			/* overflow */
-			ok = 0;
-			break;
-		    }
-		    prev = num;
-		    s++;
-		    ok = 1;
-		}
-	    }
-	    else {
-		while (isDIGIT(*s)) {
-		    num = num * 10 + (*s - '0');
-		    if (prev && num < prev) {
-			/* overflow */
-			ok = 0;
-			break;
-		    }
-		    prev = num;
-		    s++;
-		    ok = 1;
-		}
-	    }
-	    if (ok) {
-#ifdef UNICODE_HTML_PARSER
-		if (!SvUTF8(sv) && num <= 255) {
-		    buf[0] = (char) num;
-		    repl = buf;
-		    repl_len = 1;
-		    repl_utf8 = 0;
-		}
-		else {
-		    char *tmp;
-		    if ((num & 0xFFFFFC00) == 0xDC00) {  /* low-surrogate */
-			if (high_surrogate != 0) {
-			    t -= 3; /* Back up past 0xFFFD */
-			    num = ((high_surrogate - 0xD800) << 10) +
-				(num - 0xDC00) + 0x10000;
-			    high_surrogate = 0;
-			} else {
-			    num = 0xFFFD;
-			}
-		    }
-		    else if ((num & 0xFFFFFC00) == 0xD800) { /* high-surrogate */
-			high_surrogate = num;
-			num = 0xFFFD;
-		    }
-		    else {
-			high_surrogate = 0;
-			/* otherwise invalid? */
-			if ((num >= 0xFDD0 && num <= 0xFDEF) ||
-			    ((num & 0xFFFE) == 0xFFFE) ||
-			    num > 0x10FFFF)
-			{
-			    num = 0xFFFD;
-			}
-		    }
-
-		    tmp = (char*)uvuni_to_utf8((U8*)buf, num);
-		    repl = buf;
-		    repl_len = tmp - buf;
-		    repl_utf8 = 1;
-		}
-#else
-		if (num <= 255) {
-		    buf[0] = (char) num & 0xFF;
-		    repl = buf;
-		    repl_len = 1;
-		}
-#endif
-	    }
-	}
-	else {
-	    char *ent_name = s;
-	    while (isALNUM(*s))
-		s++;
-	    if (ent_name != s && entity2char) {
-		SV** svp;
-		if (              (svp = hv_fetch(entity2char, ent_name, s - ent_name, 0)) ||
-		    (*s == ';' && (svp = hv_fetch(entity2char, ent_name, s - ent_name + 1, 0)))
-		   )
-		{
-		    repl = SvPV(*svp, repl_len);
-#ifdef UNICODE_HTML_PARSER
-		    repl_utf8 = SvUTF8(*svp);
-#endif
-		}
-		else if (expand_prefix) {
-		    char *ss = s - 1;
-		    while (ss > ent_name) {
-			svp = hv_fetch(entity2char, ent_name, ss - ent_name, 0);
-			if (svp) {
-			    repl = SvPV(*svp, repl_len);
-#ifdef UNICODE_HTML_PARSER
-			    repl_utf8 = SvUTF8(*svp);
-#endif
-			    s = ss;
-			    break;
-			}
-			ss--;
-		    }
-		}
-	    }
-#ifdef UNICODE_HTML_PARSER
-	    high_surrogate = 0;
-#endif
-	}
-
-	if (repl) {
-	    char *repl_allocated = 0;
-	    if (*s == ';')
-		s++;
-	    t--;  /* '&' already copied, undo it */
-
-#ifdef UNICODE_HTML_PARSER
-	    if (*s != '&') {
-		high_surrogate = 0;
-	    }
-
-	    if (!SvUTF8(sv) && repl_utf8) {
-		/* need to upgrade sv before we continue */
-		STRLEN before_gap_len = t - SvPVX(sv);
-		char *before_gap = (char*)bytes_to_utf8((U8*)SvPVX(sv), &before_gap_len);
-		STRLEN after_gap_len = end - s;
-		char *after_gap = (char*)bytes_to_utf8((U8*)s, &after_gap_len);
-
-		sv_setpvn(sv, before_gap, before_gap_len);
-		sv_catpvn(sv, after_gap, after_gap_len);
-		SvUTF8_on(sv);
-
-		Safefree(before_gap);
-		Safefree(after_gap);
-
-		s = t = SvPVX(sv) + before_gap_len;
-		end = SvPVX(sv) + before_gap_len + after_gap_len;
-	    }
-	    else if (SvUTF8(sv) && !repl_utf8) {
-		repl = (char*)bytes_to_utf8((U8*)repl, &repl_len);
-		repl_allocated = repl;
-	    }
-#endif
-
-	    if (t + repl_len > s) {
-		/* need to grow the string */
-		grow_gap(aTHX_ sv, repl_len - (s - t), &t, &s, &end);
-	    }
-
-	    /* copy replacement string into string */
-	    while (repl_len--)
-		*t++ = *repl++;
-
-	    if (repl_allocated)
-		Safefree(repl_allocated);
-	}
-	else {
-	    while (ent_start < s)
-		*t++ = *ent_start++;
-	}
-    }
-
-    *t = '\0';
-    SvCUR_set(sv, t - SvPVX(sv));
-
-    return sv;
-}
-
-#ifdef UNICODE_HTML_PARSER
-static bool
-has_hibit(char *s, char *e)
-{
-    while (s < e) {
-	U8 ch = *s++;
-	if (!UTF8_IS_INVARIANT(ch)) {
-	    return 1;
-	}
-    }
-    return 0;
-}
-
-
-EXTERN bool
-probably_utf8_chunk(pTHX_ char *s, STRLEN len)
-{
-    char *e = s + len;
-    STRLEN clen;
-
-    /* ignore partial utf8 char at end of buffer */
-    while (s < e && UTF8_IS_CONTINUATION((U8)*(e - 1)))
-	e--;
-    if (s < e && UTF8_IS_START((U8)*(e - 1)))
-	e--;
-    clen = len - (e - s);
-    if (clen && UTF8SKIP(e) == clen) {
-	/* all promised continuation bytes are present */
-	e = s + len;
-    }
-
-    if (!has_hibit(s, e))
-	return 0;
-
-    return is_utf8_string((U8*)s, e - s);
-}
-#endif