[interchange-cvs] interchange - jon modified lib/Vend/Parser.pm
interchange-core@icdevgroup.org
interchange-core@icdevgroup.org
Sat Jul 13 23:36:00 2002
User: jon
Date: 2002-07-14 03:35:03 GMT
Modified: lib/Vend Parser.pm
Log:
Correct some old comments remaining from HTML::Parser.
Revision Changes Path
2.6 +15 -23 interchange/lib/Vend/Parser.pm
rev 2.6, prev_rev 2.5
Index: Parser.pm
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /var/cvs/interchange/lib/Vend/Parser.pm,v
retrieving revision 2.5
retrieving revision 2.6
diff -u -u -r2.5 -r2.6
--- Parser.pm 17 Jun 2002 22:24:08 -0000 2.5
+++ Parser.pm 14 Jul 2002 03:35:03 -0000 2.6
@@ -1,6 +1,6 @@
# Vend::Parser - Interchange parser class
#
-# $Id: Parser.pm,v 2.5 2002/06/17 22:24:08 jon Exp $
+# $Id: Parser.pm,v 2.6 2002/07/14 03:35:03 jon Exp $
#
# Copyright (C) 1997-2002 Red Hat, Inc. <interchange@redhat.com>
#
@@ -66,7 +66,7 @@
=20
use HTML::Entities ();
use vars qw($VERSION);
-$VERSION =3D substr(q$Revision: 2.5 $, 10);
+$VERSION =3D substr(q$Revision: 2.6 $, 10);
=20
=20
sub new
@@ -99,7 +99,7 @@
# tokens from the beginning of $$buf until we can't deside whether
# it is a token or not, or the $$buf is empty.
while (1) { # the loop will end by returning when text is parsed
- # First we try to pull off any plain text (anything before a "<" char)
+ # First we try to pull off any plain text (anything before a '[')
if ($$buf =3D~ s/^([^[]+)// ) {
#my $eat =3D $1;
#::logDebug("plain eat=3D'$eat'");
@@ -121,13 +121,9 @@
$self->{HTML} =3D 0 if ! defined $self->{HTML};
#::logDebug("do [ tag");
=20
- # This first thing we must find is a tag name. RFC1866 says:
- # A name consists of a letter followed by letters,
- # digits, periods, or hyphens. The length of a name is
- # limited to 72 characters by the `NAMELEN' parameter in
- # the SGML declaration for HTML, 9.5, "SGML Declaration
- # for HTML". In a start-tag, the element name must
- # immediately follow the tag open delimiter `<'.
+ # First find a tag name. It must immediately follow the
+ # opening '[', then start with a letter, and be followed by
+ # letters, numbers, dot, or underscore.
if ($$buf =3D~ s|^(([a-zA-Z][-a-zA-Z0-9._]*)\s*)||) {
$eaten .=3D $1;
=20
@@ -141,10 +137,6 @@
#::logDebug("tag=3D'$tag' eat=3D'$eaten'");
=20
# Then we would like to find some attributes
- #
- # Arrgh!! Since stupid Netscape violates RCF1866 by
- # using "_" in attribute names (like "ADD_DATE") of
- # their bookmarks.html, we allow this too.
while ( $$buf =3D~ s|^(([a-zA-Z][-a-zA-Z0-9._]*)\s*)|| or
$$buf =3D~ s|^(([=3D!<>][=3D~]?)\s+)|| )
{
@@ -157,25 +149,26 @@
=20=09=09=09=09=09=09
my $val;
=20=09=09=09=09=09
- # The attribute might take an optional value (first we
- # check for an unquoted value)
+ # The attribute might take an optional value.
+ # First we check for an unquoted value
if ($$buf =3D~ s~(^=3D\s*([^\|\"\'\`\]\s][^\]>\s]*)\s*)~~) {
$eaten .=3D $1;
next unless defined $attr;
$val =3D $2;
- # or quoted by " or ' or # or $ or |
+ # or quoted by " or '
} elsif ($$buf =3D~ s~(^=3D\s*(["\'])(.*?)\2\s*)~~s) {
$eaten .=3D $1;
next unless defined $attr;
$val =3D $3;
HTML::Entities::decode($val) if $attr{entities};
- # or quoted by `` to send to [calc]
} elsif ($$buf =3D~ s~(^=3D\s*([\`\|])(.*?)\2\s*)~~s) {
$eaten .=3D $1;
+ # or quoted by ` to send to [calc]
if ($2 eq '`') {
$val =3D Vend::Interpolate::tag_calc($3)
unless defined $Vend::Cfg->{AdminSub}{calc};
}
+ # or quoted by | to strip leading & trailing whitespace
elsif ($2 eq '|') {
$val =3D $3;
$val =3D~ s/^\s+//;
@@ -190,9 +183,8 @@
$$buf =3D "$eaten$1";
return $self;
} elsif (!$old) {
- # assume attribute with implicit value, but
- # if not,no value is set and the
- # eaten value is grown
+ # assume attribute with implicit value, but if not,
+ # no value is set and the eaten value is grown
undef $nopush;
($attr,$val,$nopush) =3D $self->implicit($tag,$attr);
$old =3D 1 unless $val;
@@ -233,11 +225,11 @@
push(@attrseq, $attr) unless $nopush;
}
=20
- # At the end there should be a closing "\] or >"
+ # At the end there should be a closing ']'
if ($$buf =3D~ s|^\]|| ) {
$self->start($tag, \%attr, \@attrseq, "$eaten]");
} elsif ($$buf =3D~ s|^/\s*\]||) {
- ## Empty container tag
+ # XML-style empty container tag like [this /]
$self->start($tag, \%attr, \@attrseq, "$eaten]", 1);
} elsif ($$buf =3D~ s|^([^\]\n]+\])||) {
$eaten .=3D $1;