[interchange-cvs] interchange - jon modified lib/Vend/Parser.pm

interchange-core@icdevgroup.org interchange-core@icdevgroup.org
Sat Jul 13 23:36:00 2002


User:      jon
Date:      2002-07-14 03:35:03 GMT
Modified:  lib/Vend Parser.pm
Log:
Correct some old comments remaining from HTML::Parser.

Revision  Changes    Path
2.6       +15 -23    interchange/lib/Vend/Parser.pm


rev 2.6, prev_rev 2.5
Index: Parser.pm
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
RCS file: /var/cvs/interchange/lib/Vend/Parser.pm,v
retrieving revision 2.5
retrieving revision 2.6
diff -u -u -r2.5 -r2.6
--- Parser.pm	17 Jun 2002 22:24:08 -0000	2.5
+++ Parser.pm	14 Jul 2002 03:35:03 -0000	2.6
@@ -1,6 +1,6 @@
 # Vend::Parser - Interchange parser class
 #
-# $Id: Parser.pm,v 2.5 2002/06/17 22:24:08 jon Exp $
+# $Id: Parser.pm,v 2.6 2002/07/14 03:35:03 jon Exp $
 #
 # Copyright (C) 1997-2002 Red Hat, Inc. <interchange@redhat.com>
 #
@@ -66,7 +66,7 @@
=20
 use HTML::Entities ();
 use vars qw($VERSION);
-$VERSION =3D substr(q$Revision: 2.5 $, 10);
+$VERSION =3D substr(q$Revision: 2.6 $, 10);
=20
=20
 sub new
@@ -99,7 +99,7 @@
 	# tokens from the beginning of $$buf until we can't deside whether
 	# it is a token or not, or the $$buf is empty.
 	while (1) {  # the loop will end by returning when text is parsed
-		# First we try to pull off any plain text (anything before a "<" char)
+		# First we try to pull off any plain text (anything before a '[')
 		if ($$buf =3D~ s/^([^[]+)// ) {
 #my $eat =3D $1;
 #::logDebug("plain eat=3D'$eat'");
@@ -121,13 +121,9 @@
 			$self->{HTML} =3D 0 if ! defined $self->{HTML};
 #::logDebug("do [ tag");
=20
-			# This first thing we must find is a tag name.  RFC1866 says:
-			#   A name consists of a letter followed by letters,
-			#   digits, periods, or hyphens. The length of a name is
-			#   limited to 72 characters by the `NAMELEN' parameter in
-			#   the SGML declaration for HTML, 9.5, "SGML Declaration
-			#   for HTML".  In a start-tag, the element name must
-			#   immediately follow the tag open delimiter `<'.
+			# First find a tag name. It must immediately follow the
+			# opening '[', then start with a letter, and be followed by
+			# letters, numbers, dot, or underscore.
 			if ($$buf =3D~ s|^(([a-zA-Z][-a-zA-Z0-9._]*)\s*)||) {
 				$eaten .=3D $1;
=20
@@ -141,10 +137,6 @@
 #::logDebug("tag=3D'$tag' eat=3D'$eaten'");
=20
 				# Then we would like to find some attributes
-				#
-				# Arrgh!! Since stupid Netscape violates RCF1866 by
-				# using "_" in attribute names (like "ADD_DATE") of
-				# their bookmarks.html, we allow this too.
 				while (	$$buf =3D~ s|^(([a-zA-Z][-a-zA-Z0-9._]*)\s*)|| or
 					 	$$buf =3D~ s|^(([=3D!<>][=3D~]?)\s+)||                 )
 				{
@@ -157,25 +149,26 @@
=20=09=09=09=09=09=09
 					my $val;
=20=09=09=09=09=09
-					# The attribute might take an optional value (first we
-					# check for an unquoted value)
+					# The attribute might take an optional value.
+					# First we check for an unquoted value
 					if ($$buf =3D~ s~(^=3D\s*([^\|\"\'\`\]\s][^\]>\s]*)\s*)~~) {
 						$eaten .=3D $1;
 						next unless defined $attr;
 						$val =3D $2;
-					# or quoted by " or ' or # or $ or |
+					# or quoted by " or '
 					} elsif ($$buf =3D~ s~(^=3D\s*(["\'])(.*?)\2\s*)~~s) {
 						$eaten .=3D $1;
 						next unless defined $attr;
 						$val =3D $3;
 						HTML::Entities::decode($val) if $attr{entities};
-					# or quoted by `` to send to [calc]
 					} elsif ($$buf =3D~ s~(^=3D\s*([\`\|])(.*?)\2\s*)~~s) {
 						$eaten .=3D $1;
+						# or quoted by ` to send to [calc]
 						if    ($2 eq '`') {
 							$val =3D Vend::Interpolate::tag_calc($3)
 								unless defined $Vend::Cfg->{AdminSub}{calc};
 						}
+						# or quoted by | to strip leading & trailing whitespace
 						elsif ($2 eq '|') {
 								$val =3D $3;
 								$val =3D~ s/^\s+//;
@@ -190,9 +183,8 @@
 						$$buf =3D "$eaten$1";
 						return $self;
 					} elsif (!$old) {
-						# assume attribute with implicit value, but
-						# if not,no value is set and the
-						# eaten value is grown
+						# assume attribute with implicit value, but if not,
+						# no value is set and the eaten value is grown
 						undef $nopush;
 						($attr,$val,$nopush) =3D $self->implicit($tag,$attr);
 						$old =3D 1 unless $val;
@@ -233,11 +225,11 @@
 					push(@attrseq, $attr) unless $nopush;
 				}
=20
-				# At the end there should be a closing "\] or >"
+				# At the end there should be a closing ']'
 				if ($$buf =3D~ s|^\]|| ) {
 					$self->start($tag, \%attr, \@attrseq, "$eaten]");
 				} elsif ($$buf =3D~ s|^/\s*\]||) {
-					## Empty container tag
+					# XML-style empty container tag like [this /]
 					$self->start($tag, \%attr, \@attrseq, "$eaten]", 1);
 				} elsif ($$buf =3D~ s|^([^\]\n]+\])||) {
 					$eaten .=3D $1;