[docs] xmldocs - docelic modified 2 files

Fri Nov 26 19:41:46 EST 2004

User:      docelic
Date:      2004-11-27 00:41:46 GMT
Modified:  bin      stattree refs-autogen
Log:
Took some time to work on core instead of iccattut now:

- bin/stattree:
 - recognize dist/lib/UI/usertag/\S+ files from 4.6.0
 - skip one sanity check for 4.6.0 (in relation to above)

- bin/refs-autogen:
 - Improved internal documentation
 - Removed obsolete parts of code
 - Fixed some errors
 - Turned back on printing of "NEW" near items present in cvs-head

Revision  Changes    Path
1.33      +6 -1      xmldocs/bin/stattree


rev 1.33, prev_rev 1.32
Index: stattree
===================================================================
RCS file: /var/cvs/xmldocs/bin/stattree,v
retrieving revision 1.32
retrieving revision 1.33
diff -u -r1.32 -r1.33

--- stattree	20 Nov 2004 14:40:39 -0000	1.32
+++ stattree	27 Nov 2004 00:41:46 -0000	1.33
@@ -56,7 +56,7 @@
 	config => [qw/\.cfg \.dist/],
 	c => [qw/\.c \.in/],
 	perl => [qw/\.pl \.pm/],
-	uitag => [qw|UI_Tag/\S+\.(core)?tag|],
+	uitag => [qw|UI_Tag/\S+\.(core)?tag dist/lib/UI/usertag/\S+|],
 	systemtag => [qw|SystemTag/\S+\.(core)?tag|],
 	usertag => [qw|UserTag/\S+\.tag \.tag|],
 	filter => [qw/\.filter/],
@@ -299,8 +299,13 @@
 		# Found a tag
 		} elsif ( $c{fsubtype} =~ /^(user|ui|system)tag$/ )  {
 			#$hash{total}{$fsubtype . "s"}++;
+
+			# Skip this simple sanity check for 4.6.0 - those files do not
+			# have .(core)?tag ending.
+			if ( $hash{version} ne '4.6.0') {
 			$c{file} =~ m#(\w+?)\.(core)?tag$# or
 				warn "I know $c{file} is a tag but regex doesn't match it\n";
+			}
 
 			my %specific; # Item-specific data
 			my @tags; # Support multiple tags defined in the same file



1.68      +138 -100  xmldocs/bin/refs-autogen


rev 1.68, prev_rev 1.67
Index: refs-autogen
===================================================================
RCS file: /var/cvs/xmldocs/bin/refs-autogen,v
retrieving revision 1.67
retrieving revision 1.68
diff -u -r1.67 -r1.68
--- refs-autogen	21 Nov 2004 18:47:32 -0000	1.67
+++ refs-autogen	27 Nov 2004 00:41:46 -0000	1.68
@@ -31,18 +31,18 @@
 my %invalid; # Information about missing documentation
 my %covered; # Weed out duplicate context reports
 my %symbol_lists; # symbols listed in categories
-my %symbols; # FINAL symbol refentries
+my %symbols; # FINAL symbol refentries (looks almost like %symbol_lists now ;-)
 my %templates; # Templates for various symbol types
-my $max_ctxs = 10; # Trim more than $max_ctxs source context reports
+my $max_ctxs = 20; # Trim more than $max_ctxs source context reports
 my @set_missing_all; # Helper to better manage %invalid
 my @parsed_versions; # IC versions we parsed
 my $specific_only; # Build only one specific .xml ?
 my $output_spec; # 'list' produces tag list, 'xml' produces real xml source
 my $output_both; # Unconditionally override $output_spec
-my $no_autodefs; # Generate autodefs.ent collection of entities by default
+my $no_autorefs; # Generate autorefs.ent collection of entities by default
 my $autopath = "docbook/autorefs.ent";
 my %dups; # List of symbols names that are not unique
-my $last_path; # Last path we want docs generated for (say, 5.2.0).
+my $last_path; # Last version we want docs generated for (say, 5.2.0).
 
 my @page_order = (qw/purpose default structure synopsis description structure example notes bugs/, "symbol type", "source", "author", "copyright", "see also");
 
@@ -52,7 +52,7 @@
 	"group|type|g|t=s" => \$specific_only,
 	"output|o=s" => \$output_spec,
 	"both|b!"    => \$output_both,
-	"noentities|noents!" => \$no_autodefs,
+	"noentities|noents!" => \$no_autorefs,
 	"last-path|last|lp=s" => \$last_path,
 )) { die "Error parsing options\n" }
 
@@ -64,7 +64,7 @@
 	die "Unknown output combination '$output_spec'\n";
 }
 
-$specific_only and $specific_only =~ s/s$//;
+$specific_only and $specific_only =~ s/s$//; # if user entered name in plural
 @ARGV or die "Usage: $0 version[s]\n";
 
 my %longname = (
@@ -108,11 +108,11 @@
 
 my @mandatory = (qw/synopsis example description purpose/);
 
-my $path;
-my $dumppath;
+my @paths = @ARGV; # Versions requested
+my $path; # Current path, used in loop for each version requested
+my $dumppath; # Path to cache dump file
 my $dumpdir;
-my @paths = @ARGV;
-my $lastpath;
+my $lastpath; # Used if we want to stop before cvs-head
 
 load_templates();
 
@@ -130,9 +130,12 @@
 
 	push @parsed_versions, $hash{version};
 
-	# Outer loop: symbol types (pragmas, globvars, ...)
-	# Inner loop: actual symbols
+	# Outer loop: $gkey: symbol types (pragmas, globvars, ...)
+	# Inner loop (~15 lines below): $key: actual symbols 
 	while ( my ($gkey,$gval) = each %{ $hash{symbols} } ) {
+		# Unfortunately - $specific_only is of limited use. If you use it,
+		# the script won't catch symbol "migrations" - that is, for example,
+		# a tag changing from ui_tag to usertag ...
 		next if $specific_only and $gkey ne $specific_only;
 
 		# Simply for display purpose
@@ -144,19 +147,19 @@
 			print "GEN: @olist\n";
 		}
 
-
+		# Inner loop starts
 		for my $key (keys(%$gval)) {
 			my $val = $gval->{$key};
 
+			my $found = 0; # Assume the symbol is new
+
 			# Register the symbol name ($key) under group name ($gkey) in
 			# %symbol_lists. We need to check if it already exists
-			# under %symbol_lists, searching in the same group is not enough
+			# under %symbol_lists. Searching in the same group is not enough
 			# because group might have changed in next version (usertag -> uitag).
 			# UPDATE: Item can only float between categories if it is a tag,
 			# for all other symbols it means we have different symbols of
 			# the same name (such as 'value' which is both a tag and filter).
-			my $found = 0;
-
 			for my $gk ( keys %symbol_lists ) {
 				if ( grep {/^$key$/} @{ $symbol_lists{$gk} } ) {
 					if ( $gk ne $gkey ) { # SPLAT! Symbol changed category over time.
@@ -164,22 +167,27 @@
 						# Found non-unique symbol name. (We are not interested in changes
 						# from uitag->usertag (or similar), but only in real symbols that
 						# are different but have same name). When that happens, 
-						# refs/<file> needs to be deleted, and refs/<file>.{gk,gkey}
-						# created to uniquely identify items.
+						# refs/<file> needs to be deleted, and refs/<file>.$gk and 
+						# refs/<file>.$gkey created to uniquely identify two separate items.
 						if ( $gkey !~ /tag$/ ) {
 							warn "$key IS BOTH $gk and $gkey!\n" if $verbose;
 							push @{ $dups{$key} }, $gk, $gkey;
 							goto SKIPDUPCHECK;
 						}
 
-						# If it was just the same symbol changing tag subgroup
-						# (uitag -> usertag), then delete it from the old location.
-						# (That means we'll treat it as new item, and it will be properly
-						# re-created at the new position).
+						# (If we reached this point then the symbol is a tag and migrated
+						# just to another tag subtype (ui, user or system)).
+						# (That means we'll simply delete it from previous location, 
+						# and it will be properly re-created at the new position as if it
+						# was a regular new item).
+						warn "$key CHANGED $gk // $gkey\n" if $verbose;
 						my $prev = scalar @{ $symbol_lists{$gk} }; # Quick sanity check
 						@{ $symbol_lists{$gk} } = grep {!/^$key$/} @{ $symbol_lists{$gk} };
 						my $now = scalar @{ $symbol_lists{$gk} }; # Quick sanity check
-						if ( $prev - $now != 1 ) { warn "GREP took out more than 1 item!\n" }
+						if ($prev - $now != 1) { warn "GREP took out more than 1 item!\n" }
+					
+					# Else the symbol is not new, didn't change category or antything
+					# and it's a normal already-existing symbol.
 					} else {
 						$found++;
 						#last; # let's not go into optimizations too early
@@ -207,7 +215,7 @@
 			# as a new context. (which is correct technically, but not
 			# suitable for display).
 			# 
-			# Having this code in this loop will make all symbols
+			# Having this code here will make all symbols
 			# end up having the last version they appear in displayed in
 			# source section.
 			my $ag = $autogenerated{$gkey}{$key};
@@ -222,7 +230,9 @@
 				my $fi = $$ctx{file};
 				my $ln = $$ctx{lnum} || 0; #HA! How come $$ctx{lnum} is undefined??
 			
-				# Support item types with only context info in this field
+				# See if the source context was already listed. (I don't think this
+				# can happen - that the same one appears twice). This code actually
+				# weeds out *different* but overlapping contexts.
 				for my $arr ( @{ $covered{$key}{$fi} } ) {
 					next if !$ln or !$$arr[0] or !$$arr[1];
 					if ($ln > $$arr[0] and $ln < $$arr[1]) {
@@ -231,11 +241,10 @@
 					}
 				}
 
-
 				# Make sure we don't overdo it with source contexts.
 				# MV_PAGE appears on like 31 place. We definitely don't need to
 				# see more than 10; let's say 20.
-				if ( $ctxshown++ > 20 ) {
+				if ( $ctxshown++ > $max_ctxs ) {
 					print STDERR "$$ag{name} has ", scalar @$ar,
 						" contexts, limiting to $max_ctxs\n" if $verbose;
 					goto DONELOOP;
@@ -244,19 +253,23 @@
 				# We 'shift' here because we unshifted 1 row to match line
 				# numbers with array indexes
 				my $ctxsdata = join "\n", @{ $$ctx{ctx} };
-				#$ctxsdata =~ s/^\n//;
 				if ( length $ctxsdata ) {
 					my $ls = $$ctx{ctxs}; # line start nr.
 					( my $plf = $$ctx{file} ) =~ s#.+?/##;
-					$plf =~ /^Vend/ and $plf = "lib/" . $plf; #HA?
+
+					# Well now dude, *some* files from lib/Vend/* somehow lose the
+					# prefix "lib/". I have no idea how - but we'll fix it when 
+					# it happens.
+					$plf =~ /^Vend/ and $plf = "lib/" . $plf;
+
 					#my $loc = "$$ctx{file}:$$ctx{lnum}";
 					my $loc = $$ctx{file};
 					
 					my ( $cstart, $cend, $ctxmeta ) = ("", "", "");
 					my $all = 0; # Showing all for an item?
-					#if (0) { use Data::Dumper; print Dumper $ctx; sleep 10; }
-					# WE SHOW ONLY PART OF CONTEXT FOR THOSE
+
 					if ( $gkey !~ /(tag|filter)$/ ) {
+						# WE SHOW ONLY PART OF CONTEXT FOR THOSE
 						$$ctx{ctxpre} ||= 0; $$ctx{ctxpost} ||= 0; # *confs don't have it
 						$cstart = $$ctx{lnum}-$$ctx{ctxpre};
 						$cend = $cstart+$$ctx{lnum}+$$ctx{ctxpost};
@@ -265,11 +278,15 @@
 						# WE SHOW ALL FOR THOSE
 						$all++;
 						$cstart = $$ctx{ctxs} || 1;
+						# T h o s e  p e s k y  o f f s e t s
 						$cend = $cstart + scalar @{$$ctx{ctx}}-1;
 						$cstart == 1 and $cend -= 1;
 					}
 
 					# General fix, shouldn't break anything (Heh, you bet..)
+					# We just put $cstart and $cend within some limits (to avoid
+					# cases reporting like context showing lines from -3 to 12, or
+					# 15 to 29 when there are only 25 lines total).
 					$cstart <= 0 and $cstart = 1;
 					$cend >= $cstart+@{ $$ctx{ctx} } and
 						$cend = $cstart+@{ $$ctx{ctx} }-1;
@@ -326,7 +343,7 @@
 		}
 	}
 
-	# If this is the last one we want (so, manual break), then stop here.
+	# If this is the last version we want (so, manual break), then stop here.
 	# This is for cases where you want to generate docs for say, 5.2.0 and not
 	# always cvs-head
 	last if $last_path and $last_path eq $path;
@@ -355,16 +372,19 @@
 	################################################################
 
 	# _See Also_ section: "bidirectional" linking
-	# Interesting how I actually had this very good idea in the beginning, then I commented
-	# it thinking it's crap, and now I'm back to just modifying it a little so it works 
-	# as expected again.
+	# Interesting how I actually had this very good idea in the beginning,
+	# then I commented it thinking it's crap, and now I'm back to just
+	# modifying it a little so it works as expected again.
+	# As they say. it works like a charm ;-)
 	if ( defined @{ $ag{'_see also'} } ) {
 		my $list = $ag{'_see also'};
 		my %tmp;
 
-		# This loop is now needed since we added the concept of groups in %autogenerated.
+		# This loop is now needed since we added the concept of groups
+		# in %autogenerated.
 		for my $gr ( keys %autogenerated ) {
-			$tmp{$_} = $gr for (grep {$autogenerated{$gr}{$_} and $_ ne $ag{name}} @$list);
+			$tmp{$_} = $gr
+				for (grep {$autogenerated{$gr}{$_} and $_ ne $ag{name}} @$list);
 		}
 		@$list =keys %tmp;
 		
@@ -375,6 +395,7 @@
 			@{ $autogenerated{$tmp{$sym}}{$sym}{'_see also'} } = @$list2;
 		}
 	}
+
 }
 }
 # FINAL / PASS 2
@@ -385,11 +406,10 @@
 	# Turn 'See Also' items to refentries
 	goto END_SEEALSO unless $ag{'_see also'};
 	my @see_items = @{ $ag{'_see also'} };
-	# XXX only if it's the symbol from same category, otherwise use
-	# olink to link between documents
+
 	for my $itm ( @see_items ) {
-		next if $itm =~ /^</;
-		my $linktype = "link";
+		next if $itm =~ /^</; # Don't touch if it's a manually written XML link
+		my $linktype = "link"; # Assume link to symbol of same type
 		my $linkarg = "linkend";
 		if ( $autogenerated{$group}{$itm} ) {
 			# This means our item and the target it links to are of same type. So, 
@@ -411,7 +431,7 @@
 	$ag{'see also'} = join ", ", @see_items;
 	END_SEEALSO:
 
-	# Finally, set default values if they weren't overriden by real information
+	# Finally, set default values for fields that have no real information
 	for my $field (@page_order) {
 		if ( ! $ag{$field} ) {
 			if ( grep {/$field/} @mandatory) {
@@ -426,8 +446,6 @@
 	##########################################################################
 	# "Stringify" array values
 	$ag{"available in"} = join ", ", @{ $ag{"_available in"} };
-	#$ag{'see also'} = join(", \n", $ag{'_see also'}) if
-	#	(ref $ag{'_see also'} and scalar @{ $ag{'_see also'}});
 	# Compress 4.6.0, 4.8.0, 5.0.0 to 4.6.0-5.0.0
 	$ag{'available in'} = compress_availability($ag{'_available in'});
 
@@ -435,27 +453,30 @@
 	# DONE
 	$ag{latest} = $hash{version};
 
-#	# Visually mark NEW (cvs-head) items
-#	if ( @{ $ag{'_available in'} } == 1 and
-#		${$ag{'_available in'}}[0] eq $ag{latest} ) {
-#		$ag{purpose} = <<ENDD;
-#<inlinemediaobject>
-#<imageobject>
-#<imagedata fileref="./images/new.png" format="PNG"/>
-#</imageobject>
-#<textdata>
-#<phrase>[NEW]</phrase>
-#</textdata>
-#</inlinemediaobject>
-#$ag{purpose}
-#ENDD
-#	}
+#	# Visually mark NEW (cvs-head) items . HEH, too bad this doesn't work.
+#	DocBook strips non-text stuff when creating TOC entries. So under symbol
+#	names you see a nice NEW icon, but in TOC that plain text looks very poor.
+	if ( @{ $ag{'_available in'} } == 1 and
+			${$ag{'_available in'}}[0] eq $ENV{XMLDOCS_CUR_DEVEL}) {
+		$ag{purpose} = <<ENDD;
+<inlinemediaobject>
+<imageobject>
+<imagedata fileref="./images/new.png" format="PNG"/>
+</imageobject>
+<textdata>
+<phrase>NEW</phrase>
+</textdata>
+</inlinemediaobject>
+$ag{purpose}
+ENDD
+	}
 
 
 	if ( my $fname = $hash{specific}{$ag{name}}{"_tagopt_maproutine"} ) {
 		# This means tag is MapRoutined, so it doesn't use any other 
 		# symbols directly, but possibly the maproutined function do.
-		# So make uses{tag} = uses{maproutined_function}.
+		# So as far as symbol usage goes, the tag in question actually
+		# impersonates the maproutined function it uses.
 
 		# XXX Should work, but just somehow it doesnt, so comment for now.
 		#$hash{uses}{$group}{$ag{name}} = $hash{uses}{function}{ $fname };
@@ -482,9 +503,9 @@
 	$template or warn "No template $ag{'_symbol type'} ?\n";
 
 	{ no warnings;
-	# I simply hate this, I can't find out which field
-	# is undefined
-	while ( $template =~ s/(\$ag{.*?})/$1/eem ) {};
+	# I simply hate this, I can't find out which field is undefined
+	#while ( $template =~ s/(\$ag{.*?})/$1/eem ) {}; # Am I stupid?
+	$template =~ s/(\$ag{.*?})/$1/geem;
 	}
 
 	# Save
@@ -541,8 +562,8 @@
 print INVOUT Dumper \%invalid;
 close INVOUT;
 
-# Output autodefs.ent
-unless ( $no_autodefs ) {
+# Output autorefs.ent
+unless ( $no_autorefs ) {
 	open ATD, "> $autopath" or die "Can't wropen $autopath ($!)\n";
 	print "GEN: $autopath\n";
 
@@ -594,37 +615,52 @@
 	# those cases (we could leave through in any case, but that would waste
 	# time).
 
-	# Move some of this to the above code that calls process_symbol() ?
-	# Or, what the f* was I thinking when I disabled this region?
-	if ( ref $autogenerated{$group}{$name} ) { # Symbol known
-		if ( $autogenerated{$group}{$name}{"_symbol type"} ne $group ) { # But changed grp.
-			# The good entry is already in symbol_lists (done in wanted()), we
-			# only need to remove this invalid one here.
-			@{$symbol_lists{$group} } = grep{!/^$name$/} @{ $symbol_lists{$group}};
-			# XXX In a new system, add file pathname change to NOTES section.
-			# And correct the field (we can't simply let through to regenerate
-			# the skeleton because that would delete previous "available in"
-			# information):
-			$autogenerated{$group}{$name}{"_symbol type"} = $group;
-			$autogenerated{$group}{$name}{"symbol type"} = "&SYMBOL_" . uc($group) . ";",
-		}
-		return
-	}
+	# Move some of this to the above code that calls process_symbol() ? No.
+	# what the f* was I thinking when I disabled this region?
+	# Suddenly how at some days I see everything, and then on some I am like
+	# an idiot.. This whole block below is not used any more.
+	#
+	#if ( ref $autogenerated{$group}{$name} ) { # Symbol known
+	#	if ( $autogenerated{$group}{$name}{"_symbol type"} ne $group ) { # But changed group
+	#		# The good entry is already in symbol_lists (done in wanted()), we
+	#		# only need to remove the obsolete one:
+	#		print "YES FOR $name ($group/$autogenerated{$group}{$name}{'_symbol type'}\n";
+	#		@{$symbol_lists{$group} } = grep{!/^$name$/} @{ $symbol_lists{$group}};
+	#		# XXX In a new system, add file pathname change to NOTES section.
+	#		# And correct the field (we can't simply let through to regenerate
+	#		# the skeleton because that would delete previous "available in"
+	#		# information):
+	#		$autogenerated{$group}{$name}{"_symbol type"} = $group;
+	#		#$autogenerated{$group}{$name}{"symbol type"}="&SYMBOL_".uc($group).";";
+	#	}
+	#	return
+	#}
+
+	# How easily one can comment too much of code ;-)
+	return if ( ref $autogenerated{$group}{$name} );
 
 	# Make skel
 	$autogenerated{$group}{$name} = {
 		name => $name,
 		id => $name,
 		"_symbol type" => $group,
-		"symbol type" => "&SYMBOL_" . uc($group) . ";",
+		#"symbol type" => "&SYMBOL_" . uc($group) . ";",
 	};
 
-	# Suplement with information from a control file
+	# Skel is done, now:
+
+	# Suplement with information from a control file. Control file overrides
+	# settings, but this is only available if multi-file method is used to
+	# document an item (so, refs/itemname/*). If multi-file method is not used,
+	# nothing gets done here.
 	populate($autogenerated{$group}{$name}, $group, $name, 'control', 'override');
+
+	# Supplement information with other files, that is, either
+	# all but 'control' file from refs/itemname/*, or just refs/itemname
+	# if single-file method is used (which is standard).
 	populate($autogenerated{$group}{$name}, $group, $name, '', 'append');
 }
 
-# XXX support reading from refs/<name> file.
 sub populate {
 	my ($ref, $group, $name, $file, $mode) = @_;
 
@@ -650,23 +686,25 @@
 
 	# From one specific file (control file usually)
 	if ( $file ) {
-		open IN, "< $refpath/$file" or do {
-			push @{ $invalid{$name} }, "Requested file '$file' ($!)";
-			return;
-		};
-		if ( $file eq 'control' ) {
-			while (my $line = <IN>) {
-				next if $line =~ /^\s*#/;
-				chomp $line;
-				$line =~ s/^\s+//;
-				my ($sect,$text) = split /\s*:\s*/, $line, 2;
-				update_field($mode, $group, $name, $ref, $file, $sect, $text)
-					if ( defined $text and length $text );
+		if ( -d $refpath ) {
+			open IN, "< $refpath/$file" or do {
+				push @{ $invalid{$name} }, "Requested file '$file' ($!)";
+				return;
+			};
+			if ( $file eq 'control' ) {
+				while (my $line = <IN>) {
+					next if $line =~ /^\s*#/;
+					chomp $line;
+					$line =~ s/^\s+//;
+					my ($sect,$text) = split /\s*:\s*/, $line, 2;
+					update_field($mode, $group, $name, $ref, $file, $sect, $text)
+						if ( defined $text and length $text );
+				}
+			} else {
+				die "TODO: Reading from non-control files not supported.\n";
 			}
-		} else {
-			die "TODO: Reading from non-control files not supported.\n";
+			close IN;
 		}
-		close IN;
 
 	# From other file sets
 	} elsif (! length $file) { # all files
@@ -731,7 +769,7 @@
 
 	if ( $sect ne 'missing' ) {
 
-		if (!( grep {/^$sect/} @page_order )) {
+		if (!( grep {/^$sect/} @page_order ) and $sect ne 'ignore') {
 			push @{ $invalid{$name} }, "Section '$sect' from file '$fn' won't be used (name not recognized)";
 		}