#!/usr/bin/perl -w use strict; # xml/catalog builder for Debian, based roughly upon # http://xmlsoft.org/buildDocBookCatalog # and experience on first pass with Debian. See my buildDocBookCatalog.pl # for dialog. :-) # Use at own risk. # Gordon Haverland, perl@materialisations.com # 2003/09/15 # Usage: buildXMLCatalog [--install] \ # [[--ignore pkg1] [--ignore pkg2] ...] \ # [--package pkg [--version N]] # # This program uses /usr/bin/xmlcatalog for doing the catalog creations # and entries. It probably could do it itself using various XML capabilities # found in Perl/CPAN. It uses File::Find to search for files, instead of # calling the find program. I suppose calling the locate program (if # installed) would be even faster, and if it was slocate would avoid searching # down paths the user had no read permissions on anyway. # # xmlcatalog in --add mode, takes 3 parameters: entry TYPE, orig URN, and # replacement URN. If the orig URN already exists in the $Catalog, the # existing entry is edited/overwritten with the new one. Effectively, this # is a case of the last entry being the one used. This shouldn't happen # if there are different VeRsIoN's of the package installed, as the orig # URN probably has the Version number in it. There can be problems if # multiple, not directly related, packages install the same file. It is # probably the case that these unrelated packages are intending to install # something which is functionally the same. But, we want to find the main # package version when using XML $Catalog support, not these other ones. # # An ordinary user may want to install his/her own catalogs, often for the # case that the system catalogs don't work or find the "wrong" data. So, # allow that. # ------------------------------------------------------------------ # Assumed policy (for best results, may work or be forced to work in # absence of this). # For a system install, files are searched under /usr/local, /usr/share/xml # and then /usr/share/sgml. For a user install into $HOME, $HOME is prepended # to this list. # The $RootCatalog is /etc/xml/catalog for a system install and # $HOME/xmlcatalog for a user install. # The catalog for package (family) is /etc/xml/package.cat for a system # install, and $HOME/xml$package.cat for a user install. # The pathname should have .../xml/... in it somewhere (case independent). # It will work for this to be .../xml... or ...xml/... # The pathname should have .../$package/ in it somewhere (case independent). # It will work for this to be .../$package... or ...$package/... # Installing a version of $package to the catalogs, the path needs to include # the version string exactly (4.2 doesn't match 4.2.0). The version should # be .../version/... or ...version/... The version must be in the directory # directly above the file for scoring to be effective. For dbpoolx.mod, it # must be similar to: .../4.2/dbpoolx.mod # If you are employing the ability to ignore an arbitrary number of other # packages (like ksgmltools2 or xae), the case independent placing of # that directory name is either: .../$ignore/... or .../$ignore... The code # presently prunes out $ignore matches, but can be set to just lower the # score of these files. # In the case of DocBook, some of the PUBLIC files have version numbers # in the URN (and path), some have (constant) dates, and some have # nothing. You need to segregate how you install these things by placing # a tag of ClEaNuP where-ever you switch from one kind to the other. # DocBook XSLT stylesheets are apparently all so predictable (boring? :-) # that if you have any version installed, you just install catalog entries # for everything from 1.38 to (currently) 1.60. A special tag of XSL # indicates that you take this "lazy" approach (to DocBook XSLT only). # If written correctly, this program will properly compare version strings # with differing numbers of major, minor, patch, ... numbers. For example: # 4.20.0.7 < 4.20.0.7.2 # The parts must be separated by periods (dots), and the comparisons of # parts are numeric (so 07 == 7). # Default package to install is 'docbook', and default version to install # is all (which I think matches none (aka no version numbers). # Personally, I think XML packages should get installed into /usr/share/xml, # as xml files and processing is enough specialised for XML that accidentally # running into SGML files causes problems. In the case of DocBook related # stuff, I would like to see something like: # /usr/share/xml/docbook/4.2/ # While the only DocBook specific files which seem to be versioned are DTDs, # I think /dtd/ should be a subdirectory under the version number, since # other DocBook related packages might rely on differing versions of # DocBook. For example: if you have DocBook-MathML (an XML application) # installed, which depends on DocBook-4.1.2, we would have something # like: # /usr/share/xml/docbook/4.1.2/mathml/ # How /custom/ falls into this scheme, I don't know. I see that the # DocBook-MathML DTD has "../../../dtd/xml/4.1.2/dbmathml.dtd" in it. # If this how it comes from OASIS, that certainly doesn't fit with how # I think things should work. And the closer we stick to how they # are suggesting how things get set up, the better. # Some global variables. my $DBDir = {}; # Hash of directories/versions/counts seen of # PUBLIC entries my $Search_Dirs = []; # Directories to look for XML DTD/ENTITIES/etc. in my $XMLFiles = []; use Getopt::Long; # init my $install = undef; # Switch - Checks for root running program. my @ignore; # Array of packages to ignore my $package = 'docbook'; # Name of package to install ($package.cat is catalog) my $version = 'all'; # Version string to install (all if not specified) my $result = GetOptions( "install" => \$install, "ignore=s" => \@ignore, "package=s" => \$package, "version=s" => \$version ); my( $RootCatalog, $Catalog ) = &initialize( $install, $package, $version, @ignore ); # Scoring system: presence of file assumes a score of 1. If # pathname has any of @ignore in it, we subtract 2 from score. # If pathname has /XML/i in it, add 2 to score. If pathname has # /$package/i in it, add 2 to score. If pathname has version # string we are looking for, add 2. If just the beginning matches, # add 1. # Main routine. Loop over the @XMLFiles to find. If a particular XMLFiles # hash has a {file} to look for, score all the file names. If it doesn't # have a {file} to look for, it indicates we are making entries in the # $RootCatalog based on what we've entered so far. ID_Types of XSL and # ClEaNuP are special (internal to program). foreach my $XMLFile (@{$XMLFiles}) { my( $orig, $replacement, @cmd ); my $locations = []; my $File = $XMLFile->{file} ? $XMLFile->{file} : undef; if( $File ) { if( $File =~ /^iso-?[a-z]+[0-9]?\.ent$/ ) { # ISO ENTITIES file # ISO ENTITES files may have different names from the DOS 8.3 versions # that most systems use. I am assuming that these other named files # are probably the ones we should be using, and hence searched for # first. my $tFile = $File; $tFile =~ s/^iso-/ISO/; push( @{$locations}, &file_find( $tFile ) ); $tFile =~ s/^iso-/iso/; push( @{$locations}, &file_find( $tFile ) ); } push( @{$locations}, &file_find( $File ) ); # Original name # Score and prune @locations. &score_files( 1, $XMLFile->{Identifier}, $locations ); } # What kind of entry are we making? Public entries are made in the # $Catalog, and we also store up other information for when a rewrite # or delegate ID_Type comes along. ClEaNuP is our signal to delete # this saved up information (say, before doing ISO ENTITIES). XSL # is the last entry for DocBook, we need to generate a zillion version # entries. if( $XMLFile->{ID_Type} =~ /^public$/ ) { &do_public( $XMLFile, $locations ); } elsif( $XMLFile->{ID_Type} =~ /^rewriteSystem$/ ) { # Entries to $Catalog, based on the public files entered since last # ClEaNuP &do_rewrite( $XMLFile, $DBDir ); } elsif( $XMLFile->{ID_Type} =~ /^rewriteURI$/ ) { &do_rewrite( $XMLFile, $DBDir ); } elsif( $XMLFile->{ID_Type} =~ /^delegatePublic$/ ) { # Entries to $RootCatalog based on Identifier and $Catalog &do_delegate( $XMLFile ); } elsif( $XMLFile->{ID_Type} =~ /^delegateSystem$/ ) { # Entries to $RootCatalog based on Identifier and $Catalog &do_delegate( $XMLFile ); } elsif( $XMLFile->{ID_Type} =~ /^delegateURI$/ ) { # Entries to $RootCatalog based on Identifier and $Catalog &do_delegate( $XMLFile ); } elsif( $XMLFile->{ID_Type} =~ /^ClEaNuP$/ ) { # If we have been writing DocBook catalog entries, we can now # delete all the %DBDir stuff, since what follows is the unversioned # ISO Entity files. In general, I think we cleanup whenever we switch # from a set of files which have VERSIONs, to ones that don't. foreach my $k (keys(%{$DBDir})) { delete( $DBDir->{$k} ); } } elsif( $XMLFile->{ID_Type} =~ /^XSL$/ ) { &insert_xsl( $XMLFile, $locations ); } else { # Impossible print "Say what? Unknown Type=$XMLFile->{ID_Type}\n"; } } sub initialize { my $install = @_ ? shift : undef; my $package = @_ ? shift : 'docbook'; my $version = @_ ? shift : 'all'; my @ignore = @_; my( $Home ); # If /usr/share/xml doesn't exist or isn't used by policy, remove it. push( @{$Search_Dirs}, '/usr/local', '/usr/share/xml', '/usr/share/sgml' ); if( ($< == 0) || ($( == 0) ) { # Root is running program. unless( $install ) { # Root is not doing an install? die "Program assumes root only does system installs. Bye.\n"; } $RootCatalog = '/etc/xml/catalog'; $Catalog = "/etc/xml/$package.cat"; $Home = 0; if( -w '/etc' ) { if( -e '/etc/xml' ) { die "/etc/xml exists, but isn't a directory\n" unless( -d '/etc/xml' ); } else { mkdir '/etc/xml'; chmod 0755, '/etc/xml'; } } else { die "Can't write to /etc\n"; } # So, we can write to /etc/xml/ Can we write /etc/xml/catalog? if( -e '/etc/xml/catalog' ) { die "Can't (over)write /etc/xml/catalog\n" unless( -w '/etc/xml/catalog' ); } else { die "Can't write /etc/xml/catalog\n" unless( -w '/etc/xml' ); } # Ignoring test of iso-amsb.ent for 'ominus.* ⊖' } else { $RootCatalog = "$ENV{HOME}/xmlcatalog"; $Catalog = "$ENV{HOME}/xml$package.cat"; $Home = 1; unshift( @{$Search_Dirs}, "$ENV{HOME}" ); } # Create the initial entries &create_catalog( $RootCatalog ); &create_catalog( $Catalog ); # The shell script this is based on, bounces back and forth between doing # work and testing stuff. We are going to do the testing stuff more or # less all at once, and then do the work later on. # Set up list of hashes, which contains files to look for. Probably # best to do this by a config file or install file of some kind, but do # by hard coded list here. The ID_Types accepted by xmlcatalog are: # public, rewriteSystem, rewriteURI, delegatePublic, delegateSystem, # and delegateURI. Anything else if passed to xmlcatalog, should generate # an error. I use XSL for ID_Type, to indicate that I need to calculate # a whole bunch of XSLT entries, and ClEaNuP to indicate I need to forget # some data. my @XMLFiles = ( { file => 'dbpoolx.mod', ID_Type => 'public', Identifier => '-//OASIS//ELEMENTS DocBook XML Information Pool VVeRsIoN//LaNgUaGe', }, { file => 'docbookx.dtd', ID_Type => 'public', Identifier => '-//OASIS//DTD DocBook XML VVeRsIoN//LaNgUaGe', }, { file => 'dbcentx.mod', ID_Type => 'public', Identifier => '-//OASIS//ENTITIES DocBook XML Character Entities VVeRsIoN//LaNgUaGe', }, { file => 'dbnotnx.mod', ID_Type => 'public', Identifier => '-//OASIS//ENTITIES DocBook XML Notations VVeRsIoN//LaNgUaGe', }, { file => 'dbgenent.mod', ID_Type => 'public', Identifier => '-//OASIS//ENTITIES DocBook XML Additional General Entities VVeRsIoN//LaNgUaGe', }, { file => 'dbhierx.mod', ID_Type => 'public', Identifier => '-//OASIS//ELEMENTS DocBook XML Document Hierarchy VVeRsIoN//LaNgUaGe', }, { file => 'soextblx.dtd', ID_Type => 'public', Identifier => '-//OASIS//DTD XML Exchange Table Model YeArMnDy//LaNgUaGe', }, { file => 'calstblx.dtd', ID_Type => 'public', Identifier => '-//OASIS//DTD DocBook XML CALS Table Model VVeRsIoN//LaNgUaGe', }, { ID_Type => 'rewriteSystem', Identifier => 'http://www.oasis-open.org/docbook/xml/VeRsIoN' }, { ID_Type => 'rewriteURI', Identifier => 'http://www.oasis-open.org/docbook/xml/VeRsIoN' }, { ID_Type => 'delegatePublic', Identifier => '-//OASIS//ENTITIES DocBook XML', }, { ID_Type => 'delegatePublic', Identifier => '-//OASIS//DTD DocBook XML', }, { ID_Type => 'delegateSystem', Identifier => 'http://www.oasis-open.org/docbook/', }, { ID_Type => 'delegateURI', Identifier => 'http://www.oasis-open.org/docbook/', }, { ID_Type => 'ClEaNuP', }, { file => 'iso-pub.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Publishing//LaNgUaGe', }, { file => 'iso-grk1.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Greek Letters//LaNgUaGe', }, { file => 'iso-box.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Box and Line Drawing//LaNgUaGe', }, { file => 'iso-grk3.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Greek Symbols//LaNgUaGe', }, { file => 'iso-amsn.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Added Math Symbols: Negated Relations//LaNgUaGe', }, { file => 'iso-num.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Numeric and Special Graphic//LaNgUaGe', }, { file => 'iso-grk4.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Alternative Greek Symbols//LaNgUaGe', }, { file => 'iso-dia.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Diacritical Marks//LaNgUaGe', }, { file => 'iso-grk2.ent', ID_Type => 'public', Identifier => 'ISO 8879::1986//ENTITIES Monotoniko Greek//LaNgUaGe', }, { file => 'iso-amsa.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Added Math Symbols: Arrow Relations//LaNgUaGe', }, { file => 'iso-amso.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Added Math Symbols: Ordinary//LaNgUaGe', }, { file => 'iso-cyr1.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Russian Cyrillic//LaNgUaGe', }, { file => 'iso-tech.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES General Technical//LaNgUaGe', }, { file => 'iso-amsc.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Added Math Symbols: Delimiters//LaNgUaGe', }, { file => 'iso-lat1.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Added Latin 1//LaNgUaGe', }, { file => 'iso-amsb.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Added Math Symbols: Binary Operators//LaNgUaGe', }, { file => 'iso-lat2.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Added Latin 2//LaNgUaGe', }, { file => 'iso-amsr.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Added Math Symbols: Relations//LaNgUaGe', }, { file => 'iso-cyr2.ent', ID_Type => 'public', Identifier => 'ISO 8879:1986//ENTITIES Non-Russian Cyrillic//LaNgUaGe', }, # Special hash, XSL type is a flag for special action that # was custom written. { file => 'docbook.xsl', ID_Type => 'XSL', Identifier => 'DocBook XSLT Stylesheets', }, ); push( @{$XMLFiles}, @XMLFiles ); return( $RootCatalog, $Catalog ); } sub create_catalog { die "Wrong number of args (@_) to create_catalog\n" unless $#_ == 0; my $Catalog = shift; my @cmd = ('/usr/bin/xmlcatalog', '--noout', '--create', $Catalog); if( -w $Catalog ) { # Exists, and is writable print STDOUT "creating XML Catalog $Catalog\n"; my $rc = 0xffff & system( @cmd ); if( $rc == 0 ) { return; } elsif( $rc == 0xff00 ) { print "Command (@cmd) failed: $!\n"; } elsif( $rc > 0x80 ) { $rc >>= 8; print "Command (@cmd) ran with non-zero exit status $rc\n"; } else { print "Command (@cmd) "; if( $rc & 0x80 ) { $rc &= ~0x80; print "and dumped core "; } print "seen signal $rc\n"; } die "Quitting\n"; } elsif( -e $Catalog ) { # Exists, not writable. Bail die "Can't write XML Catalog $Catalog\n"; } else { print STDOUT "creating XML Catalog $Catalog\n"; my $rc = 0xffff & system( @cmd ); if( $rc == 0 ) { return; } elsif( $rc == 0xff00 ) { print "Command (@cmd) failed: $!\n"; } elsif( $rc > 0x80 ) { $rc >>= 8; print "Command (@cmd) ran with non-zero exit status $rc\n"; } else { print "Command (@cmd) "; if( $rc & 0x80 ) { $rc &= ~0x80; print "and dumped core "; } print "seen signal $rc\n"; } die "Quitting\n"; } } sub score_files { my $prune = shift; # Flag to remove certain @locations my $Identifier = shift; my $locations = shift; my $score = {}; foreach my $pathname (@{$locations}) { # First, strip off filename part for searches on path my @path = split( m|/|, $pathname ); pop( @path ); my $dirname = join '/', @path; # Set up a score card. Presence of file, gets a score of 1. $score->{$pathname} = 1; # Does /XML/, /XML or XML/ (regardless of case) occur in path? if( $dirname =~ m|/XML/|i ) { $score->{$pathname} += 2; } elsif( $dirname =~ m|XML/|i ) { $score->{$pathname} += 2; } elsif( $dirname =~ m|/XML|i ) { $score->{$pathname} += 2; } # Are we looking for a particular package? I am assuming that if the # package name is part of the path (regardless of case), it is an entire # component/subdirectory. For example: .../docbook/... if( $package ) { $score->{$pathname} += 2 if( $dirname =~ m|/$package/|i ); } # Are we looking for a specific version to install. If yes, accept no # other versions. We want something like: .../X.Y.Z/$file if( $version && $version ne 'all' ) { if( $dirname =~ m|/$version/| ) { $score->{$pathname} += 2; } elsif( $dirname =~ m|/$version| ) { $score->{$pathname} += 1; } else { # Wrong version entirely, forget it. delete $score->{$pathname}; } } # Are we set to ignore certain things? If $prune, delete it. if( @ignore ) { foreach my $ign (@ignore) { if( $dirname =~ m|/$ign/|i ) { if( $prune ) { delete $score->{$pathname}; } else { $score->{$pathname} -= 2; } } elsif( $dirname =~ m|/$ign|i ) { if( $prune ) { delete $score->{$pathname}; } else { $score->{$pathname} -= 2; } } } } } # End loop over @locations # Rebuild our @locations list from all surviving entries, sorted on # (increasing) score. Hence, highest score is last. If the calculated # values of $orig given to xmlcatalog have duplicate values, the highest # score being last will insure the "best" entry is what is in $Catalog. @{$locations} = sort { $score->{$a} <=> $score->{$b} || &vers_cmp( $a, $b ) } keys %{$score}; } sub do_public { my $XMLFile = shift; my $locations = shift; my( $File, $Identifier, $Type, $dirname, @path ); my $orig = []; my $repl = []; $Identifier = $XMLFile->{Identifier}; $Type = $XMLFile->{ID_Type}; if( $#{$locations} == -1 ) { print "No XML Catalog entries for $XMLFile->{file}\n"; # } elsif( $#{$locations} == 0 ) { # One entry, make it. } else { # Multiple files # Some $Identifiers have a version, so grab it from $dirname or from # file if not in $dirname (better not have multiple strings that look # like versions in $dirname). Some $Identifiers have a Date, but it # turns out all the dates are the same in practice. if( $Identifier =~ /VeRsIoN/ ) { # $orig is a reference to an array of $orig URNs. $repl is a # reference to an array of replacement URNs. ($orig, $repl) = &get_public_version( $Identifier, $locations ); # Other stuff? } elsif( $Identifier =~ /YeArMnDy/ ) { # In DocBook, this specifically applies to soextblx. Which is a strange # file, as its internal comments make it look like the CALS file. And # the internal comments of the CALS file make it look like something # else entirely (some US-DOD file). It turns out that all versions of # this file I've found are nominally the same, and all have the same # date. The difference is the defintion of paracon. What happens if # I ever get a valid date, but not a version? ($orig, $repl) = &get_public_date( $Identifier, $locations ); # Other stuff? } else { # Public, but no VERSION or DATE. ISO ENTITIES and others. # Without a VERSION or DATE, all of these files will have the same # $Identifier (orig URN). So, only the last one (hopefully with the # highest score) is of consequence. my @path = split( m|/|, $locations->[-1] ); pop( @path ); my $dirname = join '/', @path; my $tmp = $Identifier; $tmp =~ s/LaNgUaGe$/EN/; if( exists( $DBDir->{$dirname} ) ) { $DBDir->{$dirname}{count}++; } else { $DBDir->{$dirname} = {}; $DBDir->{$dirname}{count} = 1; } push( @{$orig}, $tmp ); push( @{$repl}, "file:/$locations->[-1]" ); } } # Loop over $orig/$repl (both better have the same number of members!), # generating XML catalog entries. It is possible for $orig or $repl # to be undef, hopefully if one is undef the other is as well. Don't # write a XML catalog entry if either is undef. if( $#{$orig} != $#{$repl} ) { die "Strange; original URNs ($#{$orig}) != replacement URNs ($#{$repl})\n"; } for( my $i=0; $i <= $#{$orig}; $i++ ) { if( $orig->[$i] && $repl->[$i] ) { my @cmd = ('/usr/bin/xmlcatalog', '--noout', '--add', $XMLFile->{ID_Type}, $orig->[$i], $repl->[$i], $Catalog); #print join ' ', @cmd, "\n"; &xmlcatalog( @cmd ); } elsif( $orig->[$i] || $repl->[$i] ) { print "Strange; orig/repl URNs not both undef ($orig->[$i])($repl->[$i])\n"; } else { # Both undef, ignore. } } } sub get_public_version { my $Identifier = shift; my $locations = shift; my $orig = []; my $repl = []; my( $Version, $dirname, $Ident, $URI ); # Our $Identifier has VeRsIoN in it, and we want to find all versions # for our array of locations. foreach my $location (@{$locations}) { ($Version, $dirname) = &get_XMLversion( $Identifier, $location ); if( $Version ) { # We have a $Version (or we died), and a $dirname $Ident = $Identifier; $Ident =~ s/LaNgUaGe$/EN/; # Just most common case, assumption $Ident =~ s/VeRsIoN/$Version/; # $URI = "file://$dirname/$File"; $URI = "file:/$location"; if( exists( $DBDir->{$dirname} ) ) { $DBDir->{$dirname}{count}++; } else { $DBDir->{$dirname} = {}; $DBDir->{$dirname}{count} = 1; $DBDir->{$dirname}{version} = $Version; } push( @{$orig}, $Ident ); push( @{$repl}, $URI ); } else { # We need a version, but can't find one. push( @{$orig}, undef ); push( @{$repl}, undef ); } } return( $orig, $repl ); } sub get_XMLversion { my $Identifier = shift; my $pathname = shift; my( $Version, $dirname, $search, @path, $f, $last ); @path = split( m|/|, $pathname ); $f = pop( @path ); # Pop off filename $last = $path[-1]; $dirname = join '/', @path; # Version is either in last subdirectory name above this file, or we # have to look in the file for it. if( $last =~ /^[\.0-9]+$/ ) { # Last dir is all digits and dots $Version = $last; } else { # Look in file. We build a search term from the $Identifier. # We need to strip off the '//LaNgUaGe' at the end (if present) and # VeRsIoN. Often, our $Identifier has VVeRsIoN, which could become # something like V4.2. So, strip off the leading V too. Typically # these files, somewhere near the beginning, have a comment that says # how they should be called. Which has the proper VERSION string # in it. $search = $Identifier; $search =~ s|//LaNgUaGe$||; $search =~ s|\s+VVeRsIoN\s*$||; $search =~ s|\s+VeRsIoN\s*$||; # Hopefully our $search term is now generic enough, that we can find # the version in the file. open( LOC, "< $pathname" ) || die "Can't open XML source file ($pathname), quitting:$!\n"; my @l = ; close( LOC ); # It's possible that the search term is NOT in the file, sort of # like calstblx.dtd (it's there, but different format than "usual"). # Best case, is that it is there once. Worse case, multiple times. my @s = grep /$search/, @l; if( $#s == -1 ) { # Nothing $Version = undef; } elsif( $#s == 0 ) { # Multiple search terms print "$pathname has multiple ($search)\n"; $Version = undef; $dirname = undef; } else { if( $s[0] =~ m|V?([\.0-9]+)//| ) { $Version = $1; } } } return( $Version, $dirname ); } sub do_rewrite { my $XMLFile = shift; my $DBDir = shift; # The $orig (Identifier) probably has some kind of VERSION on it, or # at least DocBook does. The data on which directories to equate to # external URNs was built up installing PUBLIC catalog entries in # $Catalog. my $orig = $XMLFile->{Identifier}; foreach my $dirname (keys(%{$DBDir})) { # If we can substitute a VERSION into Identifier, do so. if( $DBDir->{$dirname}{version} ) { $orig =~ s/VeRsIoN/$DBDir->{$dirname}{version}/; } my $repl = "file:/$dirname"; # Do xmlcatalog now. Note, if there is no VERSION information to # substitute in, essentially only the last entry is important. And # with perl, the order of hash keys is unpredictable, so we probably # want to sort the keys in some way, again probably so that we have # the most recent version last. my @cmd = ('/usr/bin/xmlcatalog', '--noout', '--add', $XMLFile->{ID_Type}, $orig, $repl, $Catalog); #print join ' ', @cmd, "\n"; &xmlcatalog( @cmd ); } } sub do_delegate { my $XMLFile = shift; my $orig = $XMLFile->{Identifier}; my $repl = "file:/$Catalog"; my @cmd = ('/usr/bin/xmlcatalog', '--noout', '--add', $XMLFile->{ID_Type}, $orig, $repl, $RootCatalog); #print join ' ', @cmd, "\n"; &xmlcatalog( @cmd ); } # The following works, but it far too DocBook-XSLT specific. sub insert_xsl { my $XMLFile = shift; my $locations = shift; my $File = $XMLFile->{file}; my( $orig, $repl, @cmd ); # The $File is docbook.xsl We want the directory to be one which also # contains l10n.xml for some reason. The shell script version, has XSL # versions as X.YZ from 1.39 to 1.50. The RELEASE-NOTES.xml.gz on my # Debian system runs from 1.60.1 down. I assume, that I can ignore the # ".patch_number" part, so use xsl versions from 1.39 to 1.60. my @xsl_versions = qw(1.39 1.40 1.41 1.42 1.43 1.44 1.45 1.46 1.47 1.48 1.49 1.50 1.51 1.52 1.53 1.54 1.55 1.56 1.57 1.58 1.59 1.60); foreach my $location (@{$locations}) { # To verify a location, starting from docbook.xsl, we want to find # ../common/l10n.xml. If we find it, $dir is .. my @path = split( m|/|, $location ); pop( @path ); pop( @path ); my $dirname = join '/', @path; my $l10n = "$dirname/common/l10n.xml"; if( -r $l10n ) { foreach my $n_xsl (@xsl_versions) { $orig = "http://docbook.sourceforge.net/release/xsl/$n_xsl"; $repl = "file:/$dirname"; @cmd = ('/usr/bin/xmlcatalog', '--noout', '--add', 'rewriteSystem', $orig, $repl, $Catalog); #print join ' ', @cmd, "\n"; &xmlcatalog( @cmd ); @cmd = ('/usr/bin/xmlcatalog', '--noout', '--add', 'rewriteURI', $orig, $repl, $Catalog); #print join ' ', @cmd, "\n"; &xmlcatalog( @cmd ); } } } $orig = "http://docbook.sourceforge.net/release/xsl/"; @cmd = ('/usr/bin/xmlcatalog', '--noout', '--add', 'delegateSystem', $orig, "file:/$Catalog", $RootCatalog); #print join ' ', @cmd, "\n"; &xmlcatalog( @cmd ); @cmd = ('/usr/bin/xmlcatalog', '--noout', '--add', 'delegateURI', $orig, "file:/$Catalog", $RootCatalog); #print join ' ', @cmd, "\n"; &xmlcatalog( @cmd ); } sub xmlcatalog { #my $prog = shift; #my $noout = shift; #my $add = shift; #my $type = shift; #my $orig = shift; #my $replace = shift; #my $Catalog = shift; die "Wrong number of args (@_)\n" unless $#_ == 6; my $rc = 0xffff & system( @_ ); if( $rc == 0 ) { return; } elsif( $rc == 0xff00 ) { print "Command (@_) failed: $!\n"; } elsif( $rc > 0x80 ) { $rc >>= 8; print "Command (@_) ran with non-zero exit status $rc\n"; } else { print "Command (@_) "; if( $rc & 0x80 ) { $rc &= ~0x80; print "and dumped core "; } print "seen signal $rc\n"; } die "Quitting\n"; } sub file_find { my $fname = shift; my @paths; # Okay, cheat! Use find2perl to generate perl code to search for # these files. :-\) General solution from shell script is to search # down /usr/share/xml, $HOME, /usr/local and then /usr/share/sgml. # Debian installs DocBook in /usr/share/sgml all the time. So, the # list of directories might need to be changed. Or could be changed. # This File::Find needs to be eval'd, or at least I can't seem to # get perl to do the pattern match for the file name with all of # this in one chunk of code. ##! /usr/bin/perl -w # eval 'exec /usr/bin/perl -S $0 ${1+"$@"}' # if 0; #$running_under_some_shell my $proggie = <<'EOF'; use strict; no warnings; # So we don't get a warning about @paths use File::Find (); # Don't worry about setting $File::Find::dont_use_nlink, we aren't # running AFS (or do we?). # For the convenience of &wanted calls to File::Find use vars qw/*name *dir *prune/; *name = *File::Find::name; *dir = *File::Find::dir; *prune = *File::Find::prune; sub wanted; File::Find::find( {wanted => \&wanted}, @DBDirs ); sub wanted { /^$fname/s && # print "$name\n" && push( @paths, $name ); } EOF $proggie =~ s/\$fname/$fname/; $proggie =~ s/\@DBDirs/qw(@{$Search_Dirs})/; eval $proggie; return( @paths ); } sub vers_cmp { my $a = shift; my $b = shift; # String should be of the form: # /some/path/to/N[.N[.N[... ]]]/filename # If either $a or $b has a different nesting of numbers, assume # missing numbers are zero. What do we do when this isn't the # format? Especially possible when looking under $HOME. My # guess is to assume no VERSION information in path, and bail. my @pa = split( m|/|, $a ); my @pb = split( m|/|, $b ); # Check that second last component, is consistent with a VERSION if( ($pa[-2] !~ /^[\.0-9]+$/) && ($pb[-2] =~ /^[\.0-9]+$/) ) { return -1; } elsif( ($pa[-2] =~ /^[\.0-9]+$/) && ($pb[-2] !~ /^[\.0-9]+$/) ) { return +1; } elsif( ($pa[-2] !~ /^[\.0-9]+$/) && ($pb[-2] !~ /^[\.0-9]+$/) ) { return 0; } # Okay, we have VERSIONs (we hope). my @a = split( /\./, $pa[-2] ); my @b = split( /\./, $pb[-2] ); my $i = 0; while( ($i <= $#a) && ($i <= $#b) ) { return -1 if( $a[$i] < $b[$i] ); return +1 if( $a[$i] > $b[$i] ); $i++; } return 0 if( ($i > $#a) && ($i > $#b) ); return -1 if( $i <= $#b ); return +1 if( $i <= $#a ); } sub get_public_date { my $Identifier = shift; my $locations = shift; my $orig = []; my $repl = []; my( $Date, $Version, $search, $dirname, $Ident, $URI ); # soextblx and calstblx are a strange breed. Within soextblx are # comments about the CALS model, in fact the ENTITY being defined # is calstblx. Within calstblx are no comments as to how the file # should be addressed, but there is a reference to an old US-DOD # spec. For Debian, both of these files seem to be versioned, # with the version number coming from the path name. The date # within soextblx seems to be the same across multiple versions # of DocBook, I guess there are no bugs in this old, US-DOD CALS # document. Take the easy way out, and just return '19990315' # for the date. foreach my $location (@{$locations}) { ($Version, $Date, $dirname) = &get_XMLdate( $Identifier, $location ); if( $Version ) { # We have a $Version (or we died), and a $dirname $Ident = $Identifier; $Ident =~ s/LaNgUaGe$/EN/; # Just most common case, assumption $Ident =~ s/VeRsIoN/$Version/; $Ident =~ s/YeArMnDy/$Date/; # $URI = "file://$dirname/$File"; $URI = "file:/$location"; if( exists( $DBDir->{$dirname} ) ) { $DBDir->{$dirname}{count}++; } else { $DBDir->{$dirname} = {}; $DBDir->{$dirname}{count} = 1; $DBDir->{$dirname}{version} = $Version; } push( @{$orig}, $Ident ); push( @{$repl}, $URI ); } else { # We need a version, but can't find one. push( @{$orig}, undef ); push( @{$repl}, undef ); } } return( $orig, $repl ); } sub get_XMLdate { my $Identifier = shift; my $pathname = shift; my( $Date, $Version, $dirname, $search ); # soextblx and calstblx are a strange breed. Within soextblx are # comments about the CALS model, in fact the ENTITY being defined # is calstblx. Within calstblx are no comments as to how the file # should be addressed, but there is a reference to an old US-DOD # spec. For Debian, both of these files seem to be versioned, # with the version number coming from the path name. The date # within soextblx seems to be the same across multiple versions # of DocBook, I guess there are no bugs in this old, US-DOD CALS # document. Take the easy way out, and just return '19990315' # for the date. How do I make this generic????? $Date = '19990315'; my @path = split(m|/|, $pathname); my $f = pop( @path ); my $last = $path[-1]; $dirname = join '/', @path; if( $last =~ /^[\.0-9]+$/ ) { # Last dir is all digits and dots $Version = $last; } else { # There is nothing in file to provide a version. And date # is constant. return( undef, $Date, $dirname ); } return( $Version, $Date, $dirname ); }