#! /usr/bin/perl -w # converts an old style humanity collection which uses an index.txt file to # use a single metadata.xml file instead BEGIN { die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); } use util; use cfgread; my $collection = $ARGV[0]; my $collectdir = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection); my $importdir = &util::filename_cat($collectdir, "import"); die unless -d $importdir; # new import structure will be created in "import.new" directory my $importnewdir = $importdir . ".new"; `mkdir $importnewdir`; # read in index.txt file and generate metadata.xml, in the process # converting the html files and copying them across to the import.new # directory my $metadata_xml = "\n"; $metadata_xml .= "\n"; $metadata_xml .= "\n\n"; my $index_txt = &util::filename_cat($importdir, "index.txt"); open (INDEXTXT, $index_txt) || die; my $line = []; my @fields = (); my $count = 0; while (defined ($line = cfgread::read_cfg_line("main::INDEXTXT"))) { # last if $count > 10; if ($line->[0] eq "key:") { shift @$line; @fields = @$line; } else { my $jobnumber = shift @$line; &new_document($jobnumber); $count ++; my $i = 0; for ($i = 0; $i < scalar(@$line); $i++) { if ($line->[$i] =~ /^<([^>]+)>(.*)$/) { &set_metadata($1, $2); } else { if (defined ($fields[$i])) { &set_metadata($fields[$i], $line->[$i]); } else { print STDERR "error 1\n"; } } } $metadata_xml .= " \n"; $metadata_xml .= " \n\n"; } } close INDEXTXT; $metadata_xml .= "\n"; my $metafile = &util::filename_cat($importnewdir, "metadata.xml"); open (META, ">$metafile") || die; print META $metadata_xml; close META; sub new_document { my ($jobnumber) = @_; print STDERR "creating new document ($jobnumber)\n"; my $docdir = &util::filename_cat($importdir, $jobnumber); die unless -d $docdir; # copy whole directory across to import.new $jobnumber =~ s/^.*?\///; my $newdocdir = &util::filename_cat($importnewdir, $jobnumber); die if -e $newdocdir; `cp -r $docdir $newdocdir`; # convert the htm file to use the new syntax my $htmfile = &util::filename_cat($newdocdir, "$jobnumber.htm"); die unless -e $htmfile; `convert_toc.pl < $htmfile > $htmfile.new`; `mv $htmfile.new $htmfile`; # update metadata.xml $metadata_xml .= " \n"; $metadata_xml .= " $jobnumber\n"; $metadata_xml .= " \n"; } sub set_metadata { my ($key, $value) = @_; $metadata_xml .= " $value\n"; }