#!/usr/bin/perl -w BEGIN { die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'}; die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'}; unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); #unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins"); # unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify"); } #use arcinfo; use colcfg; #use plugin; use docprint; use util; use parsargv; use FileHandle; use XML::Writer; &main(); sub print_usage() { print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n"; print STDOUT "options:\n"; print STDOUT " -collectdir Directory where collection lives.\n"; } sub main { my $defaultlang = 'en'; my ($collectdir); # note that no defaults are passed for most options as they're set # later (after we check the collect.cfg file) if (!parsargv::parse(\@ARGV, 'verbosity/\d+/', \$verbosity, 'collectdir/.*/', \$collectdir, 'faillog/.*/', \$faillog)) { &print_usage(); die "\n"; } # get and check the collection name if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") { &print_usage(); die "\n"; } $collconfigfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg"); my $collectcfg; if (-e $collconfigfilename) { $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename); } else { print STDERR "collect.cfg not found!!"; die "\n"; } $buildconfigfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "index", "build.cfg"); my $buildcfg; if (-e $buildconfigfilename) { $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename); } else { print STDERR "build.cfg not found!!"; die "\n"; } my $colloutfile = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collectionConfig.xml"); if (-e $colloutfile) { print STDOUT "collectionConfig file already exists! overwriting it!\n"; } my $buildoutfile = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "index", "buildConfig.xml"); if (-e $buildoutfile) { print STDOUT "buildConfig file already exists! overwriting it!\n"; } my $buildtype; if (defined $collectcfg->{'buildtype'}) { $buildtype = $collectcfg->{'buildtype'}; } else { $buildtype = 'mg'; } my $buildoutput = new IO::File(">$buildoutfile"); my $buildwriter = new XML::Writer(OUTPUT => $buildoutput); $buildwriter->startTag('buildConfig'); my $colloutput = new IO::File(">$colloutfile"); my $collwriter = new XML::Writer(OUTPUT => $colloutput); $collwriter->startTag('collectionConfig'); #output the collection metadata to the collectionConfig file my $collectionmeta = $collectcfg->{'collectionmeta'}; $collwriter->startTag('metadataList'); %name_map = ( 'collectionname', 'colName', 'collectionextra', 'colDescription', 'iconcollection', 'colIcon', 'iconcollectionsmall', 'colIconSmall'); if (defined $collectionmeta) { print STDOUT "coll meta defined \n"; foreach $entry ( keys %$collectionmeta) { print STDOUT "entry=$entry\n"; if (defined $name_map{$entry}) { $name= $name_map{$entry}; print STDOUT "name=$name\n"; foreach $lang (keys %{$collectionmeta->{$entry}}) { print STDOUT "lang=$lang\n"; outputmetadata($collwriter, $name, $lang, $collectionmeta->{$entry}->{$lang}); } } } } $collwriter->endTag('metadataList'); #num docs my $numdocs = $buildcfg->{'numdocs'}; $buildwriter->startTag('metadataList'); $buildwriter->startTag('metadata', 'name'=>'numDocs'); $buildwriter->characters($numdocs); $buildwriter->endTag('metadata'); $buildwriter->endTag('metadataList'); #indexes my $indexmap = {}; if (defined $buildcfg->{'indexmap'}) { $indexmap_t = $buildcfg->{'indexmap'}; foreach $i (@$indexmap_t) { my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/; $indexmap->{$k} = $v; } } else { print STDERR "indexmap not defined"; } my $defaultindex; if (defined $collectcfg->{'defaultindex'}) { $defaultindex = $collectcfg->{'defaultindex'}; } else { print STDERR "defaultindex not defined"; } $defaultindex = $indexmap->{$defaultindex}; $buildwriter->startTag('serviceRackList'); my @levels = ('Document'); my $defaultlevel = 'Document'; #do the retrieve service # assume mgpp or mg if ($buildtype eq 'mgpp') { #for each level if (defined $collectcfg->{'levels'}) { push @levels, @{$collectcfg->{'levels'}}; foreach $l(@levels){ $defaultlevel = 'Section' if $l eq 'Section'; } } $buildwriter->startTag('serviceRack', 'name'=>'GS2MGPPRetrieve'); $buildwriter->emptyTag('defaultLevel', 'name'=>$defaultlevel); $buildwriter->startTag('levelList'); foreach $level (@levels) { $buildwriter->emptyTag('level', 'name'=>$level); } $buildwriter->endTag('levelList'); } else { $buildwriter->startTag('serviceRack', 'name'=>'GS2MGRetrieve'); $buildwriter->emptyTag('defaultIndex', 'name'=>$defaultindex); } # add in the classifiers if needed my $count = 1; my $phind = 0; my $started_classifiers = 0; if (defined $collectcfg->{'classify'}) { my $classifiers = $collectcfg->{'classify'}; foreach $cl (@$classifiers) { print STDERR "cl=$cl\n"; $name = "CL$count"; $count++; my ($classname) = @$cl[0]; if ($classname eq "Phind") { $phind=1; next; } my $document_interleave = "true"; my $orientation = "vertical"; if ($classname eq "AZList") { #there may be others $document_interleave = "false"; $orientation = "horizontal"; } if (not $started_classifiers) { $buildwriter->startTag('classifierList'); $started_classifiers = 1; } my $content = ''; #use metadata for ($i=0; $iemptyTag('classifier', 'name'=>$name, 'content'=>$content, 'orientation'=>$orientation, 'documentInterleave'=>$document_interleave); } #foreach classifier if ($started_classifiers) { # end the classifiers $buildwriter->endTag('classifierList'); } } # close off the Retrieve service $buildwriter->endTag('serviceRack'); # the phind classifier is a separate service if ($phind) { # if phind classifier $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse'); } # do the search service if ($buildtype eq 'mgpp') { $buildwriter->startTag('serviceRack', 'name'=>'GS2MGPPSearch'); $buildwriter->emptyTag('defaultLevel', 'name'=>$defaultlevel); $buildwriter->startTag('levelList'); foreach $level (@levels) { $buildwriter->emptyTag('level', 'name'=>$level); } $buildwriter->endTag('levelList'); #fieldlist print STDOUT "trying fields\n"; my $fieldmap = {}; if (defined $buildcfg->{'indexfieldmap'}) { print STDOUT "doing fields\n"; $fieldmap_t = $buildcfg->{'indexfieldmap'}; foreach $f (@$fieldmap_t) { my ($k, $v) = $f =~ /^(.*)\-\>(.*)$/; $fieldmap->{$k} = $v; } $buildwriter->startTag('fieldList'); foreach $f (keys %$fieldmap) { $field = $fieldmap->{$f}; $buildwriter->emptyTag('field', 'shortname'=>$field, 'name'=>$f); } $buildwriter->endTag('fieldList'); } else { print STDERR "indexfieldmap not defined"; } } elsif ($buildtype eq 'mg') { $buildwriter->startTag('serviceRack', 'name'=>'GS2MGSearch'); } $buildwriter->emptyTag('defaultIndex', 'name'=>$defaultindex); $buildwriter->startTag('indexList'); #for each index foreach $i (keys %$indexmap) { $index = $indexmap->{$i}; $buildwriter->emptyTag('index', 'name'=>$index); } $buildwriter->endTag('indexList'); $buildwriter->endTag('serviceRack'); $buildwriter->endTag('serviceRackList'); $buildwriter->endTag('buildConfig'); $collwriter->endTag('collectionConfig'); $buildwriter->end(); $buildoutput->close(); $colloutput->close(); } sub outputmetadata { my ($collwriter, $metaname, $lang, $metavalue) = @_; $lang = 'en' if $lang eq 'default'; if ($lang =~ /^\[/) { ($lang) = $lang =~ /\[l=(..)\]/; print STDOUT "new lang = $lang\n"; } my $newvalue; if ($name =~ /colIcon/) { #may need to translate the value ($newvalue) = $metavalue =~ /^_httpprefix_\/collect\/.*\/images\/(.*)$/; ($newvalue) = $metavalue =~ /^_httpcollection_\/images\/(.*)$/ if not defined $newvalue; $newvalue = $metavalue if not defined $newvalue; print STDOUT "old value=$metavalue. new value = $newvalue\n"; } else { $newvalue = $metavalue; } $collwriter->startTag('metadata', 'name'=>$metaname, 'lang'=>$lang); $collwriter->characters($newvalue); $collwriter->endTag('metadata'); } #$writer->startTag(''); #$writer->endTag(''); #$writer->characters(); #$writer->emptyTag(''); 1;