Changeset 4184
- Timestamp:
- 2003-04-17T14:58:47+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/bin/script/convert_coll_from_gs2.pl
r3983 r4184 5 5 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'}; 6 6 unshift (@INC, "$ENV{'GSDLHOME'}/perllib"); 7 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan"); 8 #unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins"); 9 # unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify"); 10 } 11 12 #use arcinfo; 7 unshift (@INC, "$ENV{'GSDL3HOME'}/lib/perl/cpan"); 8 } 9 13 10 use colcfg; 14 #use plugin;15 use docprint;16 11 use util; 17 12 use parsargv; … … 25 20 26 21 print STDOUT " -collectdir Directory where collection lives.\n"; 27 22 print STDOUT " -verbosity Controls the amount of output.\n\n"; 28 23 } 29 24 … … 36 31 if (!parsargv::parse(\@ARGV, 37 32 'verbosity/\d+/', \$verbosity, 38 'collectdir/.*/', \$collectdir, 39 'faillog/.*/', \$faillog)) { 33 'collectdir/.*/', \$collectdir)) { 40 34 &print_usage(); 41 35 die "\n"; … … 47 41 die "\n"; 48 42 } 49 43 50 44 $collconfigfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg"); 51 45 my $collectcfg; … … 57 51 die "\n"; 58 52 } 59 53 60 54 61 55 $buildconfigfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "index", "build.cfg"); … … 68 62 die "\n"; 69 63 } 70 64 71 65 72 66 … … 89 83 $buildtype = 'mg'; 90 84 } 91 85 92 86 my $buildoutput = new IO::File(">$buildoutfile"); 93 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput );94 95 $buildwriter->startTag('buildConfig' );87 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput, NEWLINES => 1); 88 89 $buildwriter->startTag('buildConfig', 'xmlns:gsf'=>"http://www.greenstone.org/configformat"); 96 90 97 91 my $colloutput = new IO::File(">$colloutfile"); 98 my $collwriter = new XML::Writer(OUTPUT => $colloutput );99 100 $collwriter->startTag('collectionConfig' );101 92 my $collwriter = new XML::Writer(OUTPUT => $colloutput, NEWLINES => 1); 93 94 $collwriter->startTag('collectionConfig', 'xmlns:gsf'=>"http://www.greenstone.org/configformat"); 95 102 96 #output the collection metadata to the collectionConfig file 103 97 my $collectionmeta = $collectcfg->{'collectionmeta'}; … … 111 105 112 106 if (defined $collectionmeta) { 113 print STDOUT "coll meta defined \n";114 107 foreach $entry ( keys %$collectionmeta) { 115 print STDOUT "entry=$entry\n";116 108 if (defined $name_map{$entry}) { 117 109 $name= $name_map{$entry}; 118 print STDOUT "name=$name\n";119 110 foreach $lang (keys %{$collectionmeta->{$entry}}) { 120 print STDOUT "lang=$lang\n"; 121 outputmetadata($collwriter, $name, $lang, $collectionmeta->{$entry}->{$lang}); 111 $value = $collectionmeta->{$entry}->{$lang}; 112 if ($entry =~ /^icon/) { 113 $value = format_icon_value($value); 114 } 115 outputmetadata($collwriter, $name, $lang, $value); 122 116 } 123 117 } … … 126 120 $collwriter->endTag('metadataList'); 127 121 128 #num docs 122 # output building metadata to build config file 129 123 my $numdocs = $buildcfg->{'numdocs'}; 130 124 $buildwriter->startTag('metadataList'); … … 154 148 $defaultindex = $indexmap->{$defaultindex}; 155 149 150 # levels for index 151 my $level_string = "Document"; 152 if (defined $collectcfg->{'levels'}) { 153 foreach $l (@{$collectcfg->{'levels'}}) { 154 $level_string .=",$l"; 155 } 156 } 157 # format stuff 158 my $format = $collectcfg->{'format'}; 159 160 #output the search stuff to coll cfg 161 $collwriter->startTag('search', 'type'=>$buildtype); 162 foreach $i (keys %$indexmap) { 163 $shortname = $indexmap->{$i}; 164 if ($buildtype eq 'mgpp') { 165 $collwriter->startTag('index', 'name'=>$shortname, 'content'=>$i, 166 'level'=>$level_string); 167 } elsif ($buildtype eq 'mg') { 168 ($level, $content) = $i =~ /^(.*):(.*)$/; 169 $collwriter->startTag('index', 'name'=>$shortname, 'content'=>$content, 'level'=>$level); 170 } 171 #find the coll meta stuff 172 $indexdisplay = ".$i"; 173 foreach $lang (keys %{$collectionmeta->{$indexdisplay}}) { 174 $value = $collectionmeta->{$indexdisplay}->{$lang}; 175 output_display($collwriter, $lang, $value); 176 } 177 $collwriter->endTag('index'); 178 179 } 180 181 # add in the format stuff 182 if (defined $format->{'SearchVList'}) { 183 184 $collwriter->startTag('format'); 185 write_format($collwriter, $format->{'SearchVList'}); 186 $collwriter->endTag('format'); 187 } 188 189 $collwriter->endTag('search'); 156 190 157 191 $buildwriter->startTag('serviceRackList'); … … 193 227 my $started_classifiers = 0; 194 228 if (defined $collectcfg->{'classify'}) { 229 $collwriter->startTag('browse'); 195 230 my $classifiers = $collectcfg->{'classify'}; 196 231 foreach $cl (@$classifiers) { 197 print STDERR "cl=$cl\n";198 232 $name = "CL$count"; 199 233 $count++; 200 234 my ($classname) = @$cl[0]; 201 if ($classname eq "Phind") {235 if ($classname =~ /^phind$/i) { 202 236 $phind=1; 237 #should add it into coll config classifiers 203 238 next; 204 239 } … … 206 241 my $document_interleave = "true"; 207 242 my $orientation = "vertical"; 208 if ($classname eq "AZList") { #there may be others 243 my $nodelinks = 0; 244 if ($classname eq "AZList" || $classname eq "AZCompactList") { #there may be others 209 245 $document_interleave = "false"; 210 246 $orientation = "horizontal"; 247 $nodelinks = 1; 211 248 } 212 249 if (not $started_classifiers) { … … 215 252 } 216 253 my $content = ''; #use metadata 217 218 for ($i=0; $i<scalar(@$cl); $i++) { 219 $arg = @$cl[$i]; 220 if ($arg eq "-metadata") { 221 $content = @$cl[$i+1]; 222 last; 254 if ($classname eq "DateList") { 255 $content = "Date"; 256 } else { 257 for ($i=0; $i<scalar(@$cl); $i++) { 258 $arg = @$cl[$i]; 259 if ($arg eq "-metadata") { 260 $content = @$cl[$i+1]; 261 last; 262 } 223 263 } 224 264 } … … 226 266 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'orientation'=>$orientation, 'documentInterleave'=>$document_interleave); 227 267 228 } #foreach classifier 268 $collwriter->startTag('classifier', 'name'=>$name, 'content'=>$content, 'type'=>$classname, 'level'=>'Document'); 269 $vlist = $name."VList"; 270 $hlist = $name."HList"; 271 $dlist = ""; 272 if ($classname eq "DateList") { 273 $dlist = "DateList"; 274 } 275 if ($nodelinks || defined $format->{$vlist} || defined $format->{$hlist} || defined $format->{$dlist}) { 276 $collwriter->startTag('format'); 277 if ($nodelinks) { 278 write_nodelinks_format($collwriter); 279 } 280 281 if (defined $format->{$vlist}) { 282 write_format($collwriter, $format->{$vlist}); 283 } 284 if (defined $format->{$hlist}) { 285 write_format($collwriter, $format->{$hlist}); 286 } 287 if (defined $format->{$dlist}) { 288 write_format($collwriter, $format->{$dlist}); 289 } 290 $collwriter->endTag('format'); 291 } 292 $collwriter->endTag('classifier'); 293 } #foreach classifier 229 294 if ($started_classifiers) { 230 295 # end the classifiers … … 232 297 } 233 298 299 $collwriter->endTag('browse'); 234 300 } 235 301 # close off the Retrieve service 236 302 $buildwriter->endTag('serviceRack'); 237 303 238 304 # the phind classifier is a separate service 239 305 if ($phind) { … … 254 320 255 321 #fieldlist 256 print STDOUT "trying fields\n";257 322 my $fieldmap = {}; 258 323 if (defined $buildcfg->{'indexfieldmap'}) { 259 print STDOUT "doing fields\n";260 324 $fieldmap_t = $buildcfg->{'indexfieldmap'}; 261 325 foreach $f (@$fieldmap_t) { … … 300 364 301 365 sub outputmetadata { 302 my ($ collwriter, $metaname, $lang, $metavalue) = @_;366 my ($writer, $metaname, $lang, $metavalue) = @_; 303 367 $lang = 'en' if $lang eq 'default'; 304 368 if ($lang =~ /^\[/) { 305 ($lang) = $lang =~ /\[l=(..)\]/; 306 print STDOUT "new lang = $lang\n"; 307 } 308 my $newvalue; 309 if ($name =~ /colIcon/) { 310 #may need to translate the value 311 ($newvalue) = $metavalue =~ /^_httpprefix_\/collect\/.*\/images\/(.*)$/; 312 ($newvalue) = $metavalue =~ /^_httpcollection_\/images\/(.*)$/ if not defined $newvalue; 313 $newvalue = $metavalue if not defined $newvalue; 314 print STDOUT "old value=$metavalue. new value = $newvalue\n"; 315 } else { 316 $newvalue = $metavalue; 317 } 318 $collwriter->startTag('metadata', 'name'=>$metaname, 'lang'=>$lang); 319 $collwriter->characters($newvalue); 320 $collwriter->endTag('metadata'); 321 } 322 369 ($lang) = $lang =~ /\[l=(.*)\]/; 370 } 371 $writer->startTag('metadata', 'name'=>$metaname, 'lang'=>$lang); 372 $writer->characters($metavalue); 373 $writer->endTag('metadata'); 374 } 375 376 sub output_display { 377 my ($writer, $lang, $value) = @_; 378 $lang = 'en' if $lang eq 'default'; 379 if ($lang =~ /^\[/) { 380 ($lang) = $lang =~ /\[l=(.*)\]/; 381 } 382 $writer->startTag('displayName', 'lang'=>$lang); 383 $writer->characters($value); 384 $writer->endTag('displayName'); 385 } 386 sub format_icon_value { 387 my ($value) = @_; 388 if ($value =~ /^_/) { 389 ($newvalue) = $value =~ /\/([^\/]*)$/; 390 if ($newvalue) { 391 return $newvalue; 392 } 393 } 394 return $value; 395 } 396 397 sub write_format { 398 my ($writer, $old_format) = @_; 399 #convert [] to <gsf:...> 400 #remove IFs, for now just make the first option true 401 $old_format =~ s/\{If\}\{[^,]*,([^,\}]*)(,[^\}]*)?\}/$1/g; 402 #remove ORs, for now just make the first option true 403 $old_format =~ s/\{Or\}\{([^,]*),[^\}]*\}/$1/g; 404 $old_format =~ s/\[Text\]/\<gsf:text\/\>/g; 405 $old_format =~ s/\[num\]/\<gsf:num\/\>/g; 406 $old_format =~ s/\[link\]/\<gsf:link\>/g; 407 $old_format =~ s/\[\/link\]/\<\/gsf:link\>/g; 408 $old_format =~ s/\[srclink\]/\<gsf:link type=\'source\'\>/g; 409 $old_format =~ s/\[\/srclink\]/\<\/gsf:link\>/g; 410 $old_format =~ s/\[icon\]/\<gsf:icon\/\>/g; 411 $old_format =~ s/\[srcicon\]/\<gsf:icon type=\'source\'\/\>/g; 412 413 #now do the rest of the {} which are assumed to be metadata 414 $old_format =~ s/\[([^\]]*\:)?([^\]\:]*)\]/\<gsf:metadata name=\'$2\' select=\'$1\'\/\>/g; 415 #do the parent stuff 416 $old_format =~ s/(select=\'parent)\:\'/$1\'/g; 417 $old_format =~ s/select=\'parent\(Top\)\:\'/select=\'root\'/g; 418 $old_format =~ s/select=\'parent\(All\)\:\'/select=\'ancestors\'/g; 419 $old_format =~ s/select=\'parent\(All\'([^\']*)\'\)\:\'/select=\'ancestors\' separator=\'$1\'/g; 420 #remove any select='' 421 $old_format =~ s/select=\'\'//g; 422 #turn <br> into <br /> 423 $old_format =~ s/\<br\>/\<br \/\>/g; 424 #turn <p> into <p /> 425 $old_format =~ s/\<p\>/\<p \/\>/g; 426 427 #put quotes around any atts 428 $old_format =~ s/=([a-z]+)([> ])/=\'$1\'$2/g; 429 430 $writer->startTag('gsf:template', 'match'=>'documentNode'); 431 $writer->charactersXML($old_format); 432 $writer->endTag('gsf:template'); 433 434 435 } 436 437 438 sub write_nodelinks_format { 439 440 my ($writer) = @_; 441 442 $writer->startTag('gsf:template', 'match'=>'classifierNode'); 443 $writer->startTag('td'); 444 $writer->startTag('gsf:link', 'type'=>'classifier'); 445 $writer->emptyTag('gsf:metadata', 'name'=>'Title'); 446 $writer->endTag('gsf:link'); 447 $writer->endTag('td'); 448 $writer->endTag('gsf:template'); 449 450 } 323 451 #$writer->startTag(''); 324 452 #$writer->endTag('');
Note:
See TracChangeset
for help on using the changeset viewer.