source: main/trunk/greenstone3/bin/script/convert_coll_from_gs2.pl@ 29045

Last change on this file since 29045 was 29045, checked in by ak19, 10 years ago

Kathy suggested that the GS3 buildConfig.xml generated from GS2 build.cfg should ideally have the same content as if buildConfig.xml generated from a rebuild of the collection. This is so that a collection converted from GS2 to GS3 can be immediately previewed without rebuilding. Although immediate previewing already worked, the intention is to nevertheless make the automatically-generated buildConfig.xml file as identical as possible to the one generated upon a rebuild.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 31.1 KB
RevLine 
[3688]1#!/usr/bin/perl -w
2
3BEGIN {
4 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
[6307]5 die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
[10326]6 die "GSDL3SRCHOME not set\n" unless defined $ENV{'GSDL3SRCHOME'};
[3688]7 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
8 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
[10326]9 unshift (@INC, "$ENV{'GSDL3SRCHOME'}/lib/perl/cpan");
[3688]10}
11
12use colcfg;
[28798]13use docprint; # for sub escape_text
[3688]14use util;
15use parsargv;
16use FileHandle;
17use XML::Writer;
[10658]18#can't get this to work on windows
19#use GDBM_File;
[3688]20
[10360]21use strict;
22
[28780]23my $convert_format_stmts = 0;
24
[3688]25&main();
26sub print_usage() {
27 print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n";
28 print STDOUT "options:\n";
29
[28780]30 print STDOUT " -collectdir Directory where collection lives.\n";
31 print STDOUT " -verbosity Controls the amount of output.\n";
32 print STDOUT " -defaultlang The language that is considered the default (for display text etc). defaults to 'en'\n";
33 print STDOUT " -convert_format_stmts (Deprecated.) Switch this on if you want the old behaviour of this script, which is \n";
34 print STDOUT " to process format statements using perl regular expressions.\n";
35 print STDOUT " This option is deprecated in favour of using 'formatconverter' which interprets \n";
36 print STDOUT " format statements directly using the same C++ parsing code as in GS2 runtime.\n\n";
[3688]37}
38
[13828]39
[3688]40sub main {
41
[4969]42 my ($defaultlang, $verbosity, $collectdir);
[3688]43 # note that no defaults are passed for most options as they're set
44 # later (after we check the collect.cfg file)
[4969]45 if (!&parsargv::parse(\@ARGV,
46 'verbosity/\d+/', \$verbosity,
47 'collectdir/.*/', \$collectdir,
[28780]48 'defaultlang/.*/', \$defaultlang,
49 'convert_format_stmts', \$convert_format_stmts)) {
[3688]50 &print_usage();
51 die "\n";
52 }
53
54 # get and check the collection name
[4969]55 my ($collection) = @ARGV;
56 if (!defined($collection) || $collection eq "") {
57 die "No collection specified\n";
[3688]58 }
[4969]59 if ($collection eq "gs2model") {
60 die "You cant convert the model collection\n";
61 }
[4184]62
[4969]63 if (!defined $collectdir || $collectdir eq "") {
64 $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
65 }
66
67 if (!defined $defaultlang || $defaultlang eq "") {
68 $defaultlang = 'en';
69 }
70 # add on the coll name
71 $collectdir = &util::filename_cat ($collectdir, $collection);
72
[10360]73 my $collconfigfilename = &util::filename_cat ($collectdir, "etc", "collect.cfg");
[4969]74 print STDOUT "coll config=$collconfigfilename\n";
[3688]75 my $collectcfg;
76 if (-e $collconfigfilename) {
77 $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename);
78
[3983]79 } else {
80 print STDERR "collect.cfg not found!!";
81 die "\n";
[3688]82 }
[4184]83
[3872]84
[10360]85 my $buildconfigfilename = &util::filename_cat ($collectdir, "index", "build.cfg");
[3688]86 my $buildcfg;
87 if (-e $buildconfigfilename) {
88 $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename);
89
[3983]90 } else {
91 print STDERR "build.cfg not found!!";
92 die "\n";
[3688]93 }
[4184]94
[3688]95
[3979]96
[4969]97 my $colloutfile = &util::filename_cat ($collectdir, "etc", "collectionConfig.xml");
[3979]98 if (-e $colloutfile) {
99 print STDOUT "collectionConfig file already exists! overwriting it!\n";
100
101 }
102
[4969]103 my $buildoutfile = &util::filename_cat ($collectdir, "index", "buildConfig.xml");
[3979]104 if (-e $buildoutfile) {
[3688]105 print STDOUT "buildConfig file already exists! overwriting it!\n";
106
107 }
108
[10658]109# my $db_file = &util::filename_cat ($collectdir, "index", "text", "$collection.ldb");
[9899]110 my $database;
[10658]111# if (-e $db_file) {
112# $database = &open_database($db_file);
113# } else {
114# print STDERR "gdbm database file $db_file not found!!";
115# die "\n";
116# }
[9899]117
[3688]118 my $buildtype;
[4886]119 if (defined $buildcfg->{'buildtype'}) {
120 $buildtype = $buildcfg->{'buildtype'};
[3688]121 } else {
122 $buildtype = 'mg';
123 }
[4184]124
[10653]125 my $indexstem = undef;
126 if (defined $buildcfg->{'indexstem'}) {
127 $indexstem = $buildcfg->{'indexstem'};
128 }
[29045]129 #my $indexstem = $buildcfg->{'indexstem'} || undef;
130 my $infodbtype = $buildcfg->{'infodbtype'} || "gdbm";
131 my $earliestDatestamp = $buildcfg->{'earliestdatestamp'} || undef;
[10653]132
[3979]133 my $buildoutput = new IO::File(">$buildoutfile");
[25662]134 binmode($buildoutput,":utf8");
[4184]135 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput, NEWLINES => 1);
[3979]136
[25646]137 $buildwriter->xmlDecl("UTF-8");
[9874]138 $buildwriter->startTag('buildConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat");
[4184]139
[3979]140 my $colloutput = new IO::File(">$colloutfile");
[25661]141 binmode($colloutput,":utf8");
[4184]142 my $collwriter = new XML::Writer(OUTPUT => $colloutput, NEWLINES => 1);
143
[25646]144 $collwriter->xmlDecl("UTF-8");
[14754]145 $collwriter->startTag('CollectionConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat", 'xmlns:xsl'=>'http://www.w3.org/1999/XSL/Transform');
[4184]146
[3979]147 #output the collection metadata to the collectionConfig file
148 $collwriter->startTag('metadataList');
149
[4969]150 my $creator = $collectcfg->{'creator'};
[14754]151 &output_metadata($collwriter,'default', 'creator', $creator);
[13836]152 my $public =$collectcfg->{'public'};
[14754]153 &output_metadata($collwriter,'default', 'public', $public);
154
[4969]155 $collwriter->endTag('metadataList');
156
157 #output the display collectionmeta to collectionConfig.xml
[3979]158
[4969]159 my $collectionmeta = $collectcfg->{'collectionmeta'};
[3979]160 if (defined $collectionmeta) {
[10360]161 my %name_map = ('collectionname', 'name',
[4969]162 'collectionextra', 'description',
163 'iconcollection', 'icon',
164 'iconcollectionsmall', 'smallicon');
165
166 $collwriter->startTag('displayItemList');
[10360]167 foreach my $entry ( keys %$collectionmeta) {
[25731]168 # some metadata names need to be specially mapped to other names
169 # most of them however, can retain their original names
170 my $name = (defined $name_map{$entry}) ? $name_map{$entry} : $entry;
171 foreach my $lang (keys %{$collectionmeta->{$entry}}) {
172 my $value = $collectionmeta->{$entry}->{$lang};
173 if ($entry =~ /^icon/) {
174 $value = format_icon_value($value);
175 } else {
176 $value = tidy_up_display_item($value);
177 }
178 &output_display($collwriter, $name, $lang, $value);
179 }
[3979]180 }
[4969]181 $collwriter->endTag('displayItemList');
[3979]182 }
183
[4969]184 # output building metadata to build config file
[3872]185 my $numdocs = $buildcfg->{'numdocs'};
[3979]186 $buildwriter->startTag('metadataList');
[14754]187 &output_metadata($buildwriter,'', 'numDocs', $numdocs);
188 &output_metadata($buildwriter,'', 'buildType', $buildtype);
[29045]189 &output_metadata($buildwriter,'', 'indexStem', $indexstem) if(defined $indexstem);
190 &output_metadata($buildwriter,'', 'infodbType', $infodbtype);
191 &output_metadata($buildwriter,'', 'earliestDatestamp', $earliestDatestamp) if(defined $earliestDatestamp);
[3979]192 $buildwriter->endTag('metadataList');
[3872]193
194
[3771]195 #indexes
[13912]196 # maps index name to shortname
[3771]197 my $indexmap = {};
[13912]198 # keeps the order for indexes
199 my @indexlist = ();
[13917]200 my $defaultindex = "";
[4886]201 my $first = 1;
[13912]202 my $maptype = "indexfieldmap";
203 if ($buildtype eq "mg") {
204 $maptype = "indexmap";
205 }
206 if (defined $buildcfg->{$maptype}) {
207 my $indexmap_t = $buildcfg->{$maptype};
[10360]208 foreach my $i (@$indexmap_t) {
[3771]209 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
210 $indexmap->{$k} = $v;
[13912]211 push @indexlist, $k;
[14754]212 if ($first == 1) {
213 $defaultindex = $k;
[4886]214 $first = 0;
215 }
[3771]216 }
217 } else {
[25661]218 print STDERR "$maptype not defined\n";
[3771]219 }
[13912]220 # we use the shortname for default index
221 if (defined $collectcfg->{'defaultindex'}) {
222 $defaultindex = $collectcfg->{'defaultindex'};
[14754]223 #$defaultindex = $indexmap->{$defaultindex};
[3771]224 }
225
[13917]226 # levels
227 my $levelmap = {};
228 my @levellist = ();
229 my $default_search_level = "";
[14754]230 my $default_search_level_shortname = "";
[13917]231 my $default_retrieve_level = "Sec";
232 $first = 1;
233 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
234 if (defined $buildcfg->{'levelmap'}) {
235 my $levelmap_t = $buildcfg->{'levelmap'};
236 foreach my $l (@$levelmap_t) {
237 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
238 $levelmap->{$k} = $v;
239 push @levellist, $k;
240 if ($first) {
[14754]241 $default_search_level = $k;
242 $default_search_level_shortname = $v;
[13917]243 $first = 0;
244 }
245 }
246 }
247
248 if (defined $collectcfg->{'defaultlevel'}) {
249 $default_search_level = $collectcfg->{'defaultlevel'};
[14754]250 #$default_search_level = $levelmap->{$default_search_level};
251 $default_search_level_shortname = $levelmap->{$default_search_level};
[13917]252 }
253 if (defined $buildcfg->{'textlevel'}) {
254 $default_retrieve_level = $buildcfg->{'textlevel'};
255 }
256 }
[4184]257 # format stuff
258 my $format = $collectcfg->{'format'};
259
260 #output the search stuff to coll cfg
[14754]261 $collwriter->startTag('search','type'=>$buildtype);
[10360]262 foreach my $i (keys %$indexmap) {
[13912]263 $collwriter->startTag('index', 'name'=>$i);
[4184]264 #find the coll meta stuff
[10360]265 my $indexdisplay = ".$i";
266 foreach my $lang (keys %{$collectionmeta->{$indexdisplay}}) {
267 my $value = $collectionmeta->{$indexdisplay}->{$lang};
[14754]268 output_display($collwriter, 'name', $lang, $i);
[4184]269 }
270 $collwriter->endTag('index');
271 }
272
[14754]273 #output the defaultIndex to coll cfg
274 $collwriter->emptyTag('defaultIndex','name'=>$defaultindex);
275
[26343]276 # indexOptions
277 if (defined $collectcfg->{'indexoptions'}) {
278 foreach my $i (@{$collectcfg->{'indexoptions'}}) {
279 $collwriter->emptyTag('indexOption', 'name'=>$i);
280 }
281 }
282
[14754]283 #indexSubcollection
284 my $indexsubcollections = $collectcfg->{'indexsubcollections'};
285
286 if (defined $indexsubcollections) {
287 my $indexsubcollections_t= $collectcfg->{'indexsubcollections'};
288 foreach my $i ( @$indexsubcollections_t) {
289 $collwriter->startTag('indexSubcollection', 'name'=>$i);
290 &output_display($collwriter, 'name', $defaultlang, $i);
291 $collwriter->endTag('indexSubcollection');
292 }
293 }
294
295 #subcollection
296 my $subcollection = $collectcfg->{'subcollection'};
297 if (defined $subcollection){
298 foreach my $entry (keys %$subcollection){
299 my $value = $subcollection->{$entry};
300 $collwriter->emptyTag('subcollection','filter'=>$value,'name'=>$entry);
301 }
302 }
303
304 #indexlanguage
305 my $languages = $collectcfg->{'languages'};
306 if (defined $languages){
307 my $languages_t = $collectcfg->{'languages'};
308 foreach my $i (@$languages_t){
309 $collwriter->startTag('indexLanguage','name'=>$i);
310 &output_display($collwriter, 'name', $defaultlang, $i);
311 $collwriter->endTag('indexLanguage');
312 }
313 }
314
[13917]315 # level stuff for mgpp/lucene
316 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene'){
317 foreach my $l (keys %$levelmap) {
318 $collwriter->startTag('level', 'name'=>$l);
319 #find the coll meta stuff
320 my $leveldisplay = ".$l";
321 foreach my $lang (keys %{$collectionmeta->{$leveldisplay}}) {
322 my $value = $collectionmeta->{$leveldisplay}->{$lang};
323 output_display($collwriter, 'name', $lang, $value);
324 }
325 $collwriter->endTag('level');
326 }
[14754]327 $collwriter->emptyTag('defaultLevel', 'name'=>$default_search_level);
[13917]328 }
[14754]329
330 # add in the search type
331 if (defined $format->{'SearchTypes'}){
332 $collwriter->startTag('format', 'name'=>"searchType");
333 $collwriter->charactersXML($format->{'SearchTypes'});
334 $collwriter->endTag('format');
335 }
336
[4184]337 # add in the format stuff
338 if (defined $format->{'SearchVList'}) {
[4969]339
[4184]340 $collwriter->startTag('format');
[10201]341 write_format($collwriter, $format->{'SearchVList'}, "document");
[4184]342 $collwriter->endTag('format');
343 }
[12979]344 elsif (defined $format->{'VList'}) {
345 $collwriter->startTag('format');
346 write_format($collwriter, $format->{'VList'}, "document");
347 $collwriter->endTag('format');
348 }
[3771]349
[4184]350 $collwriter->endTag('search');
[14754]351
352 # import plugins
[25669]353 # if ImagePlugin is added, then need to add in a replaceListRef element for gs2-image
354 my $contains_image_plugin = 0;
355
[14754]356 my $plugins = $collectcfg->{'plugin'};
[4184]357
[14754]358 if (defined $plugins){
359 $collwriter->startTag('import');
360 $collwriter->startTag('pluginList');
361 foreach my $pl (@$plugins) {
362 my ($pluginname) = @$pl[0];
[25669]363 if ($pluginname =~ m/^(ImagePlugin|ImagePlug|PagedImagePlugin)$/) {
364 $contains_image_plugin = 1;
365 }
[14754]366 $collwriter->startTag('plugin','name'=>$pluginname);
367
368 for (my $i=1; $i<scalar(@$pl); $i++) {
369 my $arg =@$pl[$i];
370 if ($arg =~ /^-/){
371 my $option_name=@$pl[$i];
372 my $option_value=@$pl[$i+1];
373 if (defined $option_value){
374 if ($option_value =~ /^-/){
375 $collwriter->startTag('option','name'=>$option_name);
376 $collwriter->endTag('option');
377 }else{
378 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
379 $collwriter->endTag('option');
380 }
381 }
382 }
383 }
384 $collwriter->endTag('plugin');
385 }
386 $collwriter->endTag('pluginList');
387 $collwriter->endTag('import');
388 }
389
[3979]390 $buildwriter->startTag('serviceRackList');
[3688]391
[4886]392 my @levels = ();
393 my $defaultlevel;
[13912]394 my $service_type = "MG";
395 if ($buildtype eq 'mgpp') {
396 $service_type = "MGPP";
397 } elsif ($buildtype eq "lucene") {
398 $service_type = "Lucene";
399 }
400
[14754]401 #indexSubcollectionList
402
403 my $subcollectionmap = $buildcfg->{'subcollectionmap'};
404 my $firstsubcollection = 1;
405 my $defaultsubcollection = "";
406 my @subcollist;
407 my $subcolmap = {};
408
409 if (defined $buildcfg->{'subcollectionmap'}) {
410 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
411
412 foreach my $l (@$subcolmap_t) {
413 my @pair = split(/->/, $l);
414 $subcolmap->{$pair[0]} = $pair[1];
415 push @subcollist, $pair[0];
416 if ($firstsubcollection==1) {
417 $defaultsubcollection = $pair[1];
418 $firstsubcollection = 0;
419 }
420 }
421
422 }
423
424
[3947]425 #do the retrieve service
[13917]426 $buildwriter->startTag('serviceRack', 'name'=>"GS2".$service_type."Retrieve");
[13228]427 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
[13917]428 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_retrieve_level);
[13115]429 } elsif ($buildtype eq "mg") {
[13912]430 $buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
[3688]431 }
[14754]432
433 if ((defined $defaultsubcollection) && ($defaultsubcollection ne "")) {
434 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
435 }
436
[10653]437 if (defined $indexstem) {
438 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
[29045]439 }
440 $buildwriter->emptyTag('databaseType', 'name'=>$infodbtype) if (defined $infodbtype);
441
[8967]442 # close off the Retrieve service
443 $buildwriter->endTag('serviceRack');
[3688]444
[3947]445 # add in the classifiers if needed
446
[3688]447 my $count = 1;
448 my $phind = 0;
449 my $started_classifiers = 0;
450 if (defined $collectcfg->{'classify'}) {
[4184]451 $collwriter->startTag('browse');
[10360]452 # add in default format if necessary
453 if (defined $format->{"VList"} || defined $format->{"HList"}) {
454 # global formats
455 $collwriter->startTag('format');
456 if (defined $format->{"VList"}) {
457 # VLIst applies to both classifier and doc nodes
458 write_format($collwriter, $format->{"VList"}, "document");
459 write_format($collwriter, $format->{"VList"}, "classifier");
460 }
461 if (defined $format->{"HList"}) {
462 # hlist is only for classifier nodes
463 write_format($collwriter, $format->{"HList"}, "horizontal");
464 }
465 $collwriter->endTag('format');
466 }
[3688]467 my $classifiers = $collectcfg->{'classify'};
[10360]468 foreach my $cl (@$classifiers) {
469 my $name = "CL$count";
[3688]470 $count++;
471 my ($classname) = @$cl[0];
[4184]472 if ($classname =~ /^phind$/i) {
[3688]473 $phind=1;
[4184]474 #should add it into coll config classifiers
[3688]475 next;
476 }
477
[9899]478 my $horizontalAtTop = &isHorizontalClassifier($database, $name);
[3688]479 if (not $started_classifiers) {
[8967]480 $buildwriter->startTag('serviceRack', 'name'=>'GS2Browse');
[10653]481 if (defined $indexstem) {
482 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
[29045]483 }
484 $buildwriter->emptyTag('databaseType', 'name'=>$infodbtype) if (defined $infodbtype);
[10653]485
[10360]486 $buildwriter->startTag('classifierList');
[3688]487 $started_classifiers = 1;
488 }
[4969]489 my $content = ''; #use buttonname first, then metadata
[4184]490 if ($classname eq "DateList") {
491 $content = "Date";
492 } else {
[10360]493 for (my $i=0; $i<scalar(@$cl); $i++) {
494 my $arg = @$cl[$i];
[4235]495 if ($arg eq "-buttonname"){
[4184]496 $content = @$cl[$i+1];
497 last;
[4886]498 } elsif ($arg eq "-metadata") {
[4235]499 $content = @$cl[$i+1];
[4184]500 }
[29045]501
502 # remove "ex." prefix from "ex.metaname" but not from "ex.namespace.metaname"
503 $content =~ s@ex\.([^.]+)(,|;|$)@$1$2@g; #$content =~ s@ex\.([A-Z])@$1@g;
[3688]504 }
505 }
[9899]506 if ($horizontalAtTop) {
507 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'horizontalAtTop'=>'true');
[14754]508
[9899]509 } else {
[4886]510 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content);
[9899]511 }
[3688]512
[3947]513
[14754]514 # $collwriter->startTag('classifier', 'name'=>$name);
515 $collwriter->startTag('classifier', 'name'=>$classname);
516 for (my $i=1; $i<scalar(@$cl); $i++) {
517 my $arg =@$cl[$i];
518 if ($arg =~ /^-/){
519 my $option_name=@$cl[$i];
520 my $option_value=@$cl[$i+1];
521 if (defined $option_value){
522 if ($option_value=~ /^-/){
523 $collwriter->startTag('option','name'=>$option_name);
524 $collwriter->endTag('option');
525 }else{
526 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
527 $collwriter->endTag('option');
528 }
529 }
530 }
531 }
532
[10360]533 my $vlist = $name."VList";
534 my $hlist = $name."HList";
535 my $dlist = "";
[4184]536 if ($classname eq "DateList") {
537 $dlist = "DateList";
538 }
[10201]539 # need to work out how to split into classifier and document
[10360]540 if (defined $format->{$vlist} || defined $format->{$hlist} || defined $format->{$dlist}) {
[4184]541 $collwriter->startTag('format');
542 if (defined $format->{$vlist}) {
[10201]543 write_format($collwriter, $format->{$vlist}, "document");
[10360]544 write_format($collwriter, $format->{$vlist}, "classifier");
545 }
[4184]546 if (defined $format->{$hlist}) {
[10201]547 write_format($collwriter, $format->{$hlist}, "horizontal");
[10360]548 }
549
[4184]550 if (defined $format->{$dlist}) {
[10201]551 write_format($collwriter, $format->{$dlist}, "document");
[4184]552 }
553 $collwriter->endTag('format');
554 }
555 $collwriter->endTag('classifier');
[4969]556 } #foreach classifier
[3688]557 if ($started_classifiers) {
558 # end the classifiers
[3979]559 $buildwriter->endTag('classifierList');
[8967]560 # close off the Browse service
561 $buildwriter->endTag('serviceRack');
[3688]562 }
[3947]563
[4184]564 $collwriter->endTag('browse');
[3947]565 }
[4184]566
[8967]567
[3947]568 # the phind classifier is a separate service
569 if ($phind) {
570 # if phind classifier
[3979]571 $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse');
[3947]572 }
573
574 # do the search service
[13912]575 $buildwriter->startTag('serviceRack', 'name'=>'GS2'.$service_type.'Search');
[14754]576 #$buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
[29045]577 $buildwriter->emptyTag('defaultIndex', 'shortname'=>$indexmap->{$defaultindex});
[13912]578 $buildwriter->startTag('indexList');
579 #for each index
580 foreach my $i (@indexlist) {
581 my $index = $indexmap->{$i};
582 $buildwriter->emptyTag('index', 'name'=>$i, 'shortname'=>$index);
583 }
584 $buildwriter->endTag('indexList');
585 if (defined $indexstem) {
586 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
[29045]587 }
588 $buildwriter->emptyTag('databaseType', 'name'=>$infodbtype) if (defined $infodbtype);
[13912]589
590 # index options
591 if ($buildtype eq 'mg' || $buildtype eq 'mgpp') {
592 $buildwriter->startTag('indexOptionList');
593 my $stemindexes = 3; # default is stem and casefold
594 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
595 $stemindexes = $buildcfg->{'stemindexes'};
596 }
597 $buildwriter->emptyTag('indexOption', 'name'=>'stemIndexes', 'value'=>$stemindexes);
598
599 my $maxnumeric = 4; # default
600 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
601 $maxnumeric = $buildcfg->{'maxnumeric'};
602 }
603 $buildwriter->emptyTag('indexOption', 'name'=>'maxnumeric', 'value'=>$maxnumeric);
604
605 $buildwriter->endTag('indexOptionList');
606 }
607
[13228]608 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
[3947]609
[13912]610 # level info
[14754]611 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_search_level_shortname);
[29045]612 $buildwriter->emptyTag('defaultDBLevel', 'shortname'=>$default_retrieve_level);
[3979]613 $buildwriter->startTag('levelList');
[13917]614 foreach my $l (@levellist) {
615 my $level = $levelmap->{$l};
616 $buildwriter->emptyTag('level', 'name'=>$l, 'shortname'=>$level);
617 }
[3979]618 $buildwriter->endTag('levelList');
[3688]619
[5647]620 # do the search types if there
621 if (defined $collectcfg->{'searchtype'}) {
622 $buildwriter->startTag('searchTypeList');
[10360]623 foreach my $st (@{$collectcfg->{'searchtype'}}) {
[5647]624 $buildwriter->emptyTag('searchType', 'name'=>$st);
625 }
626 $buildwriter->endTag('searchTypeList');
[13228]627 } elsif (defined $format->{'SearchTypes'}) {
628 #check format statement
629 my $searchtype = $format->{'SearchTypes'};
630 $buildwriter->startTag('searchTypeList');
631 if ($searchtype =~ /form/) {
632 $buildwriter->emptyTag('searchType', 'name'=>'form');
633 }
634 if ($searchtype =~ /plain/) {
635 $buildwriter->emptyTag('searchType', 'name'=>'plain');
636 }
637 $buildwriter->endTag('searchTypeList');
[5647]638 }
[13912]639 }
[3947]640
[14754]641 #indexLanguageList
642 my $indexlanguages = $collectcfg->{'languages'};
643 my $firstindexlanguage = 1;
644 my $defaultindexlanguage_shortname;
645 if (defined $indexlanguages){
646 $buildwriter->startTag('indexLanguageList');
647 my $languages_t = $collectcfg->{'languages'};
648 foreach my $i (@$languages_t){
649 $buildwriter->startTag('indexLanguage','name'=>$i);
650 &output_display($buildwriter, 'name', $i, $i);
651 $buildwriter->endTag('indexLanguage');
652 if ($firstindexlanguage==1){
653 $defaultindexlanguage_shortname = $i;
654 $firstindexlanguage=0;
655 }
656 }
657 $buildwriter->endTag('indexLanguageList');
658 $buildwriter->startTag('defaultIndexLanguage', 'name'=>$defaultindexlanguage_shortname,'shortname'=>$defaultindexlanguage_shortname);
659 $buildwriter->endTag('defaultIndexLanguage');
660 }
[13115]661
[14754]662 # my $defaultsubcollection = "";
663 # my @subcollist;
664
665 if (scalar(@subcollist)>0){
666
667 $buildwriter->startTag('indexSubcollectionList');
668 foreach my $i (keys %$subcolmap){
669 my $short_name = $subcolmap->{$i};
670 $buildwriter->emptyTag('indexSubcollection', 'name'=>$i, 'shortname'=>$short_name);
671 }
672
673 $buildwriter->endTag('indexSubcollectionList');
674 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
675 }
676
677
[3979]678 $buildwriter->endTag('serviceRack');
[4969]679
[3979]680 $buildwriter->endTag('serviceRackList');
681 $buildwriter->endTag('buildConfig');
[11274]682
683 # we add in the default replace list just in case we have macros in the
684 # collection
685 $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-standard');
[25669]686 $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-image') if $contains_image_plugin;
[14754]687 $collwriter->endTag('CollectionConfig');
[4969]688 $collwriter->end();
[3979]689 $buildwriter->end();
690 $buildoutput->close();
691 $colloutput->close();
[9899]692 &close_database($database);
[3979]693}
[3688]694
[3979]695
[4969]696sub output_metadata {
[14754]697 my ($writer, $lang, $metaname, $metavalue) = @_;
698 $lang = 'en' if $lang eq 'default';
699 if ($lang ne ""){
700 $writer->startTag('metadata', 'lang'=>$lang, 'name'=>$metaname);
701 }else{
702 $writer->startTag('metadata', 'name'=>$metaname);
703 }
[29045]704 $writer->characters($metavalue) if(defined $metavalue);
[4184]705 $writer->endTag('metadata');
706}
707
708sub output_display {
[4969]709 my ($writer, $name, $lang, $value) = @_;
[4184]710 $lang = 'en' if $lang eq 'default';
711 if ($lang =~ /^\[/) {
712 ($lang) = $lang =~ /\[l=(.*)\]/;
[3979]713 }
[14754]714
[4969]715 $writer->startTag('displayItem', 'name'=>$name, 'lang'=>$lang);
[4184]716 $writer->characters($value);
[4969]717 $writer->endTag('displayItem');
[3688]718}
[4184]719sub format_icon_value {
720 my ($value) = @_;
721 if ($value =~ /^_/) {
[13828]722 my ($newvalue) = $value =~ /images\/(.*)$/;
[4184]723 if ($newvalue) {
724 return $newvalue;
725 }
726 }
727 return $value;
728}
[3688]729
[13828]730sub tidy_up_display_item {
[12678]731 my ($value) = @_;
732 # remove \n
733 $value =~ s/\\n//g;
734 # replace \' with '
735 $value =~ s/\\\'/\'/g;
736 # replace \" with "
737 $value =~ s/\\\"/\"/g;
[13828]738 # replace _httpprefix_ with _httpsite_
[13843]739 $value =~ s/_httpprefix_/_httpsite_/g;
740 $value =~ s/_gwcgi_//g;
741 $value =~ s/[a-z][a-z]?=_cgiarg[a-z][a-z]?_&?//g;
742 $value =~ s/&p=/&sa=/g;
[12678]743 return $value;
744}
745
[26124]746sub format_if_or {
747 my ($format, $node_type) = @_;
748
749 # while we find nested if/or statements, recurse to find more nested if/or statements,
750 # and try to expand (process) these nested statements starting from innermost going to outermost
751
752 while($format =~ m/^.*\{(?:If|Or)\}\{[^\}\{]*\{/) { # contains nested if/or statement, expand it
753
754 my ($prefix, $nested_to_process, $suffix) = $format =~ m/^(.*\{(?:If|Or)\}\{[^\}\{]*)(\{[^\}]*\}\s*\{[^\}]*\})(.*)$/g; # recursion step
755
756 #print STDERR "prefix: |$prefix|\n\nnested: |$nested_to_process|\n\nsuffix: |$suffix|\n\n";
757 $format = $prefix . &format_if_or($nested_to_process, $node_type) . $suffix;
758 }
759
760 if($format =~ m/\{(If|Or)\}\{[^\}\{]*\}/g) { # base step: contains if/or statement(s), but none nested
761 # expand them
762 $format =~ s/\{If\}\{([^\}]*)\}/&format_if($1, $node_type)/eg;
763 $format =~ s/\{Or\}\{([^\}]*)\}/&format_or($1)/eg;
764 }
765 return $format;
766}
767
[4184]768sub write_format {
[10201]769 my ($writer, $old_format, $node_type) = @_;
[28780]770
[7471]771 # replace \' with '
772 $old_format =~ s/\\\'/\'/g;
[12236]773 # replace \" with "
774 $old_format =~ s/\\\"/\"/g;
[28798]775
776 if($convert_format_stmts) {
777
[4184]778 #convert [] to <gsf:...>
[26124]779 # now handles nested {If} and {Or}
780 $old_format = &format_if_or($old_format, $node_type);
[4184]781 $old_format =~ s/\[Text\]/\<gsf:text\/\>/g;
782 $old_format =~ s/\[num\]/\<gsf:num\/\>/g;
[10782]783 $old_format =~ s/\[link\]/\<gsf:link type=\'$node_type\'\>/g;
[4184]784 $old_format =~ s/\[\/link\]/\<\/gsf:link\>/g;
785 $old_format =~ s/\[srclink\]/\<gsf:link type=\'source\'\>/g;
786 $old_format =~ s/\[\/srclink\]/\<\/gsf:link\>/g;
[10782]787 $old_format =~ s/\[icon\]/\<gsf:icon type=\'$node_type\'\/\>/g;
[4184]788 $old_format =~ s/\[srcicon\]/\<gsf:icon type=\'source\'\/\>/g;
[10360]789
790 # what to do with hightlight??
791 $old_format =~ s/\[\/?highlight\]//g;
[4184]792
[4969]793 #now do the rest of the [] which are assumed to be metadata
[10360]794 $old_format =~ s/\[([^\]]*)\]/&format_metadata($1)/eg;
795
796 # some html tidy
[4184]797 #turn <br> into <br />
798 $old_format =~ s/\<br\>/\<br \/\>/g;
799 #turn <p> into <p />
800 $old_format =~ s/\<p\>/\<p \/\>/g;
801
802 #put quotes around any atts
803 $old_format =~ s/=([a-z]+)([> ])/=\'$1\'$2/g;
[28780]804 }
805 else { # not converting format statements, leave them as GS2 format stmts,
806 # so that formatconverter can convert them and users can oversee the conversion in GLI,
807 # but nest the GS2 statements here in an xml tag that won't be processed by GS3
[28798]808
809 $old_format = &docprint::escape_text($old_format); # escape html entities inside the format statement since the <br> and <p> may not be correct for xml
[28780]810 $old_format = "<gsf:format-gs2>" . $old_format . "</gsf:format-gs2>";
[29008]811
[28780]812 }
[4184]813
[10201]814 if ($node_type eq "document") {
815 $writer->startTag('gsf:template', 'match'=>'documentNode');
816 $writer->charactersXML($old_format);
817 $writer->endTag('gsf:template');
818 } elsif ($node_type eq "classifier") {
819 $writer->startTag('gsf:template', 'match'=>'classifierNode');
820 $writer->charactersXML($old_format);
821 $writer->endTag('gsf:template');
822 } elsif ($node_type eq "horizontal") {
823 $writer->startTag('gsf:template', 'match'=>'classifierNode', 'mode'=>'horizontal');
824 $writer->charactersXML($old_format);
825 $writer->endTag('gsf:template');
[4184]826
[10201]827 }
[4184]828}
829
[10360]830sub format_metadata {
831 my ($metadata_string) = @_;
832
833 #print STDERR "original meta = $metadata_string\n";
834
835 # what shall we do with cgisafe??
836 my $cgisafe = $metadata_string =~ s/^cgisafe://;
837
838 my ($select) = $metadata_string =~ /^(parent|sibling)/;
839 $metadata_string =~ s/^(parent|sibling)//;
840 my ($scope, $delim);
841
842 if ($select) {
843 ($scope) = $metadata_string =~ /^\((Top|All)/;
844 $metadata_string =~ s/^\((Top|All)\)?//;
845 if ($scope) {
[10782]846 ($delim) = $metadata_string =~ /^\'([^\']*)\'\)/;
847 $metadata_string =~ s/^\'([^\']*)\'\)//;
[10360]848 }
849 }
850 $metadata_string =~ s/^://;
851 # remove ex.
852 $metadata_string =~ s/^ex\.//;
853
854 #print STDERR "select=$select, scope=$scope, delim=|$delim|, meta = $metadata_string\n";
855
856 my $new_format = "<gsf:metadata name='$metadata_string' ";
857 if (defined $select) {
858 if ($select eq "sibling") {
859 $new_format .= "multiple='true' ";
860 if (defined $delim) {
861 $new_format .= "separator='$delim' ";
862 }
863 } elsif ($select eq "parent"){
864 if (defined $scope) {
865 if ($scope eq "Top") {
866 $new_format .= "select='root' ";
867 } elsif ($scope eq "All") {
868 $new_format .= "select='ancestors' ";
869 if (defined $delim) {
870 $new_format .= "separator='$delim' ";
871 }
872 }
873 } else {
874 $new_format .= "select='parent' ";
875 }
876 }
877 }
878 $new_format .= "/>";
879 #print STDERR "$new_format\n";
880 return $new_format;
881
882}
883
884sub format_if {
885
886 my ($if_string, $node_type) = @_;
[10782]887 #print STDERR "if string = $if_string\n";
[10360]888
[10782]889 my @parts = split /,/, $if_string;
890 my $test = $parts[0];
891 my $true_option = $parts[1];
892 my $false_option;
893 if (scalar (@parts) == 3) {
894 $false_option = $parts[2];
895 }
896 $test =~ s/^\s*//;
897 $test =~ s/\s*$//;
898 my ($test_meta, $test_type, $test_value);
899 if ($test =~ /^(\[.+\])$/) {
900 $test_meta = $1;
901 $test_type = 'exists';
902 } else {
903 my ($lhs, $exp, $rhs) = $test =~ /^(.+)\s+(eq|ne|lt|gt|le|ge|sw|ew)\s+(.+)$/;
904 #print STDERR "lhs, exp, rhs = $lhs, $exp, $rhs\n";
905 if ($exp eq "eq") {
906 $test_type = "equals";
907 } elsif ($exp eq "sw") {
908 $test_type = "startsWith";
909 } elsif ($exp eq "ew") {
910 $test_type = "endsWith";
911 } elsif ($exp eq "ne") {
912 $test_type = "notEquals";
913 } elsif ($exp eq "lt") {
914 $test_type = "lessThan";
915 }elsif ($exp eq "gt") {
916 $test_type = "greaterThan";
917 }elsif ($exp eq "le") {
918 $test_type = "lessThanOrEquals";
919 }elsif ($exp eq "ge") {
920 $test_type = "greaterThanOrEquals";
921 }
922 if ($lhs =~ /^\[.+\]$/) {
923 $test_meta = $lhs;
924 $test_value = $rhs;
925 } else {
926 # assume rhs has meta
927 $test_meta = $rhs;
928 $test_value = $lhs;
929 }
930
931 #remove beginning and end quotes
932 $test_value =~ s/^[\'\"]//;
933 $test_value =~ s/[\'\"]$//;
934 }
935 my $test_atts = "test='$test_type' ";
936 if (defined $test_value) {
937 $test_atts .= "test-value='$test_value' ";
938 }
939 #print STDERR "test, true, false = $test, $true_option, $false_option\n";
940 my $new_format = "<gsf:switch>$test_meta";
941 $new_format .= "<gsf:when $test_atts>$true_option</gsf:when>";
942 if (defined $false_option) {
943 $new_format .="<gsf:otherwise>$false_option</gsf:otherwise>";
944 }
945 $new_format .= "</gsf:switch>";
946
947 #print STDERR "new format = $new_format\n";
948 return $new_format;
[10360]949}
950
951sub format_or {
952 my ($or_string) = @_;
953 my @meta_list = split (',', $or_string);
954 return "" unless scalar (@meta_list);
955 my $new_format = "<gsf:choose-metadata>";
956 foreach my $m (@meta_list) {
957 if ($m =~ /^\[(.*)\]$/) {
958 $new_format .= &format_metadata($1);
959 } else {
960 # a default value
961 $new_format .= "<gsf:default>$m</gsf:default>";
962 last;
963 }
964 }
965 $new_format .= "</gsf:choose-metadata>";
966 return $new_format;
967}
968
[9899]969sub open_database {
970 my ($db_file) = @_;
971
972 my $database = ();
[10658]973# tie (%$database, 'GDBM_File', $db_file, GDBM_READER, 0400) ||
974# die "Couldn't open database $db_file\n";
[9899]975
976 return $database;
977}
978
979sub close_database {
980 my ($database) = @_;
[10360]981 untie %$database;
[9899]982}
983sub isHorizontalClassifier {
984 my ($database, $name) = @_;
985
[10658]986 return 0; # can't get this to work for windows
[10360]987 my $record = $database->{$name};
[9899]988 my ($childtype) = $record =~ /<childtype>(\w*)/;
989 if ($childtype eq "HList") {
990 return 1;
991 }
992 return 0;
993}
[3688]994#$writer->startTag('');
995#$writer->endTag('');
996#$writer->characters();
997#$writer->emptyTag('');
998
9991;
Note: See TracBrowser for help on using the repository browser.