root/main/trunk/greenstone3/bin/script/convert_coll_from_gs2.pl @ 26343

Revision 26343, 29.0 KB (checked in by kjdon, 7 years ago)

output the indexOPtions

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3BEGIN {
4    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
5    die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
6    die "GSDL3SRCHOME not set\n" unless defined $ENV{'GSDL3SRCHOME'};
7    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
8    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
9    unshift (@INC, "$ENV{'GSDL3SRCHOME'}/lib/perl/cpan");
10}
11
12use colcfg;
13use util;
14use parsargv;
15use FileHandle;
16use XML::Writer;
17#can't get this to work on windows
18#use GDBM_File;
19
20use strict;
21
22&main();
23sub print_usage() {
24    print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n";
25    print STDOUT "options:\n";
26   
27    print STDOUT "   -collectdir         Directory where collection lives.\n";
28    print STDOUT "   -verbosity          Controls the amount of output.\n";
29    print STDOUT "   -defaultlang        The language that is considered the default (for display text etc). defaults to 'en'\n\n";
30}
31
32
33sub main {
34
35    my ($defaultlang, $verbosity, $collectdir);
36    # note that no defaults are passed for most options as they're set
37    # later (after we check the collect.cfg file)
38    if (!&parsargv::parse(\@ARGV,
39              'verbosity/\d+/', \$verbosity,
40              'collectdir/.*/', \$collectdir,
41              'defaultlang/.*/', \$defaultlang)) {
42    &print_usage();
43    die "\n";
44    }
45
46    # get and check the collection name
47    my ($collection) = @ARGV;
48    if (!defined($collection) || $collection eq "") {
49    die "No collection specified\n";
50    }
51    if ($collection eq "gs2model") {
52    die "You cant convert the model collection\n";
53    }
54   
55    if (!defined $collectdir || $collectdir eq "") {
56    $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
57    }
58
59    if (!defined $defaultlang || $defaultlang eq "") {
60    $defaultlang = 'en';
61    }
62    # add on the coll name
63    $collectdir = &util::filename_cat ($collectdir, $collection);
64   
65    my $collconfigfilename = &util::filename_cat ($collectdir, "etc", "collect.cfg");
66    print STDOUT "coll config=$collconfigfilename\n";
67    my $collectcfg;
68    if (-e $collconfigfilename) {
69    $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename);
70
71    } else {
72    print STDERR "collect.cfg not found!!";
73    die "\n";
74    }
75   
76
77    my $buildconfigfilename = &util::filename_cat ($collectdir, "index", "build.cfg");
78    my $buildcfg;
79    if (-e $buildconfigfilename) {
80    $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename);
81
82    } else {
83    print STDERR "build.cfg not found!!";
84    die "\n";
85    }
86   
87
88     
89    my $colloutfile = &util::filename_cat ($collectdir, "etc", "collectionConfig.xml");
90    if (-e $colloutfile) {
91    print STDOUT "collectionConfig file already exists! overwriting it!\n";
92   
93    }
94   
95    my $buildoutfile = &util::filename_cat ($collectdir, "index", "buildConfig.xml");
96    if (-e $buildoutfile) {
97    print STDOUT "buildConfig file already exists! overwriting it!\n";
98   
99    }
100
101#    my $db_file = &util::filename_cat ($collectdir, "index", "text", "$collection.ldb");
102    my $database;
103#    if (-e $db_file) {
104#   $database = &open_database($db_file);
105#    } else {
106#   print STDERR "gdbm database file $db_file not found!!";
107#   die "\n";
108#    }
109   
110    my $buildtype;
111    if (defined $buildcfg->{'buildtype'}) {
112    $buildtype = $buildcfg->{'buildtype'};
113    } else {
114    $buildtype = 'mg';
115    }
116   
117    my $indexstem = undef;
118    if (defined $buildcfg->{'indexstem'}) {
119    $indexstem = $buildcfg->{'indexstem'};
120    }
121
122    my $buildoutput = new IO::File(">$buildoutfile");
123    binmode($buildoutput,":utf8");
124    my $buildwriter = new XML::Writer(OUTPUT => $buildoutput, NEWLINES => 1);
125   
126    $buildwriter->xmlDecl("UTF-8");
127    $buildwriter->startTag('buildConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat");
128   
129    my $colloutput = new IO::File(">$colloutfile");
130    binmode($colloutput,":utf8");
131    my $collwriter = new XML::Writer(OUTPUT => $colloutput, NEWLINES => 1);
132   
133    $collwriter->xmlDecl("UTF-8");
134    $collwriter->startTag('CollectionConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat", 'xmlns:xsl'=>'http://www.w3.org/1999/XSL/Transform');
135   
136    #output the collection metadata to the collectionConfig file
137    $collwriter->startTag('metadataList');
138   
139    my $creator = $collectcfg->{'creator'};
140    &output_metadata($collwriter,'default', 'creator', $creator);
141    my $public =$collectcfg->{'public'};
142    &output_metadata($collwriter,'default', 'public', $public);
143   
144    $collwriter->endTag('metadataList');
145
146    #output the display collectionmeta to collectionConfig.xml
147   
148    my $collectionmeta = $collectcfg->{'collectionmeta'};
149    if (defined $collectionmeta) {
150    my %name_map = ('collectionname', 'name',
151             'collectionextra', 'description',
152             'iconcollection', 'icon',
153             'iconcollectionsmall', 'smallicon');
154   
155    $collwriter->startTag('displayItemList');
156    foreach my $entry ( keys %$collectionmeta) {
157         # some metadata names need to be specially mapped to other names
158         # most of them however, can retain their original names
159         my $name = (defined $name_map{$entry}) ? $name_map{$entry} : $entry;
160         foreach my $lang (keys %{$collectionmeta->{$entry}}) {
161         my $value = $collectionmeta->{$entry}->{$lang};
162         if ($entry =~ /^icon/) {
163             $value = format_icon_value($value);
164         } else {
165             $value = tidy_up_display_item($value);
166         }
167         &output_display($collwriter, $name, $lang, $value);
168         }
169    }
170    $collwriter->endTag('displayItemList');
171    }
172   
173    # output building metadata to build config file
174    my $numdocs = $buildcfg->{'numdocs'};
175    $buildwriter->startTag('metadataList');
176    &output_metadata($buildwriter,'', 'numDocs', $numdocs);
177    &output_metadata($buildwriter,'', 'buildType', $buildtype);
178    $buildwriter->endTag('metadataList');
179   
180   
181    #indexes
182    # maps index name to shortname
183    my $indexmap = {};
184    # keeps the order for indexes
185    my @indexlist = ();
186    my $defaultindex = "";
187    my $first = 1;
188    my $maptype = "indexfieldmap";
189    if ($buildtype eq "mg") {
190    $maptype = "indexmap";
191    }
192    if (defined $buildcfg->{$maptype}) {
193    my $indexmap_t = $buildcfg->{$maptype};
194    foreach my $i (@$indexmap_t) {
195        my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
196        $indexmap->{$k} = $v;
197        push @indexlist, $k;
198        if ($first == 1) {
199        $defaultindex = $k;
200        $first = 0;
201        }   
202    }
203    } else {
204    print STDERR "$maptype not defined\n";
205    }
206    # we use the shortname for default index
207    if (defined $collectcfg->{'defaultindex'}) {
208    $defaultindex = $collectcfg->{'defaultindex'};
209    #$defaultindex = $indexmap->{$defaultindex};
210    }
211   
212    #  levels
213    my $levelmap = {};
214    my @levellist = ();
215    my $default_search_level = "";
216    my $default_search_level_shortname = "";
217    my $default_retrieve_level = "Sec";
218    $first = 1;
219    if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
220    if (defined $buildcfg->{'levelmap'}) {
221        my $levelmap_t = $buildcfg->{'levelmap'};
222        foreach my $l (@$levelmap_t) {
223        my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
224        $levelmap->{$k} = $v;
225        push @levellist, $k;
226        if ($first) {
227            $default_search_level = $k;
228            $default_search_level_shortname = $v;
229            $first = 0;
230        }
231        }
232    }
233
234    if (defined $collectcfg->{'defaultlevel'}) {
235        $default_search_level = $collectcfg->{'defaultlevel'};
236        #$default_search_level = $levelmap->{$default_search_level};
237        $default_search_level_shortname = $levelmap->{$default_search_level};
238    }
239    if (defined $buildcfg->{'textlevel'}) {
240        $default_retrieve_level = $buildcfg->{'textlevel'};
241    }
242    }
243    # format stuff
244    my $format = $collectcfg->{'format'};
245
246    #output the search stuff to coll cfg
247    $collwriter->startTag('search','type'=>$buildtype);
248    foreach my $i (keys %$indexmap) {
249    $collwriter->startTag('index', 'name'=>$i);
250    #find the coll meta stuff
251    my $indexdisplay = ".$i";
252    foreach my $lang (keys %{$collectionmeta->{$indexdisplay}}) {
253        my $value = $collectionmeta->{$indexdisplay}->{$lang};
254        output_display($collwriter, 'name', $lang, $i);
255    }
256    $collwriter->endTag('index');
257    }
258
259    #output the defaultIndex to coll cfg
260    $collwriter->emptyTag('defaultIndex','name'=>$defaultindex);
261
262    # indexOptions
263    if (defined $collectcfg->{'indexoptions'}) {
264    foreach my $i (@{$collectcfg->{'indexoptions'}}) {
265        $collwriter->emptyTag('indexOption', 'name'=>$i);
266    }
267    }
268
269    #indexSubcollection
270    my $indexsubcollections = $collectcfg->{'indexsubcollections'};
271   
272    if (defined $indexsubcollections) {
273    my $indexsubcollections_t= $collectcfg->{'indexsubcollections'};
274    foreach my $i ( @$indexsubcollections_t) {
275        $collwriter->startTag('indexSubcollection', 'name'=>$i);
276        &output_display($collwriter, 'name', $defaultlang, $i);
277        $collwriter->endTag('indexSubcollection');
278    }
279    }
280
281    #subcollection
282    my $subcollection = $collectcfg->{'subcollection'};
283    if (defined $subcollection){
284    foreach my $entry (keys %$subcollection){
285        my $value = $subcollection->{$entry};
286        $collwriter->emptyTag('subcollection','filter'=>$value,'name'=>$entry);
287    }
288    }
289
290    #indexlanguage
291    my $languages = $collectcfg->{'languages'};
292    if (defined $languages){
293    my $languages_t = $collectcfg->{'languages'};
294    foreach my $i (@$languages_t){
295        $collwriter->startTag('indexLanguage','name'=>$i);
296         &output_display($collwriter, 'name', $defaultlang, $i);
297        $collwriter->endTag('indexLanguage');
298    }
299    }
300
301    #  level stuff for mgpp/lucene
302    if ($buildtype eq 'mgpp' || $buildtype eq 'lucene'){
303    foreach my $l (keys %$levelmap) {
304        $collwriter->startTag('level', 'name'=>$l);
305        #find the coll meta stuff
306        my $leveldisplay = ".$l";
307        foreach my $lang (keys %{$collectionmeta->{$leveldisplay}}) {
308        my $value = $collectionmeta->{$leveldisplay}->{$lang};
309        output_display($collwriter, 'name', $lang, $value);
310        }
311        $collwriter->endTag('level');
312    }
313    $collwriter->emptyTag('defaultLevel', 'name'=>$default_search_level);
314    }
315   
316    # add in the search type
317    if (defined $format->{'SearchTypes'}){
318    $collwriter->startTag('format', 'name'=>"searchType");
319    $collwriter->charactersXML($format->{'SearchTypes'});
320    $collwriter->endTag('format');
321    }
322
323    # add in the format stuff
324    if (defined $format->{'SearchVList'}) {
325   
326    $collwriter->startTag('format');
327    write_format($collwriter, $format->{'SearchVList'}, "document");
328    $collwriter->endTag('format');
329    }
330    elsif (defined $format->{'VList'}) {
331    $collwriter->startTag('format');
332    write_format($collwriter, $format->{'VList'}, "document");
333    $collwriter->endTag('format');
334    }   
335   
336    $collwriter->endTag('search');
337
338    # import plugins
339    # if ImagePlugin is added, then need to add in a replaceListRef element for gs2-image
340    my $contains_image_plugin = 0;
341
342    my $plugins = $collectcfg->{'plugin'};
343   
344    if (defined $plugins){
345    $collwriter->startTag('import');
346    $collwriter->startTag('pluginList');
347    foreach my $pl (@$plugins) {
348        my ($pluginname) = @$pl[0];
349        if ($pluginname =~ m/^(ImagePlugin|ImagePlug|PagedImagePlugin)$/) {
350        $contains_image_plugin = 1;
351        }
352        $collwriter->startTag('plugin','name'=>$pluginname);
353
354        for (my $i=1; $i<scalar(@$pl); $i++) {
355        my $arg =@$pl[$i];
356        if ($arg =~ /^-/){
357            my $option_name=@$pl[$i];
358            my $option_value=@$pl[$i+1];
359            if (defined $option_value){
360            if ($option_value =~ /^-/){
361                $collwriter->startTag('option','name'=>$option_name);
362                $collwriter->endTag('option');
363            }else{
364                $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
365                $collwriter->endTag('option');
366            }
367            }
368        }
369        }
370        $collwriter->endTag('plugin');
371    }
372    $collwriter->endTag('pluginList');
373    $collwriter->endTag('import');
374    }
375
376    $buildwriter->startTag('serviceRackList');
377   
378    my @levels = ();
379    my $defaultlevel;
380    my $service_type = "MG";
381    if ($buildtype eq 'mgpp') {
382    $service_type = "MGPP";
383    } elsif ($buildtype eq "lucene") {
384    $service_type = "Lucene";
385    }
386
387 #indexSubcollectionList
388
389    my $subcollectionmap = $buildcfg->{'subcollectionmap'};
390    my $firstsubcollection = 1;
391    my $defaultsubcollection = "";
392    my @subcollist;
393    my $subcolmap = {};
394
395    if (defined $buildcfg->{'subcollectionmap'}) {
396    my $subcolmap_t = $buildcfg->{'subcollectionmap'};
397
398    foreach my $l (@$subcolmap_t) {
399        my @pair = split(/->/, $l);
400        $subcolmap->{$pair[0]} = $pair[1];
401        push @subcollist, $pair[0];
402        if ($firstsubcollection==1) {
403        $defaultsubcollection = $pair[1];
404        $firstsubcollection = 0;
405        }   
406    }
407
408    }
409
410
411    #do the retrieve service
412    $buildwriter->startTag('serviceRack', 'name'=>"GS2".$service_type."Retrieve");
413    if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
414    $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_retrieve_level);
415    } elsif ($buildtype eq "mg") {
416    $buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
417    }
418   
419    if ((defined $defaultsubcollection) && ($defaultsubcollection ne "")) {
420    $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
421    }
422
423    if (defined $indexstem) {
424    $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
425    }
426    # close off the Retrieve service
427    $buildwriter->endTag('serviceRack');
428
429    # add in the classifiers if needed
430
431    my $count = 1;
432    my $phind = 0;
433    my $started_classifiers = 0;
434    if (defined $collectcfg->{'classify'}) {
435    $collwriter->startTag('browse');
436    # add in default format if necessary
437    if (defined $format->{"VList"} || defined $format->{"HList"}) {
438        # global formats
439        $collwriter->startTag('format');
440        if (defined $format->{"VList"}) {
441        # VLIst applies to both classifier and doc nodes
442        write_format($collwriter, $format->{"VList"}, "document");
443        write_format($collwriter, $format->{"VList"}, "classifier");
444        }
445        if (defined $format->{"HList"}) {
446        # hlist is only for classifier nodes
447        write_format($collwriter, $format->{"HList"}, "horizontal");
448        }
449        $collwriter->endTag('format');
450    }
451    my $classifiers = $collectcfg->{'classify'};
452    foreach my $cl (@$classifiers) {
453        my $name = "CL$count";
454        $count++;
455        my ($classname) = @$cl[0];
456        if ($classname =~ /^phind$/i) {
457        $phind=1;
458        #should add it into coll config classifiers
459        next;
460        }
461       
462        my $horizontalAtTop = &isHorizontalClassifier($database, $name);
463        if (not $started_classifiers) {
464        $buildwriter->startTag('serviceRack', 'name'=>'GS2Browse');
465        if (defined $indexstem) {
466            $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
467        }
468
469        $buildwriter->startTag('classifierList');       
470        $started_classifiers = 1;
471        }
472        my $content = ''; #use buttonname first, then metadata
473        if ($classname eq "DateList") {
474        $content = "Date";
475        } else {
476        for (my $i=0; $i<scalar(@$cl); $i++) {
477            my $arg = @$cl[$i];
478            if ($arg eq "-buttonname"){
479            $content = @$cl[$i+1];
480            last;
481            } elsif ($arg eq "-metadata") {
482            $content = @$cl[$i+1];
483            }
484           
485        }
486        }
487        if ($horizontalAtTop) {
488        $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'horizontalAtTop'=>'true');
489
490        } else {
491        $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content);
492        }
493       
494       
495       # $collwriter->startTag('classifier', 'name'=>$name);
496        $collwriter->startTag('classifier', 'name'=>$classname);
497        for (my $i=1; $i<scalar(@$cl); $i++) {
498        my $arg =@$cl[$i];
499        if ($arg =~ /^-/){
500            my $option_name=@$cl[$i];
501            my $option_value=@$cl[$i+1];
502            if (defined $option_value){
503            if ($option_value=~ /^-/){
504                $collwriter->startTag('option','name'=>$option_name);
505                $collwriter->endTag('option');
506            }else{
507                $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
508                $collwriter->endTag('option');
509            }
510            }
511        }
512        }
513
514        my $vlist = $name."VList";
515        my $hlist = $name."HList";
516        my $dlist = "";
517        if ($classname eq "DateList") {
518        $dlist = "DateList";
519        }
520        # need to work out how to split into classifier and document
521        if (defined $format->{$vlist} || defined $format->{$hlist} || defined $format->{$dlist}) {
522        $collwriter->startTag('format');
523        if (defined $format->{$vlist}) {
524            write_format($collwriter, $format->{$vlist}, "document");
525            write_format($collwriter, $format->{$vlist}, "classifier");
526        }
527        if (defined $format->{$hlist}) {
528            write_format($collwriter, $format->{$hlist}, "horizontal");
529        }
530       
531        if (defined $format->{$dlist}) {
532            write_format($collwriter, $format->{$dlist}, "document");
533        }
534        $collwriter->endTag('format');
535        }
536        $collwriter->endTag('classifier');
537    } #foreach classifier
538    if ($started_classifiers) {
539        # end the classifiers
540        $buildwriter->endTag('classifierList');
541        # close off the Browse service
542        $buildwriter->endTag('serviceRack');
543    }
544   
545    $collwriter->endTag('browse');
546    }
547   
548   
549    # the phind classifier is a separate service
550    if ($phind) {
551    # if phind classifier
552    $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse');
553    }
554   
555    # do the search service
556    $buildwriter->startTag('serviceRack', 'name'=>'GS2'.$service_type.'Search');
557    #$buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
558    $buildwriter->startTag('indexList');
559    #for each index
560    foreach my $i (@indexlist) {
561    my $index = $indexmap->{$i};
562    $buildwriter->emptyTag('index', 'name'=>$i, 'shortname'=>$index);
563    }   
564    $buildwriter->endTag('indexList');
565    if (defined $indexstem) {
566    $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
567    }
568   
569    # index options
570    if ($buildtype eq 'mg' || $buildtype eq 'mgpp') {
571    $buildwriter->startTag('indexOptionList');
572    my $stemindexes = 3; # default is stem and casefold
573    if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
574        $stemindexes = $buildcfg->{'stemindexes'};
575    }
576    $buildwriter->emptyTag('indexOption', 'name'=>'stemIndexes', 'value'=>$stemindexes);
577   
578    my $maxnumeric = 4; # default
579    if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
580        $maxnumeric = $buildcfg->{'maxnumeric'};
581    }
582    $buildwriter->emptyTag('indexOption', 'name'=>'maxnumeric', 'value'=>$maxnumeric);
583   
584    $buildwriter->endTag('indexOptionList');
585    }
586
587    if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
588       
589    # level info
590    $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_search_level_shortname);
591    $buildwriter->emptyTag('defaultGDBMLevel', 'shortname'=>$default_retrieve_level);
592    $buildwriter->startTag('levelList');
593    foreach my $l (@levellist) {
594        my $level = $levelmap->{$l};
595        $buildwriter->emptyTag('level', 'name'=>$l, 'shortname'=>$level);
596    }   
597    $buildwriter->endTag('levelList');
598   
599    # do the search types if there
600    if (defined $collectcfg->{'searchtype'}) {
601        $buildwriter->startTag('searchTypeList');
602        foreach my $st (@{$collectcfg->{'searchtype'}}) {
603        $buildwriter->emptyTag('searchType', 'name'=>$st);
604        }
605        $buildwriter->endTag('searchTypeList');
606    } elsif (defined $format->{'SearchTypes'}) {
607        #check format statement
608        my $searchtype = $format->{'SearchTypes'};
609        $buildwriter->startTag('searchTypeList');
610        if ($searchtype =~ /form/) {
611        $buildwriter->emptyTag('searchType', 'name'=>'form');
612        }
613        if ($searchtype =~ /plain/) {
614        $buildwriter->emptyTag('searchType', 'name'=>'plain');
615        }
616        $buildwriter->endTag('searchTypeList');
617    }
618    }
619   
620    #indexLanguageList
621    my $indexlanguages = $collectcfg->{'languages'};
622    my $firstindexlanguage = 1;
623    my $defaultindexlanguage_shortname;
624    if (defined $indexlanguages){
625    $buildwriter->startTag('indexLanguageList');
626    my $languages_t = $collectcfg->{'languages'};
627    foreach my $i (@$languages_t){
628        $buildwriter->startTag('indexLanguage','name'=>$i);
629         &output_display($buildwriter, 'name', $i, $i);
630        $buildwriter->endTag('indexLanguage');
631        if ($firstindexlanguage==1){
632        $defaultindexlanguage_shortname = $i;
633        $firstindexlanguage=0;
634        }
635    }
636    $buildwriter->endTag('indexLanguageList');
637    $buildwriter->startTag('defaultIndexLanguage', 'name'=>$defaultindexlanguage_shortname,'shortname'=>$defaultindexlanguage_shortname);
638    $buildwriter->endTag('defaultIndexLanguage');
639    }
640
641  #  my $defaultsubcollection = "";
642   # my @subcollist;
643
644    if (scalar(@subcollist)>0){
645
646    $buildwriter->startTag('indexSubcollectionList');
647    foreach my $i (keys %$subcolmap){
648        my $short_name = $subcolmap->{$i};
649        $buildwriter->emptyTag('indexSubcollection', 'name'=>$i, 'shortname'=>$short_name);
650    }
651
652    $buildwriter->endTag('indexSubcollectionList');
653    $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
654    }
655
656   
657    $buildwriter->endTag('serviceRack');
658   
659    $buildwriter->endTag('serviceRackList');
660    $buildwriter->endTag('buildConfig');
661
662    # we add in the default replace list just in case we have macros in the
663    # collection
664    $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-standard');
665    $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-image') if  $contains_image_plugin;
666    $collwriter->endTag('CollectionConfig');
667    $collwriter->end();
668    $buildwriter->end();
669    $buildoutput->close();
670    $colloutput->close();
671    &close_database($database);
672}
673
674
675sub output_metadata {
676    my ($writer, $lang, $metaname,  $metavalue) = @_;
677    $lang = 'en' if $lang eq 'default';
678    if ($lang ne ""){
679    $writer->startTag('metadata', 'lang'=>$lang, 'name'=>$metaname);
680    }else{
681    $writer->startTag('metadata', 'name'=>$metaname);
682    }
683    $writer->characters($metavalue);
684    $writer->endTag('metadata');
685}
686
687sub output_display {
688    my ($writer, $name, $lang, $value) = @_;
689    $lang = 'en' if $lang eq 'default';
690    if ($lang =~ /^\[/) {
691    ($lang) = $lang =~ /\[l=(.*)\]/;
692    }
693
694    $writer->startTag('displayItem', 'name'=>$name, 'lang'=>$lang);
695    $writer->characters($value);
696    $writer->endTag('displayItem');
697}
698sub format_icon_value {
699    my ($value) = @_;
700    if ($value =~ /^_/) {
701    my ($newvalue) = $value =~ /images\/(.*)$/;
702    if ($newvalue) {
703        return $newvalue;
704    }
705    }
706    return $value;
707}
708
709sub tidy_up_display_item {
710    my ($value) = @_;
711    # remove \n
712    $value =~ s/\\n//g;
713    # replace \' with '
714    $value =~ s/\\\'/\'/g;
715    # replace \" with "
716    $value =~ s/\\\"/\"/g;
717    # replace _httpprefix_ with _httpsite_
718    $value =~ s/_httpprefix_/_httpsite_/g;
719    $value =~ s/_gwcgi_//g;
720    $value =~ s/[a-z][a-z]?=_cgiarg[a-z][a-z]?_&?//g;
721    $value =~ s/&p=/&sa=/g;
722    return $value;
723}
724
725sub format_if_or {
726    my ($format, $node_type) = @_;
727
728    # while we find nested if/or statements, recurse to find more nested if/or statements,
729    # and try to expand (process) these nested statements starting from innermost going to outermost
730
731    while($format =~ m/^.*\{(?:If|Or)\}\{[^\}\{]*\{/) { # contains nested if/or statement, expand it
732
733    my ($prefix, $nested_to_process, $suffix) = $format =~ m/^(.*\{(?:If|Or)\}\{[^\}\{]*)(\{[^\}]*\}\s*\{[^\}]*\})(.*)$/g; # recursion step
734
735    #print STDERR "prefix: |$prefix|\n\nnested: |$nested_to_process|\n\nsuffix: |$suffix|\n\n";
736    $format = $prefix . &format_if_or($nested_to_process, $node_type) . $suffix;
737    }
738
739    if($format =~ m/\{(If|Or)\}\{[^\}\{]*\}/g) { # base step: contains if/or statement(s), but none nested
740    # expand them
741    $format =~ s/\{If\}\{([^\}]*)\}/&format_if($1, $node_type)/eg;
742    $format =~ s/\{Or\}\{([^\}]*)\}/&format_or($1)/eg;
743    }
744    return $format;
745}
746
747sub write_format {
748    my ($writer, $old_format, $node_type) = @_;
749    # replace \' with '
750    $old_format =~ s/\\\'/\'/g;
751    # replace \" with "
752    $old_format =~ s/\\\"/\"/g;
753    #convert [] to <gsf:...>
754    # now handles nested {If} and {Or}
755    $old_format = &format_if_or($old_format, $node_type);
756    $old_format =~ s/\[Text\]/\<gsf:text\/\>/g;
757    $old_format =~ s/\[num\]/\<gsf:num\/\>/g;
758    $old_format =~ s/\[link\]/\<gsf:link type=\'$node_type\'\>/g;
759    $old_format =~ s/\[\/link\]/\<\/gsf:link\>/g;
760    $old_format =~ s/\[srclink\]/\<gsf:link type=\'source\'\>/g;
761    $old_format =~ s/\[\/srclink\]/\<\/gsf:link\>/g;
762    $old_format =~ s/\[icon\]/\<gsf:icon type=\'$node_type\'\/\>/g;
763    $old_format =~ s/\[srcicon\]/\<gsf:icon type=\'source\'\/\>/g;
764         
765    # what to do with hightlight??
766    $old_format =~ s/\[\/?highlight\]//g;
767
768    #now do the rest of the [] which are assumed to be metadata
769    $old_format =~ s/\[([^\]]*)\]/&format_metadata($1)/eg;
770 
771    # some html tidy
772    #turn <br> into <br />
773    $old_format =~ s/\<br\>/\<br \/\>/g;
774    #turn <p> into <p />
775    $old_format =~ s/\<p\>/\<p \/\>/g;
776   
777    #put quotes around any atts
778    $old_format =~ s/=([a-z]+)([> ])/=\'$1\'$2/g;
779
780    if ($node_type eq "document") {
781    $writer->startTag('gsf:template', 'match'=>'documentNode');
782    $writer->charactersXML($old_format);
783    $writer->endTag('gsf:template');
784    } elsif ($node_type eq "classifier") {
785    $writer->startTag('gsf:template', 'match'=>'classifierNode');
786    $writer->charactersXML($old_format);
787    $writer->endTag('gsf:template');
788    } elsif ($node_type eq "horizontal") { 
789    $writer->startTag('gsf:template', 'match'=>'classifierNode', 'mode'=>'horizontal');
790    $writer->charactersXML($old_format);
791    $writer->endTag('gsf:template');
792
793    }
794}
795
796sub format_metadata {
797    my ($metadata_string) = @_;
798
799    #print STDERR "original meta = $metadata_string\n";
800   
801    # what shall we do with cgisafe??
802    my $cgisafe = $metadata_string =~ s/^cgisafe://;
803
804    my ($select) = $metadata_string =~ /^(parent|sibling)/;
805    $metadata_string =~ s/^(parent|sibling)//;
806    my ($scope, $delim);
807   
808    if ($select) {
809    ($scope) = $metadata_string =~ /^\((Top|All)/;
810    $metadata_string =~ s/^\((Top|All)\)?//;
811    if ($scope) {
812        ($delim) = $metadata_string =~ /^\'([^\']*)\'\)/;
813        $metadata_string =~ s/^\'([^\']*)\'\)//;
814    }
815    }
816    $metadata_string =~ s/^://;
817    # remove ex.
818    $metadata_string =~ s/^ex\.//;
819   
820    #print STDERR "select=$select, scope=$scope, delim=|$delim|, meta = $metadata_string\n";
821   
822    my $new_format = "<gsf:metadata name='$metadata_string' ";
823    if (defined $select) {
824    if ($select eq "sibling") {
825        $new_format .= "multiple='true' ";
826        if (defined $delim) {
827        $new_format .= "separator='$delim' ";
828        }
829    } elsif ($select eq "parent"){
830        if (defined $scope) {
831        if ($scope eq "Top") {
832            $new_format .= "select='root' ";
833        } elsif ($scope eq "All") {
834            $new_format .= "select='ancestors' ";
835            if (defined $delim) {
836            $new_format .= "separator='$delim' ";
837            }
838        }
839        } else {
840        $new_format .= "select='parent' ";
841        }   
842    }
843    }
844    $new_format .= "/>";
845    #print STDERR "$new_format\n";
846    return $new_format;
847   
848}
849
850sub format_if {
851
852    my ($if_string, $node_type) = @_;
853    #print STDERR "if string = $if_string\n";
854
855    my @parts = split /,/, $if_string;
856    my $test = $parts[0];
857    my $true_option = $parts[1];
858    my $false_option;
859    if (scalar (@parts) == 3) {
860    $false_option = $parts[2];
861    }
862    $test =~ s/^\s*//;
863    $test =~ s/\s*$//;
864    my ($test_meta, $test_type, $test_value);
865    if ($test =~ /^(\[.+\])$/) {
866    $test_meta = $1;
867    $test_type = 'exists';
868    } else {
869    my ($lhs, $exp, $rhs) = $test =~ /^(.+)\s+(eq|ne|lt|gt|le|ge|sw|ew)\s+(.+)$/;
870    #print STDERR "lhs, exp, rhs = $lhs, $exp, $rhs\n";
871    if ($exp eq "eq") {
872        $test_type = "equals";
873    } elsif ($exp eq "sw") {
874        $test_type = "startsWith";
875    } elsif ($exp eq "ew") {
876        $test_type = "endsWith";
877    } elsif ($exp eq "ne") {
878        $test_type = "notEquals";
879    } elsif ($exp eq "lt") {
880        $test_type = "lessThan";
881    }elsif ($exp eq "gt") {
882        $test_type = "greaterThan";
883    }elsif ($exp eq "le") {
884        $test_type = "lessThanOrEquals";
885    }elsif ($exp eq "ge") {
886        $test_type = "greaterThanOrEquals";
887    }
888    if ($lhs =~ /^\[.+\]$/) {
889        $test_meta = $lhs;
890        $test_value = $rhs;
891    } else {
892        # assume rhs has meta
893        $test_meta = $rhs;
894        $test_value = $lhs;
895    }
896   
897    #remove beginning and end quotes
898    $test_value =~ s/^[\'\"]//;
899    $test_value =~ s/[\'\"]$//;
900    }
901    my $test_atts = "test='$test_type' ";
902    if (defined $test_value) {
903    $test_atts .= "test-value='$test_value' ";
904    }
905    #print STDERR "test, true, false = $test, $true_option, $false_option\n";
906    my $new_format = "<gsf:switch>$test_meta";
907    $new_format .= "<gsf:when $test_atts>$true_option</gsf:when>";
908    if (defined $false_option) {
909    $new_format .="<gsf:otherwise>$false_option</gsf:otherwise>";
910    }
911    $new_format .= "</gsf:switch>";
912   
913    #print STDERR "new format = $new_format\n";
914    return $new_format;
915}
916
917sub format_or {
918    my ($or_string) = @_;
919    my @meta_list = split (',', $or_string);
920    return "" unless scalar (@meta_list);
921    my $new_format = "<gsf:choose-metadata>";
922    foreach my $m (@meta_list) {
923    if ($m =~ /^\[(.*)\]$/) {
924        $new_format .= &format_metadata($1);
925    } else {
926        # a default value
927        $new_format .= "<gsf:default>$m</gsf:default>";
928        last;
929    }
930    }
931    $new_format .= "</gsf:choose-metadata>";
932    return $new_format;
933}
934
935sub open_database {
936    my ($db_file) = @_;
937   
938    my $database = ();
939#    tie (%$database, 'GDBM_File', $db_file, GDBM_READER, 0400) ||
940#   die "Couldn't open database $db_file\n";
941
942    return $database;
943}
944
945sub close_database {
946    my ($database) = @_;
947    untie %$database;
948}
949sub isHorizontalClassifier {
950    my ($database, $name) = @_;
951
952    return 0; # can't get this to work for windows
953    my $record = $database->{$name};
954    my ($childtype) = $record =~ /<childtype>(\w*)/;
955    if ($childtype eq "HList") {
956    return 1;
957    }
958    return 0;
959}
960#$writer->startTag('');
961#$writer->endTag('');
962#$writer->characters();
963#$writer->emptyTag('');
964
9651;
Note: See TracBrowser for help on using the browser.