source: main/trunk/greenstone3/bin/script/convert_coll_from_gs2.pl@ 29045

Last change on this file since 29045 was 29045, checked in by ak19, 10 years ago

Kathy suggested that the GS3 buildConfig.xml generated from GS2 build.cfg should ideally have the same content as if buildConfig.xml generated from a rebuild of the collection. This is so that a collection converted from GS2 to GS3 can be immediately previewed without rebuilding. Although immediate previewing already worked, the intention is to nevertheless make the automatically-generated buildConfig.xml file as identical as possible to the one generated upon a rebuild.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 31.1 KB
Line 
1#!/usr/bin/perl -w
2
3BEGIN {
4 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
5 die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
6 die "GSDL3SRCHOME not set\n" unless defined $ENV{'GSDL3SRCHOME'};
7 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
8 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
9 unshift (@INC, "$ENV{'GSDL3SRCHOME'}/lib/perl/cpan");
10}
11
12use colcfg;
13use docprint; # for sub escape_text
14use util;
15use parsargv;
16use FileHandle;
17use XML::Writer;
18#can't get this to work on windows
19#use GDBM_File;
20
21use strict;
22
23my $convert_format_stmts = 0;
24
25&main();
26sub print_usage() {
27 print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n";
28 print STDOUT "options:\n";
29
30 print STDOUT " -collectdir Directory where collection lives.\n";
31 print STDOUT " -verbosity Controls the amount of output.\n";
32 print STDOUT " -defaultlang The language that is considered the default (for display text etc). defaults to 'en'\n";
33 print STDOUT " -convert_format_stmts (Deprecated.) Switch this on if you want the old behaviour of this script, which is \n";
34 print STDOUT " to process format statements using perl regular expressions.\n";
35 print STDOUT " This option is deprecated in favour of using 'formatconverter' which interprets \n";
36 print STDOUT " format statements directly using the same C++ parsing code as in GS2 runtime.\n\n";
37}
38
39
40sub main {
41
42 my ($defaultlang, $verbosity, $collectdir);
43 # note that no defaults are passed for most options as they're set
44 # later (after we check the collect.cfg file)
45 if (!&parsargv::parse(\@ARGV,
46 'verbosity/\d+/', \$verbosity,
47 'collectdir/.*/', \$collectdir,
48 'defaultlang/.*/', \$defaultlang,
49 'convert_format_stmts', \$convert_format_stmts)) {
50 &print_usage();
51 die "\n";
52 }
53
54 # get and check the collection name
55 my ($collection) = @ARGV;
56 if (!defined($collection) || $collection eq "") {
57 die "No collection specified\n";
58 }
59 if ($collection eq "gs2model") {
60 die "You cant convert the model collection\n";
61 }
62
63 if (!defined $collectdir || $collectdir eq "") {
64 $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
65 }
66
67 if (!defined $defaultlang || $defaultlang eq "") {
68 $defaultlang = 'en';
69 }
70 # add on the coll name
71 $collectdir = &util::filename_cat ($collectdir, $collection);
72
73 my $collconfigfilename = &util::filename_cat ($collectdir, "etc", "collect.cfg");
74 print STDOUT "coll config=$collconfigfilename\n";
75 my $collectcfg;
76 if (-e $collconfigfilename) {
77 $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename);
78
79 } else {
80 print STDERR "collect.cfg not found!!";
81 die "\n";
82 }
83
84
85 my $buildconfigfilename = &util::filename_cat ($collectdir, "index", "build.cfg");
86 my $buildcfg;
87 if (-e $buildconfigfilename) {
88 $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename);
89
90 } else {
91 print STDERR "build.cfg not found!!";
92 die "\n";
93 }
94
95
96
97 my $colloutfile = &util::filename_cat ($collectdir, "etc", "collectionConfig.xml");
98 if (-e $colloutfile) {
99 print STDOUT "collectionConfig file already exists! overwriting it!\n";
100
101 }
102
103 my $buildoutfile = &util::filename_cat ($collectdir, "index", "buildConfig.xml");
104 if (-e $buildoutfile) {
105 print STDOUT "buildConfig file already exists! overwriting it!\n";
106
107 }
108
109# my $db_file = &util::filename_cat ($collectdir, "index", "text", "$collection.ldb");
110 my $database;
111# if (-e $db_file) {
112# $database = &open_database($db_file);
113# } else {
114# print STDERR "gdbm database file $db_file not found!!";
115# die "\n";
116# }
117
118 my $buildtype;
119 if (defined $buildcfg->{'buildtype'}) {
120 $buildtype = $buildcfg->{'buildtype'};
121 } else {
122 $buildtype = 'mg';
123 }
124
125 my $indexstem = undef;
126 if (defined $buildcfg->{'indexstem'}) {
127 $indexstem = $buildcfg->{'indexstem'};
128 }
129 #my $indexstem = $buildcfg->{'indexstem'} || undef;
130 my $infodbtype = $buildcfg->{'infodbtype'} || "gdbm";
131 my $earliestDatestamp = $buildcfg->{'earliestdatestamp'} || undef;
132
133 my $buildoutput = new IO::File(">$buildoutfile");
134 binmode($buildoutput,":utf8");
135 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput, NEWLINES => 1);
136
137 $buildwriter->xmlDecl("UTF-8");
138 $buildwriter->startTag('buildConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat");
139
140 my $colloutput = new IO::File(">$colloutfile");
141 binmode($colloutput,":utf8");
142 my $collwriter = new XML::Writer(OUTPUT => $colloutput, NEWLINES => 1);
143
144 $collwriter->xmlDecl("UTF-8");
145 $collwriter->startTag('CollectionConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat", 'xmlns:xsl'=>'http://www.w3.org/1999/XSL/Transform');
146
147 #output the collection metadata to the collectionConfig file
148 $collwriter->startTag('metadataList');
149
150 my $creator = $collectcfg->{'creator'};
151 &output_metadata($collwriter,'default', 'creator', $creator);
152 my $public =$collectcfg->{'public'};
153 &output_metadata($collwriter,'default', 'public', $public);
154
155 $collwriter->endTag('metadataList');
156
157 #output the display collectionmeta to collectionConfig.xml
158
159 my $collectionmeta = $collectcfg->{'collectionmeta'};
160 if (defined $collectionmeta) {
161 my %name_map = ('collectionname', 'name',
162 'collectionextra', 'description',
163 'iconcollection', 'icon',
164 'iconcollectionsmall', 'smallicon');
165
166 $collwriter->startTag('displayItemList');
167 foreach my $entry ( keys %$collectionmeta) {
168 # some metadata names need to be specially mapped to other names
169 # most of them however, can retain their original names
170 my $name = (defined $name_map{$entry}) ? $name_map{$entry} : $entry;
171 foreach my $lang (keys %{$collectionmeta->{$entry}}) {
172 my $value = $collectionmeta->{$entry}->{$lang};
173 if ($entry =~ /^icon/) {
174 $value = format_icon_value($value);
175 } else {
176 $value = tidy_up_display_item($value);
177 }
178 &output_display($collwriter, $name, $lang, $value);
179 }
180 }
181 $collwriter->endTag('displayItemList');
182 }
183
184 # output building metadata to build config file
185 my $numdocs = $buildcfg->{'numdocs'};
186 $buildwriter->startTag('metadataList');
187 &output_metadata($buildwriter,'', 'numDocs', $numdocs);
188 &output_metadata($buildwriter,'', 'buildType', $buildtype);
189 &output_metadata($buildwriter,'', 'indexStem', $indexstem) if(defined $indexstem);
190 &output_metadata($buildwriter,'', 'infodbType', $infodbtype);
191 &output_metadata($buildwriter,'', 'earliestDatestamp', $earliestDatestamp) if(defined $earliestDatestamp);
192 $buildwriter->endTag('metadataList');
193
194
195 #indexes
196 # maps index name to shortname
197 my $indexmap = {};
198 # keeps the order for indexes
199 my @indexlist = ();
200 my $defaultindex = "";
201 my $first = 1;
202 my $maptype = "indexfieldmap";
203 if ($buildtype eq "mg") {
204 $maptype = "indexmap";
205 }
206 if (defined $buildcfg->{$maptype}) {
207 my $indexmap_t = $buildcfg->{$maptype};
208 foreach my $i (@$indexmap_t) {
209 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
210 $indexmap->{$k} = $v;
211 push @indexlist, $k;
212 if ($first == 1) {
213 $defaultindex = $k;
214 $first = 0;
215 }
216 }
217 } else {
218 print STDERR "$maptype not defined\n";
219 }
220 # we use the shortname for default index
221 if (defined $collectcfg->{'defaultindex'}) {
222 $defaultindex = $collectcfg->{'defaultindex'};
223 #$defaultindex = $indexmap->{$defaultindex};
224 }
225
226 # levels
227 my $levelmap = {};
228 my @levellist = ();
229 my $default_search_level = "";
230 my $default_search_level_shortname = "";
231 my $default_retrieve_level = "Sec";
232 $first = 1;
233 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
234 if (defined $buildcfg->{'levelmap'}) {
235 my $levelmap_t = $buildcfg->{'levelmap'};
236 foreach my $l (@$levelmap_t) {
237 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
238 $levelmap->{$k} = $v;
239 push @levellist, $k;
240 if ($first) {
241 $default_search_level = $k;
242 $default_search_level_shortname = $v;
243 $first = 0;
244 }
245 }
246 }
247
248 if (defined $collectcfg->{'defaultlevel'}) {
249 $default_search_level = $collectcfg->{'defaultlevel'};
250 #$default_search_level = $levelmap->{$default_search_level};
251 $default_search_level_shortname = $levelmap->{$default_search_level};
252 }
253 if (defined $buildcfg->{'textlevel'}) {
254 $default_retrieve_level = $buildcfg->{'textlevel'};
255 }
256 }
257 # format stuff
258 my $format = $collectcfg->{'format'};
259
260 #output the search stuff to coll cfg
261 $collwriter->startTag('search','type'=>$buildtype);
262 foreach my $i (keys %$indexmap) {
263 $collwriter->startTag('index', 'name'=>$i);
264 #find the coll meta stuff
265 my $indexdisplay = ".$i";
266 foreach my $lang (keys %{$collectionmeta->{$indexdisplay}}) {
267 my $value = $collectionmeta->{$indexdisplay}->{$lang};
268 output_display($collwriter, 'name', $lang, $i);
269 }
270 $collwriter->endTag('index');
271 }
272
273 #output the defaultIndex to coll cfg
274 $collwriter->emptyTag('defaultIndex','name'=>$defaultindex);
275
276 # indexOptions
277 if (defined $collectcfg->{'indexoptions'}) {
278 foreach my $i (@{$collectcfg->{'indexoptions'}}) {
279 $collwriter->emptyTag('indexOption', 'name'=>$i);
280 }
281 }
282
283 #indexSubcollection
284 my $indexsubcollections = $collectcfg->{'indexsubcollections'};
285
286 if (defined $indexsubcollections) {
287 my $indexsubcollections_t= $collectcfg->{'indexsubcollections'};
288 foreach my $i ( @$indexsubcollections_t) {
289 $collwriter->startTag('indexSubcollection', 'name'=>$i);
290 &output_display($collwriter, 'name', $defaultlang, $i);
291 $collwriter->endTag('indexSubcollection');
292 }
293 }
294
295 #subcollection
296 my $subcollection = $collectcfg->{'subcollection'};
297 if (defined $subcollection){
298 foreach my $entry (keys %$subcollection){
299 my $value = $subcollection->{$entry};
300 $collwriter->emptyTag('subcollection','filter'=>$value,'name'=>$entry);
301 }
302 }
303
304 #indexlanguage
305 my $languages = $collectcfg->{'languages'};
306 if (defined $languages){
307 my $languages_t = $collectcfg->{'languages'};
308 foreach my $i (@$languages_t){
309 $collwriter->startTag('indexLanguage','name'=>$i);
310 &output_display($collwriter, 'name', $defaultlang, $i);
311 $collwriter->endTag('indexLanguage');
312 }
313 }
314
315 # level stuff for mgpp/lucene
316 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene'){
317 foreach my $l (keys %$levelmap) {
318 $collwriter->startTag('level', 'name'=>$l);
319 #find the coll meta stuff
320 my $leveldisplay = ".$l";
321 foreach my $lang (keys %{$collectionmeta->{$leveldisplay}}) {
322 my $value = $collectionmeta->{$leveldisplay}->{$lang};
323 output_display($collwriter, 'name', $lang, $value);
324 }
325 $collwriter->endTag('level');
326 }
327 $collwriter->emptyTag('defaultLevel', 'name'=>$default_search_level);
328 }
329
330 # add in the search type
331 if (defined $format->{'SearchTypes'}){
332 $collwriter->startTag('format', 'name'=>"searchType");
333 $collwriter->charactersXML($format->{'SearchTypes'});
334 $collwriter->endTag('format');
335 }
336
337 # add in the format stuff
338 if (defined $format->{'SearchVList'}) {
339
340 $collwriter->startTag('format');
341 write_format($collwriter, $format->{'SearchVList'}, "document");
342 $collwriter->endTag('format');
343 }
344 elsif (defined $format->{'VList'}) {
345 $collwriter->startTag('format');
346 write_format($collwriter, $format->{'VList'}, "document");
347 $collwriter->endTag('format');
348 }
349
350 $collwriter->endTag('search');
351
352 # import plugins
353 # if ImagePlugin is added, then need to add in a replaceListRef element for gs2-image
354 my $contains_image_plugin = 0;
355
356 my $plugins = $collectcfg->{'plugin'};
357
358 if (defined $plugins){
359 $collwriter->startTag('import');
360 $collwriter->startTag('pluginList');
361 foreach my $pl (@$plugins) {
362 my ($pluginname) = @$pl[0];
363 if ($pluginname =~ m/^(ImagePlugin|ImagePlug|PagedImagePlugin)$/) {
364 $contains_image_plugin = 1;
365 }
366 $collwriter->startTag('plugin','name'=>$pluginname);
367
368 for (my $i=1; $i<scalar(@$pl); $i++) {
369 my $arg =@$pl[$i];
370 if ($arg =~ /^-/){
371 my $option_name=@$pl[$i];
372 my $option_value=@$pl[$i+1];
373 if (defined $option_value){
374 if ($option_value =~ /^-/){
375 $collwriter->startTag('option','name'=>$option_name);
376 $collwriter->endTag('option');
377 }else{
378 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
379 $collwriter->endTag('option');
380 }
381 }
382 }
383 }
384 $collwriter->endTag('plugin');
385 }
386 $collwriter->endTag('pluginList');
387 $collwriter->endTag('import');
388 }
389
390 $buildwriter->startTag('serviceRackList');
391
392 my @levels = ();
393 my $defaultlevel;
394 my $service_type = "MG";
395 if ($buildtype eq 'mgpp') {
396 $service_type = "MGPP";
397 } elsif ($buildtype eq "lucene") {
398 $service_type = "Lucene";
399 }
400
401 #indexSubcollectionList
402
403 my $subcollectionmap = $buildcfg->{'subcollectionmap'};
404 my $firstsubcollection = 1;
405 my $defaultsubcollection = "";
406 my @subcollist;
407 my $subcolmap = {};
408
409 if (defined $buildcfg->{'subcollectionmap'}) {
410 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
411
412 foreach my $l (@$subcolmap_t) {
413 my @pair = split(/->/, $l);
414 $subcolmap->{$pair[0]} = $pair[1];
415 push @subcollist, $pair[0];
416 if ($firstsubcollection==1) {
417 $defaultsubcollection = $pair[1];
418 $firstsubcollection = 0;
419 }
420 }
421
422 }
423
424
425 #do the retrieve service
426 $buildwriter->startTag('serviceRack', 'name'=>"GS2".$service_type."Retrieve");
427 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
428 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_retrieve_level);
429 } elsif ($buildtype eq "mg") {
430 $buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
431 }
432
433 if ((defined $defaultsubcollection) && ($defaultsubcollection ne "")) {
434 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
435 }
436
437 if (defined $indexstem) {
438 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
439 }
440 $buildwriter->emptyTag('databaseType', 'name'=>$infodbtype) if (defined $infodbtype);
441
442 # close off the Retrieve service
443 $buildwriter->endTag('serviceRack');
444
445 # add in the classifiers if needed
446
447 my $count = 1;
448 my $phind = 0;
449 my $started_classifiers = 0;
450 if (defined $collectcfg->{'classify'}) {
451 $collwriter->startTag('browse');
452 # add in default format if necessary
453 if (defined $format->{"VList"} || defined $format->{"HList"}) {
454 # global formats
455 $collwriter->startTag('format');
456 if (defined $format->{"VList"}) {
457 # VLIst applies to both classifier and doc nodes
458 write_format($collwriter, $format->{"VList"}, "document");
459 write_format($collwriter, $format->{"VList"}, "classifier");
460 }
461 if (defined $format->{"HList"}) {
462 # hlist is only for classifier nodes
463 write_format($collwriter, $format->{"HList"}, "horizontal");
464 }
465 $collwriter->endTag('format');
466 }
467 my $classifiers = $collectcfg->{'classify'};
468 foreach my $cl (@$classifiers) {
469 my $name = "CL$count";
470 $count++;
471 my ($classname) = @$cl[0];
472 if ($classname =~ /^phind$/i) {
473 $phind=1;
474 #should add it into coll config classifiers
475 next;
476 }
477
478 my $horizontalAtTop = &isHorizontalClassifier($database, $name);
479 if (not $started_classifiers) {
480 $buildwriter->startTag('serviceRack', 'name'=>'GS2Browse');
481 if (defined $indexstem) {
482 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
483 }
484 $buildwriter->emptyTag('databaseType', 'name'=>$infodbtype) if (defined $infodbtype);
485
486 $buildwriter->startTag('classifierList');
487 $started_classifiers = 1;
488 }
489 my $content = ''; #use buttonname first, then metadata
490 if ($classname eq "DateList") {
491 $content = "Date";
492 } else {
493 for (my $i=0; $i<scalar(@$cl); $i++) {
494 my $arg = @$cl[$i];
495 if ($arg eq "-buttonname"){
496 $content = @$cl[$i+1];
497 last;
498 } elsif ($arg eq "-metadata") {
499 $content = @$cl[$i+1];
500 }
501
502 # remove "ex." prefix from "ex.metaname" but not from "ex.namespace.metaname"
503 $content =~ s@ex\.([^.]+)(,|;|$)@$1$2@g; #$content =~ s@ex\.([A-Z])@$1@g;
504 }
505 }
506 if ($horizontalAtTop) {
507 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'horizontalAtTop'=>'true');
508
509 } else {
510 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content);
511 }
512
513
514 # $collwriter->startTag('classifier', 'name'=>$name);
515 $collwriter->startTag('classifier', 'name'=>$classname);
516 for (my $i=1; $i<scalar(@$cl); $i++) {
517 my $arg =@$cl[$i];
518 if ($arg =~ /^-/){
519 my $option_name=@$cl[$i];
520 my $option_value=@$cl[$i+1];
521 if (defined $option_value){
522 if ($option_value=~ /^-/){
523 $collwriter->startTag('option','name'=>$option_name);
524 $collwriter->endTag('option');
525 }else{
526 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
527 $collwriter->endTag('option');
528 }
529 }
530 }
531 }
532
533 my $vlist = $name."VList";
534 my $hlist = $name."HList";
535 my $dlist = "";
536 if ($classname eq "DateList") {
537 $dlist = "DateList";
538 }
539 # need to work out how to split into classifier and document
540 if (defined $format->{$vlist} || defined $format->{$hlist} || defined $format->{$dlist}) {
541 $collwriter->startTag('format');
542 if (defined $format->{$vlist}) {
543 write_format($collwriter, $format->{$vlist}, "document");
544 write_format($collwriter, $format->{$vlist}, "classifier");
545 }
546 if (defined $format->{$hlist}) {
547 write_format($collwriter, $format->{$hlist}, "horizontal");
548 }
549
550 if (defined $format->{$dlist}) {
551 write_format($collwriter, $format->{$dlist}, "document");
552 }
553 $collwriter->endTag('format');
554 }
555 $collwriter->endTag('classifier');
556 } #foreach classifier
557 if ($started_classifiers) {
558 # end the classifiers
559 $buildwriter->endTag('classifierList');
560 # close off the Browse service
561 $buildwriter->endTag('serviceRack');
562 }
563
564 $collwriter->endTag('browse');
565 }
566
567
568 # the phind classifier is a separate service
569 if ($phind) {
570 # if phind classifier
571 $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse');
572 }
573
574 # do the search service
575 $buildwriter->startTag('serviceRack', 'name'=>'GS2'.$service_type.'Search');
576 #$buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
577 $buildwriter->emptyTag('defaultIndex', 'shortname'=>$indexmap->{$defaultindex});
578 $buildwriter->startTag('indexList');
579 #for each index
580 foreach my $i (@indexlist) {
581 my $index = $indexmap->{$i};
582 $buildwriter->emptyTag('index', 'name'=>$i, 'shortname'=>$index);
583 }
584 $buildwriter->endTag('indexList');
585 if (defined $indexstem) {
586 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
587 }
588 $buildwriter->emptyTag('databaseType', 'name'=>$infodbtype) if (defined $infodbtype);
589
590 # index options
591 if ($buildtype eq 'mg' || $buildtype eq 'mgpp') {
592 $buildwriter->startTag('indexOptionList');
593 my $stemindexes = 3; # default is stem and casefold
594 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
595 $stemindexes = $buildcfg->{'stemindexes'};
596 }
597 $buildwriter->emptyTag('indexOption', 'name'=>'stemIndexes', 'value'=>$stemindexes);
598
599 my $maxnumeric = 4; # default
600 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
601 $maxnumeric = $buildcfg->{'maxnumeric'};
602 }
603 $buildwriter->emptyTag('indexOption', 'name'=>'maxnumeric', 'value'=>$maxnumeric);
604
605 $buildwriter->endTag('indexOptionList');
606 }
607
608 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
609
610 # level info
611 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_search_level_shortname);
612 $buildwriter->emptyTag('defaultDBLevel', 'shortname'=>$default_retrieve_level);
613 $buildwriter->startTag('levelList');
614 foreach my $l (@levellist) {
615 my $level = $levelmap->{$l};
616 $buildwriter->emptyTag('level', 'name'=>$l, 'shortname'=>$level);
617 }
618 $buildwriter->endTag('levelList');
619
620 # do the search types if there
621 if (defined $collectcfg->{'searchtype'}) {
622 $buildwriter->startTag('searchTypeList');
623 foreach my $st (@{$collectcfg->{'searchtype'}}) {
624 $buildwriter->emptyTag('searchType', 'name'=>$st);
625 }
626 $buildwriter->endTag('searchTypeList');
627 } elsif (defined $format->{'SearchTypes'}) {
628 #check format statement
629 my $searchtype = $format->{'SearchTypes'};
630 $buildwriter->startTag('searchTypeList');
631 if ($searchtype =~ /form/) {
632 $buildwriter->emptyTag('searchType', 'name'=>'form');
633 }
634 if ($searchtype =~ /plain/) {
635 $buildwriter->emptyTag('searchType', 'name'=>'plain');
636 }
637 $buildwriter->endTag('searchTypeList');
638 }
639 }
640
641 #indexLanguageList
642 my $indexlanguages = $collectcfg->{'languages'};
643 my $firstindexlanguage = 1;
644 my $defaultindexlanguage_shortname;
645 if (defined $indexlanguages){
646 $buildwriter->startTag('indexLanguageList');
647 my $languages_t = $collectcfg->{'languages'};
648 foreach my $i (@$languages_t){
649 $buildwriter->startTag('indexLanguage','name'=>$i);
650 &output_display($buildwriter, 'name', $i, $i);
651 $buildwriter->endTag('indexLanguage');
652 if ($firstindexlanguage==1){
653 $defaultindexlanguage_shortname = $i;
654 $firstindexlanguage=0;
655 }
656 }
657 $buildwriter->endTag('indexLanguageList');
658 $buildwriter->startTag('defaultIndexLanguage', 'name'=>$defaultindexlanguage_shortname,'shortname'=>$defaultindexlanguage_shortname);
659 $buildwriter->endTag('defaultIndexLanguage');
660 }
661
662 # my $defaultsubcollection = "";
663 # my @subcollist;
664
665 if (scalar(@subcollist)>0){
666
667 $buildwriter->startTag('indexSubcollectionList');
668 foreach my $i (keys %$subcolmap){
669 my $short_name = $subcolmap->{$i};
670 $buildwriter->emptyTag('indexSubcollection', 'name'=>$i, 'shortname'=>$short_name);
671 }
672
673 $buildwriter->endTag('indexSubcollectionList');
674 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
675 }
676
677
678 $buildwriter->endTag('serviceRack');
679
680 $buildwriter->endTag('serviceRackList');
681 $buildwriter->endTag('buildConfig');
682
683 # we add in the default replace list just in case we have macros in the
684 # collection
685 $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-standard');
686 $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-image') if $contains_image_plugin;
687 $collwriter->endTag('CollectionConfig');
688 $collwriter->end();
689 $buildwriter->end();
690 $buildoutput->close();
691 $colloutput->close();
692 &close_database($database);
693}
694
695
696sub output_metadata {
697 my ($writer, $lang, $metaname, $metavalue) = @_;
698 $lang = 'en' if $lang eq 'default';
699 if ($lang ne ""){
700 $writer->startTag('metadata', 'lang'=>$lang, 'name'=>$metaname);
701 }else{
702 $writer->startTag('metadata', 'name'=>$metaname);
703 }
704 $writer->characters($metavalue) if(defined $metavalue);
705 $writer->endTag('metadata');
706}
707
708sub output_display {
709 my ($writer, $name, $lang, $value) = @_;
710 $lang = 'en' if $lang eq 'default';
711 if ($lang =~ /^\[/) {
712 ($lang) = $lang =~ /\[l=(.*)\]/;
713 }
714
715 $writer->startTag('displayItem', 'name'=>$name, 'lang'=>$lang);
716 $writer->characters($value);
717 $writer->endTag('displayItem');
718}
719sub format_icon_value {
720 my ($value) = @_;
721 if ($value =~ /^_/) {
722 my ($newvalue) = $value =~ /images\/(.*)$/;
723 if ($newvalue) {
724 return $newvalue;
725 }
726 }
727 return $value;
728}
729
730sub tidy_up_display_item {
731 my ($value) = @_;
732 # remove \n
733 $value =~ s/\\n//g;
734 # replace \' with '
735 $value =~ s/\\\'/\'/g;
736 # replace \" with "
737 $value =~ s/\\\"/\"/g;
738 # replace _httpprefix_ with _httpsite_
739 $value =~ s/_httpprefix_/_httpsite_/g;
740 $value =~ s/_gwcgi_//g;
741 $value =~ s/[a-z][a-z]?=_cgiarg[a-z][a-z]?_&?//g;
742 $value =~ s/&p=/&sa=/g;
743 return $value;
744}
745
746sub format_if_or {
747 my ($format, $node_type) = @_;
748
749 # while we find nested if/or statements, recurse to find more nested if/or statements,
750 # and try to expand (process) these nested statements starting from innermost going to outermost
751
752 while($format =~ m/^.*\{(?:If|Or)\}\{[^\}\{]*\{/) { # contains nested if/or statement, expand it
753
754 my ($prefix, $nested_to_process, $suffix) = $format =~ m/^(.*\{(?:If|Or)\}\{[^\}\{]*)(\{[^\}]*\}\s*\{[^\}]*\})(.*)$/g; # recursion step
755
756 #print STDERR "prefix: |$prefix|\n\nnested: |$nested_to_process|\n\nsuffix: |$suffix|\n\n";
757 $format = $prefix . &format_if_or($nested_to_process, $node_type) . $suffix;
758 }
759
760 if($format =~ m/\{(If|Or)\}\{[^\}\{]*\}/g) { # base step: contains if/or statement(s), but none nested
761 # expand them
762 $format =~ s/\{If\}\{([^\}]*)\}/&format_if($1, $node_type)/eg;
763 $format =~ s/\{Or\}\{([^\}]*)\}/&format_or($1)/eg;
764 }
765 return $format;
766}
767
768sub write_format {
769 my ($writer, $old_format, $node_type) = @_;
770
771 # replace \' with '
772 $old_format =~ s/\\\'/\'/g;
773 # replace \" with "
774 $old_format =~ s/\\\"/\"/g;
775
776 if($convert_format_stmts) {
777
778 #convert [] to <gsf:...>
779 # now handles nested {If} and {Or}
780 $old_format = &format_if_or($old_format, $node_type);
781 $old_format =~ s/\[Text\]/\<gsf:text\/\>/g;
782 $old_format =~ s/\[num\]/\<gsf:num\/\>/g;
783 $old_format =~ s/\[link\]/\<gsf:link type=\'$node_type\'\>/g;
784 $old_format =~ s/\[\/link\]/\<\/gsf:link\>/g;
785 $old_format =~ s/\[srclink\]/\<gsf:link type=\'source\'\>/g;
786 $old_format =~ s/\[\/srclink\]/\<\/gsf:link\>/g;
787 $old_format =~ s/\[icon\]/\<gsf:icon type=\'$node_type\'\/\>/g;
788 $old_format =~ s/\[srcicon\]/\<gsf:icon type=\'source\'\/\>/g;
789
790 # what to do with hightlight??
791 $old_format =~ s/\[\/?highlight\]//g;
792
793 #now do the rest of the [] which are assumed to be metadata
794 $old_format =~ s/\[([^\]]*)\]/&format_metadata($1)/eg;
795
796 # some html tidy
797 #turn <br> into <br />
798 $old_format =~ s/\<br\>/\<br \/\>/g;
799 #turn <p> into <p />
800 $old_format =~ s/\<p\>/\<p \/\>/g;
801
802 #put quotes around any atts
803 $old_format =~ s/=([a-z]+)([> ])/=\'$1\'$2/g;
804 }
805 else { # not converting format statements, leave them as GS2 format stmts,
806 # so that formatconverter can convert them and users can oversee the conversion in GLI,
807 # but nest the GS2 statements here in an xml tag that won't be processed by GS3
808
809 $old_format = &docprint::escape_text($old_format); # escape html entities inside the format statement since the <br> and <p> may not be correct for xml
810 $old_format = "<gsf:format-gs2>" . $old_format . "</gsf:format-gs2>";
811
812 }
813
814 if ($node_type eq "document") {
815 $writer->startTag('gsf:template', 'match'=>'documentNode');
816 $writer->charactersXML($old_format);
817 $writer->endTag('gsf:template');
818 } elsif ($node_type eq "classifier") {
819 $writer->startTag('gsf:template', 'match'=>'classifierNode');
820 $writer->charactersXML($old_format);
821 $writer->endTag('gsf:template');
822 } elsif ($node_type eq "horizontal") {
823 $writer->startTag('gsf:template', 'match'=>'classifierNode', 'mode'=>'horizontal');
824 $writer->charactersXML($old_format);
825 $writer->endTag('gsf:template');
826
827 }
828}
829
830sub format_metadata {
831 my ($metadata_string) = @_;
832
833 #print STDERR "original meta = $metadata_string\n";
834
835 # what shall we do with cgisafe??
836 my $cgisafe = $metadata_string =~ s/^cgisafe://;
837
838 my ($select) = $metadata_string =~ /^(parent|sibling)/;
839 $metadata_string =~ s/^(parent|sibling)//;
840 my ($scope, $delim);
841
842 if ($select) {
843 ($scope) = $metadata_string =~ /^\((Top|All)/;
844 $metadata_string =~ s/^\((Top|All)\)?//;
845 if ($scope) {
846 ($delim) = $metadata_string =~ /^\'([^\']*)\'\)/;
847 $metadata_string =~ s/^\'([^\']*)\'\)//;
848 }
849 }
850 $metadata_string =~ s/^://;
851 # remove ex.
852 $metadata_string =~ s/^ex\.//;
853
854 #print STDERR "select=$select, scope=$scope, delim=|$delim|, meta = $metadata_string\n";
855
856 my $new_format = "<gsf:metadata name='$metadata_string' ";
857 if (defined $select) {
858 if ($select eq "sibling") {
859 $new_format .= "multiple='true' ";
860 if (defined $delim) {
861 $new_format .= "separator='$delim' ";
862 }
863 } elsif ($select eq "parent"){
864 if (defined $scope) {
865 if ($scope eq "Top") {
866 $new_format .= "select='root' ";
867 } elsif ($scope eq "All") {
868 $new_format .= "select='ancestors' ";
869 if (defined $delim) {
870 $new_format .= "separator='$delim' ";
871 }
872 }
873 } else {
874 $new_format .= "select='parent' ";
875 }
876 }
877 }
878 $new_format .= "/>";
879 #print STDERR "$new_format\n";
880 return $new_format;
881
882}
883
884sub format_if {
885
886 my ($if_string, $node_type) = @_;
887 #print STDERR "if string = $if_string\n";
888
889 my @parts = split /,/, $if_string;
890 my $test = $parts[0];
891 my $true_option = $parts[1];
892 my $false_option;
893 if (scalar (@parts) == 3) {
894 $false_option = $parts[2];
895 }
896 $test =~ s/^\s*//;
897 $test =~ s/\s*$//;
898 my ($test_meta, $test_type, $test_value);
899 if ($test =~ /^(\[.+\])$/) {
900 $test_meta = $1;
901 $test_type = 'exists';
902 } else {
903 my ($lhs, $exp, $rhs) = $test =~ /^(.+)\s+(eq|ne|lt|gt|le|ge|sw|ew)\s+(.+)$/;
904 #print STDERR "lhs, exp, rhs = $lhs, $exp, $rhs\n";
905 if ($exp eq "eq") {
906 $test_type = "equals";
907 } elsif ($exp eq "sw") {
908 $test_type = "startsWith";
909 } elsif ($exp eq "ew") {
910 $test_type = "endsWith";
911 } elsif ($exp eq "ne") {
912 $test_type = "notEquals";
913 } elsif ($exp eq "lt") {
914 $test_type = "lessThan";
915 }elsif ($exp eq "gt") {
916 $test_type = "greaterThan";
917 }elsif ($exp eq "le") {
918 $test_type = "lessThanOrEquals";
919 }elsif ($exp eq "ge") {
920 $test_type = "greaterThanOrEquals";
921 }
922 if ($lhs =~ /^\[.+\]$/) {
923 $test_meta = $lhs;
924 $test_value = $rhs;
925 } else {
926 # assume rhs has meta
927 $test_meta = $rhs;
928 $test_value = $lhs;
929 }
930
931 #remove beginning and end quotes
932 $test_value =~ s/^[\'\"]//;
933 $test_value =~ s/[\'\"]$//;
934 }
935 my $test_atts = "test='$test_type' ";
936 if (defined $test_value) {
937 $test_atts .= "test-value='$test_value' ";
938 }
939 #print STDERR "test, true, false = $test, $true_option, $false_option\n";
940 my $new_format = "<gsf:switch>$test_meta";
941 $new_format .= "<gsf:when $test_atts>$true_option</gsf:when>";
942 if (defined $false_option) {
943 $new_format .="<gsf:otherwise>$false_option</gsf:otherwise>";
944 }
945 $new_format .= "</gsf:switch>";
946
947 #print STDERR "new format = $new_format\n";
948 return $new_format;
949}
950
951sub format_or {
952 my ($or_string) = @_;
953 my @meta_list = split (',', $or_string);
954 return "" unless scalar (@meta_list);
955 my $new_format = "<gsf:choose-metadata>";
956 foreach my $m (@meta_list) {
957 if ($m =~ /^\[(.*)\]$/) {
958 $new_format .= &format_metadata($1);
959 } else {
960 # a default value
961 $new_format .= "<gsf:default>$m</gsf:default>";
962 last;
963 }
964 }
965 $new_format .= "</gsf:choose-metadata>";
966 return $new_format;
967}
968
969sub open_database {
970 my ($db_file) = @_;
971
972 my $database = ();
973# tie (%$database, 'GDBM_File', $db_file, GDBM_READER, 0400) ||
974# die "Couldn't open database $db_file\n";
975
976 return $database;
977}
978
979sub close_database {
980 my ($database) = @_;
981 untie %$database;
982}
983sub isHorizontalClassifier {
984 my ($database, $name) = @_;
985
986 return 0; # can't get this to work for windows
987 my $record = $database->{$name};
988 my ($childtype) = $record =~ /<childtype>(\w*)/;
989 if ($childtype eq "HList") {
990 return 1;
991 }
992 return 0;
993}
994#$writer->startTag('');
995#$writer->endTag('');
996#$writer->characters();
997#$writer->emptyTag('');
998
9991;
Note: See TracBrowser for help on using the repository browser.