source: main/trunk/greenstone3/bin/script/convert_coll_from_gs2.pl

Last change on this file was 36248, checked in by anupama, 23 months ago

Need to add xmlns:gslib URL into collectionConfig xml file generated when gs2 coll_cfg was converted to gs3 collection config XML using the FormatConversion wizard dialog. The part of the code that needed to be inserting the required URL but wasn't, actually is this convert_coll_from_gs2.pl script, which gets called even before the FormatConversion wizard: when you go to open a collection in GLI, and the list of colls get loaded, it's then that this perl script gets called and creates a collectionConfig.xml file. This reduced the number of issues in GS2 to GS3 converted collection config files. The remaining errors in today's converted collection where all in the display Item tags (unescaped ampersands in URLs and unclosed paragraphs).

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 31.1 KB
Line 
1#!/usr/bin/perl -w
2
3BEGIN {
4 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
5 die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
6 die "GSDL3SRCHOME not set\n" unless defined $ENV{'GSDL3SRCHOME'};
7 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
8 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
9 unshift (@INC, "$ENV{'GSDL3SRCHOME'}/lib/perl/cpan");
10}
11
12use colcfg;
13use docprint; # for sub escape_text
14use util;
15use parsargv;
16use FileHandle;
17use XML::Writer;
18#can't get this to work on windows
19#use GDBM_File;
20
21use strict;
22
23my $convert_format_stmts = 0;
24
25&main();
26sub print_usage() {
27 print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n";
28 print STDOUT "options:\n";
29
30 print STDOUT " -collectdir Directory where collection lives.\n";
31 print STDOUT " -verbosity Controls the amount of output.\n";
32 print STDOUT " -defaultlang The language that is considered the default (for display text etc). defaults to 'en'\n";
33 print STDOUT " -convert_format_stmts (Deprecated.) Switch this on if you want the old behaviour of this script, which is \n";
34 print STDOUT " to process format statements using perl regular expressions.\n";
35 print STDOUT " This option is deprecated in favour of using 'formatconverter' which interprets \n";
36 print STDOUT " format statements directly using the same C++ parsing code as in GS2 runtime.\n\n";
37}
38
39
40sub main {
41
42 my ($defaultlang, $verbosity, $collectdir);
43 # note that no defaults are passed for most options as they're set
44 # later (after we check the collect.cfg file)
45 if (!&parsargv::parse(\@ARGV,
46 'verbosity/\d+/', \$verbosity,
47 'collectdir/.*/', \$collectdir,
48 'defaultlang/.*/', \$defaultlang,
49 'convert_format_stmts', \$convert_format_stmts)) {
50 &print_usage();
51 die "\n";
52 }
53
54 # get and check the collection name
55 my ($collection) = @ARGV;
56 if (!defined($collection) || $collection eq "") {
57 die "No collection specified\n";
58 }
59 if ($collection eq "gs2model") {
60 die "You cant convert the model collection\n";
61 }
62
63 if (!defined $collectdir || $collectdir eq "") {
64 $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
65 }
66
67 if (!defined $defaultlang || $defaultlang eq "") {
68 $defaultlang = 'en';
69 }
70 # add on the coll name
71 $collectdir = &util::filename_cat ($collectdir, $collection);
72
73 my $collconfigfilename = &util::filename_cat ($collectdir, "etc", "collect.cfg");
74 print STDOUT "coll config=$collconfigfilename\n";
75 my $collectcfg;
76 if (-e $collconfigfilename) {
77 $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename);
78
79 } else {
80 print STDERR "collect.cfg not found!!";
81 die "\n";
82 }
83
84
85 my $buildconfigfilename = &util::filename_cat ($collectdir, "index", "build.cfg");
86 my $buildcfg;
87 if (-e $buildconfigfilename) {
88 $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename);
89
90 } else {
91 print STDERR "build.cfg not found!!";
92 die "\n";
93 }
94
95
96
97 my $colloutfile = &util::filename_cat ($collectdir, "etc", "collectionConfig.xml");
98 if (-e $colloutfile) {
99 print STDOUT "collectionConfig file already exists! overwriting it!\n";
100
101 }
102
103 my $buildoutfile = &util::filename_cat ($collectdir, "index", "buildConfig.xml");
104 if (-e $buildoutfile) {
105 print STDOUT "buildConfig file already exists! overwriting it!\n";
106
107 }
108
109# my $db_file = &util::filename_cat ($collectdir, "index", "text", "$collection.ldb");
110 my $database;
111# if (-e $db_file) {
112# $database = &open_database($db_file);
113# } else {
114# print STDERR "gdbm database file $db_file not found!!";
115# die "\n";
116# }
117
118 my $buildtype;
119 if (defined $buildcfg->{'buildtype'}) {
120 $buildtype = $buildcfg->{'buildtype'};
121 } else {
122 $buildtype = 'mg';
123 }
124
125 my $indexstem = undef;
126 if (defined $buildcfg->{'indexstem'}) {
127 $indexstem = $buildcfg->{'indexstem'};
128 }
129 #my $indexstem = $buildcfg->{'indexstem'} || undef;
130 my $infodbtype = $buildcfg->{'infodbtype'} || "gdbm";
131 my $earliestDatestamp = $buildcfg->{'earliestdatestamp'} || undef;
132
133 my $buildoutput = new IO::File(">$buildoutfile");
134 binmode($buildoutput,":utf8");
135 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput, NEWLINES => 1);
136
137 $buildwriter->xmlDecl("UTF-8");
138 $buildwriter->startTag('buildConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat");
139
140 my $colloutput = new IO::File(">$colloutfile");
141 binmode($colloutput,":utf8");
142 my $collwriter = new XML::Writer(OUTPUT => $colloutput, NEWLINES => 1);
143
144 $collwriter->xmlDecl("UTF-8");
145 $collwriter->startTag('CollectionConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat",
146 'xmlns:gslib'=>'http://www.greenstone.org/skinning',
147 'xmlns:xsl'=>'http://www.w3.org/1999/XSL/Transform');
148
149 #output the collection metadata to the collectionConfig file
150 $collwriter->startTag('metadataList');
151
152 my $creator = $collectcfg->{'creator'};
153 &output_metadata($collwriter,'default', 'creator', $creator);
154 my $public =$collectcfg->{'public'};
155 &output_metadata($collwriter,'default', 'public', $public);
156
157 $collwriter->endTag('metadataList');
158
159 #output the display collectionmeta to collectionConfig.xml
160
161 my $collectionmeta = $collectcfg->{'collectionmeta'};
162 if (defined $collectionmeta) {
163 my %name_map = ('collectionname', 'name',
164 'collectionextra', 'description',
165 'iconcollection', 'icon',
166 'iconcollectionsmall', 'smallicon');
167
168 $collwriter->startTag('displayItemList');
169 foreach my $entry ( keys %$collectionmeta) {
170 # some metadata names need to be specially mapped to other names
171 # most of them however, can retain their original names
172 my $name = (defined $name_map{$entry}) ? $name_map{$entry} : $entry;
173 foreach my $lang (keys %{$collectionmeta->{$entry}}) {
174 my $value = $collectionmeta->{$entry}->{$lang};
175 if ($entry =~ /^icon/) {
176 $value = format_icon_value($value);
177 } else {
178 $value = tidy_up_display_item($value);
179 }
180 &output_display($collwriter, $name, $lang, $value);
181 }
182 }
183 $collwriter->endTag('displayItemList');
184 }
185
186 # output building metadata to build config file
187 my $numdocs = $buildcfg->{'numdocs'};
188 $buildwriter->startTag('metadataList');
189 &output_metadata($buildwriter,'', 'numDocs', $numdocs);
190 &output_metadata($buildwriter,'', 'buildType', $buildtype);
191 &output_metadata($buildwriter,'', 'indexStem', $indexstem) if(defined $indexstem);
192 &output_metadata($buildwriter,'', 'infodbType', $infodbtype);
193 &output_metadata($buildwriter,'', 'earliestDatestamp', $earliestDatestamp) if(defined $earliestDatestamp);
194 $buildwriter->endTag('metadataList');
195
196
197 #indexes
198 # maps index name to shortname
199 my $indexmap = {};
200 # keeps the order for indexes
201 my @indexlist = ();
202 my $defaultindex = "";
203 my $first = 1;
204 my $maptype = "indexfieldmap";
205 if ($buildtype eq "mg") {
206 $maptype = "indexmap";
207 }
208 if (defined $buildcfg->{$maptype}) {
209 my $indexmap_t = $buildcfg->{$maptype};
210 foreach my $i (@$indexmap_t) {
211 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
212 $indexmap->{$k} = $v;
213 push @indexlist, $k;
214 if ($first == 1) {
215 $defaultindex = $k;
216 $first = 0;
217 }
218 }
219 } else {
220 print STDERR "$maptype not defined\n";
221 }
222 # we use the shortname for default index
223 if (defined $collectcfg->{'defaultindex'}) {
224 $defaultindex = $collectcfg->{'defaultindex'};
225 #$defaultindex = $indexmap->{$defaultindex};
226 }
227
228 # levels
229 my $levelmap = {};
230 my @levellist = ();
231 my $default_search_level = "";
232 my $default_search_level_shortname = "";
233 my $default_retrieve_level = "Sec";
234 $first = 1;
235 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
236 if (defined $buildcfg->{'levelmap'}) {
237 my $levelmap_t = $buildcfg->{'levelmap'};
238 foreach my $l (@$levelmap_t) {
239 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
240 $levelmap->{$k} = $v;
241 push @levellist, $k;
242 if ($first) {
243 $default_search_level = $k;
244 $default_search_level_shortname = $v;
245 $first = 0;
246 }
247 }
248 }
249
250 if (defined $collectcfg->{'defaultlevel'}) {
251 $default_search_level = $collectcfg->{'defaultlevel'};
252 #$default_search_level = $levelmap->{$default_search_level};
253 $default_search_level_shortname = $levelmap->{$default_search_level};
254 }
255 if (defined $buildcfg->{'textlevel'}) {
256 $default_retrieve_level = $buildcfg->{'textlevel'};
257 }
258 }
259 # format stuff
260 my $format = $collectcfg->{'format'};
261
262 #output the search stuff to coll cfg
263 $collwriter->startTag('search','type'=>$buildtype);
264 foreach my $i (keys %$indexmap) {
265 $collwriter->startTag('index', 'name'=>$i);
266 #find the coll meta stuff
267 my $indexdisplay = ".$i";
268 foreach my $lang (keys %{$collectionmeta->{$indexdisplay}}) {
269 my $value = $collectionmeta->{$indexdisplay}->{$lang};
270 output_display($collwriter, 'name', $lang, $i);
271 }
272 $collwriter->endTag('index');
273 }
274
275 #output the defaultIndex to coll cfg
276 $collwriter->emptyTag('defaultIndex','name'=>$defaultindex);
277
278 # indexOptions
279 if (defined $collectcfg->{'indexoptions'}) {
280 foreach my $i (@{$collectcfg->{'indexoptions'}}) {
281 $collwriter->emptyTag('indexOption', 'name'=>$i);
282 }
283 }
284
285 #indexSubcollection
286 my $indexsubcollections = $collectcfg->{'indexsubcollections'};
287
288 if (defined $indexsubcollections) {
289 my $indexsubcollections_t= $collectcfg->{'indexsubcollections'};
290 foreach my $i ( @$indexsubcollections_t) {
291 $collwriter->startTag('indexSubcollection', 'name'=>$i);
292 &output_display($collwriter, 'name', $defaultlang, $i);
293 $collwriter->endTag('indexSubcollection');
294 }
295 }
296
297 #subcollection
298 my $subcollection = $collectcfg->{'subcollection'};
299 if (defined $subcollection){
300 foreach my $entry (keys %$subcollection){
301 my $value = $subcollection->{$entry};
302 $collwriter->emptyTag('subcollection','filter'=>$value,'name'=>$entry);
303 }
304 }
305
306 #indexlanguage
307 my $languages = $collectcfg->{'languages'};
308 if (defined $languages){
309 my $languages_t = $collectcfg->{'languages'};
310 foreach my $i (@$languages_t){
311 $collwriter->startTag('indexLanguage','name'=>$i);
312 &output_display($collwriter, 'name', $defaultlang, $i);
313 $collwriter->endTag('indexLanguage');
314 }
315 }
316
317 # level stuff for mgpp/lucene
318 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene'){
319 foreach my $l (keys %$levelmap) {
320 $collwriter->startTag('level', 'name'=>$l);
321 #find the coll meta stuff
322 my $leveldisplay = ".$l";
323 foreach my $lang (keys %{$collectionmeta->{$leveldisplay}}) {
324 my $value = $collectionmeta->{$leveldisplay}->{$lang};
325 output_display($collwriter, 'name', $lang, $value);
326 }
327 $collwriter->endTag('level');
328 }
329 $collwriter->emptyTag('defaultLevel', 'name'=>$default_search_level);
330 }
331
332 # add in the search type
333 if (defined $format->{'SearchTypes'}){
334 $collwriter->startTag('format', 'name'=>"searchType");
335 $collwriter->charactersXML($format->{'SearchTypes'});
336 $collwriter->endTag('format');
337 }
338
339 # add in the format stuff
340 if (defined $format->{'SearchVList'}) {
341
342 $collwriter->startTag('format');
343 write_format($collwriter, $format->{'SearchVList'}, "document");
344 $collwriter->endTag('format');
345 }
346 elsif (defined $format->{'VList'}) {
347 $collwriter->startTag('format');
348 write_format($collwriter, $format->{'VList'}, "document");
349 $collwriter->endTag('format');
350 }
351
352 $collwriter->endTag('search');
353
354 # import plugins
355 # if ImagePlugin is added, then need to add in a replaceListRef element for gs2-image
356 my $contains_image_plugin = 0;
357
358 my $plugins = $collectcfg->{'plugin'};
359
360 if (defined $plugins){
361 $collwriter->startTag('import');
362 $collwriter->startTag('pluginList');
363 foreach my $pl (@$plugins) {
364 my ($pluginname) = @$pl[0];
365 if ($pluginname =~ m/^(ImagePlugin|ImagePlug|PagedImagePlugin)$/) {
366 $contains_image_plugin = 1;
367 }
368 $collwriter->startTag('plugin','name'=>$pluginname);
369
370 for (my $i=1; $i<scalar(@$pl); $i++) {
371 my $arg =@$pl[$i];
372 if ($arg =~ /^-/){
373 my $option_name=@$pl[$i];
374 my $option_value=@$pl[$i+1];
375 if (defined $option_value){
376 if ($option_value =~ /^-/){
377 $collwriter->startTag('option','name'=>$option_name);
378 $collwriter->endTag('option');
379 }else{
380 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
381 $collwriter->endTag('option');
382 }
383 }
384 }
385 }
386 $collwriter->endTag('plugin');
387 }
388 $collwriter->endTag('pluginList');
389 $collwriter->endTag('import');
390 }
391
392 $buildwriter->startTag('serviceRackList');
393
394 my @levels = ();
395 my $defaultlevel;
396 my $service_type = "MG";
397 if ($buildtype eq 'mgpp') {
398 $service_type = "MGPP";
399 } elsif ($buildtype eq "lucene") {
400 $service_type = "Lucene";
401 }
402
403 #indexSubcollectionList
404
405 my $subcollectionmap = $buildcfg->{'subcollectionmap'};
406 my $firstsubcollection = 1;
407 my $defaultsubcollection = "";
408 my @subcollist;
409 my $subcolmap = {};
410
411 if (defined $buildcfg->{'subcollectionmap'}) {
412 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
413
414 foreach my $l (@$subcolmap_t) {
415 my @pair = split(/->/, $l);
416 $subcolmap->{$pair[0]} = $pair[1];
417 push @subcollist, $pair[0];
418 if ($firstsubcollection==1) {
419 $defaultsubcollection = $pair[1];
420 $firstsubcollection = 0;
421 }
422 }
423
424 }
425
426
427 #do the retrieve service
428 $buildwriter->startTag('serviceRack', 'name'=>"GS2".$service_type."Retrieve");
429 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
430 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_retrieve_level);
431 } elsif ($buildtype eq "mg") {
432 $buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
433 }
434
435 if ((defined $defaultsubcollection) && ($defaultsubcollection ne "")) {
436 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
437 }
438
439 if (defined $indexstem) {
440 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
441 }
442 $buildwriter->emptyTag('databaseType', 'name'=>$infodbtype) if (defined $infodbtype);
443
444 # close off the Retrieve service
445 $buildwriter->endTag('serviceRack');
446
447 # add in the classifiers if needed
448
449 my $count = 1;
450 my $phind = 0;
451 my $started_classifiers = 0;
452 if (defined $collectcfg->{'classify'}) {
453 $collwriter->startTag('browse');
454 # add in default format if necessary
455 if (defined $format->{"VList"} || defined $format->{"HList"}) {
456 # global formats
457 $collwriter->startTag('format');
458 if (defined $format->{"VList"}) {
459 # VLIst applies to both classifier and doc nodes
460 write_format($collwriter, $format->{"VList"}, "document");
461 write_format($collwriter, $format->{"VList"}, "classifier");
462 }
463 if (defined $format->{"HList"}) {
464 # hlist is only for classifier nodes
465 write_format($collwriter, $format->{"HList"}, "horizontal");
466 }
467 $collwriter->endTag('format');
468 }
469 my $classifiers = $collectcfg->{'classify'};
470 foreach my $cl (@$classifiers) {
471 my $name = "CL$count";
472 $count++;
473 my ($classname) = @$cl[0];
474 if ($classname =~ /^phind$/i) {
475 $phind=1;
476 #should add it into coll config classifiers
477 next;
478 }
479
480 my $horizontalAtTop = &isHorizontalClassifier($database, $name);
481 if (not $started_classifiers) {
482 $buildwriter->startTag('serviceRack', 'name'=>'GS2Browse');
483 if (defined $indexstem) {
484 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
485 }
486 $buildwriter->emptyTag('databaseType', 'name'=>$infodbtype) if (defined $infodbtype);
487
488 $buildwriter->startTag('classifierList');
489 $started_classifiers = 1;
490 }
491 my $content = ''; #use buttonname first, then metadata
492 if ($classname eq "DateList") {
493 $content = "Date";
494 } else {
495 for (my $i=0; $i<scalar(@$cl); $i++) {
496 my $arg = @$cl[$i];
497 if ($arg eq "-buttonname"){
498 $content = @$cl[$i+1];
499 last;
500 } elsif ($arg eq "-metadata") {
501 $content = @$cl[$i+1];
502 }
503
504 # remove "ex." prefix from "ex.metaname" but not from "ex.namespace.metaname"
505 $content =~ s@ex\.([^.]+)(,|;|$)@$1$2@g; #$content =~ s@ex\.([A-Z])@$1@g;
506 }
507 }
508 if ($horizontalAtTop) {
509 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'horizontalAtTop'=>'true');
510
511 } else {
512 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content);
513 }
514
515
516 # $collwriter->startTag('classifier', 'name'=>$name);
517 $collwriter->startTag('classifier', 'name'=>$classname);
518 for (my $i=1; $i<scalar(@$cl); $i++) {
519 my $arg =@$cl[$i];
520 if ($arg =~ /^-/){
521 my $option_name=@$cl[$i];
522 my $option_value=@$cl[$i+1];
523 if (defined $option_value){
524 if ($option_value=~ /^-/){
525 $collwriter->startTag('option','name'=>$option_name);
526 $collwriter->endTag('option');
527 }else{
528 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
529 $collwriter->endTag('option');
530 }
531 }
532 }
533 }
534
535 my $vlist = $name."VList";
536 my $hlist = $name."HList";
537 my $dlist = "";
538 if ($classname eq "DateList") {
539 $dlist = "DateList";
540 }
541 # need to work out how to split into classifier and document
542 if (defined $format->{$vlist} || defined $format->{$hlist} || defined $format->{$dlist}) {
543 $collwriter->startTag('format');
544 if (defined $format->{$vlist}) {
545 write_format($collwriter, $format->{$vlist}, "document");
546 write_format($collwriter, $format->{$vlist}, "classifier");
547 }
548 if (defined $format->{$hlist}) {
549 write_format($collwriter, $format->{$hlist}, "horizontal");
550 }
551
552 if (defined $format->{$dlist}) {
553 write_format($collwriter, $format->{$dlist}, "document");
554 }
555 $collwriter->endTag('format');
556 }
557 $collwriter->endTag('classifier');
558 } #foreach classifier
559 if ($started_classifiers) {
560 # end the classifiers
561 $buildwriter->endTag('classifierList');
562 # close off the Browse service
563 $buildwriter->endTag('serviceRack');
564 }
565
566 $collwriter->endTag('browse');
567 }
568
569
570 # the phind classifier is a separate service
571 if ($phind) {
572 # if phind classifier
573 $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse');
574 }
575
576 # do the search service
577 $buildwriter->startTag('serviceRack', 'name'=>'GS2'.$service_type.'Search');
578 #$buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
579 $buildwriter->emptyTag('defaultIndex', 'shortname'=>$indexmap->{$defaultindex});
580 $buildwriter->startTag('indexList');
581 #for each index
582 foreach my $i (@indexlist) {
583 my $index = $indexmap->{$i};
584 $buildwriter->emptyTag('index', 'name'=>$i, 'shortname'=>$index);
585 }
586 $buildwriter->endTag('indexList');
587 if (defined $indexstem) {
588 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
589 }
590 $buildwriter->emptyTag('databaseType', 'name'=>$infodbtype) if (defined $infodbtype);
591
592 # index options
593 if ($buildtype eq 'mg' || $buildtype eq 'mgpp') {
594 $buildwriter->startTag('indexOptionList');
595 my $stemindexes = 3; # default is stem and casefold
596 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
597 $stemindexes = $buildcfg->{'stemindexes'};
598 }
599 $buildwriter->emptyTag('indexOption', 'name'=>'stemIndexes', 'value'=>$stemindexes);
600
601 my $maxnumeric = 4; # default
602 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
603 $maxnumeric = $buildcfg->{'maxnumeric'};
604 }
605 $buildwriter->emptyTag('indexOption', 'name'=>'maxnumeric', 'value'=>$maxnumeric);
606
607 $buildwriter->endTag('indexOptionList');
608 }
609
610 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
611
612 # level info
613 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_search_level_shortname);
614 $buildwriter->emptyTag('defaultDBLevel', 'shortname'=>$default_retrieve_level);
615 $buildwriter->startTag('levelList');
616 foreach my $l (@levellist) {
617 my $level = $levelmap->{$l};
618 $buildwriter->emptyTag('level', 'name'=>$l, 'shortname'=>$level);
619 }
620 $buildwriter->endTag('levelList');
621
622 # do the search types if there
623 if (defined $collectcfg->{'searchtype'}) {
624 $buildwriter->startTag('searchTypeList');
625 foreach my $st (@{$collectcfg->{'searchtype'}}) {
626 $buildwriter->emptyTag('searchType', 'name'=>$st);
627 }
628 $buildwriter->endTag('searchTypeList');
629 } elsif (defined $format->{'SearchTypes'}) {
630 #check format statement
631 my $searchtype = $format->{'SearchTypes'};
632 $buildwriter->startTag('searchTypeList');
633 if ($searchtype =~ /form/) {
634 $buildwriter->emptyTag('searchType', 'name'=>'form');
635 }
636 if ($searchtype =~ /plain/) {
637 $buildwriter->emptyTag('searchType', 'name'=>'plain');
638 }
639 $buildwriter->endTag('searchTypeList');
640 }
641 }
642
643 #indexLanguageList
644 my $indexlanguages = $collectcfg->{'languages'};
645 my $firstindexlanguage = 1;
646 my $defaultindexlanguage_shortname;
647 if (defined $indexlanguages){
648 $buildwriter->startTag('indexLanguageList');
649 my $languages_t = $collectcfg->{'languages'};
650 foreach my $i (@$languages_t){
651 $buildwriter->startTag('indexLanguage','name'=>$i);
652 &output_display($buildwriter, 'name', $i, $i);
653 $buildwriter->endTag('indexLanguage');
654 if ($firstindexlanguage==1){
655 $defaultindexlanguage_shortname = $i;
656 $firstindexlanguage=0;
657 }
658 }
659 $buildwriter->endTag('indexLanguageList');
660 $buildwriter->startTag('defaultIndexLanguage', 'name'=>$defaultindexlanguage_shortname,'shortname'=>$defaultindexlanguage_shortname);
661 $buildwriter->endTag('defaultIndexLanguage');
662 }
663
664 # my $defaultsubcollection = "";
665 # my @subcollist;
666
667 if (scalar(@subcollist)>0){
668
669 $buildwriter->startTag('indexSubcollectionList');
670 foreach my $i (keys %$subcolmap){
671 my $short_name = $subcolmap->{$i};
672 $buildwriter->emptyTag('indexSubcollection', 'name'=>$i, 'shortname'=>$short_name);
673 }
674
675 $buildwriter->endTag('indexSubcollectionList');
676 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
677 }
678
679
680 $buildwriter->endTag('serviceRack');
681
682 $buildwriter->endTag('serviceRackList');
683 $buildwriter->endTag('buildConfig');
684
685 # we add in the default replace list just in case we have macros in the
686 # collection
687 $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-standard');
688 $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-image') if $contains_image_plugin;
689 $collwriter->endTag('CollectionConfig');
690 $collwriter->end();
691 $buildwriter->end();
692 $buildoutput->close();
693 $colloutput->close();
694 &close_database($database);
695}
696
697
698sub output_metadata {
699 my ($writer, $lang, $metaname, $metavalue) = @_;
700 $lang = 'en' if $lang eq 'default';
701 if ($lang ne ""){
702 $writer->startTag('metadata', 'lang'=>$lang, 'name'=>$metaname);
703 }else{
704 $writer->startTag('metadata', 'name'=>$metaname);
705 }
706 $writer->characters($metavalue) if(defined $metavalue);
707 $writer->endTag('metadata');
708}
709
710sub output_display {
711 my ($writer, $name, $lang, $value) = @_;
712 $lang = 'en' if $lang eq 'default';
713 if ($lang =~ /^\[/) {
714 ($lang) = $lang =~ /\[l=(.*)\]/;
715 }
716
717 $writer->startTag('displayItem', 'name'=>$name, 'lang'=>$lang);
718 $writer->characters($value);
719 $writer->endTag('displayItem');
720}
721sub format_icon_value {
722 my ($value) = @_;
723 if ($value =~ /^_/) {
724 my ($newvalue) = $value =~ /images\/(.*)$/;
725 if ($newvalue) {
726 return $newvalue;
727 }
728 }
729 return $value;
730}
731
732sub tidy_up_display_item {
733 my ($value) = @_;
734 # remove \n
735 $value =~ s/\\n//g;
736 # replace \' with '
737 $value =~ s/\\\'/\'/g;
738 # replace \" with "
739 $value =~ s/\\\"/\"/g;
740 # replace _httpprefix_ with _httpsite_
741 $value =~ s/_httpprefix_/_httpsite_/g;
742 $value =~ s/_gwcgi_//g;
743 $value =~ s/[a-z][a-z]?=_cgiarg[a-z][a-z]?_&?//g;
744 $value =~ s/&p=/&sa=/g;
745 return $value;
746}
747
748sub format_if_or {
749 my ($format, $node_type) = @_;
750
751 # while we find nested if/or statements, recurse to find more nested if/or statements,
752 # and try to expand (process) these nested statements starting from innermost going to outermost
753
754 while($format =~ m/^.*\{(?:If|Or)\}\{[^\}\{]*\{/) { # contains nested if/or statement, expand it
755
756 my ($prefix, $nested_to_process, $suffix) = $format =~ m/^(.*\{(?:If|Or)\}\{[^\}\{]*)(\{[^\}]*\}\s*\{[^\}]*\})(.*)$/g; # recursion step
757
758 #print STDERR "prefix: |$prefix|\n\nnested: |$nested_to_process|\n\nsuffix: |$suffix|\n\n";
759 $format = $prefix . &format_if_or($nested_to_process, $node_type) . $suffix;
760 }
761
762 if($format =~ m/\{(If|Or)\}\{[^\}\{]*\}/g) { # base step: contains if/or statement(s), but none nested
763 # expand them
764 $format =~ s/\{If\}\{([^\}]*)\}/&format_if($1, $node_type)/eg;
765 $format =~ s/\{Or\}\{([^\}]*)\}/&format_or($1)/eg;
766 }
767 return $format;
768}
769
770sub write_format {
771 my ($writer, $old_format, $node_type) = @_;
772
773 # replace \' with '
774 $old_format =~ s/\\\'/\'/g;
775 # replace \" with "
776 $old_format =~ s/\\\"/\"/g;
777
778 if($convert_format_stmts) {
779
780 #convert [] to <gsf:...>
781 # now handles nested {If} and {Or}
782 $old_format = &format_if_or($old_format, $node_type);
783 $old_format =~ s/\[Text\]/\<gsf:text\/\>/g;
784 $old_format =~ s/\[num\]/\<gsf:num\/\>/g;
785 $old_format =~ s/\[link\]/\<gsf:link type=\'$node_type\'\>/g;
786 $old_format =~ s/\[\/link\]/\<\/gsf:link\>/g;
787 $old_format =~ s/\[srclink\]/\<gsf:link type=\'source\'\>/g;
788 $old_format =~ s/\[\/srclink\]/\<\/gsf:link\>/g;
789 $old_format =~ s/\[icon\]/\<gsf:icon type=\'$node_type\'\/\>/g;
790 $old_format =~ s/\[srcicon\]/\<gsf:icon type=\'source\'\/\>/g;
791
792 # what to do with hightlight??
793 $old_format =~ s/\[\/?highlight\]//g;
794
795 #now do the rest of the [] which are assumed to be metadata
796 $old_format =~ s/\[([^\]]*)\]/&format_metadata($1)/eg;
797
798 # some html tidy
799 #turn <br> into <br />
800 $old_format =~ s/\<br\>/\<br \/\>/g;
801 #turn <p> into <p />
802 $old_format =~ s/\<p\>/\<p \/\>/g;
803
804 #put quotes around any atts
805 $old_format =~ s/=([a-z]+)([> ])/=\'$1\'$2/g;
806 }
807 else { # not converting format statements, leave them as GS2 format stmts,
808 # so that formatconverter can convert them and users can oversee the conversion in GLI,
809 # but nest the GS2 statements here in an xml tag that won't be processed by GS3
810
811 $old_format = &docprint::escape_text($old_format); # escape html entities inside the format statement since the <br> and <p> may not be correct for xml
812 $old_format = "<gsf:format-gs2>" . $old_format . "</gsf:format-gs2>";
813
814 }
815
816 if ($node_type eq "document") {
817 $writer->startTag('gsf:template', 'match'=>'documentNode');
818 $writer->charactersXML($old_format);
819 $writer->endTag('gsf:template');
820 } elsif ($node_type eq "classifier") {
821 $writer->startTag('gsf:template', 'match'=>'classifierNode');
822 $writer->charactersXML($old_format);
823 $writer->endTag('gsf:template');
824 } elsif ($node_type eq "horizontal") {
825 $writer->startTag('gsf:template', 'match'=>'classifierNode', 'mode'=>'horizontal');
826 $writer->charactersXML($old_format);
827 $writer->endTag('gsf:template');
828
829 }
830}
831
832sub format_metadata {
833 my ($metadata_string) = @_;
834
835 #print STDERR "original meta = $metadata_string\n";
836
837 # what shall we do with cgisafe??
838 my $cgisafe = $metadata_string =~ s/^cgisafe://;
839
840 my ($select) = $metadata_string =~ /^(parent|sibling)/;
841 $metadata_string =~ s/^(parent|sibling)//;
842 my ($scope, $delim);
843
844 if ($select) {
845 ($scope) = $metadata_string =~ /^\((Top|All)/;
846 $metadata_string =~ s/^\((Top|All)\)?//;
847 if ($scope) {
848 ($delim) = $metadata_string =~ /^\'([^\']*)\'\)/;
849 $metadata_string =~ s/^\'([^\']*)\'\)//;
850 }
851 }
852 $metadata_string =~ s/^://;
853 # remove ex.
854 $metadata_string =~ s/^ex\.//;
855
856 #print STDERR "select=$select, scope=$scope, delim=|$delim|, meta = $metadata_string\n";
857
858 my $new_format = "<gsf:metadata name='$metadata_string' ";
859 if (defined $select) {
860 if ($select eq "sibling") {
861 $new_format .= "multiple='true' ";
862 if (defined $delim) {
863 $new_format .= "separator='$delim' ";
864 }
865 } elsif ($select eq "parent"){
866 if (defined $scope) {
867 if ($scope eq "Top") {
868 $new_format .= "select='root' ";
869 } elsif ($scope eq "All") {
870 $new_format .= "select='ancestors' ";
871 if (defined $delim) {
872 $new_format .= "separator='$delim' ";
873 }
874 }
875 } else {
876 $new_format .= "select='parent' ";
877 }
878 }
879 }
880 $new_format .= "/>";
881 #print STDERR "$new_format\n";
882 return $new_format;
883
884}
885
886sub format_if {
887
888 my ($if_string, $node_type) = @_;
889 #print STDERR "if string = $if_string\n";
890
891 my @parts = split /,/, $if_string;
892 my $test = $parts[0];
893 my $true_option = $parts[1];
894 my $false_option;
895 if (scalar (@parts) == 3) {
896 $false_option = $parts[2];
897 }
898 $test =~ s/^\s*//;
899 $test =~ s/\s*$//;
900 my ($test_meta, $test_type, $test_value);
901 if ($test =~ /^(\[.+\])$/) {
902 $test_meta = $1;
903 $test_type = 'exists';
904 } else {
905 my ($lhs, $exp, $rhs) = $test =~ /^(.+)\s+(eq|ne|lt|gt|le|ge|sw|ew)\s+(.+)$/;
906 #print STDERR "lhs, exp, rhs = $lhs, $exp, $rhs\n";
907 if ($exp eq "eq") {
908 $test_type = "equals";
909 } elsif ($exp eq "sw") {
910 $test_type = "startsWith";
911 } elsif ($exp eq "ew") {
912 $test_type = "endsWith";
913 } elsif ($exp eq "ne") {
914 $test_type = "notEquals";
915 } elsif ($exp eq "lt") {
916 $test_type = "lessThan";
917 }elsif ($exp eq "gt") {
918 $test_type = "greaterThan";
919 }elsif ($exp eq "le") {
920 $test_type = "lessThanOrEquals";
921 }elsif ($exp eq "ge") {
922 $test_type = "greaterThanOrEquals";
923 }
924 if ($lhs =~ /^\[.+\]$/) {
925 $test_meta = $lhs;
926 $test_value = $rhs;
927 } else {
928 # assume rhs has meta
929 $test_meta = $rhs;
930 $test_value = $lhs;
931 }
932
933 #remove beginning and end quotes
934 $test_value =~ s/^[\'\"]//;
935 $test_value =~ s/[\'\"]$//;
936 }
937 my $test_atts = "test='$test_type' ";
938 if (defined $test_value) {
939 $test_atts .= "test-value='$test_value' ";
940 }
941 #print STDERR "test, true, false = $test, $true_option, $false_option\n";
942 my $new_format = "<gsf:switch>$test_meta";
943 $new_format .= "<gsf:when $test_atts>$true_option</gsf:when>";
944 if (defined $false_option) {
945 $new_format .="<gsf:otherwise>$false_option</gsf:otherwise>";
946 }
947 $new_format .= "</gsf:switch>";
948
949 #print STDERR "new format = $new_format\n";
950 return $new_format;
951}
952
953sub format_or {
954 my ($or_string) = @_;
955 my @meta_list = split (',', $or_string);
956 return "" unless scalar (@meta_list);
957 my $new_format = "<gsf:choose-metadata>";
958 foreach my $m (@meta_list) {
959 if ($m =~ /^\[(.*)\]$/) {
960 $new_format .= &format_metadata($1);
961 } else {
962 # a default value
963 $new_format .= "<gsf:default>$m</gsf:default>";
964 last;
965 }
966 }
967 $new_format .= "</gsf:choose-metadata>";
968 return $new_format;
969}
970
971sub open_database {
972 my ($db_file) = @_;
973
974 my $database = ();
975# tie (%$database, 'GDBM_File', $db_file, GDBM_READER, 0400) ||
976# die "Couldn't open database $db_file\n";
977
978 return $database;
979}
980
981sub close_database {
982 my ($database) = @_;
983 untie %$database;
984}
985sub isHorizontalClassifier {
986 my ($database, $name) = @_;
987
988 return 0; # can't get this to work for windows
989 my $record = $database->{$name};
990 my ($childtype) = $record =~ /<childtype>(\w*)/;
991 if ($childtype eq "HList") {
992 return 1;
993 }
994 return 0;
995}
996#$writer->startTag('');
997#$writer->endTag('');
998#$writer->characters();
999#$writer->emptyTag('');
1000
10011;
Note: See TracBrowser for help on using the repository browser.