source: main/trunk/greenstone3/bin/script/convert_coll_from_gs2.pl@ 25661

Last change on this file since 25661 was 25661, checked in by ak19, 12 years ago

Dr Bainbridge fixed the mode in which the buildconfig and collectionconfig output files were written so that it's now in utf-8 mode.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 27.5 KB
Line 
1#!/usr/bin/perl -w
2
3BEGIN {
4 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
5 die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
6 die "GSDL3SRCHOME not set\n" unless defined $ENV{'GSDL3SRCHOME'};
7 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
8 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
9 unshift (@INC, "$ENV{'GSDL3SRCHOME'}/lib/perl/cpan");
10}
11
12use colcfg;
13use util;
14use parsargv;
15use FileHandle;
16use XML::Writer;
17#can't get this to work on windows
18#use GDBM_File;
19
20use strict;
21
22&main();
23sub print_usage() {
24 print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n";
25 print STDOUT "options:\n";
26
27 print STDOUT " -collectdir Directory where collection lives.\n";
28 print STDOUT " -verbosity Controls the amount of output.\n";
29 print STDOUT " -defaultlang The language that is considered the default (for display text etc). defaults to 'en'\n\n";
30}
31
32
33sub main {
34
35 my ($defaultlang, $verbosity, $collectdir);
36 # note that no defaults are passed for most options as they're set
37 # later (after we check the collect.cfg file)
38 if (!&parsargv::parse(\@ARGV,
39 'verbosity/\d+/', \$verbosity,
40 'collectdir/.*/', \$collectdir,
41 'defaultlang/.*/', \$defaultlang)) {
42 &print_usage();
43 die "\n";
44 }
45
46 # get and check the collection name
47 my ($collection) = @ARGV;
48 if (!defined($collection) || $collection eq "") {
49 die "No collection specified\n";
50 }
51 if ($collection eq "gs2model") {
52 die "You cant convert the model collection\n";
53 }
54
55 if (!defined $collectdir || $collectdir eq "") {
56 $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
57 }
58
59 if (!defined $defaultlang || $defaultlang eq "") {
60 $defaultlang = 'en';
61 }
62 # add on the coll name
63 $collectdir = &util::filename_cat ($collectdir, $collection);
64
65 my $collconfigfilename = &util::filename_cat ($collectdir, "etc", "collect.cfg");
66 print STDOUT "coll config=$collconfigfilename\n";
67 my $collectcfg;
68 if (-e $collconfigfilename) {
69 $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename);
70
71 } else {
72 print STDERR "collect.cfg not found!!";
73 die "\n";
74 }
75
76
77 my $buildconfigfilename = &util::filename_cat ($collectdir, "index", "build.cfg");
78 my $buildcfg;
79 if (-e $buildconfigfilename) {
80 $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename);
81
82 } else {
83 print STDERR "build.cfg not found!!";
84 die "\n";
85 }
86
87
88
89 my $colloutfile = &util::filename_cat ($collectdir, "etc", "collectionConfig.xml");
90 if (-e $colloutfile) {
91 print STDOUT "collectionConfig file already exists! overwriting it!\n";
92
93 }
94
95 my $buildoutfile = &util::filename_cat ($collectdir, "index", "buildConfig.xml");
96 if (-e $buildoutfile) {
97 print STDOUT "buildConfig file already exists! overwriting it!\n";
98
99 }
100
101# my $db_file = &util::filename_cat ($collectdir, "index", "text", "$collection.ldb");
102 my $database;
103# if (-e $db_file) {
104# $database = &open_database($db_file);
105# } else {
106# print STDERR "gdbm database file $db_file not found!!";
107# die "\n";
108# }
109
110 my $buildtype;
111 if (defined $buildcfg->{'buildtype'}) {
112 $buildtype = $buildcfg->{'buildtype'};
113 } else {
114 $buildtype = 'mg';
115 }
116
117 my $indexstem = undef;
118 if (defined $buildcfg->{'indexstem'}) {
119 $indexstem = $buildcfg->{'indexstem'};
120 }
121
122 my $buildoutput = new IO::File(">$buildoutfile");
123 binmode($colloutput,":utf8");
124 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput, NEWLINES => 1);
125
126 $buildwriter->xmlDecl("UTF-8");
127 $buildwriter->startTag('buildConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat");
128
129 my $colloutput = new IO::File(">$colloutfile");
130 binmode($colloutput,":utf8");
131 my $collwriter = new XML::Writer(OUTPUT => $colloutput, NEWLINES => 1);
132
133 $collwriter->xmlDecl("UTF-8");
134 $collwriter->startTag('CollectionConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat", 'xmlns:xsl'=>'http://www.w3.org/1999/XSL/Transform');
135
136 #output the collection metadata to the collectionConfig file
137 $collwriter->startTag('metadataList');
138
139 my $creator = $collectcfg->{'creator'};
140 &output_metadata($collwriter,'default', 'creator', $creator);
141 my $public =$collectcfg->{'public'};
142 &output_metadata($collwriter,'default', 'public', $public);
143
144 $collwriter->endTag('metadataList');
145
146 #output the display collectionmeta to collectionConfig.xml
147
148 my $collectionmeta = $collectcfg->{'collectionmeta'};
149 if (defined $collectionmeta) {
150 my %name_map = ('collectionname', 'name',
151 'collectionextra', 'description',
152 'iconcollection', 'icon',
153 'iconcollectionsmall', 'smallicon');
154
155 $collwriter->startTag('displayItemList');
156 foreach my $entry ( keys %$collectionmeta) {
157 if (defined $name_map{$entry}) {
158 my $name= $name_map{$entry};
159 foreach my $lang (keys %{$collectionmeta->{$entry}}) {
160 my $value = $collectionmeta->{$entry}->{$lang};
161 if ($entry =~ /^icon/) {
162 $value = format_icon_value($value);
163 } else {
164 $value = tidy_up_display_item($value);
165 }
166 &output_display($collwriter, $name, $lang, $value);
167 }
168 }
169 }
170 $collwriter->endTag('displayItemList');
171 }
172
173 # output building metadata to build config file
174 my $numdocs = $buildcfg->{'numdocs'};
175 $buildwriter->startTag('metadataList');
176 &output_metadata($buildwriter,'', 'numDocs', $numdocs);
177 &output_metadata($buildwriter,'', 'buildType', $buildtype);
178 $buildwriter->endTag('metadataList');
179
180
181 #indexes
182 # maps index name to shortname
183 my $indexmap = {};
184 # keeps the order for indexes
185 my @indexlist = ();
186 my $defaultindex = "";
187 my $first = 1;
188 my $maptype = "indexfieldmap";
189 if ($buildtype eq "mg") {
190 $maptype = "indexmap";
191 }
192 if (defined $buildcfg->{$maptype}) {
193 my $indexmap_t = $buildcfg->{$maptype};
194 foreach my $i (@$indexmap_t) {
195 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
196 $indexmap->{$k} = $v;
197 push @indexlist, $k;
198 if ($first == 1) {
199 $defaultindex = $k;
200 $first = 0;
201 }
202 }
203 } else {
204 print STDERR "$maptype not defined\n";
205 }
206 # we use the shortname for default index
207 if (defined $collectcfg->{'defaultindex'}) {
208 $defaultindex = $collectcfg->{'defaultindex'};
209 #$defaultindex = $indexmap->{$defaultindex};
210 }
211
212 # levels
213 my $levelmap = {};
214 my @levellist = ();
215 my $default_search_level = "";
216 my $default_search_level_shortname = "";
217 my $default_retrieve_level = "Sec";
218 $first = 1;
219 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
220 if (defined $buildcfg->{'levelmap'}) {
221 my $levelmap_t = $buildcfg->{'levelmap'};
222 foreach my $l (@$levelmap_t) {
223 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
224 $levelmap->{$k} = $v;
225 push @levellist, $k;
226 if ($first) {
227 $default_search_level = $k;
228 $default_search_level_shortname = $v;
229 $first = 0;
230 }
231 }
232 }
233
234 if (defined $collectcfg->{'defaultlevel'}) {
235 $default_search_level = $collectcfg->{'defaultlevel'};
236 #$default_search_level = $levelmap->{$default_search_level};
237 $default_search_level_shortname = $levelmap->{$default_search_level};
238 }
239 if (defined $buildcfg->{'textlevel'}) {
240 $default_retrieve_level = $buildcfg->{'textlevel'};
241 }
242 }
243 # format stuff
244 my $format = $collectcfg->{'format'};
245
246 #output the search stuff to coll cfg
247 $collwriter->startTag('search','type'=>$buildtype);
248 foreach my $i (keys %$indexmap) {
249 $collwriter->startTag('index', 'name'=>$i);
250 #find the coll meta stuff
251 my $indexdisplay = ".$i";
252 foreach my $lang (keys %{$collectionmeta->{$indexdisplay}}) {
253 my $value = $collectionmeta->{$indexdisplay}->{$lang};
254 output_display($collwriter, 'name', $lang, $i);
255 }
256 $collwriter->endTag('index');
257 }
258
259 #output the defaultIndex to coll cfg
260 $collwriter->emptyTag('defaultIndex','name'=>$defaultindex);
261
262 #indexSubcollection
263 my $indexsubcollections = $collectcfg->{'indexsubcollections'};
264
265 if (defined $indexsubcollections) {
266 my $indexsubcollections_t= $collectcfg->{'indexsubcollections'};
267 foreach my $i ( @$indexsubcollections_t) {
268 $collwriter->startTag('indexSubcollection', 'name'=>$i);
269 &output_display($collwriter, 'name', $defaultlang, $i);
270 $collwriter->endTag('indexSubcollection');
271 }
272 }
273
274 #subcollection
275 my $subcollection = $collectcfg->{'subcollection'};
276 if (defined $subcollection){
277 foreach my $entry (keys %$subcollection){
278 my $value = $subcollection->{$entry};
279 $collwriter->emptyTag('subcollection','filter'=>$value,'name'=>$entry);
280 }
281 }
282
283 #indexlanguage
284 my $languages = $collectcfg->{'languages'};
285 if (defined $languages){
286 my $languages_t = $collectcfg->{'languages'};
287 foreach my $i (@$languages_t){
288 $collwriter->startTag('indexLanguage','name'=>$i);
289 &output_display($collwriter, 'name', $defaultlang, $i);
290 $collwriter->endTag('indexLanguage');
291 }
292 }
293
294 # level stuff for mgpp/lucene
295 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene'){
296 foreach my $l (keys %$levelmap) {
297 $collwriter->startTag('level', 'name'=>$l);
298 #find the coll meta stuff
299 my $leveldisplay = ".$l";
300 foreach my $lang (keys %{$collectionmeta->{$leveldisplay}}) {
301 my $value = $collectionmeta->{$leveldisplay}->{$lang};
302 output_display($collwriter, 'name', $lang, $value);
303 }
304 $collwriter->endTag('level');
305 }
306 $collwriter->emptyTag('defaultLevel', 'name'=>$default_search_level);
307 }
308
309 # add in the search type
310 if (defined $format->{'SearchTypes'}){
311 $collwriter->startTag('format', 'name'=>"searchType");
312 $collwriter->charactersXML($format->{'SearchTypes'});
313 $collwriter->endTag('format');
314 }
315
316 # add in the format stuff
317 if (defined $format->{'SearchVList'}) {
318
319 $collwriter->startTag('format');
320 write_format($collwriter, $format->{'SearchVList'}, "document");
321 $collwriter->endTag('format');
322 }
323 elsif (defined $format->{'VList'}) {
324 $collwriter->startTag('format');
325 write_format($collwriter, $format->{'VList'}, "document");
326 $collwriter->endTag('format');
327 }
328
329 $collwriter->endTag('search');
330
331 # import plugins
332
333 my $plugins = $collectcfg->{'plugin'};
334
335 if (defined $plugins){
336 $collwriter->startTag('import');
337 $collwriter->startTag('pluginList');
338 foreach my $pl (@$plugins) {
339 my ($pluginname) = @$pl[0];
340 $collwriter->startTag('plugin','name'=>$pluginname);
341
342 for (my $i=1; $i<scalar(@$pl); $i++) {
343 my $arg =@$pl[$i];
344 if ($arg =~ /^-/){
345 my $option_name=@$pl[$i];
346 my $option_value=@$pl[$i+1];
347 if (defined $option_value){
348 if ($option_value =~ /^-/){
349 $collwriter->startTag('option','name'=>$option_name);
350 $collwriter->endTag('option');
351 }else{
352 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
353 $collwriter->endTag('option');
354 }
355 }
356 }
357 }
358 $collwriter->endTag('plugin');
359 }
360 $collwriter->endTag('pluginList');
361 $collwriter->endTag('import');
362 }
363
364 $buildwriter->startTag('serviceRackList');
365
366 my @levels = ();
367 my $defaultlevel;
368 my $service_type = "MG";
369 if ($buildtype eq 'mgpp') {
370 $service_type = "MGPP";
371 } elsif ($buildtype eq "lucene") {
372 $service_type = "Lucene";
373 }
374
375 #indexSubcollectionList
376
377 my $subcollectionmap = $buildcfg->{'subcollectionmap'};
378 my $firstsubcollection = 1;
379 my $defaultsubcollection = "";
380 my @subcollist;
381 my $subcolmap = {};
382
383 if (defined $buildcfg->{'subcollectionmap'}) {
384 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
385
386 foreach my $l (@$subcolmap_t) {
387 my @pair = split(/->/, $l);
388 $subcolmap->{$pair[0]} = $pair[1];
389 push @subcollist, $pair[0];
390 if ($firstsubcollection==1) {
391 $defaultsubcollection = $pair[1];
392 $firstsubcollection = 0;
393 }
394 }
395
396 }
397
398
399 #do the retrieve service
400 $buildwriter->startTag('serviceRack', 'name'=>"GS2".$service_type."Retrieve");
401 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
402 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_retrieve_level);
403 } elsif ($buildtype eq "mg") {
404 $buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
405 }
406
407 if ((defined $defaultsubcollection) && ($defaultsubcollection ne "")) {
408 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
409 }
410
411 if (defined $indexstem) {
412 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
413 }
414 # close off the Retrieve service
415 $buildwriter->endTag('serviceRack');
416
417 # add in the classifiers if needed
418
419 my $count = 1;
420 my $phind = 0;
421 my $started_classifiers = 0;
422 if (defined $collectcfg->{'classify'}) {
423 $collwriter->startTag('browse');
424 # add in default format if necessary
425 if (defined $format->{"VList"} || defined $format->{"HList"}) {
426 # global formats
427 $collwriter->startTag('format');
428 if (defined $format->{"VList"}) {
429 # VLIst applies to both classifier and doc nodes
430 write_format($collwriter, $format->{"VList"}, "document");
431 write_format($collwriter, $format->{"VList"}, "classifier");
432 }
433 if (defined $format->{"HList"}) {
434 # hlist is only for classifier nodes
435 write_format($collwriter, $format->{"HList"}, "horizontal");
436 }
437 $collwriter->endTag('format');
438 }
439 my $classifiers = $collectcfg->{'classify'};
440 foreach my $cl (@$classifiers) {
441 my $name = "CL$count";
442 $count++;
443 my ($classname) = @$cl[0];
444 if ($classname =~ /^phind$/i) {
445 $phind=1;
446 #should add it into coll config classifiers
447 next;
448 }
449
450 my $horizontalAtTop = &isHorizontalClassifier($database, $name);
451 if (not $started_classifiers) {
452 $buildwriter->startTag('serviceRack', 'name'=>'GS2Browse');
453 if (defined $indexstem) {
454 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
455 }
456
457 $buildwriter->startTag('classifierList');
458 $started_classifiers = 1;
459 }
460 my $content = ''; #use buttonname first, then metadata
461 if ($classname eq "DateList") {
462 $content = "Date";
463 } else {
464 for (my $i=0; $i<scalar(@$cl); $i++) {
465 my $arg = @$cl[$i];
466 if ($arg eq "-buttonname"){
467 $content = @$cl[$i+1];
468 last;
469 } elsif ($arg eq "-metadata") {
470 $content = @$cl[$i+1];
471 }
472
473 }
474 }
475 if ($horizontalAtTop) {
476 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'horizontalAtTop'=>'true');
477
478 } else {
479 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content);
480 }
481
482
483 # $collwriter->startTag('classifier', 'name'=>$name);
484 $collwriter->startTag('classifier', 'name'=>$classname);
485 for (my $i=1; $i<scalar(@$cl); $i++) {
486 my $arg =@$cl[$i];
487 if ($arg =~ /^-/){
488 my $option_name=@$cl[$i];
489 my $option_value=@$cl[$i+1];
490 if (defined $option_value){
491 if ($option_value=~ /^-/){
492 $collwriter->startTag('option','name'=>$option_name);
493 $collwriter->endTag('option');
494 }else{
495 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
496 $collwriter->endTag('option');
497 }
498 }
499 }
500 }
501
502 my $vlist = $name."VList";
503 my $hlist = $name."HList";
504 my $dlist = "";
505 if ($classname eq "DateList") {
506 $dlist = "DateList";
507 }
508 # need to work out how to split into classifier and document
509 if (defined $format->{$vlist} || defined $format->{$hlist} || defined $format->{$dlist}) {
510 $collwriter->startTag('format');
511 if (defined $format->{$vlist}) {
512 write_format($collwriter, $format->{$vlist}, "document");
513 write_format($collwriter, $format->{$vlist}, "classifier");
514 }
515 if (defined $format->{$hlist}) {
516 write_format($collwriter, $format->{$hlist}, "horizontal");
517 }
518
519 if (defined $format->{$dlist}) {
520 write_format($collwriter, $format->{$dlist}, "document");
521 }
522 $collwriter->endTag('format');
523 }
524 $collwriter->endTag('classifier');
525 } #foreach classifier
526 if ($started_classifiers) {
527 # end the classifiers
528 $buildwriter->endTag('classifierList');
529 # close off the Browse service
530 $buildwriter->endTag('serviceRack');
531 }
532
533 $collwriter->endTag('browse');
534 }
535
536
537 # the phind classifier is a separate service
538 if ($phind) {
539 # if phind classifier
540 $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse');
541 }
542
543 # do the search service
544 $buildwriter->startTag('serviceRack', 'name'=>'GS2'.$service_type.'Search');
545 #$buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
546 $buildwriter->startTag('indexList');
547 #for each index
548 foreach my $i (@indexlist) {
549 my $index = $indexmap->{$i};
550 $buildwriter->emptyTag('index', 'name'=>$i, 'shortname'=>$index);
551 }
552 $buildwriter->endTag('indexList');
553 if (defined $indexstem) {
554 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
555 }
556
557 # index options
558 if ($buildtype eq 'mg' || $buildtype eq 'mgpp') {
559 $buildwriter->startTag('indexOptionList');
560 my $stemindexes = 3; # default is stem and casefold
561 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
562 $stemindexes = $buildcfg->{'stemindexes'};
563 }
564 $buildwriter->emptyTag('indexOption', 'name'=>'stemIndexes', 'value'=>$stemindexes);
565
566 my $maxnumeric = 4; # default
567 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
568 $maxnumeric = $buildcfg->{'maxnumeric'};
569 }
570 $buildwriter->emptyTag('indexOption', 'name'=>'maxnumeric', 'value'=>$maxnumeric);
571
572 $buildwriter->endTag('indexOptionList');
573 }
574
575 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
576
577 # level info
578 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_search_level_shortname);
579 $buildwriter->emptyTag('defaultGDBMLevel', 'shortname'=>$default_retrieve_level);
580 $buildwriter->startTag('levelList');
581 foreach my $l (@levellist) {
582 my $level = $levelmap->{$l};
583 $buildwriter->emptyTag('level', 'name'=>$l, 'shortname'=>$level);
584 }
585 $buildwriter->endTag('levelList');
586
587 # do the search types if there
588 if (defined $collectcfg->{'searchtype'}) {
589 $buildwriter->startTag('searchTypeList');
590 foreach my $st (@{$collectcfg->{'searchtype'}}) {
591 $buildwriter->emptyTag('searchType', 'name'=>$st);
592 }
593 $buildwriter->endTag('searchTypeList');
594 } elsif (defined $format->{'SearchTypes'}) {
595 #check format statement
596 my $searchtype = $format->{'SearchTypes'};
597 $buildwriter->startTag('searchTypeList');
598 if ($searchtype =~ /form/) {
599 $buildwriter->emptyTag('searchType', 'name'=>'form');
600 }
601 if ($searchtype =~ /plain/) {
602 $buildwriter->emptyTag('searchType', 'name'=>'plain');
603 }
604 $buildwriter->endTag('searchTypeList');
605 }
606 }
607
608 #indexLanguageList
609 my $indexlanguages = $collectcfg->{'languages'};
610 my $firstindexlanguage = 1;
611 my $defaultindexlanguage_shortname;
612 if (defined $indexlanguages){
613 $buildwriter->startTag('indexLanguageList');
614 my $languages_t = $collectcfg->{'languages'};
615 foreach my $i (@$languages_t){
616 $buildwriter->startTag('indexLanguage','name'=>$i);
617 &output_display($buildwriter, 'name', $i, $i);
618 $buildwriter->endTag('indexLanguage');
619 if ($firstindexlanguage==1){
620 $defaultindexlanguage_shortname = $i;
621 $firstindexlanguage=0;
622 }
623 }
624 $buildwriter->endTag('indexLanguageList');
625 $buildwriter->startTag('defaultIndexLanguage', 'name'=>$defaultindexlanguage_shortname,'shortname'=>$defaultindexlanguage_shortname);
626 $buildwriter->endTag('defaultIndexLanguage');
627 }
628
629 # my $defaultsubcollection = "";
630 # my @subcollist;
631
632 if (scalar(@subcollist)>0){
633
634 $buildwriter->startTag('indexSubcollectionList');
635 foreach my $i (keys %$subcolmap){
636 my $short_name = $subcolmap->{$i};
637 $buildwriter->emptyTag('indexSubcollection', 'name'=>$i, 'shortname'=>$short_name);
638 }
639
640 $buildwriter->endTag('indexSubcollectionList');
641 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
642 }
643
644
645 $buildwriter->endTag('serviceRack');
646
647 $buildwriter->endTag('serviceRackList');
648 $buildwriter->endTag('buildConfig');
649
650 # we add in the default replace list just in case we have macros in the
651 # collection
652 $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-standard');
653 $collwriter->endTag('CollectionConfig');
654 $collwriter->end();
655 $buildwriter->end();
656 $buildoutput->close();
657 $colloutput->close();
658 &close_database($database);
659}
660
661
662sub output_metadata {
663 my ($writer, $lang, $metaname, $metavalue) = @_;
664 $lang = 'en' if $lang eq 'default';
665 if ($lang ne ""){
666 $writer->startTag('metadata', 'lang'=>$lang, 'name'=>$metaname);
667 }else{
668 $writer->startTag('metadata', 'name'=>$metaname);
669 }
670 $writer->characters($metavalue);
671 $writer->endTag('metadata');
672}
673
674sub output_display {
675 my ($writer, $name, $lang, $value) = @_;
676 $lang = 'en' if $lang eq 'default';
677 if ($lang =~ /^\[/) {
678 ($lang) = $lang =~ /\[l=(.*)\]/;
679 }
680
681 $writer->startTag('displayItem', 'name'=>$name, 'lang'=>$lang);
682 $writer->characters($value);
683 $writer->endTag('displayItem');
684}
685sub format_icon_value {
686 my ($value) = @_;
687 if ($value =~ /^_/) {
688 my ($newvalue) = $value =~ /images\/(.*)$/;
689 if ($newvalue) {
690 return $newvalue;
691 }
692 }
693 return $value;
694}
695
696sub tidy_up_display_item {
697 my ($value) = @_;
698 # remove \n
699 $value =~ s/\\n//g;
700 # replace \' with '
701 $value =~ s/\\\'/\'/g;
702 # replace \" with "
703 $value =~ s/\\\"/\"/g;
704 # replace _httpprefix_ with _httpsite_
705 $value =~ s/_httpprefix_/_httpsite_/g;
706 $value =~ s/_gwcgi_//g;
707 $value =~ s/[a-z][a-z]?=_cgiarg[a-z][a-z]?_&?//g;
708 $value =~ s/&p=/&sa=/g;
709 return $value;
710}
711
712sub write_format {
713 my ($writer, $old_format, $node_type) = @_;
714 # replace \' with '
715 $old_format =~ s/\\\'/\'/g;
716 # replace \" with "
717 $old_format =~ s/\\\"/\"/g;
718 #convert [] to <gsf:...>
719 # assume no nesting {If} or {Or} for now
720 $old_format =~ s/\{If\}\{([^\}]*)\}/&format_if($1, $node_type)/eg;
721 $old_format =~ s/\{Or\}\{([^\}]*)\}/&format_or($1)/eg;
722 $old_format =~ s/\[Text\]/\<gsf:text\/\>/g;
723 $old_format =~ s/\[num\]/\<gsf:num\/\>/g;
724 $old_format =~ s/\[link\]/\<gsf:link type=\'$node_type\'\>/g;
725 $old_format =~ s/\[\/link\]/\<\/gsf:link\>/g;
726 $old_format =~ s/\[srclink\]/\<gsf:link type=\'source\'\>/g;
727 $old_format =~ s/\[\/srclink\]/\<\/gsf:link\>/g;
728 $old_format =~ s/\[icon\]/\<gsf:icon type=\'$node_type\'\/\>/g;
729 $old_format =~ s/\[srcicon\]/\<gsf:icon type=\'source\'\/\>/g;
730
731 # what to do with hightlight??
732 $old_format =~ s/\[\/?highlight\]//g;
733
734 #now do the rest of the [] which are assumed to be metadata
735 $old_format =~ s/\[([^\]]*)\]/&format_metadata($1)/eg;
736
737 # some html tidy
738 #turn <br> into <br />
739 $old_format =~ s/\<br\>/\<br \/\>/g;
740 #turn <p> into <p />
741 $old_format =~ s/\<p\>/\<p \/\>/g;
742
743 #put quotes around any atts
744 $old_format =~ s/=([a-z]+)([> ])/=\'$1\'$2/g;
745
746 if ($node_type eq "document") {
747 $writer->startTag('gsf:template', 'match'=>'documentNode');
748 $writer->charactersXML($old_format);
749 $writer->endTag('gsf:template');
750 } elsif ($node_type eq "classifier") {
751 $writer->startTag('gsf:template', 'match'=>'classifierNode');
752 $writer->charactersXML($old_format);
753 $writer->endTag('gsf:template');
754 } elsif ($node_type eq "horizontal") {
755 $writer->startTag('gsf:template', 'match'=>'classifierNode', 'mode'=>'horizontal');
756 $writer->charactersXML($old_format);
757 $writer->endTag('gsf:template');
758
759 }
760}
761
762sub format_metadata {
763 my ($metadata_string) = @_;
764
765 #print STDERR "original meta = $metadata_string\n";
766
767 # what shall we do with cgisafe??
768 my $cgisafe = $metadata_string =~ s/^cgisafe://;
769
770 my ($select) = $metadata_string =~ /^(parent|sibling)/;
771 $metadata_string =~ s/^(parent|sibling)//;
772 my ($scope, $delim);
773
774 if ($select) {
775 ($scope) = $metadata_string =~ /^\((Top|All)/;
776 $metadata_string =~ s/^\((Top|All)\)?//;
777 if ($scope) {
778 ($delim) = $metadata_string =~ /^\'([^\']*)\'\)/;
779 $metadata_string =~ s/^\'([^\']*)\'\)//;
780 }
781 }
782 $metadata_string =~ s/^://;
783 # remove ex.
784 $metadata_string =~ s/^ex\.//;
785
786 #print STDERR "select=$select, scope=$scope, delim=|$delim|, meta = $metadata_string\n";
787
788 my $new_format = "<gsf:metadata name='$metadata_string' ";
789 if (defined $select) {
790 if ($select eq "sibling") {
791 $new_format .= "multiple='true' ";
792 if (defined $delim) {
793 $new_format .= "separator='$delim' ";
794 }
795 } elsif ($select eq "parent"){
796 if (defined $scope) {
797 if ($scope eq "Top") {
798 $new_format .= "select='root' ";
799 } elsif ($scope eq "All") {
800 $new_format .= "select='ancestors' ";
801 if (defined $delim) {
802 $new_format .= "separator='$delim' ";
803 }
804 }
805 } else {
806 $new_format .= "select='parent' ";
807 }
808 }
809 }
810 $new_format .= "/>";
811 #print STDERR "$new_format\n";
812 return $new_format;
813
814}
815
816sub format_if {
817
818 my ($if_string, $node_type) = @_;
819 #print STDERR "if string = $if_string\n";
820
821 my @parts = split /,/, $if_string;
822 my $test = $parts[0];
823 my $true_option = $parts[1];
824 my $false_option;
825 if (scalar (@parts) == 3) {
826 $false_option = $parts[2];
827 }
828 $test =~ s/^\s*//;
829 $test =~ s/\s*$//;
830 my ($test_meta, $test_type, $test_value);
831 if ($test =~ /^(\[.+\])$/) {
832 $test_meta = $1;
833 $test_type = 'exists';
834 } else {
835 my ($lhs, $exp, $rhs) = $test =~ /^(.+)\s+(eq|ne|lt|gt|le|ge|sw|ew)\s+(.+)$/;
836 #print STDERR "lhs, exp, rhs = $lhs, $exp, $rhs\n";
837 if ($exp eq "eq") {
838 $test_type = "equals";
839 } elsif ($exp eq "sw") {
840 $test_type = "startsWith";
841 } elsif ($exp eq "ew") {
842 $test_type = "endsWith";
843 } elsif ($exp eq "ne") {
844 $test_type = "notEquals";
845 } elsif ($exp eq "lt") {
846 $test_type = "lessThan";
847 }elsif ($exp eq "gt") {
848 $test_type = "greaterThan";
849 }elsif ($exp eq "le") {
850 $test_type = "lessThanOrEquals";
851 }elsif ($exp eq "ge") {
852 $test_type = "greaterThanOrEquals";
853 }
854 if ($lhs =~ /^\[.+\]$/) {
855 $test_meta = $lhs;
856 $test_value = $rhs;
857 } else {
858 # assume rhs has meta
859 $test_meta = $rhs;
860 $test_value = $lhs;
861 }
862
863 #remove beginning and end quotes
864 $test_value =~ s/^[\'\"]//;
865 $test_value =~ s/[\'\"]$//;
866 }
867 my $test_atts = "test='$test_type' ";
868 if (defined $test_value) {
869 $test_atts .= "test-value='$test_value' ";
870 }
871 #print STDERR "test, true, false = $test, $true_option, $false_option\n";
872 my $new_format = "<gsf:switch>$test_meta";
873 $new_format .= "<gsf:when $test_atts>$true_option</gsf:when>";
874 if (defined $false_option) {
875 $new_format .="<gsf:otherwise>$false_option</gsf:otherwise>";
876 }
877 $new_format .= "</gsf:switch>";
878
879 #print STDERR "new format = $new_format\n";
880 return $new_format;
881}
882
883sub format_or {
884 my ($or_string) = @_;
885 my @meta_list = split (',', $or_string);
886 return "" unless scalar (@meta_list);
887 my $new_format = "<gsf:choose-metadata>";
888 foreach my $m (@meta_list) {
889 if ($m =~ /^\[(.*)\]$/) {
890 $new_format .= &format_metadata($1);
891 } else {
892 # a default value
893 $new_format .= "<gsf:default>$m</gsf:default>";
894 last;
895 }
896 }
897 $new_format .= "</gsf:choose-metadata>";
898 return $new_format;
899}
900
901sub open_database {
902 my ($db_file) = @_;
903
904 my $database = ();
905# tie (%$database, 'GDBM_File', $db_file, GDBM_READER, 0400) ||
906# die "Couldn't open database $db_file\n";
907
908 return $database;
909}
910
911sub close_database {
912 my ($database) = @_;
913 untie %$database;
914}
915sub isHorizontalClassifier {
916 my ($database, $name) = @_;
917
918 return 0; # can't get this to work for windows
919 my $record = $database->{$name};
920 my ($childtype) = $record =~ /<childtype>(\w*)/;
921 if ($childtype eq "HList") {
922 return 1;
923 }
924 return 0;
925}
926#$writer->startTag('');
927#$writer->endTag('');
928#$writer->characters();
929#$writer->emptyTag('');
930
9311;
Note: See TracBrowser for help on using the repository browser.