source: main/trunk/greenstone3/bin/script/convert_coll_from_gs2.pl@ 25731

Last change on this file since 25731 was 25731, checked in by ak19, 12 years ago

All collectionmeta, not just description (collectionextra) and name (collectionname), need to be written out as displayItem elements.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 28.0 KB
RevLine 
[3688]1#!/usr/bin/perl -w
2
3BEGIN {
4 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
[6307]5 die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
[10326]6 die "GSDL3SRCHOME not set\n" unless defined $ENV{'GSDL3SRCHOME'};
[3688]7 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
8 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
[10326]9 unshift (@INC, "$ENV{'GSDL3SRCHOME'}/lib/perl/cpan");
[3688]10}
11
12use colcfg;
13use util;
14use parsargv;
15use FileHandle;
16use XML::Writer;
[10658]17#can't get this to work on windows
18#use GDBM_File;
[3688]19
[10360]20use strict;
21
[3688]22&main();
23sub print_usage() {
24 print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n";
25 print STDOUT "options:\n";
26
[3872]27 print STDOUT " -collectdir Directory where collection lives.\n";
[4969]28 print STDOUT " -verbosity Controls the amount of output.\n";
29 print STDOUT " -defaultlang The language that is considered the default (for display text etc). defaults to 'en'\n\n";
[3688]30}
31
[13828]32
[3688]33sub main {
34
[4969]35 my ($defaultlang, $verbosity, $collectdir);
[3688]36 # note that no defaults are passed for most options as they're set
37 # later (after we check the collect.cfg file)
[4969]38 if (!&parsargv::parse(\@ARGV,
39 'verbosity/\d+/', \$verbosity,
40 'collectdir/.*/', \$collectdir,
41 'defaultlang/.*/', \$defaultlang)) {
[3688]42 &print_usage();
43 die "\n";
44 }
45
46 # get and check the collection name
[4969]47 my ($collection) = @ARGV;
48 if (!defined($collection) || $collection eq "") {
49 die "No collection specified\n";
[3688]50 }
[4969]51 if ($collection eq "gs2model") {
52 die "You cant convert the model collection\n";
53 }
[4184]54
[4969]55 if (!defined $collectdir || $collectdir eq "") {
56 $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
57 }
58
59 if (!defined $defaultlang || $defaultlang eq "") {
60 $defaultlang = 'en';
61 }
62 # add on the coll name
63 $collectdir = &util::filename_cat ($collectdir, $collection);
64
[10360]65 my $collconfigfilename = &util::filename_cat ($collectdir, "etc", "collect.cfg");
[4969]66 print STDOUT "coll config=$collconfigfilename\n";
[3688]67 my $collectcfg;
68 if (-e $collconfigfilename) {
69 $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename);
70
[3983]71 } else {
72 print STDERR "collect.cfg not found!!";
73 die "\n";
[3688]74 }
[4184]75
[3872]76
[10360]77 my $buildconfigfilename = &util::filename_cat ($collectdir, "index", "build.cfg");
[3688]78 my $buildcfg;
79 if (-e $buildconfigfilename) {
80 $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename);
81
[3983]82 } else {
83 print STDERR "build.cfg not found!!";
84 die "\n";
[3688]85 }
[4184]86
[3688]87
[3979]88
[4969]89 my $colloutfile = &util::filename_cat ($collectdir, "etc", "collectionConfig.xml");
[3979]90 if (-e $colloutfile) {
91 print STDOUT "collectionConfig file already exists! overwriting it!\n";
92
93 }
94
[4969]95 my $buildoutfile = &util::filename_cat ($collectdir, "index", "buildConfig.xml");
[3979]96 if (-e $buildoutfile) {
[3688]97 print STDOUT "buildConfig file already exists! overwriting it!\n";
98
99 }
100
[10658]101# my $db_file = &util::filename_cat ($collectdir, "index", "text", "$collection.ldb");
[9899]102 my $database;
[10658]103# if (-e $db_file) {
104# $database = &open_database($db_file);
105# } else {
106# print STDERR "gdbm database file $db_file not found!!";
107# die "\n";
108# }
[9899]109
[3688]110 my $buildtype;
[4886]111 if (defined $buildcfg->{'buildtype'}) {
112 $buildtype = $buildcfg->{'buildtype'};
[3688]113 } else {
114 $buildtype = 'mg';
115 }
[4184]116
[10653]117 my $indexstem = undef;
118 if (defined $buildcfg->{'indexstem'}) {
119 $indexstem = $buildcfg->{'indexstem'};
120 }
121
[3979]122 my $buildoutput = new IO::File(">$buildoutfile");
[25662]123 binmode($buildoutput,":utf8");
[4184]124 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput, NEWLINES => 1);
[3979]125
[25646]126 $buildwriter->xmlDecl("UTF-8");
[9874]127 $buildwriter->startTag('buildConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat");
[4184]128
[3979]129 my $colloutput = new IO::File(">$colloutfile");
[25661]130 binmode($colloutput,":utf8");
[4184]131 my $collwriter = new XML::Writer(OUTPUT => $colloutput, NEWLINES => 1);
132
[25646]133 $collwriter->xmlDecl("UTF-8");
[14754]134 $collwriter->startTag('CollectionConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat", 'xmlns:xsl'=>'http://www.w3.org/1999/XSL/Transform');
[4184]135
[3979]136 #output the collection metadata to the collectionConfig file
137 $collwriter->startTag('metadataList');
138
[4969]139 my $creator = $collectcfg->{'creator'};
[14754]140 &output_metadata($collwriter,'default', 'creator', $creator);
[13836]141 my $public =$collectcfg->{'public'};
[14754]142 &output_metadata($collwriter,'default', 'public', $public);
143
[4969]144 $collwriter->endTag('metadataList');
145
146 #output the display collectionmeta to collectionConfig.xml
[3979]147
[4969]148 my $collectionmeta = $collectcfg->{'collectionmeta'};
[3979]149 if (defined $collectionmeta) {
[10360]150 my %name_map = ('collectionname', 'name',
[4969]151 'collectionextra', 'description',
152 'iconcollection', 'icon',
153 'iconcollectionsmall', 'smallicon');
154
155 $collwriter->startTag('displayItemList');
[10360]156 foreach my $entry ( keys %$collectionmeta) {
[25731]157 # some metadata names need to be specially mapped to other names
158 # most of them however, can retain their original names
159 my $name = (defined $name_map{$entry}) ? $name_map{$entry} : $entry;
160 foreach my $lang (keys %{$collectionmeta->{$entry}}) {
161 my $value = $collectionmeta->{$entry}->{$lang};
162 if ($entry =~ /^icon/) {
163 $value = format_icon_value($value);
164 } else {
165 $value = tidy_up_display_item($value);
166 }
167 &output_display($collwriter, $name, $lang, $value);
168 }
[3979]169 }
[4969]170 $collwriter->endTag('displayItemList');
[3979]171 }
172
[4969]173 # output building metadata to build config file
[3872]174 my $numdocs = $buildcfg->{'numdocs'};
[3979]175 $buildwriter->startTag('metadataList');
[14754]176 &output_metadata($buildwriter,'', 'numDocs', $numdocs);
177 &output_metadata($buildwriter,'', 'buildType', $buildtype);
[3979]178 $buildwriter->endTag('metadataList');
[3872]179
180
[3771]181 #indexes
[13912]182 # maps index name to shortname
[3771]183 my $indexmap = {};
[13912]184 # keeps the order for indexes
185 my @indexlist = ();
[13917]186 my $defaultindex = "";
[4886]187 my $first = 1;
[13912]188 my $maptype = "indexfieldmap";
189 if ($buildtype eq "mg") {
190 $maptype = "indexmap";
191 }
192 if (defined $buildcfg->{$maptype}) {
193 my $indexmap_t = $buildcfg->{$maptype};
[10360]194 foreach my $i (@$indexmap_t) {
[3771]195 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
196 $indexmap->{$k} = $v;
[13912]197 push @indexlist, $k;
[14754]198 if ($first == 1) {
199 $defaultindex = $k;
[4886]200 $first = 0;
201 }
[3771]202 }
203 } else {
[25661]204 print STDERR "$maptype not defined\n";
[3771]205 }
[13912]206 # we use the shortname for default index
207 if (defined $collectcfg->{'defaultindex'}) {
208 $defaultindex = $collectcfg->{'defaultindex'};
[14754]209 #$defaultindex = $indexmap->{$defaultindex};
[3771]210 }
211
[13917]212 # levels
213 my $levelmap = {};
214 my @levellist = ();
215 my $default_search_level = "";
[14754]216 my $default_search_level_shortname = "";
[13917]217 my $default_retrieve_level = "Sec";
218 $first = 1;
219 if ($buildtype eq "mgpp" || $buildtype eq "lucene") {
220 if (defined $buildcfg->{'levelmap'}) {
221 my $levelmap_t = $buildcfg->{'levelmap'};
222 foreach my $l (@$levelmap_t) {
223 my ($k, $v) = $l =~ /^(.*)\-\>(.*)$/;
224 $levelmap->{$k} = $v;
225 push @levellist, $k;
226 if ($first) {
[14754]227 $default_search_level = $k;
228 $default_search_level_shortname = $v;
[13917]229 $first = 0;
230 }
231 }
232 }
233
234 if (defined $collectcfg->{'defaultlevel'}) {
235 $default_search_level = $collectcfg->{'defaultlevel'};
[14754]236 #$default_search_level = $levelmap->{$default_search_level};
237 $default_search_level_shortname = $levelmap->{$default_search_level};
[13917]238 }
239 if (defined $buildcfg->{'textlevel'}) {
240 $default_retrieve_level = $buildcfg->{'textlevel'};
241 }
242 }
[4184]243 # format stuff
244 my $format = $collectcfg->{'format'};
245
246 #output the search stuff to coll cfg
[14754]247 $collwriter->startTag('search','type'=>$buildtype);
[10360]248 foreach my $i (keys %$indexmap) {
[13912]249 $collwriter->startTag('index', 'name'=>$i);
[4184]250 #find the coll meta stuff
[10360]251 my $indexdisplay = ".$i";
252 foreach my $lang (keys %{$collectionmeta->{$indexdisplay}}) {
253 my $value = $collectionmeta->{$indexdisplay}->{$lang};
[14754]254 output_display($collwriter, 'name', $lang, $i);
[4184]255 }
256 $collwriter->endTag('index');
257 }
258
[14754]259 #output the defaultIndex to coll cfg
260 $collwriter->emptyTag('defaultIndex','name'=>$defaultindex);
261
262 #indexSubcollection
263 my $indexsubcollections = $collectcfg->{'indexsubcollections'};
264
265 if (defined $indexsubcollections) {
266 my $indexsubcollections_t= $collectcfg->{'indexsubcollections'};
267 foreach my $i ( @$indexsubcollections_t) {
268 $collwriter->startTag('indexSubcollection', 'name'=>$i);
269 &output_display($collwriter, 'name', $defaultlang, $i);
270 $collwriter->endTag('indexSubcollection');
271 }
272 }
273
274 #subcollection
275 my $subcollection = $collectcfg->{'subcollection'};
276 if (defined $subcollection){
277 foreach my $entry (keys %$subcollection){
278 my $value = $subcollection->{$entry};
279 $collwriter->emptyTag('subcollection','filter'=>$value,'name'=>$entry);
280 }
281 }
282
283 #indexlanguage
284 my $languages = $collectcfg->{'languages'};
285 if (defined $languages){
286 my $languages_t = $collectcfg->{'languages'};
287 foreach my $i (@$languages_t){
288 $collwriter->startTag('indexLanguage','name'=>$i);
289 &output_display($collwriter, 'name', $defaultlang, $i);
290 $collwriter->endTag('indexLanguage');
291 }
292 }
293
[13917]294 # level stuff for mgpp/lucene
295 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene'){
296 foreach my $l (keys %$levelmap) {
297 $collwriter->startTag('level', 'name'=>$l);
298 #find the coll meta stuff
299 my $leveldisplay = ".$l";
300 foreach my $lang (keys %{$collectionmeta->{$leveldisplay}}) {
301 my $value = $collectionmeta->{$leveldisplay}->{$lang};
302 output_display($collwriter, 'name', $lang, $value);
303 }
304 $collwriter->endTag('level');
305 }
[14754]306 $collwriter->emptyTag('defaultLevel', 'name'=>$default_search_level);
[13917]307 }
[14754]308
309 # add in the search type
310 if (defined $format->{'SearchTypes'}){
311 $collwriter->startTag('format', 'name'=>"searchType");
312 $collwriter->charactersXML($format->{'SearchTypes'});
313 $collwriter->endTag('format');
314 }
315
[4184]316 # add in the format stuff
317 if (defined $format->{'SearchVList'}) {
[4969]318
[4184]319 $collwriter->startTag('format');
[10201]320 write_format($collwriter, $format->{'SearchVList'}, "document");
[4184]321 $collwriter->endTag('format');
322 }
[12979]323 elsif (defined $format->{'VList'}) {
324 $collwriter->startTag('format');
325 write_format($collwriter, $format->{'VList'}, "document");
326 $collwriter->endTag('format');
327 }
[3771]328
[4184]329 $collwriter->endTag('search');
[14754]330
331 # import plugins
[25669]332 # if ImagePlugin is added, then need to add in a replaceListRef element for gs2-image
333 my $contains_image_plugin = 0;
334
[14754]335 my $plugins = $collectcfg->{'plugin'};
[4184]336
[14754]337 if (defined $plugins){
338 $collwriter->startTag('import');
339 $collwriter->startTag('pluginList');
340 foreach my $pl (@$plugins) {
341 my ($pluginname) = @$pl[0];
[25669]342 if ($pluginname =~ m/^(ImagePlugin|ImagePlug|PagedImagePlugin)$/) {
343 $contains_image_plugin = 1;
344 }
[14754]345 $collwriter->startTag('plugin','name'=>$pluginname);
346
347 for (my $i=1; $i<scalar(@$pl); $i++) {
348 my $arg =@$pl[$i];
349 if ($arg =~ /^-/){
350 my $option_name=@$pl[$i];
351 my $option_value=@$pl[$i+1];
352 if (defined $option_value){
353 if ($option_value =~ /^-/){
354 $collwriter->startTag('option','name'=>$option_name);
355 $collwriter->endTag('option');
356 }else{
357 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
358 $collwriter->endTag('option');
359 }
360 }
361 }
362 }
363 $collwriter->endTag('plugin');
364 }
365 $collwriter->endTag('pluginList');
366 $collwriter->endTag('import');
367 }
368
[3979]369 $buildwriter->startTag('serviceRackList');
[3688]370
[4886]371 my @levels = ();
372 my $defaultlevel;
[13912]373 my $service_type = "MG";
374 if ($buildtype eq 'mgpp') {
375 $service_type = "MGPP";
376 } elsif ($buildtype eq "lucene") {
377 $service_type = "Lucene";
378 }
379
[14754]380 #indexSubcollectionList
381
382 my $subcollectionmap = $buildcfg->{'subcollectionmap'};
383 my $firstsubcollection = 1;
384 my $defaultsubcollection = "";
385 my @subcollist;
386 my $subcolmap = {};
387
388 if (defined $buildcfg->{'subcollectionmap'}) {
389 my $subcolmap_t = $buildcfg->{'subcollectionmap'};
390
391 foreach my $l (@$subcolmap_t) {
392 my @pair = split(/->/, $l);
393 $subcolmap->{$pair[0]} = $pair[1];
394 push @subcollist, $pair[0];
395 if ($firstsubcollection==1) {
396 $defaultsubcollection = $pair[1];
397 $firstsubcollection = 0;
398 }
399 }
400
401 }
402
403
[3947]404 #do the retrieve service
[13917]405 $buildwriter->startTag('serviceRack', 'name'=>"GS2".$service_type."Retrieve");
[13228]406 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
[13917]407 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_retrieve_level);
[13115]408 } elsif ($buildtype eq "mg") {
[13912]409 $buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
[3688]410 }
[14754]411
412 if ((defined $defaultsubcollection) && ($defaultsubcollection ne "")) {
413 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
414 }
415
[10653]416 if (defined $indexstem) {
417 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
418 }
[8967]419 # close off the Retrieve service
420 $buildwriter->endTag('serviceRack');
[3688]421
[3947]422 # add in the classifiers if needed
423
[3688]424 my $count = 1;
425 my $phind = 0;
426 my $started_classifiers = 0;
427 if (defined $collectcfg->{'classify'}) {
[4184]428 $collwriter->startTag('browse');
[10360]429 # add in default format if necessary
430 if (defined $format->{"VList"} || defined $format->{"HList"}) {
431 # global formats
432 $collwriter->startTag('format');
433 if (defined $format->{"VList"}) {
434 # VLIst applies to both classifier and doc nodes
435 write_format($collwriter, $format->{"VList"}, "document");
436 write_format($collwriter, $format->{"VList"}, "classifier");
437 }
438 if (defined $format->{"HList"}) {
439 # hlist is only for classifier nodes
440 write_format($collwriter, $format->{"HList"}, "horizontal");
441 }
442 $collwriter->endTag('format');
443 }
[3688]444 my $classifiers = $collectcfg->{'classify'};
[10360]445 foreach my $cl (@$classifiers) {
446 my $name = "CL$count";
[3688]447 $count++;
448 my ($classname) = @$cl[0];
[4184]449 if ($classname =~ /^phind$/i) {
[3688]450 $phind=1;
[4184]451 #should add it into coll config classifiers
[3688]452 next;
453 }
454
[9899]455 my $horizontalAtTop = &isHorizontalClassifier($database, $name);
[3688]456 if (not $started_classifiers) {
[8967]457 $buildwriter->startTag('serviceRack', 'name'=>'GS2Browse');
[10653]458 if (defined $indexstem) {
459 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
460 }
461
[10360]462 $buildwriter->startTag('classifierList');
[3688]463 $started_classifiers = 1;
464 }
[4969]465 my $content = ''; #use buttonname first, then metadata
[4184]466 if ($classname eq "DateList") {
467 $content = "Date";
468 } else {
[10360]469 for (my $i=0; $i<scalar(@$cl); $i++) {
470 my $arg = @$cl[$i];
[4235]471 if ($arg eq "-buttonname"){
[4184]472 $content = @$cl[$i+1];
473 last;
[4886]474 } elsif ($arg eq "-metadata") {
[4235]475 $content = @$cl[$i+1];
[4184]476 }
[4235]477
[3688]478 }
479 }
[9899]480 if ($horizontalAtTop) {
481 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'horizontalAtTop'=>'true');
[14754]482
[9899]483 } else {
[4886]484 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content);
[9899]485 }
[3688]486
[3947]487
[14754]488 # $collwriter->startTag('classifier', 'name'=>$name);
489 $collwriter->startTag('classifier', 'name'=>$classname);
490 for (my $i=1; $i<scalar(@$cl); $i++) {
491 my $arg =@$cl[$i];
492 if ($arg =~ /^-/){
493 my $option_name=@$cl[$i];
494 my $option_value=@$cl[$i+1];
495 if (defined $option_value){
496 if ($option_value=~ /^-/){
497 $collwriter->startTag('option','name'=>$option_name);
498 $collwriter->endTag('option');
499 }else{
500 $collwriter->startTag('option','name'=>$option_name,'value'=>$option_value);
501 $collwriter->endTag('option');
502 }
503 }
504 }
505 }
506
[10360]507 my $vlist = $name."VList";
508 my $hlist = $name."HList";
509 my $dlist = "";
[4184]510 if ($classname eq "DateList") {
511 $dlist = "DateList";
512 }
[10201]513 # need to work out how to split into classifier and document
[10360]514 if (defined $format->{$vlist} || defined $format->{$hlist} || defined $format->{$dlist}) {
[4184]515 $collwriter->startTag('format');
516 if (defined $format->{$vlist}) {
[10201]517 write_format($collwriter, $format->{$vlist}, "document");
[10360]518 write_format($collwriter, $format->{$vlist}, "classifier");
519 }
[4184]520 if (defined $format->{$hlist}) {
[10201]521 write_format($collwriter, $format->{$hlist}, "horizontal");
[10360]522 }
523
[4184]524 if (defined $format->{$dlist}) {
[10201]525 write_format($collwriter, $format->{$dlist}, "document");
[4184]526 }
527 $collwriter->endTag('format');
528 }
529 $collwriter->endTag('classifier');
[4969]530 } #foreach classifier
[3688]531 if ($started_classifiers) {
532 # end the classifiers
[3979]533 $buildwriter->endTag('classifierList');
[8967]534 # close off the Browse service
535 $buildwriter->endTag('serviceRack');
[3688]536 }
[3947]537
[4184]538 $collwriter->endTag('browse');
[3947]539 }
[4184]540
[8967]541
[3947]542 # the phind classifier is a separate service
543 if ($phind) {
544 # if phind classifier
[3979]545 $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse');
[3947]546 }
547
548 # do the search service
[13912]549 $buildwriter->startTag('serviceRack', 'name'=>'GS2'.$service_type.'Search');
[14754]550 #$buildwriter->emptyTag('defaultIndex', 'shortname'=>$defaultindex);
[13912]551 $buildwriter->startTag('indexList');
552 #for each index
553 foreach my $i (@indexlist) {
554 my $index = $indexmap->{$i};
555 $buildwriter->emptyTag('index', 'name'=>$i, 'shortname'=>$index);
556 }
557 $buildwriter->endTag('indexList');
558 if (defined $indexstem) {
559 $buildwriter->emptyTag('indexStem', 'name'=>$indexstem);
560 }
561
562 # index options
563 if ($buildtype eq 'mg' || $buildtype eq 'mgpp') {
564 $buildwriter->startTag('indexOptionList');
565 my $stemindexes = 3; # default is stem and casefold
566 if (defined $buildcfg->{'stemindexes'} && $buildcfg->{'stemindexes'} =~ /^\d+$/ ) {
567 $stemindexes = $buildcfg->{'stemindexes'};
568 }
569 $buildwriter->emptyTag('indexOption', 'name'=>'stemIndexes', 'value'=>$stemindexes);
570
571 my $maxnumeric = 4; # default
572 if (defined $buildcfg->{'maxnumeric'} && $buildcfg->{'maxnumeric'} =~ /^\d+$/) {
573 $maxnumeric = $buildcfg->{'maxnumeric'};
574 }
575 $buildwriter->emptyTag('indexOption', 'name'=>'maxnumeric', 'value'=>$maxnumeric);
576
577 $buildwriter->endTag('indexOptionList');
578 }
579
[13228]580 if ($buildtype eq 'mgpp' || $buildtype eq 'lucene') {
[3947]581
[13912]582 # level info
[14754]583 $buildwriter->emptyTag('defaultLevel', 'shortname'=>$default_search_level_shortname);
[13917]584 $buildwriter->emptyTag('defaultGDBMLevel', 'shortname'=>$default_retrieve_level);
[3979]585 $buildwriter->startTag('levelList');
[13917]586 foreach my $l (@levellist) {
587 my $level = $levelmap->{$l};
588 $buildwriter->emptyTag('level', 'name'=>$l, 'shortname'=>$level);
589 }
[3979]590 $buildwriter->endTag('levelList');
[3688]591
[5647]592 # do the search types if there
593 if (defined $collectcfg->{'searchtype'}) {
594 $buildwriter->startTag('searchTypeList');
[10360]595 foreach my $st (@{$collectcfg->{'searchtype'}}) {
[5647]596 $buildwriter->emptyTag('searchType', 'name'=>$st);
597 }
598 $buildwriter->endTag('searchTypeList');
[13228]599 } elsif (defined $format->{'SearchTypes'}) {
600 #check format statement
601 my $searchtype = $format->{'SearchTypes'};
602 $buildwriter->startTag('searchTypeList');
603 if ($searchtype =~ /form/) {
604 $buildwriter->emptyTag('searchType', 'name'=>'form');
605 }
606 if ($searchtype =~ /plain/) {
607 $buildwriter->emptyTag('searchType', 'name'=>'plain');
608 }
609 $buildwriter->endTag('searchTypeList');
[5647]610 }
[13912]611 }
[3947]612
[14754]613 #indexLanguageList
614 my $indexlanguages = $collectcfg->{'languages'};
615 my $firstindexlanguage = 1;
616 my $defaultindexlanguage_shortname;
617 if (defined $indexlanguages){
618 $buildwriter->startTag('indexLanguageList');
619 my $languages_t = $collectcfg->{'languages'};
620 foreach my $i (@$languages_t){
621 $buildwriter->startTag('indexLanguage','name'=>$i);
622 &output_display($buildwriter, 'name', $i, $i);
623 $buildwriter->endTag('indexLanguage');
624 if ($firstindexlanguage==1){
625 $defaultindexlanguage_shortname = $i;
626 $firstindexlanguage=0;
627 }
628 }
629 $buildwriter->endTag('indexLanguageList');
630 $buildwriter->startTag('defaultIndexLanguage', 'name'=>$defaultindexlanguage_shortname,'shortname'=>$defaultindexlanguage_shortname);
631 $buildwriter->endTag('defaultIndexLanguage');
632 }
[13115]633
[14754]634 # my $defaultsubcollection = "";
635 # my @subcollist;
636
637 if (scalar(@subcollist)>0){
638
639 $buildwriter->startTag('indexSubcollectionList');
640 foreach my $i (keys %$subcolmap){
641 my $short_name = $subcolmap->{$i};
642 $buildwriter->emptyTag('indexSubcollection', 'name'=>$i, 'shortname'=>$short_name);
643 }
644
645 $buildwriter->endTag('indexSubcollectionList');
646 $buildwriter->emptyTag('defaultIndexSubcollection', 'shortname'=>$defaultsubcollection);
647 }
648
649
[3979]650 $buildwriter->endTag('serviceRack');
[4969]651
[3979]652 $buildwriter->endTag('serviceRackList');
653 $buildwriter->endTag('buildConfig');
[11274]654
655 # we add in the default replace list just in case we have macros in the
656 # collection
657 $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-standard');
[25669]658 $collwriter->emptyTag('replaceListRef', 'id'=>'gs2-image') if $contains_image_plugin;
[14754]659 $collwriter->endTag('CollectionConfig');
[4969]660 $collwriter->end();
[3979]661 $buildwriter->end();
662 $buildoutput->close();
663 $colloutput->close();
[9899]664 &close_database($database);
[3979]665}
[3688]666
[3979]667
[4969]668sub output_metadata {
[14754]669 my ($writer, $lang, $metaname, $metavalue) = @_;
670 $lang = 'en' if $lang eq 'default';
671 if ($lang ne ""){
672 $writer->startTag('metadata', 'lang'=>$lang, 'name'=>$metaname);
673 }else{
674 $writer->startTag('metadata', 'name'=>$metaname);
675 }
[4184]676 $writer->characters($metavalue);
677 $writer->endTag('metadata');
678}
679
680sub output_display {
[4969]681 my ($writer, $name, $lang, $value) = @_;
[4184]682 $lang = 'en' if $lang eq 'default';
683 if ($lang =~ /^\[/) {
684 ($lang) = $lang =~ /\[l=(.*)\]/;
[3979]685 }
[14754]686
[4969]687 $writer->startTag('displayItem', 'name'=>$name, 'lang'=>$lang);
[4184]688 $writer->characters($value);
[4969]689 $writer->endTag('displayItem');
[3688]690}
[4184]691sub format_icon_value {
692 my ($value) = @_;
693 if ($value =~ /^_/) {
[13828]694 my ($newvalue) = $value =~ /images\/(.*)$/;
[4184]695 if ($newvalue) {
696 return $newvalue;
697 }
698 }
699 return $value;
700}
[3688]701
[13828]702sub tidy_up_display_item {
[12678]703 my ($value) = @_;
704 # remove \n
705 $value =~ s/\\n//g;
706 # replace \' with '
707 $value =~ s/\\\'/\'/g;
708 # replace \" with "
709 $value =~ s/\\\"/\"/g;
[13828]710 # replace _httpprefix_ with _httpsite_
[13843]711 $value =~ s/_httpprefix_/_httpsite_/g;
712 $value =~ s/_gwcgi_//g;
713 $value =~ s/[a-z][a-z]?=_cgiarg[a-z][a-z]?_&?//g;
714 $value =~ s/&p=/&sa=/g;
[12678]715 return $value;
716}
717
[4184]718sub write_format {
[10201]719 my ($writer, $old_format, $node_type) = @_;
[7471]720 # replace \' with '
721 $old_format =~ s/\\\'/\'/g;
[12236]722 # replace \" with "
723 $old_format =~ s/\\\"/\"/g;
[4184]724 #convert [] to <gsf:...>
[10360]725 # assume no nesting {If} or {Or} for now
726 $old_format =~ s/\{If\}\{([^\}]*)\}/&format_if($1, $node_type)/eg;
727 $old_format =~ s/\{Or\}\{([^\}]*)\}/&format_or($1)/eg;
[4184]728 $old_format =~ s/\[Text\]/\<gsf:text\/\>/g;
729 $old_format =~ s/\[num\]/\<gsf:num\/\>/g;
[10782]730 $old_format =~ s/\[link\]/\<gsf:link type=\'$node_type\'\>/g;
[4184]731 $old_format =~ s/\[\/link\]/\<\/gsf:link\>/g;
732 $old_format =~ s/\[srclink\]/\<gsf:link type=\'source\'\>/g;
733 $old_format =~ s/\[\/srclink\]/\<\/gsf:link\>/g;
[10782]734 $old_format =~ s/\[icon\]/\<gsf:icon type=\'$node_type\'\/\>/g;
[4184]735 $old_format =~ s/\[srcicon\]/\<gsf:icon type=\'source\'\/\>/g;
[10360]736
737 # what to do with hightlight??
738 $old_format =~ s/\[\/?highlight\]//g;
[4184]739
[4969]740 #now do the rest of the [] which are assumed to be metadata
[10360]741 $old_format =~ s/\[([^\]]*)\]/&format_metadata($1)/eg;
742
743 # some html tidy
[4184]744 #turn <br> into <br />
745 $old_format =~ s/\<br\>/\<br \/\>/g;
746 #turn <p> into <p />
747 $old_format =~ s/\<p\>/\<p \/\>/g;
748
749 #put quotes around any atts
750 $old_format =~ s/=([a-z]+)([> ])/=\'$1\'$2/g;
751
[10201]752 if ($node_type eq "document") {
753 $writer->startTag('gsf:template', 'match'=>'documentNode');
754 $writer->charactersXML($old_format);
755 $writer->endTag('gsf:template');
756 } elsif ($node_type eq "classifier") {
757 $writer->startTag('gsf:template', 'match'=>'classifierNode');
758 $writer->charactersXML($old_format);
759 $writer->endTag('gsf:template');
760 } elsif ($node_type eq "horizontal") {
761 $writer->startTag('gsf:template', 'match'=>'classifierNode', 'mode'=>'horizontal');
762 $writer->charactersXML($old_format);
763 $writer->endTag('gsf:template');
[4184]764
[10201]765 }
[4184]766}
767
[10360]768sub format_metadata {
769 my ($metadata_string) = @_;
770
771 #print STDERR "original meta = $metadata_string\n";
772
773 # what shall we do with cgisafe??
774 my $cgisafe = $metadata_string =~ s/^cgisafe://;
775
776 my ($select) = $metadata_string =~ /^(parent|sibling)/;
777 $metadata_string =~ s/^(parent|sibling)//;
778 my ($scope, $delim);
779
780 if ($select) {
781 ($scope) = $metadata_string =~ /^\((Top|All)/;
782 $metadata_string =~ s/^\((Top|All)\)?//;
783 if ($scope) {
[10782]784 ($delim) = $metadata_string =~ /^\'([^\']*)\'\)/;
785 $metadata_string =~ s/^\'([^\']*)\'\)//;
[10360]786 }
787 }
788 $metadata_string =~ s/^://;
789 # remove ex.
790 $metadata_string =~ s/^ex\.//;
791
792 #print STDERR "select=$select, scope=$scope, delim=|$delim|, meta = $metadata_string\n";
793
794 my $new_format = "<gsf:metadata name='$metadata_string' ";
795 if (defined $select) {
796 if ($select eq "sibling") {
797 $new_format .= "multiple='true' ";
798 if (defined $delim) {
799 $new_format .= "separator='$delim' ";
800 }
801 } elsif ($select eq "parent"){
802 if (defined $scope) {
803 if ($scope eq "Top") {
804 $new_format .= "select='root' ";
805 } elsif ($scope eq "All") {
806 $new_format .= "select='ancestors' ";
807 if (defined $delim) {
808 $new_format .= "separator='$delim' ";
809 }
810 }
811 } else {
812 $new_format .= "select='parent' ";
813 }
814 }
815 }
816 $new_format .= "/>";
817 #print STDERR "$new_format\n";
818 return $new_format;
819
820}
821
822sub format_if {
823
824 my ($if_string, $node_type) = @_;
[10782]825 #print STDERR "if string = $if_string\n";
[10360]826
[10782]827 my @parts = split /,/, $if_string;
828 my $test = $parts[0];
829 my $true_option = $parts[1];
830 my $false_option;
831 if (scalar (@parts) == 3) {
832 $false_option = $parts[2];
833 }
834 $test =~ s/^\s*//;
835 $test =~ s/\s*$//;
836 my ($test_meta, $test_type, $test_value);
837 if ($test =~ /^(\[.+\])$/) {
838 $test_meta = $1;
839 $test_type = 'exists';
840 } else {
841 my ($lhs, $exp, $rhs) = $test =~ /^(.+)\s+(eq|ne|lt|gt|le|ge|sw|ew)\s+(.+)$/;
842 #print STDERR "lhs, exp, rhs = $lhs, $exp, $rhs\n";
843 if ($exp eq "eq") {
844 $test_type = "equals";
845 } elsif ($exp eq "sw") {
846 $test_type = "startsWith";
847 } elsif ($exp eq "ew") {
848 $test_type = "endsWith";
849 } elsif ($exp eq "ne") {
850 $test_type = "notEquals";
851 } elsif ($exp eq "lt") {
852 $test_type = "lessThan";
853 }elsif ($exp eq "gt") {
854 $test_type = "greaterThan";
855 }elsif ($exp eq "le") {
856 $test_type = "lessThanOrEquals";
857 }elsif ($exp eq "ge") {
858 $test_type = "greaterThanOrEquals";
859 }
860 if ($lhs =~ /^\[.+\]$/) {
861 $test_meta = $lhs;
862 $test_value = $rhs;
863 } else {
864 # assume rhs has meta
865 $test_meta = $rhs;
866 $test_value = $lhs;
867 }
868
869 #remove beginning and end quotes
870 $test_value =~ s/^[\'\"]//;
871 $test_value =~ s/[\'\"]$//;
872 }
873 my $test_atts = "test='$test_type' ";
874 if (defined $test_value) {
875 $test_atts .= "test-value='$test_value' ";
876 }
877 #print STDERR "test, true, false = $test, $true_option, $false_option\n";
878 my $new_format = "<gsf:switch>$test_meta";
879 $new_format .= "<gsf:when $test_atts>$true_option</gsf:when>";
880 if (defined $false_option) {
881 $new_format .="<gsf:otherwise>$false_option</gsf:otherwise>";
882 }
883 $new_format .= "</gsf:switch>";
884
885 #print STDERR "new format = $new_format\n";
886 return $new_format;
[10360]887}
888
889sub format_or {
890 my ($or_string) = @_;
891 my @meta_list = split (',', $or_string);
892 return "" unless scalar (@meta_list);
893 my $new_format = "<gsf:choose-metadata>";
894 foreach my $m (@meta_list) {
895 if ($m =~ /^\[(.*)\]$/) {
896 $new_format .= &format_metadata($1);
897 } else {
898 # a default value
899 $new_format .= "<gsf:default>$m</gsf:default>";
900 last;
901 }
902 }
903 $new_format .= "</gsf:choose-metadata>";
904 return $new_format;
905}
906
[9899]907sub open_database {
908 my ($db_file) = @_;
909
910 my $database = ();
[10658]911# tie (%$database, 'GDBM_File', $db_file, GDBM_READER, 0400) ||
912# die "Couldn't open database $db_file\n";
[9899]913
914 return $database;
915}
916
917sub close_database {
918 my ($database) = @_;
[10360]919 untie %$database;
[9899]920}
921sub isHorizontalClassifier {
922 my ($database, $name) = @_;
923
[10658]924 return 0; # can't get this to work for windows
[10360]925 my $record = $database->{$name};
[9899]926 my ($childtype) = $record =~ /<childtype>(\w*)/;
927 if ($childtype eq "HList") {
928 return 1;
929 }
930 return 0;
931}
[3688]932#$writer->startTag('');
933#$writer->endTag('');
934#$writer->characters();
935#$writer->emptyTag('');
936
9371;
Note: See TracBrowser for help on using the repository browser.