source: trunk/gsdl3/bin/script/convert_coll_from_gs2.pl@ 9874

Last change on this file since 9874 was 9874, checked in by kjdon, 19 years ago

merged from branch ant-install-branch: merge 1

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 14.4 KB
Line 
1#!/usr/bin/perl -w
2
3BEGIN {
4 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
5 die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
6 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
7 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
8 unshift (@INC, "$ENV{'GSDL3HOME'}/lib/perl/cpan");
9}
10
11use colcfg;
12use util;
13use parsargv;
14use FileHandle;
15use XML::Writer;
16
17&main();
18sub print_usage() {
19 print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n";
20 print STDOUT "options:\n";
21
22 print STDOUT " -collectdir Directory where collection lives.\n";
23 print STDOUT " -verbosity Controls the amount of output.\n";
24 print STDOUT " -defaultlang The language that is considered the default (for display text etc). defaults to 'en'\n\n";
25}
26
27sub main {
28
29 my ($defaultlang, $verbosity, $collectdir);
30 # note that no defaults are passed for most options as they're set
31 # later (after we check the collect.cfg file)
32 if (!&parsargv::parse(\@ARGV,
33 'verbosity/\d+/', \$verbosity,
34 'collectdir/.*/', \$collectdir,
35 'defaultlang/.*/', \$defaultlang)) {
36 &print_usage();
37 die "\n";
38 }
39
40 # get and check the collection name
41 my ($collection) = @ARGV;
42 if (!defined($collection) || $collection eq "") {
43 die "No collection specified\n";
44 }
45 if ($collection eq "gs2model") {
46 die "You cant convert the model collection\n";
47 }
48
49 if (!defined $collectdir || $collectdir eq "") {
50 $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
51 }
52
53 if (!defined $defaultlang || $defaultlang eq "") {
54 $defaultlang = 'en';
55 }
56 # add on the coll name
57 $collectdir = &util::filename_cat ($collectdir, $collection);
58
59 $collconfigfilename = &util::filename_cat ($collectdir, "etc", "collect.cfg");
60 print STDOUT "coll config=$collconfigfilename\n";
61 my $collectcfg;
62 if (-e $collconfigfilename) {
63 $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename);
64
65 } else {
66 print STDERR "collect.cfg not found!!";
67 die "\n";
68 }
69
70
71 $buildconfigfilename = &util::filename_cat ($collectdir, "index", "build.cfg");
72 my $buildcfg;
73 if (-e $buildconfigfilename) {
74 $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename);
75
76 } else {
77 print STDERR "build.cfg not found!!";
78 die "\n";
79 }
80
81
82
83 my $colloutfile = &util::filename_cat ($collectdir, "etc", "collectionConfig.xml");
84 if (-e $colloutfile) {
85 print STDOUT "collectionConfig file already exists! overwriting it!\n";
86
87 }
88
89 my $buildoutfile = &util::filename_cat ($collectdir, "index", "buildConfig.xml");
90 if (-e $buildoutfile) {
91 print STDOUT "buildConfig file already exists! overwriting it!\n";
92
93 }
94
95 my $buildtype;
96 if (defined $buildcfg->{'buildtype'}) {
97 $buildtype = $buildcfg->{'buildtype'};
98 } else {
99 $buildtype = 'mg';
100 }
101
102 my $buildoutput = new IO::File(">$buildoutfile");
103 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput, NEWLINES => 1);
104
105 $buildwriter->startTag('buildConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat");
106
107 my $colloutput = new IO::File(">$colloutfile");
108 my $collwriter = new XML::Writer(OUTPUT => $colloutput, NEWLINES => 1);
109
110 $collwriter->startTag('collectionConfig', 'xmlns:gsf'=>"http://www.greenstone.org/greenstone3/schema/ConfigFormat", 'xmlns:xsl'=>'http://www.w3.org/1999/XSL/Transform');
111
112 #output the collection metadata to the collectionConfig file
113 $collwriter->startTag('metadataList');
114
115 my $creator = $collectcfg->{'creator'};
116 &output_metadata($collwriter, 'creator', $creator);
117 $collwriter->endTag('metadataList');
118
119 #output the display collectionmeta to collectionConfig.xml
120
121 my $collectionmeta = $collectcfg->{'collectionmeta'};
122 if (defined $collectionmeta) {
123 %name_map = ('collectionname', 'name',
124 'collectionextra', 'description',
125 'iconcollection', 'icon',
126 'iconcollectionsmall', 'smallicon');
127
128 $collwriter->startTag('displayItemList');
129 foreach $entry ( keys %$collectionmeta) {
130 if (defined $name_map{$entry}) {
131 $name= $name_map{$entry};
132 foreach $lang (keys %{$collectionmeta->{$entry}}) {
133 $value = $collectionmeta->{$entry}->{$lang};
134 if ($entry =~ /^icon/) {
135 $value = format_icon_value($value);
136 }
137 &output_display($collwriter, $name, $lang, $value);
138 }
139 }
140 }
141 $collwriter->endTag('displayItemList');
142 }
143
144 # output building metadata to build config file
145 my $numdocs = $buildcfg->{'numdocs'};
146 $buildwriter->startTag('metadataList');
147 &output_metadata($buildwriter, 'numDocs', $numdocs);
148 &output_metadata($buildwriter, 'buildType', $buildtype);
149 $buildwriter->endTag('metadataList');
150
151
152 #indexes
153 my $indexmap = {};
154 my $firstindex = "";
155 my $first = 1;
156 if (defined $buildcfg->{'indexmap'}) {
157 $indexmap_t = $buildcfg->{'indexmap'};
158 foreach $i (@$indexmap_t) {
159 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
160 $indexmap->{$k} = $v;
161 if ($first) {
162 $firstindex = $v;
163 $first = 0;
164 }
165 }
166 } else {
167 print STDERR "indexmap not defined";
168 }
169 my $defaultindex;
170 if (defined $collectcfg->{'defaultindex'}) {
171 $defaultindex = $collectcfg->{'defaultindex'};
172 $defaultindex = $indexmap->{$defaultindex};
173 } else {
174 # use the first one
175 $defaultindex = $firstindex;
176 }
177
178 # format stuff
179 my $format = $collectcfg->{'format'};
180
181 #output the search stuff to coll cfg
182 $collwriter->startTag('search');
183 foreach $i (keys %$indexmap) {
184 $shortname = $indexmap->{$i};
185 $collwriter->startTag('index', 'name'=>$shortname);
186 #find the coll meta stuff
187 $indexdisplay = ".$i";
188 foreach $lang (keys %{$collectionmeta->{$indexdisplay}}) {
189 $value = $collectionmeta->{$indexdisplay}->{$lang};
190 output_display($collwriter, 'name', $lang, $value);
191 }
192 $collwriter->endTag('index');
193
194 }
195
196 # add in the format stuff
197 if (defined $format->{'SearchVList'}) {
198
199 $collwriter->startTag('format');
200 write_format($collwriter, $format->{'SearchVList'});
201 $collwriter->endTag('format');
202 }
203
204 $collwriter->endTag('search');
205
206 $buildwriter->startTag('serviceRackList');
207
208 my @levels = ();
209 my $defaultlevel;
210
211 #do the retrieve service
212 # assume mgpp or mg
213 if ($buildtype eq 'mgpp') {
214 #for each level
215 if (defined $buildcfg->{'indexlevels'}) {
216 push @levels, @{$buildcfg->{'indexlevels'}};
217
218 if (defined $buildcfg->{'textlevel'}) {
219 $defaultlevel = $buildcfg->{'textlevel'};
220 } else {
221 $defaultlevel = $levels[0];
222 }
223 } else { #use levels from collect.cfg - must be an old collection
224 @levels = ('Document');
225 $defaultlevel = 'Document';
226 if (defined $collectcfg->{'levels'}) {
227 foreach $l (@{$collectcfg->{'levels'}}) {
228 if ($l eq "Section") {
229 $defaultlevel = 'Section';
230 }
231 push @levels, $l;
232 }
233 }
234 }
235
236 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGPPRetrieve');
237 $buildwriter->emptyTag('defaultLevel', 'name'=>$defaultlevel);
238
239
240 } else {
241 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGRetrieve');
242 $buildwriter->emptyTag('defaultIndex', 'name'=>$defaultindex);
243
244 }
245 # close off the Retrieve service
246 $buildwriter->endTag('serviceRack');
247
248 # add in the classifiers if needed
249
250 my $count = 1;
251 my $phind = 0;
252 my $started_classifiers = 0;
253 if (defined $collectcfg->{'classify'}) {
254 $collwriter->startTag('browse');
255 my $classifiers = $collectcfg->{'classify'};
256 foreach $cl (@$classifiers) {
257 $name = "CL$count";
258 $count++;
259 my ($classname) = @$cl[0];
260 if ($classname =~ /^phind$/i) {
261 $phind=1;
262 #should add it into coll config classifiers
263 next;
264 }
265
266 my $horizontalAtTop = "false";
267 if ($classname eq "AZList" || $classname eq "AZCompactList") { #there may be others
268 $horizontalAtTop = "true";
269 }
270 if (not $started_classifiers) {
271 $buildwriter->startTag('serviceRack', 'name'=>'GS2Browse');
272 $buildwriter->startTag('classifierList');
273 $started_classifiers = 1;
274 }
275 my $content = ''; #use buttonname first, then metadata
276 if ($classname eq "DateList") {
277 $content = "Date";
278 } else {
279 for ($i=0; $i<scalar(@$cl); $i++) {
280 $arg = @$cl[$i];
281 if ($arg eq "-buttonname"){
282 $content = @$cl[$i+1];
283 last;
284 } elsif ($arg eq "-metadata") {
285 $content = @$cl[$i+1];
286 }
287
288 }
289 }
290 if ($horizontalAtTop eq "false") {
291 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content);
292 } else {
293 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'horizontalAtTop'=>'true');
294 }
295
296
297 $collwriter->startTag('classifier', 'name'=>$name);
298 $vlist = $name."VList";
299 $hlist = $name."HList";
300 $dlist = "";
301 if ($classname eq "DateList") {
302 $dlist = "DateList";
303 }
304 if (defined $format->{$vlist} || defined $format->{"VList"}|| defined $format->{$hlist} || defined $format->{"HList"} || defined $format->{$dlist}) {
305 $collwriter->startTag('format');
306 if (defined $format->{$vlist}) {
307 write_format($collwriter, $format->{$vlist});
308 } elsif (defined $format->{"VList"}) {
309 # use VList if no specific one
310 write_format($collwriter, $format->{"VList"});
311 }
312 if (defined $format->{$hlist}) {
313 write_format($collwriter, $format->{$hlist});
314 } elsif (defined $format->{"HList"}) {
315 # use HList if no specific one
316 write_format($collwriter, $format->{"HList"});
317 }
318 if (defined $format->{$dlist}) {
319 write_format($collwriter, $format->{$dlist});
320 }
321 $collwriter->endTag('format');
322 }
323 $collwriter->endTag('classifier');
324 } #foreach classifier
325 if ($started_classifiers) {
326 # end the classifiers
327 $buildwriter->endTag('classifierList');
328 # close off the Browse service
329 $buildwriter->endTag('serviceRack');
330 }
331
332 $collwriter->endTag('browse');
333 }
334
335
336 # the phind classifier is a separate service
337 if ($phind) {
338 # if phind classifier
339 $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse');
340 }
341
342 # do the search service
343 if ($buildtype eq 'mgpp') {
344
345 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGPPSearch');
346 $buildwriter->emptyTag('defaultLevel', 'name'=>$defaultlevel);
347 $buildwriter->startTag('levelList');
348 foreach $level (@levels) {
349 $buildwriter->emptyTag('level', 'name'=>$level);
350 }
351 $buildwriter->endTag('levelList');
352
353 #fieldlist
354 my $fieldmap = {};
355 my $fieldlist = ();
356 if (defined $buildcfg->{'indexfields'}) {
357 push @fieldlist, @{$buildcfg->{'indexfields'}};
358 if (defined $buildcfg->{'indexfieldmap'}) {
359 $fieldmap_t = $buildcfg->{'indexfieldmap'};
360 foreach $f (@$fieldmap_t) {
361 my ($k, $v) = $f =~ /^(.*)\-\>(.*)$/;
362 $fieldmap->{$k} = $v;
363 }
364 }
365 $buildwriter->startTag('fieldList');
366 foreach $f (@fieldlist) {
367 $field = $fieldmap->{$f};
368 $buildwriter->emptyTag('field', 'shortname'=>$field, 'name'=>$f);
369 }
370 $buildwriter->endTag('fieldList');
371 } else {
372 print STDERR "indexfieldmap not defined";
373 }
374
375 # do the search types if there
376 if (defined $collectcfg->{'searchtype'}) {
377 $buildwriter->startTag('searchTypeList');
378 foreach $st (@{$collectcfg->{'searchtype'}}) {
379 $buildwriter->emptyTag('searchType', 'name'=>$st);
380 }
381 $buildwriter->endTag('searchTypeList');
382 }
383 } elsif ($buildtype eq 'mg') {
384 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGSearch');
385 }
386
387
388 $buildwriter->emptyTag('defaultIndex', 'name'=>$defaultindex);
389 $buildwriter->startTag('indexList');
390 #for each index
391 foreach $i (keys %$indexmap) {
392 $index = $indexmap->{$i};
393 $buildwriter->emptyTag('index', 'name'=>$index);
394 }
395 $buildwriter->endTag('indexList');
396
397 $buildwriter->endTag('serviceRack');
398
399 $buildwriter->endTag('serviceRackList');
400 $buildwriter->endTag('buildConfig');
401 $collwriter->endTag('collectionConfig');
402 $collwriter->end();
403 $buildwriter->end();
404 $buildoutput->close();
405 $colloutput->close();
406}
407
408
409sub output_metadata {
410 my ($writer, $metaname, $metavalue) = @_;
411 $writer->startTag('metadata', 'name'=>$metaname);
412 $writer->characters($metavalue);
413 $writer->endTag('metadata');
414}
415
416sub output_display {
417 my ($writer, $name, $lang, $value) = @_;
418 $lang = 'en' if $lang eq 'default';
419 if ($lang =~ /^\[/) {
420 ($lang) = $lang =~ /\[l=(.*)\]/;
421 }
422 $writer->startTag('displayItem', 'name'=>$name, 'lang'=>$lang);
423 $writer->characters($value);
424 $writer->endTag('displayItem');
425}
426sub format_icon_value {
427 my ($value) = @_;
428 if ($value =~ /^_/) {
429 ($newvalue) = $value =~ /\/([^\/]*)$/;
430 if ($newvalue) {
431 return $newvalue;
432 }
433 }
434 return $value;
435}
436
437sub write_format {
438 my ($writer, $old_format) = @_;
439 # replace \' with '
440 $old_format =~ s/\\\'/\'/g;
441
442 #convert [] to <gsf:...>
443 #remove IFs, for now just make the first option true
444 $old_format =~ s/\{If\}\{[^,]*,([^,\}]*)(,[^\}]*)?\}/$1/g;
445 #remove ORs, for now just make the first option true
446 $old_format =~ s/\{Or\}\{([^,]*),[^\}]*\}/$1/g;
447 $old_format =~ s/\[Text\]/\<gsf:text\/\>/g;
448 $old_format =~ s/\[num\]/\<gsf:num\/\>/g;
449 $old_format =~ s/\[link\]/\<gsf:link\>/g;
450 $old_format =~ s/\[\/link\]/\<\/gsf:link\>/g;
451 $old_format =~ s/\[srclink\]/\<gsf:link type=\'source\'\>/g;
452 $old_format =~ s/\[\/srclink\]/\<\/gsf:link\>/g;
453 $old_format =~ s/\[icon\]/\<gsf:icon\/\>/g;
454 $old_format =~ s/\[srcicon\]/\<gsf:icon type=\'source\'\/\>/g;
455
456 #now do the rest of the [] which are assumed to be metadata
457 $old_format =~ s/\[([^\]]*\:)?([^\]\:]*)\]/\<gsf:metadata name=\'$2\' select=\'$1\'\/\>/g;
458 # remove ex.
459 $old_format =~ s/<gsf:metadata name=\'ex\.([^\']+)\'/<gsf:metadata name=\'$1\'/g;
460 #do the parent stuff
461 $old_format =~ s/(select=\'parent)\:\'/$1\'/g;
462 $old_format =~ s/select=\'parent\(Top\)\:\'/select=\'root\'/g;
463 $old_format =~ s/select=\'parent\(All\)\:\'/select=\'ancestors\'/g;
464 $old_format =~ s/select=\'parent\(All\'([^\']*)\'\)\:\'/select=\'ancestors\' separator=\'$1\'/g;
465 #remove any select=''
466 $old_format =~ s/select=\'\'//g;
467 #turn <br> into <br />
468 $old_format =~ s/\<br\>/\<br \/\>/g;
469 #turn <p> into <p />
470 $old_format =~ s/\<p\>/\<p \/\>/g;
471
472 #put quotes around any atts
473 $old_format =~ s/=([a-z]+)([> ])/=\'$1\'$2/g;
474
475 $writer->startTag('gsf:template', 'match'=>'documentNode');
476 $writer->charactersXML($old_format);
477 $writer->endTag('gsf:template');
478
479
480}
481
482#$writer->startTag('');
483#$writer->endTag('');
484#$writer->characters();
485#$writer->emptyTag('');
486
4871;
Note: See TracBrowser for help on using the repository browser.