source: trunk/gsdl3/bin/script/convert_coll_from_gs2.pl@ 7471

Last change on this file since 7471 was 7471, checked in by kjdon, 20 years ago

added in a replace \' with '

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.8 KB
Line 
1#!/usr/bin/perl -w
2
3BEGIN {
4 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
5 die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
6 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
7 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
8 unshift (@INC, "$ENV{'GSDL3HOME'}/lib/perl/cpan");
9}
10
11use colcfg;
12use util;
13use parsargv;
14use FileHandle;
15use XML::Writer;
16
17&main();
18sub print_usage() {
19 print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n";
20 print STDOUT "options:\n";
21
22 print STDOUT " -collectdir Directory where collection lives.\n";
23 print STDOUT " -verbosity Controls the amount of output.\n";
24 print STDOUT " -defaultlang The language that is considered the default (for display text etc). defaults to 'en'\n\n";
25}
26
27sub main {
28
29 my ($defaultlang, $verbosity, $collectdir);
30 # note that no defaults are passed for most options as they're set
31 # later (after we check the collect.cfg file)
32 if (!&parsargv::parse(\@ARGV,
33 'verbosity/\d+/', \$verbosity,
34 'collectdir/.*/', \$collectdir,
35 'defaultlang/.*/', \$defaultlang)) {
36 &print_usage();
37 die "\n";
38 }
39
40 # get and check the collection name
41 my ($collection) = @ARGV;
42 if (!defined($collection) || $collection eq "") {
43 die "No collection specified\n";
44 }
45 if ($collection eq "gs2model") {
46 die "You cant convert the model collection\n";
47 }
48
49 if (!defined $collectdir || $collectdir eq "") {
50 $collectdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect");
51 }
52
53 if (!defined $defaultlang || $defaultlang eq "") {
54 $defaultlang = 'en';
55 }
56 # add on the coll name
57 $collectdir = &util::filename_cat ($collectdir, $collection);
58
59 $collconfigfilename = &util::filename_cat ($collectdir, "etc", "collect.cfg");
60 print STDOUT "coll config=$collconfigfilename\n";
61 my $collectcfg;
62 if (-e $collconfigfilename) {
63 $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename);
64
65 } else {
66 print STDERR "collect.cfg not found!!";
67 die "\n";
68 }
69
70
71 $buildconfigfilename = &util::filename_cat ($collectdir, "index", "build.cfg");
72 my $buildcfg;
73 if (-e $buildconfigfilename) {
74 $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename);
75
76 } else {
77 print STDERR "build.cfg not found!!";
78 die "\n";
79 }
80
81
82
83 my $colloutfile = &util::filename_cat ($collectdir, "etc", "collectionConfig.xml");
84 if (-e $colloutfile) {
85 print STDOUT "collectionConfig file already exists! overwriting it!\n";
86
87 }
88
89 my $buildoutfile = &util::filename_cat ($collectdir, "index", "buildConfig.xml");
90 if (-e $buildoutfile) {
91 print STDOUT "buildConfig file already exists! overwriting it!\n";
92
93 }
94
95 my $buildtype;
96 if (defined $buildcfg->{'buildtype'}) {
97 $buildtype = $buildcfg->{'buildtype'};
98 } else {
99 $buildtype = 'mg';
100 }
101
102 my $buildoutput = new IO::File(">$buildoutfile");
103 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput, NEWLINES => 1);
104
105 $buildwriter->startTag('buildConfig', 'xmlns:gsf'=>"http://www.greenstone.org/configformat");
106
107 my $colloutput = new IO::File(">$colloutfile");
108 my $collwriter = new XML::Writer(OUTPUT => $colloutput, NEWLINES => 1);
109
110 $collwriter->startTag('collectionConfig', 'xmlns:gsf'=>"http://www.greenstone.org/configformat", 'xmlns:xsl'=>'http://www.w3.org/1999/XSL/Transform');
111
112 #output the collection metadata to the collectionConfig file
113 $collwriter->startTag('metadataList');
114
115 my $creator = $collectcfg->{'creator'};
116 &output_metadata($collwriter, 'creator', $creator);
117 $collwriter->endTag('metadataList');
118
119 #output the display collectionmeta to collectionConfig.xml
120
121 my $collectionmeta = $collectcfg->{'collectionmeta'};
122 if (defined $collectionmeta) {
123 %name_map = ('collectionname', 'name',
124 'collectionextra', 'description',
125 'iconcollection', 'icon',
126 'iconcollectionsmall', 'smallicon');
127
128 $collwriter->startTag('displayItemList');
129 foreach $entry ( keys %$collectionmeta) {
130 if (defined $name_map{$entry}) {
131 $name= $name_map{$entry};
132 foreach $lang (keys %{$collectionmeta->{$entry}}) {
133 $value = $collectionmeta->{$entry}->{$lang};
134 if ($entry =~ /^icon/) {
135 $value = format_icon_value($value);
136 }
137 &output_display($collwriter, $name, $lang, $value);
138 }
139 }
140 }
141 $collwriter->endTag('displayItemList');
142 }
143
144 # output building metadata to build config file
145 my $numdocs = $buildcfg->{'numdocs'};
146 $buildwriter->startTag('metadataList');
147 &output_metadata($buildwriter, 'numDocs', $numdocs);
148 &output_metadata($buildwriter, 'buildType', $buildtype);
149 $buildwriter->endTag('metadataList');
150
151
152 #indexes
153 my $indexmap = {};
154 my $firstindex = "";
155 my $first = 1;
156 if (defined $buildcfg->{'indexmap'}) {
157 $indexmap_t = $buildcfg->{'indexmap'};
158 foreach $i (@$indexmap_t) {
159 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
160 $indexmap->{$k} = $v;
161 if ($first) {
162 $firstindex = $v;
163 $first = 0;
164 }
165 }
166 } else {
167 print STDERR "indexmap not defined";
168 }
169 my $defaultindex;
170 if (defined $collectcfg->{'defaultindex'}) {
171 $defaultindex = $collectcfg->{'defaultindex'};
172 $defaultindex = $indexmap->{$defaultindex};
173 } else {
174 # use the first one
175 $defaultindex = $firstindex;
176 }
177
178 # format stuff
179 my $format = $collectcfg->{'format'};
180
181 #output the search stuff to coll cfg
182 $collwriter->startTag('search');
183 foreach $i (keys %$indexmap) {
184 $shortname = $indexmap->{$i};
185 $collwriter->startTag('index', 'name'=>$shortname);
186 #find the coll meta stuff
187 $indexdisplay = ".$i";
188 foreach $lang (keys %{$collectionmeta->{$indexdisplay}}) {
189 $value = $collectionmeta->{$indexdisplay}->{$lang};
190 output_display($collwriter, 'name', $lang, $value);
191 }
192 $collwriter->endTag('index');
193
194 }
195
196 # add in the format stuff
197 if (defined $format->{'SearchVList'}) {
198
199 $collwriter->startTag('format');
200 write_format($collwriter, $format->{'SearchVList'});
201 $collwriter->endTag('format');
202 }
203
204 $collwriter->endTag('search');
205
206 $buildwriter->startTag('serviceRackList');
207
208 my @levels = ();
209 my $defaultlevel;
210
211 #do the retrieve service
212 # assume mgpp or mg
213 if ($buildtype eq 'mgpp') {
214 #for each level
215 if (defined $buildcfg->{'indexlevels'}) {
216 push @levels, @{$buildcfg->{'indexlevels'}};
217
218 if (defined $buildcfg->{'textlevel'}) {
219 $defaultlevel = $buildcfg->{'textlevel'};
220 } else {
221 $defaultlevel = $levels[0];
222 }
223 } else { #use levels from collect.cfg - must be an old collection
224 @levels = ('Document');
225 $defaultlevel = 'Document';
226 if (defined $collectcfg->{'levels'}) {
227 foreach $l (@{$collectcfg->{'levels'}}) {
228 if ($l eq "Section") {
229 $defaultlevel = 'Section';
230 }
231 push @levels, $l;
232 }
233 }
234 }
235
236 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGPPRetrieve');
237 $buildwriter->emptyTag('defaultLevel', 'name'=>$defaultlevel);
238
239
240 } else {
241 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGRetrieve');
242 $buildwriter->emptyTag('defaultIndex', 'name'=>$defaultindex);
243
244 }
245
246 # add in the classifiers if needed
247
248 my $count = 1;
249 my $phind = 0;
250 my $started_classifiers = 0;
251 if (defined $collectcfg->{'classify'}) {
252 $collwriter->startTag('browse');
253 my $classifiers = $collectcfg->{'classify'};
254 foreach $cl (@$classifiers) {
255 $name = "CL$count";
256 $count++;
257 my ($classname) = @$cl[0];
258 if ($classname =~ /^phind$/i) {
259 $phind=1;
260 #should add it into coll config classifiers
261 next;
262 }
263
264 my $horizontalAtTop = "false";
265 if ($classname eq "AZList" || $classname eq "AZCompactList") { #there may be others
266 $horizontalAtTop = "true";
267 }
268 if (not $started_classifiers) {
269 $buildwriter->startTag('classifierList');
270 $started_classifiers = 1;
271 }
272 my $content = ''; #use buttonname first, then metadata
273 if ($classname eq "DateList") {
274 $content = "Date";
275 } else {
276 for ($i=0; $i<scalar(@$cl); $i++) {
277 $arg = @$cl[$i];
278 if ($arg eq "-buttonname"){
279 $content = @$cl[$i+1];
280 last;
281 } elsif ($arg eq "-metadata") {
282 $content = @$cl[$i+1];
283 }
284
285 }
286 }
287 if ($horizontalAtTop eq "false") {
288 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content);
289 } else {
290 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'horizontalAtTop'=>'true');
291 }
292
293
294 $collwriter->startTag('classifier', 'name'=>$name);
295 $vlist = $name."VList";
296 $hlist = $name."HList";
297 $dlist = "";
298 if ($classname eq "DateList") {
299 $dlist = "DateList";
300 }
301 if (defined $format->{$vlist} || defined $format->{$hlist} || defined $format->{$dlist}) {
302 $collwriter->startTag('format');
303 if (defined $format->{$vlist}) {
304 write_format($collwriter, $format->{$vlist});
305 }
306 if (defined $format->{$hlist}) {
307 write_format($collwriter, $format->{$hlist});
308 }
309 if (defined $format->{$dlist}) {
310 write_format($collwriter, $format->{$dlist});
311 }
312 $collwriter->endTag('format');
313 }
314 $collwriter->endTag('classifier');
315 } #foreach classifier
316 if ($started_classifiers) {
317 # end the classifiers
318 $buildwriter->endTag('classifierList');
319 }
320
321 $collwriter->endTag('browse');
322 }
323 # close off the Retrieve service
324 $buildwriter->endTag('serviceRack');
325
326 # the phind classifier is a separate service
327 if ($phind) {
328 # if phind classifier
329 $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse');
330 }
331
332 # do the search service
333 if ($buildtype eq 'mgpp') {
334
335 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGPPSearch');
336 $buildwriter->emptyTag('defaultLevel', 'name'=>$defaultlevel);
337 $buildwriter->startTag('levelList');
338 foreach $level (@levels) {
339 $buildwriter->emptyTag('level', 'name'=>$level);
340 }
341 $buildwriter->endTag('levelList');
342
343 #fieldlist
344 my $fieldmap = {};
345 my $fieldlist = ();
346 if (defined $buildcfg->{'indexfields'}) {
347 push @fieldlist, @{$buildcfg->{'indexfields'}};
348 if (defined $buildcfg->{'indexfieldmap'}) {
349 $fieldmap_t = $buildcfg->{'indexfieldmap'};
350 foreach $f (@$fieldmap_t) {
351 my ($k, $v) = $f =~ /^(.*)\-\>(.*)$/;
352 $fieldmap->{$k} = $v;
353 }
354 }
355 $buildwriter->startTag('fieldList');
356 foreach $f (@fieldlist) {
357 $field = $fieldmap->{$f};
358 $buildwriter->emptyTag('field', 'shortname'=>$field, 'name'=>$f);
359 }
360 $buildwriter->endTag('fieldList');
361 } else {
362 print STDERR "indexfieldmap not defined";
363 }
364
365 # do the search types if there
366 if (defined $collectcfg->{'searchtype'}) {
367 $buildwriter->startTag('searchTypeList');
368 foreach $st (@{$collectcfg->{'searchtype'}}) {
369 $buildwriter->emptyTag('searchType', 'name'=>$st);
370 }
371 $buildwriter->endTag('searchTypeList');
372 }
373 } elsif ($buildtype eq 'mg') {
374 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGSearch');
375 }
376
377
378 $buildwriter->emptyTag('defaultIndex', 'name'=>$defaultindex);
379 $buildwriter->startTag('indexList');
380 #for each index
381 foreach $i (keys %$indexmap) {
382 $index = $indexmap->{$i};
383 $buildwriter->emptyTag('index', 'name'=>$index);
384 }
385 $buildwriter->endTag('indexList');
386
387 $buildwriter->endTag('serviceRack');
388
389 $buildwriter->endTag('serviceRackList');
390 $buildwriter->endTag('buildConfig');
391 $collwriter->endTag('collectionConfig');
392 $collwriter->end();
393 $buildwriter->end();
394 $buildoutput->close();
395 $colloutput->close();
396}
397
398
399sub output_metadata {
400 my ($writer, $metaname, $metavalue) = @_;
401 $writer->startTag('metadata', 'name'=>$metaname);
402 $writer->characters($metavalue);
403 $writer->endTag('metadata');
404}
405
406sub output_display {
407 my ($writer, $name, $lang, $value) = @_;
408 $lang = 'en' if $lang eq 'default';
409 if ($lang =~ /^\[/) {
410 ($lang) = $lang =~ /\[l=(.*)\]/;
411 }
412 $writer->startTag('displayItem', 'name'=>$name, 'lang'=>$lang);
413 $writer->characters($value);
414 $writer->endTag('displayItem');
415}
416sub format_icon_value {
417 my ($value) = @_;
418 if ($value =~ /^_/) {
419 ($newvalue) = $value =~ /\/([^\/]*)$/;
420 if ($newvalue) {
421 return $newvalue;
422 }
423 }
424 return $value;
425}
426
427sub write_format {
428 my ($writer, $old_format) = @_;
429 # replace \' with '
430 $old_format =~ s/\\\'/\'/g;
431
432 #convert [] to <gsf:...>
433 #remove IFs, for now just make the first option true
434 $old_format =~ s/\{If\}\{[^,]*,([^,\}]*)(,[^\}]*)?\}/$1/g;
435 #remove ORs, for now just make the first option true
436 $old_format =~ s/\{Or\}\{([^,]*),[^\}]*\}/$1/g;
437 $old_format =~ s/\[Text\]/\<gsf:text\/\>/g;
438 $old_format =~ s/\[num\]/\<gsf:num\/\>/g;
439 $old_format =~ s/\[link\]/\<gsf:link\>/g;
440 $old_format =~ s/\[\/link\]/\<\/gsf:link\>/g;
441 $old_format =~ s/\[srclink\]/\<gsf:link type=\'source\'\>/g;
442 $old_format =~ s/\[\/srclink\]/\<\/gsf:link\>/g;
443 $old_format =~ s/\[icon\]/\<gsf:icon\/\>/g;
444 $old_format =~ s/\[srcicon\]/\<gsf:icon type=\'source\'\/\>/g;
445
446 #now do the rest of the [] which are assumed to be metadata
447 $old_format =~ s/\[([^\]]*\:)?([^\]\:]*)\]/\<gsf:metadata name=\'$2\' select=\'$1\'\/\>/g;
448 #do the parent stuff
449 $old_format =~ s/(select=\'parent)\:\'/$1\'/g;
450 $old_format =~ s/select=\'parent\(Top\)\:\'/select=\'root\'/g;
451 $old_format =~ s/select=\'parent\(All\)\:\'/select=\'ancestors\'/g;
452 $old_format =~ s/select=\'parent\(All\'([^\']*)\'\)\:\'/select=\'ancestors\' separator=\'$1\'/g;
453 #remove any select=''
454 $old_format =~ s/select=\'\'//g;
455 #turn <br> into <br />
456 $old_format =~ s/\<br\>/\<br \/\>/g;
457 #turn <p> into <p />
458 $old_format =~ s/\<p\>/\<p \/\>/g;
459
460 #put quotes around any atts
461 $old_format =~ s/=([a-z]+)([> ])/=\'$1\'$2/g;
462
463 $writer->startTag('gsf:template', 'match'=>'documentNode');
464 $writer->charactersXML($old_format);
465 $writer->endTag('gsf:template');
466
467
468}
469
470#$writer->startTag('');
471#$writer->endTag('');
472#$writer->characters();
473#$writer->emptyTag('');
474
4751;
Note: See TracBrowser for help on using the repository browser.