source: trunk/gsdl3/bin/script/convert_coll_from_gs2.pl@ 3983

Last change on this file since 3983 was 3983, checked in by kjdon, 21 years ago

changed a couple of things. this is now only for conversion of existing collections built in gs2. also it doesn't create the collectionConfig.xml file properly, so the new collection will have no services. this all needs tidying up

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.9 KB
Line 
1#!/usr/bin/perl -w
2
3BEGIN {
4 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
5 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
6 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
7 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
8 #unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
9 # unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
10}
11
12#use arcinfo;
13use colcfg;
14#use plugin;
15use docprint;
16use util;
17use parsargv;
18use FileHandle;
19use XML::Writer;
20
21&main();
22sub print_usage() {
23 print STDOUT "Usage: convert_coll_from_gs2.pl [options] coll-name\n";
24 print STDOUT "options:\n";
25
26 print STDOUT " -collectdir Directory where collection lives.\n";
27
28}
29
30sub main {
31
32 my $defaultlang = 'en';
33 my ($collectdir);
34 # note that no defaults are passed for most options as they're set
35 # later (after we check the collect.cfg file)
36 if (!parsargv::parse(\@ARGV,
37 'verbosity/\d+/', \$verbosity,
38 'collectdir/.*/', \$collectdir,
39 'faillog/.*/', \$faillog)) {
40 &print_usage();
41 die "\n";
42 }
43
44 # get and check the collection name
45 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
46 &print_usage();
47 die "\n";
48 }
49
50 $collconfigfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
51 my $collectcfg;
52 if (-e $collconfigfilename) {
53 $collectcfg = &colcfg::read_collect_cfg ($collconfigfilename);
54
55 } else {
56 print STDERR "collect.cfg not found!!";
57 die "\n";
58 }
59
60
61 $buildconfigfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "index", "build.cfg");
62 my $buildcfg;
63 if (-e $buildconfigfilename) {
64 $buildcfg = &colcfg::read_build_cfg ($buildconfigfilename);
65
66 } else {
67 print STDERR "build.cfg not found!!";
68 die "\n";
69 }
70
71
72
73 my $colloutfile = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collectionConfig.xml");
74 if (-e $colloutfile) {
75 print STDOUT "collectionConfig file already exists! overwriting it!\n";
76
77 }
78
79 my $buildoutfile = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "index", "buildConfig.xml");
80 if (-e $buildoutfile) {
81 print STDOUT "buildConfig file already exists! overwriting it!\n";
82
83 }
84
85 my $buildtype;
86 if (defined $collectcfg->{'buildtype'}) {
87 $buildtype = $collectcfg->{'buildtype'};
88 } else {
89 $buildtype = 'mg';
90 }
91
92 my $buildoutput = new IO::File(">$buildoutfile");
93 my $buildwriter = new XML::Writer(OUTPUT => $buildoutput);
94
95 $buildwriter->startTag('buildConfig');
96
97 my $colloutput = new IO::File(">$colloutfile");
98 my $collwriter = new XML::Writer(OUTPUT => $colloutput);
99
100 $collwriter->startTag('collectionConfig');
101
102 #output the collection metadata to the collectionConfig file
103 my $collectionmeta = $collectcfg->{'collectionmeta'};
104 $collwriter->startTag('metadataList');
105
106 %name_map = (
107 'collectionname', 'colName',
108 'collectionextra', 'colDescription',
109 'iconcollection', 'colIcon',
110 'iconcollectionsmall', 'colIconSmall');
111
112 if (defined $collectionmeta) {
113 print STDOUT "coll meta defined \n";
114 foreach $entry ( keys %$collectionmeta) {
115 print STDOUT "entry=$entry\n";
116 if (defined $name_map{$entry}) {
117 $name= $name_map{$entry};
118 print STDOUT "name=$name\n";
119 foreach $lang (keys %{$collectionmeta->{$entry}}) {
120 print STDOUT "lang=$lang\n";
121 outputmetadata($collwriter, $name, $lang, $collectionmeta->{$entry}->{$lang});
122 }
123 }
124 }
125 }
126 $collwriter->endTag('metadataList');
127
128#num docs
129 my $numdocs = $buildcfg->{'numdocs'};
130 $buildwriter->startTag('metadataList');
131 $buildwriter->startTag('metadata', 'name'=>'numDocs');
132 $buildwriter->characters($numdocs);
133 $buildwriter->endTag('metadata');
134 $buildwriter->endTag('metadataList');
135
136
137 #indexes
138 my $indexmap = {};
139 if (defined $buildcfg->{'indexmap'}) {
140 $indexmap_t = $buildcfg->{'indexmap'};
141 foreach $i (@$indexmap_t) {
142 my ($k, $v) = $i =~ /^(.*)\-\>(.*)$/;
143 $indexmap->{$k} = $v;
144 }
145 } else {
146 print STDERR "indexmap not defined";
147 }
148 my $defaultindex;
149 if (defined $collectcfg->{'defaultindex'}) {
150 $defaultindex = $collectcfg->{'defaultindex'};
151 } else {
152 print STDERR "defaultindex not defined";
153 }
154 $defaultindex = $indexmap->{$defaultindex};
155
156
157 $buildwriter->startTag('serviceRackList');
158
159 my @levels = ('Document');
160 my $defaultlevel = 'Document';
161
162 #do the retrieve service
163 # assume mgpp or mg
164 if ($buildtype eq 'mgpp') {
165 #for each level
166 if (defined $collectcfg->{'levels'}) {
167 push @levels, @{$collectcfg->{'levels'}};
168
169 foreach $l(@levels){
170 $defaultlevel = 'Section' if $l eq 'Section';
171 }
172 }
173
174 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGPPRetrieve');
175 $buildwriter->emptyTag('defaultLevel', 'name'=>$defaultlevel);
176 $buildwriter->startTag('levelList');
177 foreach $level (@levels) {
178 $buildwriter->emptyTag('level', 'name'=>$level);
179 }
180 $buildwriter->endTag('levelList');
181
182
183 } else {
184 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGRetrieve');
185 $buildwriter->emptyTag('defaultIndex', 'name'=>$defaultindex);
186
187 }
188
189 # add in the classifiers if needed
190
191 my $count = 1;
192 my $phind = 0;
193 my $started_classifiers = 0;
194 if (defined $collectcfg->{'classify'}) {
195 my $classifiers = $collectcfg->{'classify'};
196 foreach $cl (@$classifiers) {
197 print STDERR "cl=$cl\n";
198 $name = "CL$count";
199 $count++;
200 my ($classname) = @$cl[0];
201 if ($classname eq "Phind") {
202 $phind=1;
203 next;
204 }
205
206 my $document_interleave = "true";
207 my $orientation = "vertical";
208 if ($classname eq "AZList") { #there may be others
209 $document_interleave = "false";
210 $orientation = "horizontal";
211 }
212 if (not $started_classifiers) {
213 $buildwriter->startTag('classifierList');
214 $started_classifiers = 1;
215 }
216 my $content = ''; #use metadata
217
218 for ($i=0; $i<scalar(@$cl); $i++) {
219 $arg = @$cl[$i];
220 if ($arg eq "-metadata") {
221 $content = @$cl[$i+1];
222 last;
223 }
224 }
225
226 $buildwriter->emptyTag('classifier', 'name'=>$name, 'content'=>$content, 'orientation'=>$orientation, 'documentInterleave'=>$document_interleave);
227
228 } #foreach classifier
229 if ($started_classifiers) {
230 # end the classifiers
231 $buildwriter->endTag('classifierList');
232 }
233
234 }
235 # close off the Retrieve service
236 $buildwriter->endTag('serviceRack');
237
238 # the phind classifier is a separate service
239 if ($phind) {
240 # if phind classifier
241 $buildwriter->emptyTag('serviceRack', 'name'=>'PhindPhraseBrowse');
242 }
243
244 # do the search service
245 if ($buildtype eq 'mgpp') {
246
247 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGPPSearch');
248 $buildwriter->emptyTag('defaultLevel', 'name'=>$defaultlevel);
249 $buildwriter->startTag('levelList');
250 foreach $level (@levels) {
251 $buildwriter->emptyTag('level', 'name'=>$level);
252 }
253 $buildwriter->endTag('levelList');
254
255 #fieldlist
256 print STDOUT "trying fields\n";
257 my $fieldmap = {};
258 if (defined $buildcfg->{'indexfieldmap'}) {
259 print STDOUT "doing fields\n";
260 $fieldmap_t = $buildcfg->{'indexfieldmap'};
261 foreach $f (@$fieldmap_t) {
262 my ($k, $v) = $f =~ /^(.*)\-\>(.*)$/;
263 $fieldmap->{$k} = $v;
264 }
265
266 $buildwriter->startTag('fieldList');
267 foreach $f (keys %$fieldmap) {
268 $field = $fieldmap->{$f};
269 $buildwriter->emptyTag('field', 'shortname'=>$field, 'name'=>$f);
270 }
271 $buildwriter->endTag('fieldList');
272 } else {
273 print STDERR "indexfieldmap not defined";
274 }
275
276 } elsif ($buildtype eq 'mg') {
277 $buildwriter->startTag('serviceRack', 'name'=>'GS2MGSearch');
278 }
279
280
281 $buildwriter->emptyTag('defaultIndex', 'name'=>$defaultindex);
282 $buildwriter->startTag('indexList');
283 #for each index
284 foreach $i (keys %$indexmap) {
285 $index = $indexmap->{$i};
286 $buildwriter->emptyTag('index', 'name'=>$index);
287 }
288 $buildwriter->endTag('indexList');
289
290 $buildwriter->endTag('serviceRack');
291
292 $buildwriter->endTag('serviceRackList');
293 $buildwriter->endTag('buildConfig');
294 $collwriter->endTag('collectionConfig');
295 $buildwriter->end();
296 $buildoutput->close();
297 $colloutput->close();
298}
299
300
301sub outputmetadata {
302 my ($collwriter, $metaname, $lang, $metavalue) = @_;
303 $lang = 'en' if $lang eq 'default';
304 if ($lang =~ /^\[/) {
305 ($lang) = $lang =~ /\[l=(..)\]/;
306 print STDOUT "new lang = $lang\n";
307 }
308 my $newvalue;
309 if ($name =~ /colIcon/) {
310 #may need to translate the value
311 ($newvalue) = $metavalue =~ /^_httpprefix_\/collect\/.*\/images\/(.*)$/;
312 ($newvalue) = $metavalue =~ /^_httpcollection_\/images\/(.*)$/ if not defined $newvalue;
313 $newvalue = $metavalue if not defined $newvalue;
314 print STDOUT "old value=$metavalue. new value = $newvalue\n";
315 } else {
316 $newvalue = $metavalue;
317 }
318 $collwriter->startTag('metadata', 'name'=>$metaname, 'lang'=>$lang);
319 $collwriter->characters($newvalue);
320 $collwriter->endTag('metadata');
321}
322
323#$writer->startTag('');
324#$writer->endTag('');
325#$writer->characters();
326#$writer->emptyTag('');
327
3281;
Note: See TracBrowser for help on using the repository browser.