source: main/tags/2.40/gsdl/bin/script/unbuildv2.pl@ 21085

Last change on this file since 21085 was 3324, checked in by sjboddie, 22 years ago

Removed the really old unindex.pl script and added the not quite so old
unbuildv1.pl and unbuildv2.pl. These scripts can be used to convert built
indexes back to Greenstone Archive format documents. unbuildv1.pl handles
very old Greenstone collections like those on the various HumanInfo CDROMs
while unbuildv2.pl handles more recently created collections. unbuildv1.pl
was altered and now works but unbuildv2.pl probably still needs some work.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.6 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# unindex.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# this program will decompress all the text from a built index
29# and return it to gml format -
30#this is based on the format current in Nov 1999. (version two)
31
32# Stefan updated unbuildv1.pl in August 2002 but unbuildv2.pl was not
33# updated. It probably needs some work done before using.
34
35BEGIN {
36 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
37 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
39 $FileHandle = 'FH000';
40}
41
42use doc;
43use docsave;
44use util;
45use parsargv;
46use GDBM_File;
47use FileHandle;
48use English;
49use cfgread;
50use unicode;
51
52select STDERR; $| = 1;
53select STDOUT; $| = 1;
54
55
56# globals
57$collection = ""; # the collection name
58$index = ""; # the selected index (like stt/unu)
59$textdir = ""; # the textdir (like text/unu)
60$toplevelinfo = []; #list of document OIDs
61%infodb = (); #hash into GDBM file
62$classifyinfo = []; # list of classifications
63$doc_classif_info = {}; # hash of OIDs->classifications they belong to
64$collect_cfg = {}; #data for the configuration file
65
66$mgread = ++$FileHandle;
67$mgwrite = ++$FileHandle;
68
69
70
71sub print_usage {
72 print STDERR "\n usage: $0 [options]\n\n";
73 print STDERR " options:\n";
74 print STDERR " -verbosity number 0=none, 3=lots\n";
75 print STDERR " -indexdir directory The index to be decompressed (defaults to ./index)\n";
76 print STDERR " -archivedir directory Where the converted material ends up (defaults to ./archives.new\n";
77 print STDERR " -removeold Will remove the old contents of the archives\n";
78 print STDERR " directory -- use with care\n\n";
79}
80
81&main ();
82
83sub main {
84 if (!parsargv::parse(\@ARGV,
85 'verbosity/\d+/2', \$verbosity,
86 'indexdir/.*/index', \$indexdir,
87 'archivedir/.*/archives.new', \$archivedir,
88 'removeold', \$removeold)) {
89 &print_usage();
90 die "\n";
91 }
92
93 die "indexdir $indexdir does not exist\n\n" unless (-d $indexdir);
94 $indexdir =~ s/\/$//;
95 if (-d $archivedir) {
96 if ($removeold) {
97 print STDERR "Warning - removing current contents of the archives directory $archivedir\n";
98 print STDERR " in preparation for the import\n";
99 sleep(5); # just in case...
100 &util::rm_r ($archivedir);
101 }
102 } else {
103 &util::mk_all_dir ($archivedir);
104 }
105
106 $etcdir = "./etc";
107 if (!(-d $etcdir)) {
108 &util::mk_all_dir ($etcdir);
109 }
110
111
112 my $gdbmfile = &get_gdbmfile ($indexdir); #sets $collection and $textdir
113 &set_index (); # sets $index (just chooses one index)
114
115
116 my $buildcfgfile = &util::filename_cat($indexdir, "build.cfg");
117 my $colcfgfile = &util::filename_cat($etcdir, "collect.cfg");
118
119 &add_default_cfg();
120 &add_index_cfg($buildcfgfile);
121
122
123 #work out all the classifications from the gdbm file, info for each doc
124#(ie which classifications they belong to, are kept in $doc_classif_info
125 &get_classifyinfo ($gdbmfile); #puts a list of classifications into classifyinfo
126
127 &get_toplevel_OID ($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
128
129 tie (%infodb, "GDBM_File", $gdbmfile, 1, 0);
130
131 #this makes the files specifying the hierarchy of subjects, titles etc
132 foreach $classify (@$classifyinfo) {
133
134 &make_info_file($classify);
135
136 }
137
138
139
140 #write out the collect.cfg
141 &output_cfg_file($colcfgfile);
142
143 &openmg ();
144
145 # read the archive information file
146 my $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
147 my $archive_info = new arcinfo ();
148
149 # create a docsave object to process the documents
150 my $processor = new docsave ("", $archive_info, $verbosity);
151 $processor->setarchivedir ($archivedir);
152
153 my ($doc_obj, $hashref, $children);
154 print STDERR "processing documents now\n" if $verbosity >=2;
155 foreach $oid (@$toplevelinfo) {
156 $value = $infodb{$oid};
157 $hashref={};
158 $children = [];
159 &get_metadata($value, $hashref);
160 $doc_obj = new doc ();
161 $doc_obj->set_OID($oid);
162 my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
163 $top = $doc_obj->get_top_section();
164 &add_section_content ($doc_obj, $top, $hashref, $olddir);
165 &add_classification_metadata($oid, $doc_obj, $top);
166 &add_cover_image($doc_obj, $olddir);
167 &get_children($hashref, $children);
168 &recurse_sections($doc_obj, $children, $oid, $top, $olddir);
169 $processor->process($doc_obj);
170
171 }
172 print STDERR "\n";
173
174 &closemg();
175
176 # write out the archive information file
177 $archive_info->save_info($archive_info_filename);
178}
179
180# returns the path to the gdbm info database - also
181# sets the $collection and $textdir global variable
182sub get_gdbmfile {
183 my ($indexdir) = @_;
184 my ($gdbmfile);
185
186 opendir (DIR, $indexdir) || die "Couldn't open directory $indexdir\n\n";
187 my @conts = readdir DIR;
188 close DIR;
189
190 foreach $file (@conts) {
191 if ($file =~ /text$/) {
192 $textdir = $file;
193 last;
194 }
195 }
196 die "No text directory found in $indexdir\n\n"
197 unless defined $textdir && $textdir =~ /text$/;
198
199 $gdbmfile = &util::filename_cat ($indexdir, $textdir);
200
201 opendir (DIR, $gdbmfile) || die "Couldn't open directory $gdbmfile\n\n";
202 @conts = readdir DIR;
203 close DIR;
204
205 foreach $file (@conts) {
206 if ($file =~ /^(.*?)\.(?:ldb|bdb)$/) {
207 $collection = $1;
208 $gdbmfile = &util::filename_cat ($gdbmfile, $file);
209 last;
210 }
211 }
212
213 if (defined $collection && $collection =~ /\w/) {
214 $textdir = &util::filename_cat ($textdir, $collection);
215 } else {
216 die "collection global wasn't set\n";
217 }
218 return $gdbmfile if (-e $gdbmfile);
219 die "Couldn't find gdbm info database in $indexdir\n\n";
220}
221
222
223sub get_toplevel_OID {
224 my ($gdbmfile) = @_;
225
226 open (DB2TXT, "db2txt $gdbmfile |") || die "couldn't open pipe to db2txt\n";
227 print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
228
229 $/ = '-' x 70;
230 my $entry = "";
231 while (defined ($entry = <DB2TXT>)) {
232 next unless $entry =~ /\w/; #ignore blank entries
233 $entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
234 my ($key, $value) = $entry =~ /\[([^\]]*)\](.*)/;
235
236 next if ($key =~ /\./); #ignore any lower level entries
237 next if ($key =~ /^CL/); #ignore classification entries
238 next if ($value =~ /<section>/); #ignore docnum->OID entries
239 next if ($key !~ /\d/); #ignore collection, browse entries
240
241 push( @$toplevelinfo, $key);
242
243 }
244
245 $/ = "\n";
246 #print STDERR "toplevel sections are: ", join ("\n", @$toplevelinfo);
247}
248
249# gets all the metadata from a gdbm file entry, and puts it into a hashref
250sub get_metadata {
251
252 my ($gdb_str_ref, $hashref) = @_;
253
254 my @entries = split(/\n/, $gdb_str_ref);
255 foreach $entry (@entries) {
256 my($key, $value) = ($entry =~ /^<([^>]*)>(.*?)$/ );
257 $$hashref{$key} .= '@' if defined $$hashref{$key};
258 $$hashref{$key} .= $value;
259 #print STDERR "key: $key, value: $value\n";
260 }
261
262
263}
264
265#takes a hashref containing the metadata for a gdbmfile entry, and extracts
266#the childrens numbers (from the 'contains' entry).
267#assumes format is ".1;".2;".3
268sub get_children {
269 my ($hashref, $children) = @_;
270
271 $childs = $hashref->{'contains'};
272 if (defined ($childs)) {
273 $childs =~ s/\@$//; #remove trailing @
274 $childs =~ s/^\"\.//; #remove initial ".
275 @$children = split /\;\"\./, $childs;
276
277 }
278 else {
279 $children = [];
280 }
281}
282
283#takes a hashref containing the metadata for a gdbmfile entry, and extracts
284#the childrens numbers (from the 'contains' entry).
285#assumes format is ".1;".2;".3
286#returns a list with the full child name ie HASH0123...ac.1 HASH0123...ac.2
287#etc
288#used for classification stuff
289sub get_whole_children {
290
291 my ($parentoid, $hashref, $children) = @_;
292
293 my $childs = $hashref->{'contains'};
294 my @items;
295 if (defined ($childs)) {
296 $childs =~ s/\@$//; #remove trailing @
297 if ($childs =~ s/^\"//) { #remove initial " if there
298 @items = split /\;\"/, $childs; #split into .1 .2 .3 etc
299 foreach $item (@items) {
300 push (@$children, "$parentoid$item");
301 }
302 }
303 else { # whole id is already present. split on ; and dont put parentoid in front
304 @items = split /\;/, $childs;
305 foreach $item (@items) {
306 push (@$children, "$item");
307 }
308 }
309 #print STDERR "children are: ", join ("\n", @$children);
310
311 }
312 else {
313 $children = [];
314 }
315
316
317}
318sub recurse_sections {
319 my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
320
321 foreach $child (sort numerically @$children) {
322 $doc_obj->create_named_section("$parentsection.$child");
323 my $value = $infodb{"$parentoid.$child"};
324 my $hashref={};
325 &get_metadata($value, $hashref); # get childs metadata
326 my $newchildren = [];
327 &get_children($hashref, $newchildren); # get childs children
328 #add content fo rcurrent section
329 &add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
330 # process all the children if there are any
331 &recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
332 if (defined ($newchildren));
333 }
334
335
336}
337
338sub add_section_content {
339 my ($doc_obj, $cursection, $hashref, $olddir) = @_;
340
341 foreach $key (keys %$hashref) {
342 #dont need to store these metadata
343 next if $key =~ /(contains|docnum|hastxt|doctype|archivedir|classifytype)/i;
344 my @items = split /@/, $hashref->{$key};
345 map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
346
347 }
348 my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
349 my ($hastext) =$hashref->{'hastxt'} =~ /(0|1)/;
350
351 my $images=[];
352 if ($hastext) {
353 my $text = &get_text($docnum);
354
355 #my (@images) = $text =~ /<img.*?src=\"([^\"]*)\"[^>]*>/g;
356
357 # in text replace path to image with _httpdocimg_/blah.gif
358 #while ($text =~ s/(<img.*?src=\")([^\"]*)(\"[^>]*>)/
359 # $1.&get_img($2, \@images).$3/sgei) {
360 $text =~ s/(<img.*?src=\")([^\"]*)(\"[^>]*>)/
361 $1.&get_img($2,$images).$3/sgei;
362
363 $doc_obj->add_text ($cursection, $text);
364
365 if (scalar(@$images)>0) {
366
367 foreach $img (@$images) {
368 my ($assoc_file) = $img =~ /([^\/\\]*\..*)$/; #the name of the image
369 $img =~ s/_httpcollection_/\./; #replace _httpcollection_ with .
370 $img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name
371
372 $doc_obj->associate_file($img, $assoc_file);
373 }
374 }
375 }
376}
377
378
379
380sub get_img {
381 my ($path, $images) = @_;
382 my $img = "_httpdocimg_/";
383 my ($imgname) = $path =~ /([^\/\\]*\..*)$/;
384 push (@$images, $path);
385 $img .= $imgname;
386 return $img;
387}
388
389
390sub add_classification_metadata {
391
392 my ($oid, $doc_obj, $cursection) = @_;
393
394 if (defined $doc_classif_info->{$oid}) {
395
396 my $hashref = $doc_classif_info->{$oid};
397
398 foreach $key (keys %$hashref) {
399 my @items = @{$hashref->{$key}};
400 map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
401 }
402 }
403}
404
405# picks up the cover image "cover.jpg" from the old archives directory.
406sub add_cover_image {
407
408 my ($doc_obj, $olddir) = @_;
409 $assoc_file = "cover.jpg";
410 $img = "archives/$olddir/$assoc_file";
411
412
413 if (-e $img) {
414 $doc_obj->associate_file($img, $assoc_file);
415 }
416}
417
418
419
420sub set_index {
421 # check that $collection has been set
422 die "collection global was not set\n"
423 unless defined $collection && $collection =~ /\w/;
424
425 # find an index (just use first non-text directory we come across in $indexdir)
426 opendir (INDEXDIR, $indexdir) || die "couldn't open directory $indexdir\n";
427 my @indexes = readdir INDEXDIR;
428 close INDEXDIR;
429 foreach $i (@indexes) {
430 next if $i =~ /text$/i || $i =~ /\./ || $i =~ /assoc$/i;
431 $index = &util::filename_cat ($i, $collection);
432 last;
433 }
434}
435
436
437#########################################################################
438
439################ functions involving mg ################################
440
441sub get_text {
442 my ($docnum) = @_;
443
444 print STDERR "." if $verbosity >= 2;
445 &mgcommand ($docnum);
446
447 <$mgread>; # eat the document separator
448
449 my $text = "";
450 my $line = "";
451
452 while (defined ($line = <$mgread>))
453 {
454 last if $line =~ /^<\/mg>/;
455 $text .= $line;
456 }
457
458 # Read in the last statement, which should be:
459 # "dd documents retrieved."
460 <$mgread>;
461
462 return $text;
463}
464
465sub numerically {$a <=> $b;}
466
467
468
469sub openmg {
470
471 #print STDERR "index: $index\n";
472
473 die "Unable to start mgquery." unless
474 &openpipe($mgread, $mgwrite,
475 "mgquery -d $indexdir -f $index -t $textdir");
476
477 $mgwrite->autoflush();
478
479 &mgcommand('.set expert true');
480 &mgcommand('.set terminator "</mg>\n"');
481 &mgcommand('.set mode text');
482 &mgcommand('.set query docnums');
483 &mgcommand('.set term_freq off');
484}
485
486sub closemg {
487 &mgcommand (".quit");
488 close($mgread);
489 close($mgwrite);
490}
491
492sub mgcommand {
493 my ($command) = @_;
494
495 return if $command =~ /^\s*$/; #whitespace
496 #print STDERR "command: $command\n";
497 print $mgwrite "$command\n";
498
499 # eat up the command executed which is echoed
500 <$mgread>;
501}
502
503# openpipe(READ, WRITE, CMD)
504#
505# Like open2, except CMD's stderr is also redirected.
506#
507sub openpipe
508{
509 my ($read, $write, $cmd) = @_;
510 my ($child_read, $child_write);
511
512 $child_read = ++$FileHandle;
513 $child_write = ++$FileHandle;
514
515 pipe($read, $child_write) || die "Failed pipe($read, $child_write): $!";
516 pipe($child_read, $write) || die "Failed pipe($child_read, $write): $!";
517 my $pid;
518
519 if (($pid = fork) < 0) {
520 die "Failed fork: $!";
521 } elsif ($pid == 0) {
522 close($read);
523 close($write);
524 open(STDIN, "<&$child_read");
525 open(STDOUT, ">&$child_write");
526 open(STDERR, ">&$child_write");
527 exec($cmd);
528 die "Failed exec $cmd: $!";
529 }
530
531 close($child_read);
532 close($child_write);
533
534 $write->autoflush();
535 $read->autoflush();
536
537 return 1;
538}
539
540
541
542
543######################################################################
544
545############# functions to do with the classification stuff ##########
546
547#returns the top level classification oids
548sub get_classifyinfo {
549 my ($gdbmfile) = @_;
550
551 open (DB2TXT, "db2txt $gdbmfile |") || die "couldn't open pipe to db2txt\n";
552 print STDERR "Finding all classification sections from $gdbmfile\n" ;
553
554 $/ = '-' x 70;
555 my $entry = "";
556 while (defined ($entry = <DB2TXT>)) {
557 next unless $entry =~ /\w/; #ignore blank entries
558 $entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
559 my ($key, $value) = $entry =~ /\[([^\]]*)\](.*)/;
560
561 next unless ($key =~/^CL\d$/); # assumes classification OID is like
562 # CL1 or CL2 etc
563
564 push( @$classifyinfo, $key);
565
566 }
567
568 $/ = "\n";
569 print STDERR "classifications are: ", join(", ", @$classifyinfo);
570}
571
572#this creates the classification files needed for the hierarchy classifier
573#used for subjects, titles, orgs etc
574#also adds in entries to the collect_cfg hash
575sub make_info_file {
576
577 my ($classifier) = @_;
578 my $info_file = "";
579
580 my $entry = $infodb{$classifier};
581 #print STDERR "entry = $entry\n";
582 my $hashref = {};
583 &get_metadata($entry, $hashref);
584
585 my $classifier_name = "CL".$hashref->{'Title'}; #like CLSubject
586 $classifier_name =~ s/\@$//; #remove trailing @
587
588 # check children - if all are documents at this level, dont need a
589 # hierarchy so just put a AZList into config file
590
591 my $children=[];
592 &get_whole_children($classifier, $hashref, $children);
593 if (&is_document($$children[0])) {
594 &add_classify_cfg_list($classifier_name);
595
596 }else { #there is a hierarchy so create a file
597
598 $info_file = "./etc/$classifier_name.txt";
599
600 print STDERR "classification $classifier will be called $classifier_name\n";
601
602 open (OUTDOC, "> $info_file" ) || die "couldn't open file $info_file\n";
603
604 foreach $child (@$children) {
605 &process_entry(OUTDOC, $classifier_name, $child);
606 }
607
608 close OUTDOC;
609
610 &add_classify_cfg($classifier, $classifier_name, $info_file);
611 }
612}
613
614
615sub process_entry {
616 my ($handle, $classifier_name, $classify_id) = @_;
617
618 my $value = $infodb{$classify_id};
619
620 my $hashref={};
621
622 &get_metadata($value, $hashref);
623 my $title = $hashref->{'Title'};
624 $title =~ s/\@$//; #remove trailing @
625 &add_line($handle, $classify_id, $title);
626
627 my $children = [];
628 &get_whole_children($classify_id, $hashref, $children);
629 foreach $child (@$children) {
630 if (&is_document($child)) {
631 &add_doc_metadata($child, $classifier_name, $title);
632 }else {
633 process_entry($handle, $classifier_name, $child);
634 }
635 }
636
637}
638
639
640
641sub add_doc_metadata {
642
643 my ($doc_id, $classifier_name, $classifier_id) = @_;
644
645 #add entry to doc database
646 #print STDERR "at doc level, docnum=$classify_id\n";
647
648 $doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
649 $doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
650 defined $doc_classif_info->{$doc_id}->{$classifier_name};
651 push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
652
653}
654
655
656
657
658sub add_line {
659
660 my ($handle, $classify_id, $title) = @_;
661 #print STDERR "classify id= $classify_id, title= $title\n";
662 $title = &unicode::ascii2utf8($title);
663
664 my ($num) = $classify_id =~ /^CL\d\.(.*)$/; #remove the CL1. from the front
665
666 print $handle "\"$title\"\t$num\t\"$title\"\n";
667
668
669}
670
671sub is_document {
672 my ($oid) = @_;
673 return 1 if $oid =~ /^HASH/i;
674 return 0;
675}
676
677########################################################################
678
679########## stuff for producing collect.cfg file ###########################
680
681sub add_default_cfg {
682
683 $username=`whoami`;
684 $username=`logname` unless defined $username;
685 $username="a_user" unless defined $username;
686 $username =~ s/\n//;
687 $collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
688 $collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
689 $collect_cfg->{'public'}="false";
690 $collect_cfg->{'beta'}="true";
691
692 $collect_cfg->{'plugin'}=[];
693 push (@{$collect_cfg->{'plugin'}}, ["GMLPlug"]);
694 push (@{$collect_cfg->{'plugin'}}, ["ArcPlug"]);
695 push (@{$collect_cfg->{'plugin'}}, ["RecPlug"]);
696
697 $collect_cfg->{'format'}={};
698 $collect_cfg->{'format'}->{'DocumentImages'}="true";
699 $collect_cfg->{'format'}->{'DocumentText'} =
700 "\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
701 $collect_cfg->{'format'}->{'SearchVList'} =
702 "\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
703
704 $collect_cfg->{'collectionmeta'}={};
705 $collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
706 $collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"http://www.nzdl.org/gsdl/collect/$collection/images/$collection.gif\"";
707 $collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"http://www.nzdl.org/gsdl/collect/$collection/images/${collection}sm.gif\"";
708 $collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
709
710
711
712
713}
714
715sub add_index_cfg {
716 my ($buildfile) = @_;
717
718 my $data={};
719 $collect_cfg->{'indexes'}=[];
720 if (-e $buildfile) {
721 $data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
722 foreach $i (@{$data->{'indexmap'}}) {
723 ($thisindex, $abbrev)= split ("\-\>", $i);
724 push (@{$collect_cfg->{'indexes'}}, $thisindex);
725 $collect_cfg->{'defaultindex'} = $thisindex unless defined
726 $collect_cfg->{'defaultindex'};
727 $name=&get_index_name($thisindex);
728 $thisindex=".$thisindex";
729 $collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
730 }
731 }
732 else {
733 print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
734 }
735
736}
737
738sub get_index_name {
739 my ($thisindex) = @_;
740 return "paragraphs" if $thisindex =~ /paragraph/;
741 return "chapters" if $thisindex =~ /section.*text/;
742 return "titles" if $thisindex =~ /Title/;
743 return "other";
744}
745
746sub add_classify_cfg {
747
748 my ($classify, $classifier_name, $file) = @_;
749 $collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
750
751 my ($title) = $classifier_name =~ /^CL(.*)$/;
752 my ($filename) = $file =~ /\/([^\/]*)$/;
753 my $entry = "Hierarchy hfile=$filename metadata=$classifier_name title=$title sort=Title";
754 push (@{$collect_cfg->{'classify'}},[$entry]);
755
756
757}
758
759sub add_classify_cfg_list {
760
761 my ($classifier) = @_;
762 $collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
763 my ($title) = $classifier =~ /^CL(.*)$/;
764 my $entry = "AZList metadata=$classifier title=$title";
765 push (@{$collect_cfg->{'classify'}},[$entry]);
766}
767
768sub output_cfg_file {
769
770 my ($collfile) = @_;
771
772 if (-e $collfile) { #collect.cfg already exists
773 $collfile .= ".new";
774 }
775 &cfgread::write_cfg_file($collfile, $collect_cfg,
776 '^(creator|maintainer|public|beta|defaultindex)$',
777 '^(indexes)$', '^(format|collectionmeta)$',
778 '^(plugin|classify)$');
779
780
781
782}
783
784
Note: See TracBrowser for help on using the repository browser.