source: main/trunk/greenstone2/bin/script/unbuildv2.pl@ 26441

Last change on this file since 26441 was 17750, checked in by kjdon, 15 years ago

plugout and plugin name changes. do these scripts still work??

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.8 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# unbuildv2.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# this program will decompress all the text from a built index
29# and return it to gml format -
30#this is based on the format current in Nov 1999. (version two)
31
32# Stefan updated unbuildv1.pl in August 2002 but unbuildv2.pl was not
33# updated. It probably needs some work done before using.
34
35# Katherine updated this but apparently it still doesn't work.
36
37BEGIN {
38 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
39 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
41 $FileHandle = 'FH000';
42}
43
44use dbutil;
45use doc;
46use util;
47use parsargv;
48use GDBM_File;
49use FileHandle;
50use English;
51use cfgread;
52use unicode;
53use plugout;
54
55select STDERR; $| = 1;
56select STDOUT; $| = 1;
57
58
59# globals
60$collection = ""; # the collection name
61$index = ""; # the selected index (like stt/unu)
62$textdir = ""; # the textdir (like text/unu)
63$toplevelinfo = []; #list of document OIDs
64%infodb = (); #hash into GDBM file
65$classifyinfo = []; # list of classifications
66$doc_classif_info = {}; # hash of OIDs->classifications they belong to
67$collect_cfg = {}; #data for the configuration file
68
69$mgread = ++$FileHandle;
70$mgwrite = ++$FileHandle;
71
72
73
74sub print_usage {
75 print STDERR "\n usage: $0 [options]\n\n";
76 print STDERR " options:\n";
77 print STDERR " -verbosity number 0=none, 3=lots\n";
78 print STDERR " -indexdir directory The index to be decompressed (defaults to ./index)\n";
79 print STDERR " -archivedir directory Where the converted material ends up (defaults to ./archives.new\n";
80 print STDERR " -removeold Will remove the old contents of the archives\n";
81 print STDERR " directory -- use with care\n\n";
82}
83
84&main ();
85
86sub main {
87 if (!parsargv::parse(\@ARGV,
88 'verbosity/\d+/2', \$verbosity,
89 'indexdir/.*/index', \$indexdir,
90 'archivedir/.*/archives.new', \$archivedir,
91 'removeold', \$removeold)) {
92 &print_usage();
93 die "\n";
94 }
95
96 die "indexdir $indexdir does not exist\n\n" unless (-d $indexdir);
97 $indexdir =~ s/\/$//;
98 if (-d $archivedir) {
99 if ($removeold) {
100 print STDERR "Warning - removing current contents of the archives directory $archivedir\n";
101 print STDERR " in preparation for the import\n";
102 sleep(5); # just in case...
103 &util::rm_r ($archivedir);
104 }
105 } else {
106 &util::mk_all_dir ($archivedir);
107 }
108
109 $etcdir = "./etc";
110 if (!(-d $etcdir)) {
111 &util::mk_all_dir ($etcdir);
112 }
113
114
115 my $gdbmfile = &get_gdbmfile ($indexdir); #sets $collection and $textdir
116 &set_index (); # sets $index (just chooses one index)
117
118
119 my $buildcfgfile = &util::filename_cat($indexdir, "build.cfg");
120 my $colcfgfile = &util::filename_cat($etcdir, "collect.cfg");
121
122 &add_default_cfg();
123 &add_index_cfg($buildcfgfile);
124
125
126 #work out all the classifications from the gdbm file, info for each doc
127#(ie which classifications they belong to, are kept in $doc_classif_info
128 &get_classifyinfo ($gdbmfile); #puts a list of classifications into classifyinfo
129
130 &get_toplevel_OID ($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
131
132 #tie (%infodb, "GDBM_File", $gdbmfile, 1, 0);
133
134 #read ldb file into %infodb
135 &dbutil::read_infodb_file("gdbm", $gdbmfile, \%infodb);
136
137 #this makes the files specifying the hierarchy of subjects, titles etc
138 foreach $classify (@$classifyinfo) {
139
140 &make_info_file($classify);
141
142 }
143
144
145 #write out the collect.cfg
146 &output_cfg_file($colcfgfile);
147
148 &openmg ();
149
150 # read the archive information file
151 my $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
152 my $archive_info = new arcinfo ();
153
154 my $opts = [];
155 push @$opts,("-output_info",$archive_info);
156
157 $processor = &plugout::load_plugout("GreenstoneXMLPlugout",$opts);
158 $processor->setoutputdir ($new_archivedir);
159
160 my ($doc_obj, $hashref, $children);
161 print STDERR "processing documents now\n" if $verbosity >=2;
162 foreach $oid (@$toplevelinfo) {
163 $value = $infodb{$oid};
164 $hashref={};
165 $children = [];
166 &get_metadata($value, $hashref);
167 $doc_obj = new doc ();
168 $doc_obj->set_OID($oid);
169 my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
170 $top = $doc_obj->get_top_section();
171 &add_section_content ($doc_obj, $top, $hashref, $olddir);
172 &add_classification_metadata($oid, $doc_obj, $top);
173 &add_cover_image($doc_obj, $olddir);
174 &get_children($hashref, $children);
175 &recurse_sections($doc_obj, $children, $oid, $top, $olddir);
176 $processor->process($doc_obj);
177
178 }
179 print STDERR "\n";
180
181 &closemg();
182
183 # write out the archive information file
184 $archive_info->save_info($archive_info_filename);
185}
186
187# returns the path to the gdbm info database - also
188# sets the $collection and $textdir global variable
189sub get_gdbmfile {
190 my ($indexdir) = @_;
191 my ($gdbmfile);
192
193 opendir (DIR, $indexdir) || die "Couldn't open directory $indexdir\n\n";
194 my @conts = readdir DIR;
195 close DIR;
196
197 foreach $file (@conts) {
198 if ($file =~ /text$/) {
199 $textdir = $file;
200 last;
201 }
202 }
203 die "No text directory found in $indexdir\n\n"
204 unless defined $textdir && $textdir =~ /text$/;
205
206 $gdbmfile = &util::filename_cat ($indexdir, $textdir);
207
208 opendir (DIR, $gdbmfile) || die "Couldn't open directory $gdbmfile\n\n";
209 @conts = readdir DIR;
210 close DIR;
211
212 foreach $file (@conts) {
213 if ($file =~ /^(.*?)\.(?:ldb|bdb)$/) {
214 $collection = $1;
215 $gdbmfile = &util::filename_cat ($gdbmfile, $file);
216 last;
217 }
218 }
219
220 if (defined $collection && $collection =~ /\w/) {
221 $textdir = &util::filename_cat ($textdir, $collection);
222 } else {
223 die "collection global wasn't set\n";
224 }
225 return $gdbmfile if (-e $gdbmfile);
226 die "Couldn't find gdbm info database in $indexdir\n\n";
227}
228
229
230sub get_toplevel_OID {
231 my ($gdbmfile) = @_;
232
233 open (DB2TXT, "db2txt $gdbmfile |") || die "couldn't open pipe to db2txt\n";
234 print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
235
236 $/ = '-' x 70;
237 my $entry = "";
238 while (defined ($entry = <DB2TXT>)) {
239 next unless $entry =~ /\w/; #ignore blank entries
240 $entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
241 my ($key, $value) = $entry =~ /\[([^\]]*)\](.*)/;
242
243 next if ($key =~ /\./); #ignore any lower level entries
244 next if ($key =~ /^CL/); #ignore classification entries
245 next if ($value =~ /<section>/); #ignore docnum->OID entries
246 next if ($value =~ /<docoid>/); #ignore strange s133->OID entries
247 next if ($key !~ /\d/); #ignore collection, browse entries
248
249 push( @$toplevelinfo, $key);
250
251 }
252
253 $/ = "\n";
254 #print STDERR "toplevel sections are: ", join ("\n", @$toplevelinfo);
255 #print STDERR "\n";
256}
257
258# gets all the metadata from a gdbm file entry, and puts it into a hashref
259sub get_metadata {
260
261 my ($gdb_str_ref, $hashref) = @_;
262 my @entries = split(/\n/, $gdb_str_ref);
263 foreach $entry (@entries) {
264 my($key, $value) = ($entry =~ /^<([^>]*)>(.*?)$/ );
265 $$hashref{$key} .= '@' if defined $$hashref{$key};
266 $$hashref{$key} .= $value;
267
268 }
269
270
271}
272
273#takes a hashref containing the metadata for a gdbmfile entry, and extracts
274#the childrens numbers (from the 'contains' entry).
275#assumes format is ".1;".2;".3
276sub get_children {
277 my ($hashref, $children) = @_;
278
279 $childs = $hashref->{'contains'};
280 if (defined ($childs)) {
281 $childs =~ s/\@$//; #remove trailing @
282 $childs =~ s/^\"\.//; #remove initial ".
283 @$children = split /\;\"\./, $childs;
284
285 }
286 else {
287 $children = [];
288 }
289}
290
291#takes a hashref containing the metadata for a gdbmfile entry, and extracts
292#the childrens numbers (from the 'contains' entry).
293#assumes format is ".1;".2;".3
294#returns a list with the full child name ie HASH0123...ac.1 HASH0123...ac.2
295#etc
296#used for classification stuff
297sub get_whole_children {
298
299 my ($parentoid, $hashref, $children) = @_;
300
301 my $childs = $hashref->{'contains'};
302 my @items;
303 if (defined ($childs)) {
304 $childs =~ s/\@$//; #remove trailing @
305 @items = split /\;/, $childs; #split on ;
306 foreach $item (@items) {
307 $item =~ s/^\"/$parentoid/; # replace " with parentoid
308 push (@$children, "$item");
309 }
310 }
311 else {
312 $children = [];
313 }
314}
315
316
317sub recurse_sections {
318 my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
319
320 foreach $child (sort numerically @$children) {
321 $doc_obj->create_named_section("$parentsection.$child");
322 my $value = $infodb{"$parentoid.$child"};
323 my $hashref={};
324 &get_metadata($value, $hashref); # get childs metadata
325 my $newchildren = [];
326 &get_children($hashref, $newchildren); # get childs children
327 #add content fo rcurrent section
328 &add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
329 # process all the children if there are any
330 &recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
331 if (defined ($newchildren));
332 }
333
334
335}
336
337sub add_section_content {
338 my ($doc_obj, $cursection, $hashref, $olddir) = @_;
339
340 foreach $key (keys %$hashref) {
341 #dont need to store these metadata
342 next if $key =~ /(contains|docnum|hastxt|doctype|archivedir|classifytype)/i;
343 my @items = split /@/, $hashref->{$key};
344 map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
345
346 }
347 my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
348 my ($hastext) =$hashref->{'hastxt'} =~ /(0|1)/;
349
350 my $images=[];
351 if ($hastext) {
352 my $text = &get_text($docnum);
353
354 #my (@images) = $text =~ /<img.*?src=\"([^\"]*)\"[^>]*>/g;
355
356 # in text replace path to image with _httpdocimg_/blah.gif
357 #while ($text =~ s/(<img.*?src=\")([^\"]*)(\"[^>]*>)/
358 # $1.&get_img($2, \@images).$3/sgei) {
359 $text =~ s/(<img.*?src=\")([^\"]*)(\"[^>]*>)/
360 $1.&get_img($2,$images).$3/sgei;
361
362 $doc_obj->add_text ($cursection, $text);
363
364 if (scalar(@$images)>0) {
365
366 foreach $img (@$images) {
367 my ($assoc_file) = $img =~ /([^\/\\]*\..*)$/; #the name of the image
368 $img =~ s/_httpcollection_/\./; #replace _httpcollection_ with .
369 $img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name
370
371 $doc_obj->associate_file($img, $assoc_file);
372 }
373 }
374 }
375}
376
377
378
379sub get_img {
380 my ($path, $images) = @_;
381 my $img = "_httpdocimg_/";
382 my ($imgname) = $path =~ /([^\/\\]*\..*)$/;
383 push (@$images, $path);
384 $img .= $imgname;
385 return $img;
386}
387
388
389sub add_classification_metadata {
390
391 my ($oid, $doc_obj, $cursection) = @_;
392
393 if (defined $doc_classif_info->{$oid}) {
394
395 my $hashref = $doc_classif_info->{$oid};
396
397 foreach $key (keys %$hashref) {
398 my @items = @{$hashref->{$key}};
399 map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
400 }
401 }
402}
403
404# picks up the cover image "cover.jpg" from the old archives directory.
405sub add_cover_image {
406
407 my ($doc_obj, $olddir) = @_;
408 $assoc_file = "cover.jpg";
409 $img = "archives/$olddir/$assoc_file";
410
411
412 if (-e $img) {
413 $doc_obj->associate_file($img, $assoc_file);
414 }
415}
416
417
418
419sub set_index {
420 # check that $collection has been set
421 die "collection global was not set\n"
422 unless defined $collection && $collection =~ /\w/;
423
424 # find an index (just use first non-text directory we come across in $indexdir)
425 opendir (INDEXDIR, $indexdir) || die "couldn't open directory $indexdir\n";
426 my @indexes = readdir INDEXDIR;
427 close INDEXDIR;
428 foreach $i (@indexes) {
429 next if $i =~ /text$/i || $i =~ /\./ || $i =~ /assoc$/i;
430 $index = &util::filename_cat ($i, $collection);
431 last;
432 }
433}
434
435
436#########################################################################
437
438################ functions involving mg ################################
439
440sub get_text {
441 my ($docnum) = @_;
442
443 print STDERR "." if $verbosity >= 2;
444 &mgcommand ($docnum);
445
446 <$mgread>; # eat the document separator
447
448 my $text = "";
449 my $line = "";
450
451 while (defined ($line = <$mgread>))
452 {
453 last if $line =~ /^<\/mg>/;
454 $text .= $line;
455 }
456
457 # Read in the last statement, which should be:
458 # "dd documents retrieved."
459 <$mgread>;
460
461 return $text;
462}
463
464sub numerically {$a <=> $b;}
465
466
467
468sub openmg {
469
470 #print STDERR "index: $index\n";
471
472 die "Unable to start mgquery." unless
473 &openpipe($mgread, $mgwrite,
474 "mgquery -d $indexdir -f $index -t $textdir");
475
476 $mgwrite->autoflush();
477
478 &mgcommand('.set expert true');
479 &mgcommand('.set terminator "</mg>\n"');
480 &mgcommand('.set mode text');
481 &mgcommand('.set query docnums');
482 &mgcommand('.set term_freq off');
483 &mgcommand('.set briefstats off');
484 &mgcommand('.set memstats off');
485 &mgcommand('.set sizestats off');
486 &mgcommand('.set timestats off');
487}
488
489sub closemg {
490 &mgcommand (".quit");
491 close($mgread);
492 close($mgwrite);
493}
494
495sub mgcommand {
496 my ($command) = @_;
497
498 return if $command =~ /^\s*$/; #whitespace
499 #print STDERR "command: $command\n";
500 print $mgwrite "$command\n";
501
502 # eat up the command executed which is echoed
503 <$mgread>;
504}
505
506# openpipe(READ, WRITE, CMD)
507#
508# Like open2, except CMD's stderr is also redirected.
509#
510sub openpipe
511{
512 my ($read, $write, $cmd) = @_;
513 my ($child_read, $child_write);
514
515 $child_read = ++$FileHandle;
516 $child_write = ++$FileHandle;
517
518 pipe($read, $child_write) || die "Failed pipe($read, $child_write): $!";
519 pipe($child_read, $write) || die "Failed pipe($child_read, $write): $!";
520 my $pid;
521
522 if (($pid = fork) < 0) {
523 die "Failed fork: $!";
524 } elsif ($pid == 0) {
525 close($read);
526 close($write);
527 open(STDIN, "<&$child_read");
528 open(STDOUT, ">&$child_write");
529 open(STDERR, ">&$child_write");
530 exec($cmd);
531 die "Failed exec $cmd: $!";
532 }
533
534 close($child_read);
535 close($child_write);
536
537 $write->autoflush();
538 $read->autoflush();
539
540 return 1;
541}
542
543
544
545
546######################################################################
547
548############# functions to do with the classification stuff ##########
549
550#returns the top level classification oids
551sub get_classifyinfo {
552 my ($gdbmfile) = @_;
553
554 open (DB2TXT, "db2txt $gdbmfile |") || die "couldn't open pipe to db2txt\n";
555 print STDERR "Finding all classification sections from $gdbmfile\n" ;
556
557 $/ = '-' x 70;
558 my $entry = "";
559 while (defined ($entry = <DB2TXT>)) {
560 next unless $entry =~ /\w/; #ignore blank entries
561 $entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
562 my ($key, $value) = $entry =~ /\[([^\]]*)\](.*)/;
563
564 next unless ($key =~/^CL\d$/); # assumes classification OID is like
565 # CL1 or CL2 etc
566
567 push( @$classifyinfo, $key);
568
569 }
570
571 $/ = "\n";
572 #print STDERR "classifications are: ", join(", ", @$classifyinfo);
573 #print STDERR "\n";
574
575}
576
577#this creates the classification files needed for the hierarchy classifier
578#used for subjects, titles, orgs etc
579#also adds in entries to the collect_cfg hash
580sub make_info_file {
581
582 my ($classifier) = @_;
583 my $info_file = "";
584 my $entry = $infodb{$classifier};
585
586 my $hashref = {};
587 &get_metadata($entry, $hashref);
588
589 my $classifier_name = "CL".$hashref->{'Title'}; #like CLSubject
590 $classifier_name =~ s/\@$//; #remove trailing @
591
592 # check children - if there is a classifier node at this level,
593 # use a hierarchy, otherwise use an AZList.
594
595 my $children=[];
596 my $hierarchy = 0;
597 &get_whole_children($classifier, $hashref, $children); #returns a list of the child ids
598 foreach $child(@$children) {
599 if(not &is_document($child)) {
600 $hierarchy = 1;
601 last;
602 }
603 }
604
605 if (!$hierarchy) {
606 &add_classify_cfg_list($classifier_name);
607 }else { #there is a hierarchy so create a file
608 $info_file = "./etc/$classifier_name.txt";
609
610 print STDERR "classification $classifier will be called $classifier_name\n";
611
612 open (OUTDOC, "> $info_file" ) || die "couldn't open file $info_file\n";
613
614 foreach $child (@$children) {
615 &process_entry(OUTDOC, $classifier_name, $child);
616 }
617
618 close OUTDOC;
619
620 &add_classify_cfg($classifier, $classifier_name, $info_file);
621 }
622}
623
624
625sub process_entry {
626 my ($handle, $classifier_name, $classify_id) = @_;
627 my $value = $infodb{$classify_id};
628
629 my $hashref={};
630 &get_metadata($value, $hashref);
631 my $title = $hashref->{'Title'};
632 $title =~ s/\@$//; #remove trailing @
633 &add_line($handle, $classify_id, $title);
634
635 my $children = [];
636 &get_whole_children($classify_id, $hashref, $children);
637 foreach $child (@$children) {
638 if (&is_document($child)) {
639 &add_doc_metadata($child, $classifier_name, $title);
640 }else {
641 &process_entry($handle, $classifier_name, $child);
642 }
643 }
644
645}
646
647
648
649sub add_doc_metadata {
650
651 my ($doc_id, $classifier_name, $classifier_id) = @_;
652
653 #add entry to doc database
654 #print STDERR "at doc level, docnum=$classify_id\n";
655
656 $doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
657 $doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
658 defined $doc_classif_info->{$doc_id}->{$classifier_name};
659 push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
660
661}
662
663
664
665
666sub add_line {
667
668 my ($handle, $classify_id, $title) = @_;
669 #print STDERR "classify id= $classify_id, title= $title\n";
670 $title = &unicode::ascii2utf8(\$title);
671 my ($num) = $classify_id =~ /^CL\d\.(.*)$/; #remove the CL1. from the front
672
673 print $handle "\"$title\"\t$num\t\"$title\"\n";
674
675
676}
677
678sub is_document {
679 my ($oid) = @_;
680 return 1 if $oid =~ /^HASH/i;
681 return 0;
682}
683
684########################################################################
685
686########## stuff for producing collect.cfg file ###########################
687
688sub add_default_cfg {
689
690 $username=`whoami`;
691 $username=`logname` unless defined $username;
692 $username="a_user" unless defined $username;
693 $username =~ s/\n//;
694 $collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
695 $collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
696 $collect_cfg->{'public'}="true";
697 $collect_cfg->{'beta'}="true";
698
699 $collect_cfg->{'plugin'}=[];
700 push (@{$collect_cfg->{'plugin'}}, ["GreenstoneXMLPlugin"]);
701 push (@{$collect_cfg->{'plugin'}}, ["ArchivesInfPlugin"]);
702 push (@{$collect_cfg->{'plugin'}}, ["DirectoryPlugin"]);
703
704 $collect_cfg->{'format'}={};
705 $collect_cfg->{'format'}->{'DocumentImages'}="true";
706 $collect_cfg->{'format'}->{'DocumentText'} =
707 "\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
708 $collect_cfg->{'format'}->{'SearchVList'} =
709 "\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
710
711 $collect_cfg->{'collectionmeta'}={};
712 $collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
713 $collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"_httpprefix_/collect/$collection/images/$collection.gif\"";
714 $collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"_httpprefix_/collect/$collection/images/${collection}sm.gif\"";
715 $collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
716
717}
718
719sub add_index_cfg {
720 my ($buildfile) = @_;
721
722 my $data={};
723 $collect_cfg->{'indexes'}=[];
724 if (-e $buildfile) {
725 $data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
726 foreach $i (@{$data->{'indexmap'}}) {
727 ($thisindex, $abbrev)= split ("\-\>", $i);
728 push (@{$collect_cfg->{'indexes'}}, $thisindex);
729 $collect_cfg->{'defaultindex'} = $thisindex unless defined
730 $collect_cfg->{'defaultindex'};
731 $name=&get_index_name($thisindex);
732 $thisindex=".$thisindex";
733 $collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
734 }
735 }
736 else {
737 print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
738 }
739
740}
741
742sub get_index_name {
743 my ($thisindex) = @_;
744 return "paragraphs" if $thisindex =~ /paragraph/;
745 return "chapters" if $thisindex =~ /section.*text/;
746 return "titles" if $thisindex =~ /Title/;
747 return "other";
748}
749
750sub add_classify_cfg {
751
752 my ($classify, $classifier_name, $file) = @_;
753 $collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
754
755 my ($title) = $classifier_name =~ /^CL(.*)$/;
756 my ($filename) = $file =~ /\/([^\/]*)$/;
757 my $entry = "Hierarchy -hfile $filename -metadata $classifier_name -buttonname $title -sort Title";
758 push (@{$collect_cfg->{'classify'}},[$entry]);
759
760
761}
762
763sub add_classify_cfg_list {
764
765 my ($classifier) = @_;
766 $collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
767 my ($title) = $classifier =~ /^CL(.*)$/;
768 my $entry = "AZList -metadata $classifier -buttonname $title";
769 push (@{$collect_cfg->{'classify'}},[$entry]);
770}
771
772sub output_cfg_file {
773
774 my ($collfile) = @_;
775
776 if (-e $collfile) { #collect.cfg already exists
777 $collfile .= ".new";
778 }
779 &cfgread::write_cfg_file($collfile, $collect_cfg,
780 '^(creator|maintainer|public|beta|defaultindex)$',
781 '^(indexes)$', '^(format|collectionmeta)$',
782 '^(plugin|classify)$');
783
784
785
786}
787
788
Note: See TracBrowser for help on using the repository browser.