Context Navigation

source: main/trunk/greenstone2/bin/script/unbuildv2.pl@ 26441

Last change on this file since 26441 was 17750, checked in by kjdon, 15 years ago
plugout and plugin name changes. do these scripts still work??
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 21.8 KB

Line
1	#!/usr/bin/perl -w
2
3	###########################################################################
4	#
5	# unbuildv2.pl --
6	# A component of the Greenstone digital library software
7	# from the New Zealand Digital Library Project at the
8	# University of Waikato, New Zealand.
9	#
10	# Copyright (C) 1999 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28	# this program will decompress all the text from a built index
29	# and return it to gml format -
30	#this is based on the format current in Nov 1999. (version two)
31
32	# Stefan updated unbuildv1.pl in August 2002 but unbuildv2.pl was not
33	# updated. It probably needs some work done before using.
34
35	# Katherine updated this but apparently it still doesn't work.
36
37	BEGIN {
38	die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
39	die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
40	unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
41	$FileHandle = 'FH000';
42	}
43
44	use dbutil;
45	use doc;
46	use util;
47	use parsargv;
48	use GDBM_File;
49	use FileHandle;
50	use English;
51	use cfgread;
52	use unicode;
53	use plugout;
54
55	select STDERR; $\| = 1;
56	select STDOUT; $\| = 1;
57
58
59	# globals
60	$collection = ""; # the collection name
61	$index = ""; # the selected index (like stt/unu)
62	$textdir = ""; # the textdir (like text/unu)
63	$toplevelinfo = []; #list of document OIDs
64	%infodb = (); #hash into GDBM file
65	$classifyinfo = []; # list of classifications
66	$doc_classif_info = {}; # hash of OIDs->classifications they belong to
67	$collect_cfg = {}; #data for the configuration file
68
69	$mgread = ++$FileHandle;
70	$mgwrite = ++$FileHandle;
71
72
73
74	sub print_usage {
75	print STDERR "\n usage: $0 [options]\n\n";
76	print STDERR " options:\n";
77	print STDERR " -verbosity number 0=none, 3=lots\n";
78	print STDERR " -indexdir directory The index to be decompressed (defaults to ./index)\n";
79	print STDERR " -archivedir directory Where the converted material ends up (defaults to ./archives.new\n";
80	print STDERR " -removeold Will remove the old contents of the archives\n";
81	print STDERR " directory -- use with care\n\n";
82	}
83
84	&main ();
85
86	sub main {
87	if (!parsargv::parse(\@ARGV,
88	'verbosity/\d+/2', \$verbosity,
89	'indexdir/.*/index', \$indexdir,
90	'archivedir/.*/archives.new', \$archivedir,
91	'removeold', \$removeold)) {
92	&print_usage();
93	die "\n";
94	}
95
96	die "indexdir $indexdir does not exist\n\n" unless (-d $indexdir);
97	$indexdir =~ s/\/$//;
98	if (-d $archivedir) {
99	if ($removeold) {
100	print STDERR "Warning - removing current contents of the archives directory $archivedir\n";
101	print STDERR " in preparation for the import\n";
102	sleep(5); # just in case...
103	&util::rm_r ($archivedir);
104	}
105	} else {
106	&util::mk_all_dir ($archivedir);
107	}
108
109	$etcdir = "./etc";
110	if (!(-d $etcdir)) {
111	&util::mk_all_dir ($etcdir);
112	}
113
114
115	my $gdbmfile = &get_gdbmfile ($indexdir); #sets $collection and $textdir
116	&set_index (); # sets $index (just chooses one index)
117
118
119	my $buildcfgfile = &util::filename_cat($indexdir, "build.cfg");
120	my $colcfgfile = &util::filename_cat($etcdir, "collect.cfg");
121
122	&add_default_cfg();
123	&add_index_cfg($buildcfgfile);
124
125
126	#work out all the classifications from the gdbm file, info for each doc
127	#(ie which classifications they belong to, are kept in $doc_classif_info
128	&get_classifyinfo ($gdbmfile); #puts a list of classifications into classifyinfo
129
130	&get_toplevel_OID ($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
131
132	#tie (%infodb, "GDBM_File", $gdbmfile, 1, 0);
133
134	#read ldb file into %infodb
135	&dbutil::read_infodb_file("gdbm", $gdbmfile, \%infodb);
136
137	#this makes the files specifying the hierarchy of subjects, titles etc
138	foreach $classify (@$classifyinfo) {
139
140	&make_info_file($classify);
141
142	}
143
144
145	#write out the collect.cfg
146	&output_cfg_file($colcfgfile);
147
148	&openmg ();
149
150	# read the archive information file
151	my $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
152	my $archive_info = new arcinfo ();
153
154	my $opts = [];
155	push @$opts,("-output_info",$archive_info);
156
157	$processor = &plugout::load_plugout("GreenstoneXMLPlugout",$opts);
158	$processor->setoutputdir ($new_archivedir);
159
160	my ($doc_obj, $hashref, $children);
161	print STDERR "processing documents now\n" if $verbosity >=2;
162	foreach $oid (@$toplevelinfo) {
163	$value = $infodb{$oid};
164	$hashref={};
165	$children = [];
166	&get_metadata($value, $hashref);
167	$doc_obj = new doc ();
168	$doc_obj->set_OID($oid);
169	my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
170	$top = $doc_obj->get_top_section();
171	&add_section_content ($doc_obj, $top, $hashref, $olddir);
172	&add_classification_metadata($oid, $doc_obj, $top);
173	&add_cover_image($doc_obj, $olddir);
174	&get_children($hashref, $children);
175	&recurse_sections($doc_obj, $children, $oid, $top, $olddir);
176	$processor->process($doc_obj);
177
178	}
179	print STDERR "\n";
180
181	&closemg();
182
183	# write out the archive information file
184	$archive_info->save_info($archive_info_filename);
185	}
186
187	# returns the path to the gdbm info database - also
188	# sets the $collection and $textdir global variable
189	sub get_gdbmfile {
190	my ($indexdir) = @_;
191	my ($gdbmfile);
192
193	opendir (DIR, $indexdir) \|\| die "Couldn't open directory $indexdir\n\n";
194	my @conts = readdir DIR;
195	close DIR;
196
197	foreach $file (@conts) {
198	if ($file =~ /text$/) {
199	$textdir = $file;
200	last;
201	}
202	}
203	die "No text directory found in $indexdir\n\n"
204	unless defined $textdir && $textdir =~ /text$/;
205
206	$gdbmfile = &util::filename_cat ($indexdir, $textdir);
207
208	opendir (DIR, $gdbmfile) \|\| die "Couldn't open directory $gdbmfile\n\n";
209	@conts = readdir DIR;
210	close DIR;
211
212	foreach $file (@conts) {
213	if ($file =~ /^(.*?)\.(?:ldb\|bdb)$/) {
214	$collection = $1;
215	$gdbmfile = &util::filename_cat ($gdbmfile, $file);
216	last;
217	}
218	}
219
220	if (defined $collection && $collection =~ /\w/) {
221	$textdir = &util::filename_cat ($textdir, $collection);
222	} else {
223	die "collection global wasn't set\n";
224	}
225	return $gdbmfile if (-e $gdbmfile);
226	die "Couldn't find gdbm info database in $indexdir\n\n";
227	}
228
229
230	sub get_toplevel_OID {
231	my ($gdbmfile) = @_;
232
233	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
234	print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
235
236	$/ = '-' x 70;
237	my $entry = "";
238	while (defined ($entry = <DB2TXT>)) {
239	next unless $entry =~ /\w/; #ignore blank entries
240	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
241	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
242
243	next if ($key =~ /\./); #ignore any lower level entries
244	next if ($key =~ /^CL/); #ignore classification entries
245	next if ($value =~ /<section>/); #ignore docnum->OID entries
246	next if ($value =~ /<docoid>/); #ignore strange s133->OID entries
247	next if ($key !~ /\d/); #ignore collection, browse entries
248
249	push( @$toplevelinfo, $key);
250
251	}
252
253	$/ = "\n";
254	#print STDERR "toplevel sections are: ", join ("\n", @$toplevelinfo);
255	#print STDERR "\n";
256	}
257
258	# gets all the metadata from a gdbm file entry, and puts it into a hashref
259	sub get_metadata {
260
261	my ($gdb_str_ref, $hashref) = @_;
262	my @entries = split(/\n/, $gdb_str_ref);
263	foreach $entry (@entries) {
264	my($key, $value) = ($entry =~ /^<([^>])>(.?)$/ );
265	$$hashref{$key} .= '@' if defined $$hashref{$key};
266	$$hashref{$key} .= $value;
267
268	}
269
270
271	}
272
273	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
274	#the childrens numbers (from the 'contains' entry).
275	#assumes format is ".1;".2;".3
276	sub get_children {
277	my ($hashref, $children) = @_;
278
279	$childs = $hashref->{'contains'};
280	if (defined ($childs)) {
281	$childs =~ s/\@$//; #remove trailing @
282	$childs =~ s/^\"\.//; #remove initial ".
283	@$children = split /\;\"\./, $childs;
284
285	}
286	else {
287	$children = [];
288	}
289	}
290
291	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
292	#the childrens numbers (from the 'contains' entry).
293	#assumes format is ".1;".2;".3
294	#returns a list with the full child name ie HASH0123...ac.1 HASH0123...ac.2
295	#etc
296	#used for classification stuff
297	sub get_whole_children {
298
299	my ($parentoid, $hashref, $children) = @_;
300
301	my $childs = $hashref->{'contains'};
302	my @items;
303	if (defined ($childs)) {
304	$childs =~ s/\@$//; #remove trailing @
305	@items = split /\;/, $childs; #split on ;
306	foreach $item (@items) {
307	$item =~ s/^\"/$parentoid/; # replace " with parentoid
308	push (@$children, "$item");
309	}
310	}
311	else {
312	$children = [];
313	}
314	}
315
316
317	sub recurse_sections {
318	my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
319
320	foreach $child (sort numerically @$children) {
321	$doc_obj->create_named_section("$parentsection.$child");
322	my $value = $infodb{"$parentoid.$child"};
323	my $hashref={};
324	&get_metadata($value, $hashref); # get childs metadata
325	my $newchildren = [];
326	&get_children($hashref, $newchildren); # get childs children
327	#add content fo rcurrent section
328	&add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
329	# process all the children if there are any
330	&recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
331	if (defined ($newchildren));
332	}
333
334
335	}
336
337	sub add_section_content {
338	my ($doc_obj, $cursection, $hashref, $olddir) = @_;
339
340	foreach $key (keys %$hashref) {
341	#dont need to store these metadata
342	next if $key =~ /(contains\|docnum\|hastxt\|doctype\|archivedir\|classifytype)/i;
343	my @items = split /@/, $hashref->{$key};
344	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
345
346	}
347	my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
348	my ($hastext) =$hashref->{'hastxt'} =~ /(0\|1)/;
349
350	my $images=[];
351	if ($hastext) {
352	my $text = &get_text($docnum);
353
354	#my (@images) = $text =~ /<img.?src=\"([^\"])\"[^>]*>/g;
355
356	# in text replace path to image with _httpdocimg_/blah.gif
357	#while ($text =~ s/(<img.?src=\")([^\"])(\"[^>]*>)/
358	# $1.&get_img($2, \@images).$3/sgei) {
359	$text =~ s/(<img.?src=\")([^\"])(\"[^>]*>)/
360	$1.&get_img($2,$images).$3/sgei;
361
362	$doc_obj->add_text ($cursection, $text);
363
364	if (scalar(@$images)>0) {
365
366	foreach $img (@$images) {
367	my ($assoc_file) = $img =~ /([^\/\\]\..)$/; #the name of the image
368	$img =~ s/_httpcollection_/\./; #replace _httpcollection_ with .
369	$img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name
370
371	$doc_obj->associate_file($img, $assoc_file);
372	}
373	}
374	}
375	}
376
377
378
379	sub get_img {
380	my ($path, $images) = @_;
381	my $img = "_httpdocimg_/";
382	my ($imgname) = $path =~ /([^\/\\]\..)$/;
383	push (@$images, $path);
384	$img .= $imgname;
385	return $img;
386	}
387
388
389	sub add_classification_metadata {
390
391	my ($oid, $doc_obj, $cursection) = @_;
392
393	if (defined $doc_classif_info->{$oid}) {
394
395	my $hashref = $doc_classif_info->{$oid};
396
397	foreach $key (keys %$hashref) {
398	my @items = @{$hashref->{$key}};
399	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
400	}
401	}
402	}
403
404	# picks up the cover image "cover.jpg" from the old archives directory.
405	sub add_cover_image {
406
407	my ($doc_obj, $olddir) = @_;
408	$assoc_file = "cover.jpg";
409	$img = "archives/$olddir/$assoc_file";
410
411
412	if (-e $img) {
413	$doc_obj->associate_file($img, $assoc_file);
414	}
415	}
416
417
418
419	sub set_index {
420	# check that $collection has been set
421	die "collection global was not set\n"
422	unless defined $collection && $collection =~ /\w/;
423
424	# find an index (just use first non-text directory we come across in $indexdir)
425	opendir (INDEXDIR, $indexdir) \|\| die "couldn't open directory $indexdir\n";
426	my @indexes = readdir INDEXDIR;
427	close INDEXDIR;
428	foreach $i (@indexes) {
429	next if $i =~ /text$/i \|\| $i =~ /\./ \|\| $i =~ /assoc$/i;
430	$index = &util::filename_cat ($i, $collection);
431	last;
432	}
433	}
434
435
436	#########################################################################
437
438	################ functions involving mg ################################
439
440	sub get_text {
441	my ($docnum) = @_;
442
443	print STDERR "." if $verbosity >= 2;
444	&mgcommand ($docnum);
445
446	<$mgread>; # eat the document separator
447
448	my $text = "";
449	my $line = "";
450
451	while (defined ($line = <$mgread>))
452	{
453	last if $line =~ /^<\/mg>/;
454	$text .= $line;
455	}
456
457	# Read in the last statement, which should be:
458	# "dd documents retrieved."
459	<$mgread>;
460
461	return $text;
462	}
463
464	sub numerically {$a <=> $b;}
465
466
467
468	sub openmg {
469
470	#print STDERR "index: $index\n";
471
472	die "Unable to start mgquery." unless
473	&openpipe($mgread, $mgwrite,
474	"mgquery -d $indexdir -f $index -t $textdir");
475
476	$mgwrite->autoflush();
477
478	&mgcommand('.set expert true');
479	&mgcommand('.set terminator "</mg>\n"');
480	&mgcommand('.set mode text');
481	&mgcommand('.set query docnums');
482	&mgcommand('.set term_freq off');
483	&mgcommand('.set briefstats off');
484	&mgcommand('.set memstats off');
485	&mgcommand('.set sizestats off');
486	&mgcommand('.set timestats off');
487	}
488
489	sub closemg {
490	&mgcommand (".quit");
491	close($mgread);
492	close($mgwrite);
493	}
494
495	sub mgcommand {
496	my ($command) = @_;
497
498	return if $command =~ /^\s*$/; #whitespace
499	#print STDERR "command: $command\n";
500	print $mgwrite "$command\n";
501
502	# eat up the command executed which is echoed
503	<$mgread>;
504	}
505
506	# openpipe(READ, WRITE, CMD)
507	#
508	# Like open2, except CMD's stderr is also redirected.
509	#
510	sub openpipe
511	{
512	my ($read, $write, $cmd) = @_;
513	my ($child_read, $child_write);
514
515	$child_read = ++$FileHandle;
516	$child_write = ++$FileHandle;
517
518	pipe($read, $child_write) \|\| die "Failed pipe($read, $child_write): $!";
519	pipe($child_read, $write) \|\| die "Failed pipe($child_read, $write): $!";
520	my $pid;
521
522	if (($pid = fork) < 0) {
523	die "Failed fork: $!";
524	} elsif ($pid == 0) {
525	close($read);
526	close($write);
527	open(STDIN, "<&$child_read");
528	open(STDOUT, ">&$child_write");
529	open(STDERR, ">&$child_write");
530	exec($cmd);
531	die "Failed exec $cmd: $!";
532	}
533
534	close($child_read);
535	close($child_write);
536
537	$write->autoflush();
538	$read->autoflush();
539
540	return 1;
541	}
542
543
544
545
546	######################################################################
547
548	############# functions to do with the classification stuff ##########
549
550	#returns the top level classification oids
551	sub get_classifyinfo {
552	my ($gdbmfile) = @_;
553
554	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
555	print STDERR "Finding all classification sections from $gdbmfile\n" ;
556
557	$/ = '-' x 70;
558	my $entry = "";
559	while (defined ($entry = <DB2TXT>)) {
560	next unless $entry =~ /\w/; #ignore blank entries
561	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
562	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
563
564	next unless ($key =~/^CL\d$/); # assumes classification OID is like
565	# CL1 or CL2 etc
566
567	push( @$classifyinfo, $key);
568
569	}
570
571	$/ = "\n";
572	#print STDERR "classifications are: ", join(", ", @$classifyinfo);
573	#print STDERR "\n";
574
575	}
576
577	#this creates the classification files needed for the hierarchy classifier
578	#used for subjects, titles, orgs etc
579	#also adds in entries to the collect_cfg hash
580	sub make_info_file {
581
582	my ($classifier) = @_;
583	my $info_file = "";
584	my $entry = $infodb{$classifier};
585
586	my $hashref = {};
587	&get_metadata($entry, $hashref);
588
589	my $classifier_name = "CL".$hashref->{'Title'}; #like CLSubject
590	$classifier_name =~ s/\@$//; #remove trailing @
591
592	# check children - if there is a classifier node at this level,
593	# use a hierarchy, otherwise use an AZList.
594
595	my $children=[];
596	my $hierarchy = 0;
597	&get_whole_children($classifier, $hashref, $children); #returns a list of the child ids
598	foreach $child(@$children) {
599	if(not &is_document($child)) {
600	$hierarchy = 1;
601	last;
602	}
603	}
604
605	if (!$hierarchy) {
606	&add_classify_cfg_list($classifier_name);
607	}else { #there is a hierarchy so create a file
608	$info_file = "./etc/$classifier_name.txt";
609
610	print STDERR "classification $classifier will be called $classifier_name\n";
611
612	open (OUTDOC, "> $info_file" ) \|\| die "couldn't open file $info_file\n";
613
614	foreach $child (@$children) {
615	&process_entry(OUTDOC, $classifier_name, $child);
616	}
617
618	close OUTDOC;
619
620	&add_classify_cfg($classifier, $classifier_name, $info_file);
621	}
622	}
623
624
625	sub process_entry {
626	my ($handle, $classifier_name, $classify_id) = @_;
627	my $value = $infodb{$classify_id};
628
629	my $hashref={};
630	&get_metadata($value, $hashref);
631	my $title = $hashref->{'Title'};
632	$title =~ s/\@$//; #remove trailing @
633	&add_line($handle, $classify_id, $title);
634
635	my $children = [];
636	&get_whole_children($classify_id, $hashref, $children);
637	foreach $child (@$children) {
638	if (&is_document($child)) {
639	&add_doc_metadata($child, $classifier_name, $title);
640	}else {
641	&process_entry($handle, $classifier_name, $child);
642	}
643	}
644
645	}
646
647
648
649	sub add_doc_metadata {
650
651	my ($doc_id, $classifier_name, $classifier_id) = @_;
652
653	#add entry to doc database
654	#print STDERR "at doc level, docnum=$classify_id\n";
655
656	$doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
657	$doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
658	defined $doc_classif_info->{$doc_id}->{$classifier_name};
659	push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
660
661	}
662
663
664
665
666	sub add_line {
667
668	my ($handle, $classify_id, $title) = @_;
669	#print STDERR "classify id= $classify_id, title= $title\n";
670	$title = &unicode::ascii2utf8(\$title);
671	my ($num) = $classify_id =~ /^CL\d\.(.*)$/; #remove the CL1. from the front
672
673	print $handle "\"$title\"\t$num\t\"$title\"\n";
674
675
676	}
677
678	sub is_document {
679	my ($oid) = @_;
680	return 1 if $oid =~ /^HASH/i;
681	return 0;
682	}
683
684	########################################################################
685
686	########## stuff for producing collect.cfg file ###########################
687
688	sub add_default_cfg {
689
690	$username=`whoami`;
691	$username=`logname` unless defined $username;
692	$username="a_user" unless defined $username;
693	$username =~ s/\n//;
694	$collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
695	$collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
696	$collect_cfg->{'public'}="true";
697	$collect_cfg->{'beta'}="true";
698
699	$collect_cfg->{'plugin'}=[];
700	push (@{$collect_cfg->{'plugin'}}, ["GreenstoneXMLPlugin"]);
701	push (@{$collect_cfg->{'plugin'}}, ["ArchivesInfPlugin"]);
702	push (@{$collect_cfg->{'plugin'}}, ["DirectoryPlugin"]);
703
704	$collect_cfg->{'format'}={};
705	$collect_cfg->{'format'}->{'DocumentImages'}="true";
706	$collect_cfg->{'format'}->{'DocumentText'} =
707	"\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
708	$collect_cfg->{'format'}->{'SearchVList'} =
709	"\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
710
711	$collect_cfg->{'collectionmeta'}={};
712	$collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
713	$collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"_httpprefix_/collect/$collection/images/$collection.gif\"";
714	$collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"_httpprefix_/collect/$collection/images/${collection}sm.gif\"";
715	$collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
716
717	}
718
719	sub add_index_cfg {
720	my ($buildfile) = @_;
721
722	my $data={};
723	$collect_cfg->{'indexes'}=[];
724	if (-e $buildfile) {
725	$data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
726	foreach $i (@{$data->{'indexmap'}}) {
727	($thisindex, $abbrev)= split ("\-\>", $i);
728	push (@{$collect_cfg->{'indexes'}}, $thisindex);
729	$collect_cfg->{'defaultindex'} = $thisindex unless defined
730	$collect_cfg->{'defaultindex'};
731	$name=&get_index_name($thisindex);
732	$thisindex=".$thisindex";
733	$collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
734	}
735	}
736	else {
737	print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
738	}
739
740	}
741
742	sub get_index_name {
743	my ($thisindex) = @_;
744	return "paragraphs" if $thisindex =~ /paragraph/;
745	return "chapters" if $thisindex =~ /section.*text/;
746	return "titles" if $thisindex =~ /Title/;
747	return "other";
748	}
749
750	sub add_classify_cfg {
751
752	my ($classify, $classifier_name, $file) = @_;
753	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
754
755	my ($title) = $classifier_name =~ /^CL(.*)$/;
756	my ($filename) = $file =~ /\/([^\/]*)$/;
757	my $entry = "Hierarchy -hfile $filename -metadata $classifier_name -buttonname $title -sort Title";
758	push (@{$collect_cfg->{'classify'}},[$entry]);
759
760
761	}
762
763	sub add_classify_cfg_list {
764
765	my ($classifier) = @_;
766	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
767	my ($title) = $classifier =~ /^CL(.*)$/;
768	my $entry = "AZList -metadata $classifier -buttonname $title";
769	push (@{$collect_cfg->{'classify'}},[$entry]);
770	}
771
772	sub output_cfg_file {
773
774	my ($collfile) = @_;
775
776	if (-e $collfile) { #collect.cfg already exists
777	$collfile .= ".new";
778	}
779	&cfgread::write_cfg_file($collfile, $collect_cfg,
780	'^(creator\|maintainer\|public\|beta\|defaultindex)$',
781	'^(indexes)$', '^(format\|collectionmeta)$',
782	'^(plugin\|classify)$');
783
784
785
786	}
787
788

Note: See TracBrowser for help on using the repository browser.

Download in other formats: