Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: main/tags/2.40/gsdl/bin/script/unbuildv2.pl@ 21085

Last change on this file since 21085 was 3324, checked in by sjboddie, 22 years ago
Removed the really old unindex.pl script and added the not quite so old unbuildv1.pl and unbuildv2.pl. These scripts can be used to convert built indexes back to Greenstone Archive format documents. unbuildv1.pl handles very old Greenstone collections like those on the various HumanInfo CDROMs while unbuildv2.pl handles more recently created collections. unbuildv1.pl was altered and now works but unbuildv2.pl probably still needs some work.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 21.6 KB

Line
1	#!/usr/bin/perl -w
2
3	###########################################################################
4	#
5	# unindex.pl --
6	# A component of the Greenstone digital library software
7	# from the New Zealand Digital Library Project at the
8	# University of Waikato, New Zealand.
9	#
10	# Copyright (C) 1999 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28	# this program will decompress all the text from a built index
29	# and return it to gml format -
30	#this is based on the format current in Nov 1999. (version two)
31
32	# Stefan updated unbuildv1.pl in August 2002 but unbuildv2.pl was not
33	# updated. It probably needs some work done before using.
34
35	BEGIN {
36	die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
37	die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
38	unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
39	$FileHandle = 'FH000';
40	}
41
42	use doc;
43	use docsave;
44	use util;
45	use parsargv;
46	use GDBM_File;
47	use FileHandle;
48	use English;
49	use cfgread;
50	use unicode;
51
52	select STDERR; $\| = 1;
53	select STDOUT; $\| = 1;
54
55
56	# globals
57	$collection = ""; # the collection name
58	$index = ""; # the selected index (like stt/unu)
59	$textdir = ""; # the textdir (like text/unu)
60	$toplevelinfo = []; #list of document OIDs
61	%infodb = (); #hash into GDBM file
62	$classifyinfo = []; # list of classifications
63	$doc_classif_info = {}; # hash of OIDs->classifications they belong to
64	$collect_cfg = {}; #data for the configuration file
65
66	$mgread = ++$FileHandle;
67	$mgwrite = ++$FileHandle;
68
69
70
71	sub print_usage {
72	print STDERR "\n usage: $0 [options]\n\n";
73	print STDERR " options:\n";
74	print STDERR " -verbosity number 0=none, 3=lots\n";
75	print STDERR " -indexdir directory The index to be decompressed (defaults to ./index)\n";
76	print STDERR " -archivedir directory Where the converted material ends up (defaults to ./archives.new\n";
77	print STDERR " -removeold Will remove the old contents of the archives\n";
78	print STDERR " directory -- use with care\n\n";
79	}
80
81	&main ();
82
83	sub main {
84	if (!parsargv::parse(\@ARGV,
85	'verbosity/\d+/2', \$verbosity,
86	'indexdir/.*/index', \$indexdir,
87	'archivedir/.*/archives.new', \$archivedir,
88	'removeold', \$removeold)) {
89	&print_usage();
90	die "\n";
91	}
92
93	die "indexdir $indexdir does not exist\n\n" unless (-d $indexdir);
94	$indexdir =~ s/\/$//;
95	if (-d $archivedir) {
96	if ($removeold) {
97	print STDERR "Warning - removing current contents of the archives directory $archivedir\n";
98	print STDERR " in preparation for the import\n";
99	sleep(5); # just in case...
100	&util::rm_r ($archivedir);
101	}
102	} else {
103	&util::mk_all_dir ($archivedir);
104	}
105
106	$etcdir = "./etc";
107	if (!(-d $etcdir)) {
108	&util::mk_all_dir ($etcdir);
109	}
110
111
112	my $gdbmfile = &get_gdbmfile ($indexdir); #sets $collection and $textdir
113	&set_index (); # sets $index (just chooses one index)
114
115
116	my $buildcfgfile = &util::filename_cat($indexdir, "build.cfg");
117	my $colcfgfile = &util::filename_cat($etcdir, "collect.cfg");
118
119	&add_default_cfg();
120	&add_index_cfg($buildcfgfile);
121
122
123	#work out all the classifications from the gdbm file, info for each doc
124	#(ie which classifications they belong to, are kept in $doc_classif_info
125	&get_classifyinfo ($gdbmfile); #puts a list of classifications into classifyinfo
126
127	&get_toplevel_OID ($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
128
129	tie (%infodb, "GDBM_File", $gdbmfile, 1, 0);
130
131	#this makes the files specifying the hierarchy of subjects, titles etc
132	foreach $classify (@$classifyinfo) {
133
134	&make_info_file($classify);
135
136	}
137
138
139
140	#write out the collect.cfg
141	&output_cfg_file($colcfgfile);
142
143	&openmg ();
144
145	# read the archive information file
146	my $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
147	my $archive_info = new arcinfo ();
148
149	# create a docsave object to process the documents
150	my $processor = new docsave ("", $archive_info, $verbosity);
151	$processor->setarchivedir ($archivedir);
152
153	my ($doc_obj, $hashref, $children);
154	print STDERR "processing documents now\n" if $verbosity >=2;
155	foreach $oid (@$toplevelinfo) {
156	$value = $infodb{$oid};
157	$hashref={};
158	$children = [];
159	&get_metadata($value, $hashref);
160	$doc_obj = new doc ();
161	$doc_obj->set_OID($oid);
162	my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
163	$top = $doc_obj->get_top_section();
164	&add_section_content ($doc_obj, $top, $hashref, $olddir);
165	&add_classification_metadata($oid, $doc_obj, $top);
166	&add_cover_image($doc_obj, $olddir);
167	&get_children($hashref, $children);
168	&recurse_sections($doc_obj, $children, $oid, $top, $olddir);
169	$processor->process($doc_obj);
170
171	}
172	print STDERR "\n";
173
174	&closemg();
175
176	# write out the archive information file
177	$archive_info->save_info($archive_info_filename);
178	}
179
180	# returns the path to the gdbm info database - also
181	# sets the $collection and $textdir global variable
182	sub get_gdbmfile {
183	my ($indexdir) = @_;
184	my ($gdbmfile);
185
186	opendir (DIR, $indexdir) \|\| die "Couldn't open directory $indexdir\n\n";
187	my @conts = readdir DIR;
188	close DIR;
189
190	foreach $file (@conts) {
191	if ($file =~ /text$/) {
192	$textdir = $file;
193	last;
194	}
195	}
196	die "No text directory found in $indexdir\n\n"
197	unless defined $textdir && $textdir =~ /text$/;
198
199	$gdbmfile = &util::filename_cat ($indexdir, $textdir);
200
201	opendir (DIR, $gdbmfile) \|\| die "Couldn't open directory $gdbmfile\n\n";
202	@conts = readdir DIR;
203	close DIR;
204
205	foreach $file (@conts) {
206	if ($file =~ /^(.*?)\.(?:ldb\|bdb)$/) {
207	$collection = $1;
208	$gdbmfile = &util::filename_cat ($gdbmfile, $file);
209	last;
210	}
211	}
212
213	if (defined $collection && $collection =~ /\w/) {
214	$textdir = &util::filename_cat ($textdir, $collection);
215	} else {
216	die "collection global wasn't set\n";
217	}
218	return $gdbmfile if (-e $gdbmfile);
219	die "Couldn't find gdbm info database in $indexdir\n\n";
220	}
221
222
223	sub get_toplevel_OID {
224	my ($gdbmfile) = @_;
225
226	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
227	print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
228
229	$/ = '-' x 70;
230	my $entry = "";
231	while (defined ($entry = <DB2TXT>)) {
232	next unless $entry =~ /\w/; #ignore blank entries
233	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
234	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
235
236	next if ($key =~ /\./); #ignore any lower level entries
237	next if ($key =~ /^CL/); #ignore classification entries
238	next if ($value =~ /<section>/); #ignore docnum->OID entries
239	next if ($key !~ /\d/); #ignore collection, browse entries
240
241	push( @$toplevelinfo, $key);
242
243	}
244
245	$/ = "\n";
246	#print STDERR "toplevel sections are: ", join ("\n", @$toplevelinfo);
247	}
248
249	# gets all the metadata from a gdbm file entry, and puts it into a hashref
250	sub get_metadata {
251
252	my ($gdb_str_ref, $hashref) = @_;
253
254	my @entries = split(/\n/, $gdb_str_ref);
255	foreach $entry (@entries) {
256	my($key, $value) = ($entry =~ /^<([^>])>(.?)$/ );
257	$$hashref{$key} .= '@' if defined $$hashref{$key};
258	$$hashref{$key} .= $value;
259	#print STDERR "key: $key, value: $value\n";
260	}
261
262
263	}
264
265	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
266	#the childrens numbers (from the 'contains' entry).
267	#assumes format is ".1;".2;".3
268	sub get_children {
269	my ($hashref, $children) = @_;
270
271	$childs = $hashref->{'contains'};
272	if (defined ($childs)) {
273	$childs =~ s/\@$//; #remove trailing @
274	$childs =~ s/^\"\.//; #remove initial ".
275	@$children = split /\;\"\./, $childs;
276
277	}
278	else {
279	$children = [];
280	}
281	}
282
283	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
284	#the childrens numbers (from the 'contains' entry).
285	#assumes format is ".1;".2;".3
286	#returns a list with the full child name ie HASH0123...ac.1 HASH0123...ac.2
287	#etc
288	#used for classification stuff
289	sub get_whole_children {
290
291	my ($parentoid, $hashref, $children) = @_;
292
293	my $childs = $hashref->{'contains'};
294	my @items;
295	if (defined ($childs)) {
296	$childs =~ s/\@$//; #remove trailing @
297	if ($childs =~ s/^\"//) { #remove initial " if there
298	@items = split /\;\"/, $childs; #split into .1 .2 .3 etc
299	foreach $item (@items) {
300	push (@$children, "$parentoid$item");
301	}
302	}
303	else { # whole id is already present. split on ; and dont put parentoid in front
304	@items = split /\;/, $childs;
305	foreach $item (@items) {
306	push (@$children, "$item");
307	}
308	}
309	#print STDERR "children are: ", join ("\n", @$children);
310
311	}
312	else {
313	$children = [];
314	}
315
316
317	}
318	sub recurse_sections {
319	my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
320
321	foreach $child (sort numerically @$children) {
322	$doc_obj->create_named_section("$parentsection.$child");
323	my $value = $infodb{"$parentoid.$child"};
324	my $hashref={};
325	&get_metadata($value, $hashref); # get childs metadata
326	my $newchildren = [];
327	&get_children($hashref, $newchildren); # get childs children
328	#add content fo rcurrent section
329	&add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
330	# process all the children if there are any
331	&recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
332	if (defined ($newchildren));
333	}
334
335
336	}
337
338	sub add_section_content {
339	my ($doc_obj, $cursection, $hashref, $olddir) = @_;
340
341	foreach $key (keys %$hashref) {
342	#dont need to store these metadata
343	next if $key =~ /(contains\|docnum\|hastxt\|doctype\|archivedir\|classifytype)/i;
344	my @items = split /@/, $hashref->{$key};
345	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
346
347	}
348	my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
349	my ($hastext) =$hashref->{'hastxt'} =~ /(0\|1)/;
350
351	my $images=[];
352	if ($hastext) {
353	my $text = &get_text($docnum);
354
355	#my (@images) = $text =~ /<img.?src=\"([^\"])\"[^>]*>/g;
356
357	# in text replace path to image with _httpdocimg_/blah.gif
358	#while ($text =~ s/(<img.?src=\")([^\"])(\"[^>]*>)/
359	# $1.&get_img($2, \@images).$3/sgei) {
360	$text =~ s/(<img.?src=\")([^\"])(\"[^>]*>)/
361	$1.&get_img($2,$images).$3/sgei;
362
363	$doc_obj->add_text ($cursection, $text);
364
365	if (scalar(@$images)>0) {
366
367	foreach $img (@$images) {
368	my ($assoc_file) = $img =~ /([^\/\\]\..)$/; #the name of the image
369	$img =~ s/_httpcollection_/\./; #replace _httpcollection_ with .
370	$img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name
371
372	$doc_obj->associate_file($img, $assoc_file);
373	}
374	}
375	}
376	}
377
378
379
380	sub get_img {
381	my ($path, $images) = @_;
382	my $img = "_httpdocimg_/";
383	my ($imgname) = $path =~ /([^\/\\]\..)$/;
384	push (@$images, $path);
385	$img .= $imgname;
386	return $img;
387	}
388
389
390	sub add_classification_metadata {
391
392	my ($oid, $doc_obj, $cursection) = @_;
393
394	if (defined $doc_classif_info->{$oid}) {
395
396	my $hashref = $doc_classif_info->{$oid};
397
398	foreach $key (keys %$hashref) {
399	my @items = @{$hashref->{$key}};
400	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
401	}
402	}
403	}
404
405	# picks up the cover image "cover.jpg" from the old archives directory.
406	sub add_cover_image {
407
408	my ($doc_obj, $olddir) = @_;
409	$assoc_file = "cover.jpg";
410	$img = "archives/$olddir/$assoc_file";
411
412
413	if (-e $img) {
414	$doc_obj->associate_file($img, $assoc_file);
415	}
416	}
417
418
419
420	sub set_index {
421	# check that $collection has been set
422	die "collection global was not set\n"
423	unless defined $collection && $collection =~ /\w/;
424
425	# find an index (just use first non-text directory we come across in $indexdir)
426	opendir (INDEXDIR, $indexdir) \|\| die "couldn't open directory $indexdir\n";
427	my @indexes = readdir INDEXDIR;
428	close INDEXDIR;
429	foreach $i (@indexes) {
430	next if $i =~ /text$/i \|\| $i =~ /\./ \|\| $i =~ /assoc$/i;
431	$index = &util::filename_cat ($i, $collection);
432	last;
433	}
434	}
435
436
437	#########################################################################
438
439	################ functions involving mg ################################
440
441	sub get_text {
442	my ($docnum) = @_;
443
444	print STDERR "." if $verbosity >= 2;
445	&mgcommand ($docnum);
446
447	<$mgread>; # eat the document separator
448
449	my $text = "";
450	my $line = "";
451
452	while (defined ($line = <$mgread>))
453	{
454	last if $line =~ /^<\/mg>/;
455	$text .= $line;
456	}
457
458	# Read in the last statement, which should be:
459	# "dd documents retrieved."
460	<$mgread>;
461
462	return $text;
463	}
464
465	sub numerically {$a <=> $b;}
466
467
468
469	sub openmg {
470
471	#print STDERR "index: $index\n";
472
473	die "Unable to start mgquery." unless
474	&openpipe($mgread, $mgwrite,
475	"mgquery -d $indexdir -f $index -t $textdir");
476
477	$mgwrite->autoflush();
478
479	&mgcommand('.set expert true');
480	&mgcommand('.set terminator "</mg>\n"');
481	&mgcommand('.set mode text');
482	&mgcommand('.set query docnums');
483	&mgcommand('.set term_freq off');
484	}
485
486	sub closemg {
487	&mgcommand (".quit");
488	close($mgread);
489	close($mgwrite);
490	}
491
492	sub mgcommand {
493	my ($command) = @_;
494
495	return if $command =~ /^\s*$/; #whitespace
496	#print STDERR "command: $command\n";
497	print $mgwrite "$command\n";
498
499	# eat up the command executed which is echoed
500	<$mgread>;
501	}
502
503	# openpipe(READ, WRITE, CMD)
504	#
505	# Like open2, except CMD's stderr is also redirected.
506	#
507	sub openpipe
508	{
509	my ($read, $write, $cmd) = @_;
510	my ($child_read, $child_write);
511
512	$child_read = ++$FileHandle;
513	$child_write = ++$FileHandle;
514
515	pipe($read, $child_write) \|\| die "Failed pipe($read, $child_write): $!";
516	pipe($child_read, $write) \|\| die "Failed pipe($child_read, $write): $!";
517	my $pid;
518
519	if (($pid = fork) < 0) {
520	die "Failed fork: $!";
521	} elsif ($pid == 0) {
522	close($read);
523	close($write);
524	open(STDIN, "<&$child_read");
525	open(STDOUT, ">&$child_write");
526	open(STDERR, ">&$child_write");
527	exec($cmd);
528	die "Failed exec $cmd: $!";
529	}
530
531	close($child_read);
532	close($child_write);
533
534	$write->autoflush();
535	$read->autoflush();
536
537	return 1;
538	}
539
540
541
542
543	######################################################################
544
545	############# functions to do with the classification stuff ##########
546
547	#returns the top level classification oids
548	sub get_classifyinfo {
549	my ($gdbmfile) = @_;
550
551	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
552	print STDERR "Finding all classification sections from $gdbmfile\n" ;
553
554	$/ = '-' x 70;
555	my $entry = "";
556	while (defined ($entry = <DB2TXT>)) {
557	next unless $entry =~ /\w/; #ignore blank entries
558	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
559	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
560
561	next unless ($key =~/^CL\d$/); # assumes classification OID is like
562	# CL1 or CL2 etc
563
564	push( @$classifyinfo, $key);
565
566	}
567
568	$/ = "\n";
569	print STDERR "classifications are: ", join(", ", @$classifyinfo);
570	}
571
572	#this creates the classification files needed for the hierarchy classifier
573	#used for subjects, titles, orgs etc
574	#also adds in entries to the collect_cfg hash
575	sub make_info_file {
576
577	my ($classifier) = @_;
578	my $info_file = "";
579
580	my $entry = $infodb{$classifier};
581	#print STDERR "entry = $entry\n";
582	my $hashref = {};
583	&get_metadata($entry, $hashref);
584
585	my $classifier_name = "CL".$hashref->{'Title'}; #like CLSubject
586	$classifier_name =~ s/\@$//; #remove trailing @
587
588	# check children - if all are documents at this level, dont need a
589	# hierarchy so just put a AZList into config file
590
591	my $children=[];
592	&get_whole_children($classifier, $hashref, $children);
593	if (&is_document($$children[0])) {
594	&add_classify_cfg_list($classifier_name);
595
596	}else { #there is a hierarchy so create a file
597
598	$info_file = "./etc/$classifier_name.txt";
599
600	print STDERR "classification $classifier will be called $classifier_name\n";
601
602	open (OUTDOC, "> $info_file" ) \|\| die "couldn't open file $info_file\n";
603
604	foreach $child (@$children) {
605	&process_entry(OUTDOC, $classifier_name, $child);
606	}
607
608	close OUTDOC;
609
610	&add_classify_cfg($classifier, $classifier_name, $info_file);
611	}
612	}
613
614
615	sub process_entry {
616	my ($handle, $classifier_name, $classify_id) = @_;
617
618	my $value = $infodb{$classify_id};
619
620	my $hashref={};
621
622	&get_metadata($value, $hashref);
623	my $title = $hashref->{'Title'};
624	$title =~ s/\@$//; #remove trailing @
625	&add_line($handle, $classify_id, $title);
626
627	my $children = [];
628	&get_whole_children($classify_id, $hashref, $children);
629	foreach $child (@$children) {
630	if (&is_document($child)) {
631	&add_doc_metadata($child, $classifier_name, $title);
632	}else {
633	process_entry($handle, $classifier_name, $child);
634	}
635	}
636
637	}
638
639
640
641	sub add_doc_metadata {
642
643	my ($doc_id, $classifier_name, $classifier_id) = @_;
644
645	#add entry to doc database
646	#print STDERR "at doc level, docnum=$classify_id\n";
647
648	$doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
649	$doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
650	defined $doc_classif_info->{$doc_id}->{$classifier_name};
651	push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
652
653	}
654
655
656
657
658	sub add_line {
659
660	my ($handle, $classify_id, $title) = @_;
661	#print STDERR "classify id= $classify_id, title= $title\n";
662	$title = &unicode::ascii2utf8($title);
663
664	my ($num) = $classify_id =~ /^CL\d\.(.*)$/; #remove the CL1. from the front
665
666	print $handle "\"$title\"\t$num\t\"$title\"\n";
667
668
669	}
670
671	sub is_document {
672	my ($oid) = @_;
673	return 1 if $oid =~ /^HASH/i;
674	return 0;
675	}
676
677	########################################################################
678
679	########## stuff for producing collect.cfg file ###########################
680
681	sub add_default_cfg {
682
683	$username=`whoami`;
684	$username=`logname` unless defined $username;
685	$username="a_user" unless defined $username;
686	$username =~ s/\n//;
687	$collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
688	$collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
689	$collect_cfg->{'public'}="false";
690	$collect_cfg->{'beta'}="true";
691
692	$collect_cfg->{'plugin'}=[];
693	push (@{$collect_cfg->{'plugin'}}, ["GMLPlug"]);
694	push (@{$collect_cfg->{'plugin'}}, ["ArcPlug"]);
695	push (@{$collect_cfg->{'plugin'}}, ["RecPlug"]);
696
697	$collect_cfg->{'format'}={};
698	$collect_cfg->{'format'}->{'DocumentImages'}="true";
699	$collect_cfg->{'format'}->{'DocumentText'} =
700	"\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
701	$collect_cfg->{'format'}->{'SearchVList'} =
702	"\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
703
704	$collect_cfg->{'collectionmeta'}={};
705	$collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
706	$collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"http://www.nzdl.org/gsdl/collect/$collection/images/$collection.gif\"";
707	$collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"http://www.nzdl.org/gsdl/collect/$collection/images/${collection}sm.gif\"";
708	$collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
709
710
711
712
713	}
714
715	sub add_index_cfg {
716	my ($buildfile) = @_;
717
718	my $data={};
719	$collect_cfg->{'indexes'}=[];
720	if (-e $buildfile) {
721	$data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
722	foreach $i (@{$data->{'indexmap'}}) {
723	($thisindex, $abbrev)= split ("\-\>", $i);
724	push (@{$collect_cfg->{'indexes'}}, $thisindex);
725	$collect_cfg->{'defaultindex'} = $thisindex unless defined
726	$collect_cfg->{'defaultindex'};
727	$name=&get_index_name($thisindex);
728	$thisindex=".$thisindex";
729	$collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
730	}
731	}
732	else {
733	print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
734	}
735
736	}
737
738	sub get_index_name {
739	my ($thisindex) = @_;
740	return "paragraphs" if $thisindex =~ /paragraph/;
741	return "chapters" if $thisindex =~ /section.*text/;
742	return "titles" if $thisindex =~ /Title/;
743	return "other";
744	}
745
746	sub add_classify_cfg {
747
748	my ($classify, $classifier_name, $file) = @_;
749	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
750
751	my ($title) = $classifier_name =~ /^CL(.*)$/;
752	my ($filename) = $file =~ /\/([^\/]*)$/;
753	my $entry = "Hierarchy hfile=$filename metadata=$classifier_name title=$title sort=Title";
754	push (@{$collect_cfg->{'classify'}},[$entry]);
755
756
757	}
758
759	sub add_classify_cfg_list {
760
761	my ($classifier) = @_;
762	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
763	my ($title) = $classifier =~ /^CL(.*)$/;
764	my $entry = "AZList metadata=$classifier title=$title";
765	push (@{$collect_cfg->{'classify'}},[$entry]);
766	}
767
768	sub output_cfg_file {
769
770	my ($collfile) = @_;
771
772	if (-e $collfile) { #collect.cfg already exists
773	$collfile .= ".new";
774	}
775	&cfgread::write_cfg_file($collfile, $collect_cfg,
776	'^(creator\|maintainer\|public\|beta\|defaultindex)$',
777	'^(indexes)$', '^(format\|collectionmeta)$',
778	'^(plugin\|classify)$');
779
780
781
782	}
783
784

Note: See TracBrowser for help on using the repository browser.

Download in other formats: