Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: trunk/gsdl/bin/script/unbuildv1.pl@ 3324

Last change on this file since 3324 was 3324, checked in by sjboddie, 22 years ago
Removed the really old unindex.pl script and added the not quite so old unbuildv1.pl and unbuildv2.pl. These scripts can be used to convert built indexes back to Greenstone Archive format documents. unbuildv1.pl handles very old Greenstone collections like those on the various HumanInfo CDROMs while unbuildv2.pl handles more recently created collections. unbuildv1.pl was altered and now works but unbuildv2.pl probably still needs some work.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 21.6 KB

Line
1	#!/usr/bin/perl -w
2
3	###########################################################################
4	#
5	# unbuildv1.pl --
6	# A component of the Greenstone digital library software
7	# from the New Zealand Digital Library Project at the
8	# University of Waikato, New Zealand.
9	#
10	# Copyright (C) 1999 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28	# this program will decompress all the text from a built index
29	# and return it to gml format - this version of the program is designed to
30	# work on collections built with the first version of the greenstone software
31	# it extracts the browse classifications from the gdbm file, and recreates
32	# them. It also creates a default collect.cfg file.
33
34	# To run, this program needs the old archive directory (called archives),
35	# the index directory with the text and one other index, not paragraphs!! -
36	# use one of the section indexes. Also needs the build.cfg file (in the
37	# index dir) - used in creating a collect.cfg file. If build.cfg is not
38	# present, the parts of the collect.cfg file relating to indexes will not
39	# be entered.
40
41	# Stefan altered this script slightly August 2002 to use
42	# $GSDLHOME/bin/$GSDLOS/mgquery_old instead of the old mgquery in
43	# nzdl-1.2. mgquery_old is a statically linked binary compiled from
44	# mg-1.3d. It was compiled with SHORT_SUFFIX set so doesn't need the index
45	# file extensions to be altered.
46	# Also changed the command line options a little.
47
48	BEGIN {
49	die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
50	die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
51	unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
52	$FileHandle = 'FH000';
53	}
54
55	use doc;
56	use docsave;
57	use util;
58	use parsargv;
59	use FileHandle;
60	use English;
61	use cfgread;
62	use unicode;
63	select STDERR; $\| = 1;
64	select STDOUT; $\| = 1;
65
66
67	# globals
68	$collection = ""; # the collection name
69	$index = ""; # the selected index (like stt/unu)
70	$textdir = ""; # the textdir (like text/unu)
71	$toplevelinfo = []; #list of document OIDs
72	%infodb = (); #hash into GDBM file
73	$classifyinfo = []; # list of classifications
74	$doc_classif_info = {}; # hash of OIDs->classifications they belong to
75	$collect_cfg = {}; #data for the configuration file
76	$mgread = ++$FileHandle;
77	$mgwrite = ++$FileHandle;
78
79	sub print_usage {
80	print STDERR "\n usage: $0 [options]\n\n";
81	print STDERR " options:\n";
82	print STDERR " -verbosity number 0=none, 3=lots\n";
83	print STDERR " -old directory The directory in which the old collection lives\n";
84	print STDERR " -new directory The directory in which to put the new collection\n";
85	}
86
87	&main ();
88
89	sub main {
90	if (!parsargv::parse(\@ARGV,
91	'verbosity/\d+/2', \$verbosity,
92	'old/.*/', \$old,
93	'new/.*/', \$new)) {
94	&print_usage();
95	die "\n";
96	}
97
98	die "$old does not exist\n\n" unless (-d $old);
99	my $old_indexdir = &util::filename_cat($old, "index");
100	my $old_archivedir = &util::filename_cat($old, "archives");
101
102	my $new_archivedir = &util::filename_cat($new, "archives");
103	my $new_etcdir = &util::filename_cat($new, "etc");
104	if (!-e $new_archivedir) {
105	&util::mk_all_dir($new_archivedir);
106	}
107	if (!(-d $new_etcdir)) {
108	&util::mk_all_dir($new_etcdir);
109	}
110
111	my $gdbmfile = &get_gdbmfile($old_indexdir); # sets $collection and $textdir
112	&set_index($old_indexdir); # sets $index (just chooses one index)
113
114	my $buildcfgfile = &util::filename_cat($old_indexdir, "build.cfg");
115	my $colcfgfile = &util::filename_cat($new_etcdir, "collect.cfg");
116
117	# don't need this but giving it a value stops an annoying undefined
118	# value warning
119	$ENV{'GSDLCOLLECTDIR'} = 'tmp';
120
121	&add_default_cfg();
122	&add_index_cfg($buildcfgfile);
123
124	# work out all the classifications from the gdbm file, info for each doc
125	# (ie which classifications they belong to, are kept in $doc_classif_info
126	&get_classifyinfo($gdbmfile); #puts a list of classifications into classifyinfo
127	&get_toplevel_OID($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
128
129	# read ldb file into %infodb
130	&read_gdbm($gdbmfile);
131
132	#this makes the files specifying the hierarchy of subjects, titles etc
133	foreach my $classify (@$classifyinfo) {
134	&make_info_file($new_etcdir, $classify);
135	}
136
137	# write out the collect.cfg
138	&output_cfg_file($colcfgfile);
139
140	&openmg($old_indexdir);
141
142	# read the archive information file
143	my $archive_info_filename = &util::filename_cat ($new_archivedir, "archives.inf");
144	my $archive_info = new arcinfo ();
145
146	# create a docsave object to process the documents
147	my $processor = new docsave ("", $archive_info, $verbosity);
148	$processor->setarchivedir ($new_archivedir);
149
150	my ($doc_obj, $hashref, $children);
151	print STDERR "processing documents now\n" if $verbosity >=2;
152	foreach $oid (@$toplevelinfo) {
153	$value = $infodb{$oid};
154	$hashref={};
155	$children = [];
156	&get_metadata($value, $hashref);
157	$doc_obj = new doc ();
158	$doc_obj->set_OID($oid);
159	my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
160	$top = $doc_obj->get_top_section();
161	&add_section_content ($doc_obj, $top, $hashref, $olddir);
162	&add_classification_metadata($oid, $doc_obj, $top);
163	&add_cover_image($doc_obj, $old_archivedir, $olddir);
164	&get_children($hashref, $children);
165	&recurse_sections($doc_obj, $children, $oid, $top, $olddir) if (defined ($children));
166
167	#change OID to new format
168	$doc_obj->delete_OID();
169	$doc_obj->set_OID();
170	$processor->process($doc_obj);
171	# last;
172	}
173	print STDERR "\n";
174
175	&closemg();
176
177	# write out the archive information file
178	$archive_info->save_info($archive_info_filename);
179
180
181	}
182
183	# returns the path to the gdbm info database - also
184	# sets the $collection and $textdir global variable
185	sub get_gdbmfile {
186	my ($indexdir) = @_;
187	my ($gdbmfile);
188
189	opendir (DIR, $indexdir) \|\| die "Couldn't open directory $indexdir\n\n";
190	my @conts = readdir DIR;
191	close DIR;
192
193	foreach $file (@conts) {
194	if ($file =~ /text$/) {
195	$textdir = $file;
196	last;
197	}
198	}
199	die "No text directory found in $indexdir\n\n"
200	unless defined $textdir && $textdir =~ /text$/;
201
202	$gdbmfile = &util::filename_cat ($indexdir, $textdir);
203
204	opendir (DIR, $gdbmfile) \|\| die "Couldn't open directory $gdbmfile\n\n";
205	@conts = readdir DIR;
206	close DIR;
207
208	foreach my $file (@conts) {
209	if ($file =~ /^(.*?)\.(?:ldb\|bdb)$/) {
210	$collection = $1;
211	$gdbmfile = &util::filename_cat ($gdbmfile, $file);
212	last;
213	}
214	}
215
216	if (defined $collection && $collection =~ /\w/) {
217	$textdir = &util::filename_cat ($textdir, $collection);
218	} else {
219	die "collection global wasn't set\n";
220	}
221	return $gdbmfile if (-e $gdbmfile);
222	die "Couldn't find gdbm info database in $indexdir\n\n";
223	}
224
225
226	#this function will probably need to be changed for any different file formats
227	sub get_toplevel_OID {
228	my ($gdbmfile) = @_;
229
230	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
231	print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
232
233	$/ = '-' x 70;
234	my $entry = "";
235	while (defined ($entry = <DB2TXT>)) {
236	next unless $entry =~ /\w/; #ignore blank entries
237	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
238	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
239
240	next unless ($key =~ /B\.\d*$/); # assumes top level OID is like
241	# B.12
242
243	push( @$toplevelinfo, $key);
244
245	}
246
247	$/ = "\n";
248	}
249
250	# gets all the metadata from a gdbm file entry, and puts it into a hashref
251	#this has changed for old style formats
252	sub get_metadata {
253
254	my ($gdb_str_ref, $hashref) = @_;
255	# print STDERR $gdb_str_ref;
256	my @entries = split(/\n/, $gdb_str_ref);
257
258	my $hastext = 1;
259	foreach $entry (@entries) {
260
261	my($key, $value) = ($entry =~ /^<([^>])>(.?)$/ );
262
263	next if ($key eq "p");
264	next if ($key eq "j");
265
266	$key = "Title" if $key eq "t";
267	$key = "docnum" if $key eq "d";
268	if ($key eq "o") {
269	$key = "archivedir";
270	$value =~ s/HASH/hash/;
271	}
272	if ($key eq "c") {
273	$key = "contains";
274	$hastext=0;}
275
276	$$hashref{$key} .= $value;
277
278	}
279
280	$$hashref{'hastxt'} = $hastext;
281
282	}
283
284	# takes a hashref containing the metadata for a gdbmfile entry, and
285	# extracts the childrens numbers (from the 'contains' entry). assumes
286	# format is like B.14.1:B.24.2:B.24.3 returns a list like 1 2 3 4
287	sub get_children {
288	my ($hashref, $children) = @_;
289
290	my $childs = $hashref->{'contains'};
291	if (defined ($childs)) {
292	@xchildren = split /\:/, $childs;
293	foreach $kid (@xchildren) {
294	my ($child)= $kid =~ /\.([^\.]*)(?:\\n\|$)/;
295	push (@$children, $child);
296	}
297	# print STDERR "children are: ", @$children;
298	}
299	else {
300	$children = [];
301	}
302	}
303
304	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
305	#the childrens numbers (from the 'contains' entry).
306	#assumes format is like B.14.1:B.24.2:B.24.3
307	#returns a list with the full child name ie B.14.1 B.14.2 etc
308	#used for classification stuff
309	sub get_whole_children {
310
311	my ($hashref, $children) = @_;
312
313	my $childs = $hashref->{'contains'};
314	if (defined ($childs)) {
315	@$children = split /\:/, $childs;
316	#print STDERR "children are: ", @$children;
317	}
318	else {
319	$children = [];
320	}
321	}
322
323	sub recurse_sections {
324	my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
325
326	foreach $child (sort numerically @$children) {
327	$doc_obj->create_named_section("$parentsection.$child");
328	my $value = $infodb{"$parentoid.$child"};
329	my $hashref={};
330	&get_metadata($value, $hashref); # get childs metadata
331	my $newchildren = [];
332	&get_children($hashref, $newchildren); # get childs children
333	#add content fo rcurrent section
334	&add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
335	# process all the children if there are any
336	&recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
337	if (defined ($newchildren));
338	}
339	}
340
341	sub add_section_content {
342	my ($doc_obj, $cursection, $hashref, $olddir) = @_;
343
344	foreach $key (keys %$hashref) {
345	#dont need to store these metadata
346	next if $key =~ /(contains\|docnum\|hastxt\|doctype\|archivedir\|classifytype)/i;
347	my @items = split /@/, $hashref->{$key};
348	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
349
350	}
351
352	my ($hastext) = $hashref->{'hastxt'} =~ /(0\|1)/;
353	if ($hastext) {
354
355	my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
356	my $text = &get_text($docnum);
357	$doc_obj->add_text ($cursection, $text);
358
359	my (@images) = $text =~ /<img.?src=\"([^\"])\"[^>]*>/g;
360
361	if (scalar(@images)>0) {
362
363	foreach $img (@images) {
364	my ($assoc_file) = $img =~ /([^\/\\]\..)$/; #the name of the image
365	$img =~ s/_httpcollection_/$old/; #replace _httpcollection_ with path to old collection
366	$olddir =~ s/HASH/hash/;
367	$img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name #the path to the image
368	$img = lc $img; # doc names and path upper case in file, lower
369	# case in directory
370	if (-e $img) {
371	$doc_obj->associate_file($img, $assoc_file);
372	} else {
373	print STDERR "WARNING: $img does not exist\n";
374	}
375	}
376
377	}
378	}
379	}
380
381
382	sub add_classification_metadata {
383
384	my ($oid, $doc_obj, $cursection) = @_;
385
386	if (defined $doc_classif_info->{$oid}) {
387
388	$hashref = $doc_classif_info->{$oid};
389
390	foreach $key (keys %$hashref) {
391	my @items = @{$hashref->{$key}};
392	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
393	}
394	}
395	}
396	# picks up the cover image "cover.jpg" from the old archives directory.
397	sub add_cover_image {
398	my ($doc_obj, $archivedir, $olddir) = @_;
399	$assoc_file = "cover.jpg";
400	$img = &util::filename_cat($archivedir, $olddir, $assoc_file);
401
402	if (-e $img) {
403	$doc_obj->associate_file($img, $assoc_file);
404	} else {
405	print STDERR "WARNING: couldn't find $img\n";
406	}
407	}
408
409	sub set_index {
410	my ($indexdir) = @_;
411
412	# check that $collection has been set
413	die "collection global was not set\n"
414	unless defined $collection && $collection =~ /\w/;
415
416	# find an index (just use first non-text and non-paragraph directory we
417	# come across in $indexdir)
418	opendir (INDEXDIR, $indexdir) \|\| die "couldn't open directory $indexdir\n";
419	my @indexes = readdir INDEXDIR;
420	close INDEXDIR;
421	foreach $i (@indexes) {
422	next if $i =~ /text$/i \|\| $i =~ /\./ \|\| $i =~ /^p/i;
423	$index = &util::filename_cat ($i, $collection);
424	last;
425	}
426	}
427
428
429	#########################################################################
430
431	################ functions involving mg ################################
432	sub get_text {
433	my ($docnum) = @_;
434
435	print STDERR "." if $verbosity >= 2;
436	&mgcommand ($docnum);
437
438	<$mgread>; # eat the document separator
439
440	my $text = "";
441	my $line = "";
442
443	while (defined ($line = <$mgread>))
444	{
445	last if $line =~ /^<\/mg>/;
446	$text .= $line;
447	}
448
449	# Read in the last statement, which should be:
450	# "dd documents retrieved."
451	<$mgread>;
452
453	return $text;
454	}
455
456
457	sub numerically {$a <=> $b;}
458
459
460
461	# mg stuff
462
463	sub openmg {
464	my ($indexdir) = @_;
465
466	die "Unable to start mgquery." unless
467	&openpipe($mgread, $mgwrite, "mgquery_old -d $indexdir -f $index -t $textdir");
468
469	$mgwrite->autoflush();
470	&mgcommand('.set expert true');
471	&mgcommand('.set terminator "</mg>\n"');
472	&mgcommand('.set mode text');
473	&mgcommand('.set query docnums');
474	&mgcommand('.set term_freq off');
475	}
476
477	sub closemg {
478	&mgcommand (".quit");
479	close($mgread);
480	close($mgwrite);
481	}
482
483	sub mgcommand {
484	my ($command) = @_;
485
486	return if $command =~ /^\s*$/; #whitespace
487	#print STDERR "command: $command\n";
488	print $mgwrite "$command\n";
489
490	# eat up the command executed which is echoed
491	<$mgread>;
492	}
493
494	# openpipe(READ, WRITE, CMD)
495	#
496	# Like open2, except CMD's stderr is also redirected.
497	#
498	sub openpipe
499	{
500	my ($read, $write, $cmd) = @_;
501	my ($child_read, $child_write);
502
503	$child_read = ++$FileHandle;
504	$child_write = ++$FileHandle;
505
506	pipe($read, $child_write) \|\| die "Failed pipe($read, $child_write): $!";
507	pipe($child_read, $write) \|\| die "Failed pipe($child_read, $write): $!";
508	my $pid;
509
510	if (($pid = fork) < 0) {
511	die "Failed fork: $!";
512	} elsif ($pid == 0) {
513	close($read);
514	close($write);
515	open(STDIN, "<&$child_read");
516	open(STDOUT, ">&$child_write");
517	open(STDERR, ">&$child_write");
518	exec($cmd);
519	die "Failed exec $cmd: $!";
520	}
521
522	close($child_read);
523	close($child_write);
524
525	$write->autoflush();
526	$read->autoflush();
527
528	return 1;
529	}
530
531
532	######################################################################
533
534	############# functions to do with the classificaiton stuff ##########
535	sub get_classifyinfo {
536	my ($gdbmfile) = @_;
537
538	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
539	print STDERR "Finding all classification sections from $gdbmfile\n" ;
540
541	$/ = '-' x 70;
542	my $entry = "";
543	while (defined ($entry = <DB2TXT>)) {
544	next unless $entry =~ /\w/; #ignore blank entries
545	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
546	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
547
548	next unless ($key =~/^[A-Z]$/); # assumes classification OID is like
549	# C or T etc
550
551	push( @$classifyinfo, $key);
552
553	}
554
555	$/ = "\n";
556	}
557
558	#this creates the classification files needed for the hierarchy classifier
559	#used for subjects, titles, orgs etc
560	#also adds in entries to the collect_cfg hash
561	sub make_info_file {
562	my ($etcdir, $classifier) = @_;
563	my $info_file = "";
564
565	$info_file = &util::filename_cat($etcdir, "CL.${classifier}.txt");
566	$classifier_name = "CL.$classifier";
567	print STDERR "classification $classifier will be called CL.$classifier\n";
568
569	open (OUTDOC, ">$info_file" ) \|\| die "couldn't open file $info_file\n";
570
571	my $entry = $infodb{$classifier};
572	#print STDERR "entry = $entry\n";
573	my $hashref = {};
574	&get_metadata($entry, $hashref);
575	my $children=[];
576	&get_whole_children($hashref, $children);
577	foreach $child (@$children) {
578	&process_entry(OUTDOC, $classifier_name, $child);
579	}
580
581	close OUTDOC;
582
583	&add_classify_cfg($classifier, $classifier_name, $info_file);
584
585	}
586
587
588	sub process_entry {
589	my ($handle, $classifier_name, $classify_id) = @_;
590
591	my $value = $infodb{$classify_id};
592	my $hashref={};
593	&get_metadata($value, $hashref);
594	my $title = $hashref->{'Title'};
595
596	&add_line($handle, $classify_id, $title);
597
598	my $children = [];
599	&get_whole_children($hashref, $children);
600	foreach $child (@$children) {
601	if (&is_document($child)) {
602	&add_doc_metadata($child, $classifier_name, $classify_id);
603	}
604	else {
605	&process_entry($handle, $classifier_name, $child);
606	}
607	}
608	}
609
610	sub add_doc_metadata {
611	my ($doc_id, $classifier_name, $classifier_id) = @_;
612
613	#add entry to doc database
614	#print STDERR "at doc level, docnum=$classify_id\n";
615	$doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
616	$doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
617	defined $doc_classif_info->{$doc_id}->{$classifier_name};
618	push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
619	}
620
621	sub add_line {
622	my ($handle, $classify_id, $title) = @_;
623	$title = &unicode::ascii2utf8(\$title);
624	my ($num) = $classify_id =~ /^[A-Z]\.(.*)$/; #remove the C. from the front
625	print $handle "$classify_id\t$num\t\"$title\"\n";
626	}
627
628	sub is_document {
629	my ($oid) = @_;
630	return 1 if $oid =~ /^B\.\d/;
631	return 0;
632	}
633
634	########################################################################
635
636	########## stuff for producing collect.cfg file ###########################
637
638	sub add_default_cfg {
639
640	$username=`whoami`;
641	$username=`logname` unless defined $username;
642	$username="a_user" unless defined $username;
643	$username =~ s/\n//;
644	$collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
645	$collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
646	$collect_cfg->{'public'}="true";
647
648	$collect_cfg->{'plugin'}=[];
649	push (@{$collect_cfg->{'plugin'}}, ["GAPlug"]);
650	push (@{$collect_cfg->{'plugin'}}, ["ArcPlug"]);
651	push (@{$collect_cfg->{'plugin'}}, ["RecPlug"]);
652
653	$collect_cfg->{'format'}={};
654	$collect_cfg->{'format'}->{'DocumentImages'}="true";
655	$collect_cfg->{'format'}->{'DocumentText'} =
656	"\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
657	$collect_cfg->{'format'}->{'SearchVList'} =
658	"\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
659
660	$collect_cfg->{'collectionmeta'}={};
661	$collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
662	$collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"_httpprefix_/collect/$collection/images/$collection.gif\"";
663	$collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"_httpprefix_/collect/$collection/images/${collection}sm.gif\"";
664	$collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
665
666	}
667
668	sub add_index_cfg {
669	my ($buildfile) = @_;
670
671	my $data={};
672	$collect_cfg->{'indexes'}=[];
673	if (-e $buildfile) {
674	$data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
675	foreach my $i (@{$data->{'indexmap'}}) {
676	my ($thisindex, $abbrev)= split (/\-\>/, $i);
677	push (@{$collect_cfg->{'indexes'}}, $thisindex);
678	$collect_cfg->{'defaultindex'} = $thisindex unless defined
679	$collect_cfg->{'defaultindex'};
680	$name=&get_index_name($thisindex);
681	$thisindex=".$thisindex";
682	$collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
683	}
684	}
685	else {
686	print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
687	}
688
689	}
690
691	sub get_index_name {
692	my ($thisindex) = @_;
693	return "paragraphs" if $thisindex =~ /paragraph/;
694	return "chapters" if $thisindex =~ /section.*text/;
695	return "titles" if $thisindex =~ /Title/;
696	return "other";
697	}
698
699	sub add_classify_cfg {
700
701	my ($classify, $metadata, $file) = @_;
702	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
703
704	my ($title);
705	$title = "Howto" if $classify eq 'H';
706	$title = "Subject" if $classify eq 'C';
707	$title = "Organization" if $classify eq 'O';
708	$title = "Title" if $classify eq 'T';
709	$title = "Other" unless defined $title;
710
711	my ($filename) = $file =~ /\/([^\/]*)$/;
712	my $entry = "Hierarchy hfile=$filename metadata=$metadata buttonname=$title sort=Title";
713	push (@{$collect_cfg->{'classify'}},[$entry]);
714
715
716	}
717
718	sub output_cfg_file {
719
720	my ($collfile) = @_;
721	&cfgread::write_cfg_file($collfile, $collect_cfg,
722	'^(creator\|maintainer\|public\|beta\|defaultindex)$',
723	'^(indexes)$', '^(format\|collectionmeta)$',
724	'^(plugin\|classify)$');
725	}
726
727	sub read_gdbm {
728	my ($filename) = @_;
729
730	open (PIPEIN, "db2txt \"$filename\" \|") \|\| die "couldn't open pipe from db2txt\n";
731	my $line = ""; my $key = ""; my $value = "";
732	while (defined ($line = <PIPEIN>)) {
733	if ($line =~ /^\[([^\]]+)\]$/) {
734	$key = $1;
735	} elsif ($line =~ /^-{70}$/) {
736	$infodb{$key} = $value;
737	$value = "";
738	$key = "";
739	} else {
740	$value .= $line;
741	}
742	}
743	close PIPEIN;
744	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: