Context Navigation

source: gsdl/trunk/bin/script/unbuildv1.pl@ 17750

Last change on this file since 17750 was 17750, checked in by kjdon, 15 years ago
plugout and plugin name changes. do these scripts still work??
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 21.8 KB

Line
1	#!/usr/bin/perl -w
2
3	###########################################################################
4	#
5	# unbuildv1.pl --
6	# A component of the Greenstone digital library software
7	# from the New Zealand Digital Library Project at the
8	# University of Waikato, New Zealand.
9	#
10	# Copyright (C) 1999 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28	# this program will decompress all the text from a built index
29	# and return it to gml format - this version of the program is designed to
30	# work on collections built with the first version of the greenstone software
31	# it extracts the browse classifications from the gdbm file, and recreates
32	# them. It also creates a default collect.cfg file.
33
34	# To run, this program needs the old archive directory (called archives),
35	# the index directory with the text and one other index, not paragraphs!! -
36	# use one of the section indexes. Also needs the build.cfg file (in the
37	# index dir) - used in creating a collect.cfg file. If build.cfg is not
38	# present, the parts of the collect.cfg file relating to indexes will not
39	# be entered.
40
41	# Stefan altered this script slightly August 2002 to use
42	# $GSDLHOME/bin/$GSDLOS/mgquery_old instead of the old mgquery in
43	# nzdl-1.2. mgquery_old is a statically linked binary compiled from
44	# mg-1.3d. It was compiled with SHORT_SUFFIX set so doesn't need the index
45	# file extensions to be altered.
46	# Also changed the command line options a little.
47
48	BEGIN {
49	die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
50	die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
51	unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
52	$FileHandle = 'FH000';
53	}
54
55	use doc;
56	use util;
57	use parsargv;
58	use FileHandle;
59	use English;
60	use cfgread;
61	use unicode;
62	use plugout;
63	select STDERR; $\| = 1;
64	select STDOUT; $\| = 1;
65
66
67	# globals
68	$collection = ""; # the collection name
69	$index = ""; # the selected index (like stt/unu)
70	$textdir = ""; # the textdir (like text/unu)
71	$toplevelinfo = []; #list of document OIDs
72	%infodb = (); #hash into GDBM file
73	$classifyinfo = []; # list of classifications
74	$doc_classif_info = {}; # hash of OIDs->classifications they belong to
75	$collect_cfg = {}; #data for the configuration file
76	$mgread = ++$FileHandle;
77	$mgwrite = ++$FileHandle;
78
79	sub print_usage {
80	print STDERR "\n usage: $0 [options]\n\n";
81	print STDERR " options:\n";
82	print STDERR " -verbosity number 0=none, 3=lots\n";
83	print STDERR " -old directory The directory in which the old collection lives\n";
84	print STDERR " -new directory The directory in which to put the new collection\n";
85	}
86
87	&main ();
88
89	sub main {
90	if (!parsargv::parse(\@ARGV,
91	'verbosity/\d+/2', \$verbosity,
92	'old/.*/', \$old,
93	'new/.*/', \$new)) {
94	&print_usage();
95	die "\n";
96	}
97
98	die "$old does not exist\n\n" unless (-d $old);
99	my $old_indexdir = &util::filename_cat($old, "index");
100	my $old_archivedir = &util::filename_cat($old, "archives");
101
102	my $new_archivedir = &util::filename_cat($new, "archives");
103	my $new_etcdir = &util::filename_cat($new, "etc");
104	if (!-e $new_archivedir) {
105	&util::mk_all_dir($new_archivedir);
106	}
107	if (!(-d $new_etcdir)) {
108	&util::mk_all_dir($new_etcdir);
109	}
110
111	my $gdbmfile = &get_gdbmfile($old_indexdir); # sets $collection and $textdir
112	&set_index($old_indexdir); # sets $index (just chooses one index)
113
114	my $buildcfgfile = &util::filename_cat($old_indexdir, "build.cfg");
115	my $colcfgfile = &util::filename_cat($new_etcdir, "collect.cfg");
116
117	# don't need this but giving it a value stops an annoying undefined
118	# value warning
119	$ENV{'GSDLCOLLECTDIR'} = 'tmp';
120
121	&add_default_cfg();
122	&add_index_cfg($buildcfgfile);
123
124	# work out all the classifications from the gdbm file, info for each doc
125	# (ie which classifications they belong to, are kept in $doc_classif_info
126	&get_classifyinfo($gdbmfile); #puts a list of classifications into classifyinfo
127	&get_toplevel_OID($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
128
129	# read ldb file into %infodb
130	&read_gdbm($gdbmfile);
131
132	#this makes the files specifying the hierarchy of subjects, titles etc
133	foreach my $classify (@$classifyinfo) {
134	&make_info_file($new_etcdir, $classify);
135	}
136
137	# write out the collect.cfg
138	&output_cfg_file($colcfgfile);
139
140	&openmg($old_indexdir);
141
142	# read the archive information file
143	my $archive_info_filename = &util::filename_cat ($new_archivedir, "archives.inf");
144	my $archive_info = new arcinfo ();
145
146	my $opts = [];
147	push @$opts,("-output_info",$archive_info);
148
149	$processor = &plugout::load_plugout("GreenstoneXMLPlugout",$opts);
150	$processor->setoutputdir ($new_archivedir);
151
152	my ($doc_obj, $hashref, $children);
153	print STDERR "processing documents now\n" if $verbosity >=2;
154	foreach $oid (@$toplevelinfo) {
155	$value = $infodb{$oid};
156	$hashref={};
157	$children = [];
158	&get_metadata($value, $hashref);
159	$doc_obj = new doc ();
160	$doc_obj->set_OID($oid);
161	my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
162	$top = $doc_obj->get_top_section();
163	&add_section_content ($doc_obj, $top, $hashref, $olddir);
164	&add_classification_metadata($oid, $doc_obj, $top);
165	&add_cover_image($doc_obj, $old_archivedir, $olddir);
166	&get_children($hashref, $children);
167	&recurse_sections($doc_obj, $children, $oid, $top, $olddir) if (defined ($children));
168
169	#change OID to new format
170	$doc_obj->delete_OID();
171	$doc_obj->set_OID();
172	$processor->process($doc_obj);
173	# last;
174	}
175	print STDERR "\n";
176
177	&closemg();
178
179	# write out the archive information file
180	$archive_info->save_info($archive_info_filename);
181
182
183	}
184
185	# returns the path to the gdbm info database - also
186	# sets the $collection and $textdir global variable
187	sub get_gdbmfile {
188	my ($indexdir) = @_;
189	my ($gdbmfile);
190
191	opendir (DIR, $indexdir) \|\| die "Couldn't open directory $indexdir\n\n";
192	my @conts = readdir DIR;
193	close DIR;
194
195	foreach $file (@conts) {
196	if ($file =~ /text$/) {
197	$textdir = $file;
198	last;
199	}
200	}
201	die "No text directory found in $indexdir\n\n"
202	unless defined $textdir && $textdir =~ /text$/;
203
204	$gdbmfile = &util::filename_cat ($indexdir, $textdir);
205
206	opendir (DIR, $gdbmfile) \|\| die "Couldn't open directory $gdbmfile\n\n";
207	@conts = readdir DIR;
208	close DIR;
209
210	foreach my $file (@conts) {
211	if ($file =~ /^(.*?)\.(?:ldb\|bdb)$/) {
212	$collection = $1;
213	$gdbmfile = &util::filename_cat ($gdbmfile, $file);
214	last;
215	}
216	}
217
218	if (defined $collection && $collection =~ /\w/) {
219	$textdir = &util::filename_cat ($textdir, $collection);
220	} else {
221	die "collection global wasn't set\n";
222	}
223	return $gdbmfile if (-e $gdbmfile);
224	die "Couldn't find gdbm info database in $indexdir\n\n";
225	}
226
227
228	#this function will probably need to be changed for any different file formats
229	sub get_toplevel_OID {
230	my ($gdbmfile) = @_;
231
232	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
233	print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
234
235	$/ = '-' x 70;
236	my $entry = "";
237	while (defined ($entry = <DB2TXT>)) {
238	next unless $entry =~ /\w/; #ignore blank entries
239	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
240	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
241
242	next unless ($key =~ /B\.\d*$/); # assumes top level OID is like
243	# B.12
244
245	push( @$toplevelinfo, $key);
246
247	}
248
249	$/ = "\n";
250	}
251
252	# gets all the metadata from a gdbm file entry, and puts it into a hashref
253	#this has changed for old style formats
254	sub get_metadata {
255
256	my ($gdb_str_ref, $hashref) = @_;
257	# print STDERR $gdb_str_ref;
258	my @entries = split(/\n/, $gdb_str_ref);
259
260	my $hastext = 1;
261	foreach $entry (@entries) {
262
263	my($key, $value) = ($entry =~ /^<([^>])>(.?)$/ );
264
265	next if ($key eq "p");
266	next if ($key eq "j");
267
268	$key = "Title" if $key eq "t";
269	$key = "docnum" if $key eq "d";
270	if ($key eq "o") {
271	$key = "archivedir";
272	$value =~ s/HASH/hash/;
273	}
274	if ($key eq "c") {
275	$key = "contains";
276	$hastext=0;}
277
278	$$hashref{$key} .= $value;
279
280	}
281
282	$$hashref{'hastxt'} = $hastext;
283
284	}
285
286	# takes a hashref containing the metadata for a gdbmfile entry, and
287	# extracts the childrens numbers (from the 'contains' entry). assumes
288	# format is like B.14.1:B.24.2:B.24.3 returns a list like 1 2 3 4
289	sub get_children {
290	my ($hashref, $children) = @_;
291
292	my $childs = $hashref->{'contains'};
293	if (defined ($childs)) {
294	@xchildren = split /\:/, $childs;
295	foreach $kid (@xchildren) {
296	my ($child)= $kid =~ /\.([^\.]*)(?:\\n\|$)/;
297	push (@$children, $child);
298	}
299	# print STDERR "children are: ", @$children;
300	}
301	else {
302	$children = [];
303	}
304	}
305
306	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
307	#the childrens numbers (from the 'contains' entry).
308	#assumes format is like B.14.1:B.24.2:B.24.3
309	#returns a list with the full child name ie B.14.1 B.14.2 etc
310	#used for classification stuff
311	sub get_whole_children {
312
313	my ($hashref, $children) = @_;
314
315	my $childs = $hashref->{'contains'};
316	if (defined ($childs)) {
317	@$children = split /\:/, $childs;
318	#print STDERR "children are: ", @$children;
319	}
320	else {
321	$children = [];
322	}
323	}
324
325	sub recurse_sections {
326	my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
327
328	foreach $child (sort numerically @$children) {
329	$doc_obj->create_named_section("$parentsection.$child");
330	my $value = $infodb{"$parentoid.$child"};
331	my $hashref={};
332	&get_metadata($value, $hashref); # get childs metadata
333	my $newchildren = [];
334	&get_children($hashref, $newchildren); # get childs children
335	#add content fo rcurrent section
336	&add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
337	# process all the children if there are any
338	&recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
339	if (defined ($newchildren));
340	}
341	}
342
343	sub add_section_content {
344	my ($doc_obj, $cursection, $hashref, $olddir) = @_;
345
346	foreach $key (keys %$hashref) {
347	#dont need to store these metadata
348	next if $key =~ /(contains\|docnum\|hastxt\|doctype\|archivedir\|classifytype)/i;
349	my @items = split /@/, $hashref->{$key};
350	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
351
352	}
353
354	my ($hastext) = $hashref->{'hastxt'} =~ /(0\|1)/;
355	if ($hastext) {
356
357	my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
358	my $text = &get_text($docnum);
359	$doc_obj->add_text ($cursection, $text);
360
361	my (@images) = $text =~ /<img.?src=\"([^\"])\"[^>]*>/g;
362
363	if (scalar(@images)>0) {
364
365	foreach $img (@images) {
366	my ($assoc_file) = $img =~ /([^\/\\]\..)$/; #the name of the image
367	$img =~ s/_httpcollection_/$old/; #replace _httpcollection_ with path to old collection
368	$olddir =~ s/HASH/hash/;
369	$img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name #the path to the image
370	$img = lc $img; # doc names and path upper case in file, lower
371	# case in directory
372	if (-e $img) {
373	$doc_obj->associate_file($img, $assoc_file);
374	} else {
375	print STDERR "WARNING: $img does not exist\n";
376	}
377	}
378
379	}
380	}
381	}
382
383
384	sub add_classification_metadata {
385
386	my ($oid, $doc_obj, $cursection) = @_;
387
388	if (defined $doc_classif_info->{$oid}) {
389
390	$hashref = $doc_classif_info->{$oid};
391
392	foreach $key (keys %$hashref) {
393	my @items = @{$hashref->{$key}};
394	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
395	}
396	}
397	}
398	# picks up the cover image "cover.jpg" from the old archives directory.
399	sub add_cover_image {
400	my ($doc_obj, $archivedir, $olddir) = @_;
401	$assoc_file = "cover.jpg";
402	$img = &util::filename_cat($archivedir, $olddir, $assoc_file);
403
404	if (-e $img) {
405	$doc_obj->associate_file($img, $assoc_file);
406	} else {
407	print STDERR "WARNING: couldn't find $img\n";
408	}
409	}
410
411	sub set_index {
412	my ($indexdir) = @_;
413
414	# check that $collection has been set
415	die "collection global was not set\n"
416	unless defined $collection && $collection =~ /\w/;
417
418	# find an index (just use first non-text and non-paragraph directory we
419	# come across in $indexdir)
420	opendir (INDEXDIR, $indexdir) \|\| die "couldn't open directory $indexdir\n";
421	my @indexes = readdir INDEXDIR;
422	close INDEXDIR;
423	foreach $i (@indexes) {
424	next if $i =~ /text$/i \|\| $i =~ /\./ \|\| $i =~ /^p/i;
425	$index = &util::filename_cat ($i, $collection);
426	last;
427	}
428	}
429
430
431	#########################################################################
432
433	################ functions involving mg ################################
434	sub get_text {
435	my ($docnum) = @_;
436
437	print STDERR "." if $verbosity >= 2;
438	&mgcommand ($docnum);
439
440	<$mgread>; # eat the document separator
441
442	my $text = "";
443	my $line = "";
444
445	while (defined ($line = <$mgread>))
446	{
447	last if $line =~ /^<\/mg>/;
448	$text .= $line;
449	}
450
451	# Read in the last statement, which should be:
452	# "dd documents retrieved."
453	<$mgread>;
454
455	return $text;
456	}
457
458
459	sub numerically {$a <=> $b;}
460
461
462
463	# mg stuff
464
465	sub openmg {
466	my ($indexdir) = @_;
467
468	die "Unable to start mgquery." unless
469	&openpipe($mgread, $mgwrite, "mgquery_old -d $indexdir -f $index -t $textdir");
470
471	$mgwrite->autoflush();
472	&mgcommand('.set expert true');
473	&mgcommand('.set terminator "</mg>\n"');
474	&mgcommand('.set mode text');
475	&mgcommand('.set query docnums');
476	&mgcommand('.set term_freq off');
477	}
478
479	sub closemg {
480	&mgcommand (".quit");
481	close($mgread);
482	close($mgwrite);
483	}
484
485	sub mgcommand {
486	my ($command) = @_;
487
488	return if $command =~ /^\s*$/; #whitespace
489	#print STDERR "command: $command\n";
490	print $mgwrite "$command\n";
491
492	# eat up the command executed which is echoed
493	<$mgread>;
494	}
495
496	# openpipe(READ, WRITE, CMD)
497	#
498	# Like open2, except CMD's stderr is also redirected.
499	#
500	sub openpipe
501	{
502	my ($read, $write, $cmd) = @_;
503	my ($child_read, $child_write);
504
505	$child_read = ++$FileHandle;
506	$child_write = ++$FileHandle;
507
508	pipe($read, $child_write) \|\| die "Failed pipe($read, $child_write): $!";
509	pipe($child_read, $write) \|\| die "Failed pipe($child_read, $write): $!";
510	my $pid;
511
512	if (($pid = fork) < 0) {
513	die "Failed fork: $!";
514	} elsif ($pid == 0) {
515	close($read);
516	close($write);
517	open(STDIN, "<&$child_read");
518	open(STDOUT, ">&$child_write");
519	open(STDERR, ">&$child_write");
520	exec($cmd);
521	die "Failed exec $cmd: $!";
522	}
523
524	close($child_read);
525	close($child_write);
526
527	$write->autoflush();
528	$read->autoflush();
529
530	return 1;
531	}
532
533
534	######################################################################
535
536	############# functions to do with the classificaiton stuff ##########
537	sub get_classifyinfo {
538	my ($gdbmfile) = @_;
539
540	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
541	print STDERR "Finding all classification sections from $gdbmfile\n" ;
542
543	$/ = '-' x 70;
544	my $entry = "";
545	while (defined ($entry = <DB2TXT>)) {
546	next unless $entry =~ /\w/; #ignore blank entries
547	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
548	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
549
550	next unless ($key =~/^[A-Z]$/); # assumes classification OID is like
551	# C or T etc
552
553	push( @$classifyinfo, $key);
554
555	}
556
557	$/ = "\n";
558	}
559
560	#this creates the classification files needed for the hierarchy classifier
561	#used for subjects, titles, orgs etc
562	#also adds in entries to the collect_cfg hash
563	sub make_info_file {
564	my ($etcdir, $classifier) = @_;
565	my $info_file = "";
566
567	$info_file = &util::filename_cat($etcdir, "CL.${classifier}.txt");
568	$classifier_name = "CL.$classifier";
569	print STDERR "classification $classifier will be called CL.$classifier\n";
570
571	open (OUTDOC, ">$info_file" ) \|\| die "couldn't open file $info_file\n";
572
573	my $entry = $infodb{$classifier};
574	#print STDERR "entry = $entry\n";
575	my $hashref = {};
576	&get_metadata($entry, $hashref);
577	my $children=[];
578	&get_whole_children($hashref, $children);
579	foreach $child (@$children) {
580	&process_entry(OUTDOC, $classifier_name, $child);
581	}
582
583	close OUTDOC;
584
585	&add_classify_cfg($classifier, $classifier_name, $info_file);
586
587	}
588
589
590	sub process_entry {
591	my ($handle, $classifier_name, $classify_id) = @_;
592
593	my $value = $infodb{$classify_id};
594	my $hashref={};
595	&get_metadata($value, $hashref);
596	my $title = $hashref->{'Title'};
597
598	&add_line($handle, $classify_id, $title);
599
600	my $children = [];
601	&get_whole_children($hashref, $children);
602	foreach $child (@$children) {
603	if (&is_document($child)) {
604	&add_doc_metadata($child, $classifier_name, $classify_id);
605	}
606	else {
607	&process_entry($handle, $classifier_name, $child);
608	}
609	}
610	}
611
612	sub add_doc_metadata {
613	my ($doc_id, $classifier_name, $classifier_id) = @_;
614
615	#add entry to doc database
616	#print STDERR "at doc level, docnum=$classify_id\n";
617	$doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
618	$doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
619	defined $doc_classif_info->{$doc_id}->{$classifier_name};
620	push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
621	}
622
623	sub add_line {
624	my ($handle, $classify_id, $title) = @_;
625	$title = &unicode::ascii2utf8(\$title);
626	my ($num) = $classify_id =~ /^[A-Z]\.(.*)$/; #remove the C. from the front
627	print $handle "$classify_id\t$num\t\"$title\"\n";
628	}
629
630	sub is_document {
631	my ($oid) = @_;
632	return 1 if $oid =~ /^B\.\d/;
633	return 0;
634	}
635
636	########################################################################
637
638	########## stuff for producing collect.cfg file ###########################
639
640	sub add_default_cfg {
641
642	$username=`whoami`;
643	$username=`logname` unless defined $username;
644	$username="a_user" unless defined $username;
645	$username =~ s/\n//;
646	$collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
647	$collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
648	$collect_cfg->{'public'}="true";
649
650	$collect_cfg->{'plugin'}=[];
651	push (@{$collect_cfg->{'plugin'}}, ["GreenstoneXMLPlugin"]);
652	push (@{$collect_cfg->{'plugin'}}, ["ArchivesInfPlugin"]);
653	push (@{$collect_cfg->{'plugin'}}, ["DirectoryPlugin"]);
654
655	$collect_cfg->{'format'}={};
656	$collect_cfg->{'format'}->{'DocumentImages'}="true";
657	$collect_cfg->{'format'}->{'DocumentText'} =
658	"\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
659	$collect_cfg->{'format'}->{'SearchVList'} =
660	"\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
661
662	$collect_cfg->{'collectionmeta'}={};
663	$collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
664	$collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"_httpprefix_/collect/$collection/images/$collection.gif\"";
665	$collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"_httpprefix_/collect/$collection/images/${collection}sm.gif\"";
666	$collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
667
668	}
669
670	sub add_index_cfg {
671	my ($buildfile) = @_;
672
673	my $data={};
674	$collect_cfg->{'indexes'}=[];
675	if (-e $buildfile) {
676	$data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
677	foreach my $i (@{$data->{'indexmap'}}) {
678	my ($thisindex, $abbrev)= split (/\-\>/, $i);
679	push (@{$collect_cfg->{'indexes'}}, $thisindex);
680	$collect_cfg->{'defaultindex'} = $thisindex unless defined
681	$collect_cfg->{'defaultindex'};
682	$name=&get_index_name($thisindex);
683	$thisindex=".$thisindex";
684	$collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
685	}
686	}
687	else {
688	print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
689	}
690
691	}
692
693	sub get_index_name {
694	my ($thisindex) = @_;
695	return "paragraphs" if $thisindex =~ /paragraph/;
696	return "chapters" if $thisindex =~ /section.*text/;
697	return "titles" if $thisindex =~ /Title/;
698	return "other";
699	}
700
701	sub add_classify_cfg {
702
703	my ($classify, $metadata, $file) = @_;
704	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
705
706	my ($title);
707	$title = "Howto" if $classify eq 'H';
708	$title = "Subject" if $classify eq 'C';
709	$title = "Organization" if $classify eq 'O';
710	$title = "Title" if $classify eq 'T';
711	$title = "Other" unless defined $title;
712
713	my ($filename) = $file =~ /\/([^\/]*)$/;
714	my $entry = "Hierarchy -hfile $filename -metadata $metadata -buttonname $title -sort Title";
715	$entry .= " -hlist_at_top" if $title eq "Title";
716	push (@{$collect_cfg->{'classify'}},[$entry]);
717
718
719	}
720
721	sub output_cfg_file {
722
723	my ($collfile) = @_;
724	&cfgread::write_cfg_file($collfile, $collect_cfg,
725	'^(creator\|maintainer\|public\|beta\|defaultindex)$',
726	'^(indexes)$', '^(format\|collectionmeta)$',
727	'^(plugin\|classify)$');
728	}
729
730	sub read_gdbm {
731	my ($filename) = @_;
732
733	open (PIPEIN, "db2txt \"$filename\" \|") \|\| die "couldn't open pipe from db2txt\n";
734	my $line = ""; my $key = ""; my $value = "";
735	while (defined ($line = <PIPEIN>)) {
736	if ($line =~ /^\[([^\]]+)\]$/) {
737	$key = $1;
738	} elsif ($line =~ /^-{70}$/) {
739	$infodb{$key} = $value;
740	$value = "";
741	$key = "";
742	} else {
743	$value .= $line;
744	}
745	}
746	close PIPEIN;
747	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: