Context Navigation

source: main/tags/2.40/gsdl/bin/script/unbuildv1.pl@ 31150

Last change on this file since 31150 was 3326, checked in by sjboddie, 22 years ago
* empty log message *
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 21.7 KB

Line
1	#!/usr/bin/perl -w
2
3	###########################################################################
4	#
5	# unbuildv1.pl --
6	# A component of the Greenstone digital library software
7	# from the New Zealand Digital Library Project at the
8	# University of Waikato, New Zealand.
9	#
10	# Copyright (C) 1999 New Zealand Digital Library Project
11	#
12	# This program is free software; you can redistribute it and/or modify
13	# it under the terms of the GNU General Public License as published by
14	# the Free Software Foundation; either version 2 of the License, or
15	# (at your option) any later version.
16	#
17	# This program is distributed in the hope that it will be useful,
18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	# GNU General Public License for more details.
21	#
22	# You should have received a copy of the GNU General Public License
23	# along with this program; if not, write to the Free Software
24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25	#
26	###########################################################################
27
28	# this program will decompress all the text from a built index
29	# and return it to gml format - this version of the program is designed to
30	# work on collections built with the first version of the greenstone software
31	# it extracts the browse classifications from the gdbm file, and recreates
32	# them. It also creates a default collect.cfg file.
33
34	# To run, this program needs the old archive directory (called archives),
35	# the index directory with the text and one other index, not paragraphs!! -
36	# use one of the section indexes. Also needs the build.cfg file (in the
37	# index dir) - used in creating a collect.cfg file. If build.cfg is not
38	# present, the parts of the collect.cfg file relating to indexes will not
39	# be entered.
40
41	# Stefan altered this script slightly August 2002 to use
42	# $GSDLHOME/bin/$GSDLOS/mgquery_old instead of the old mgquery in
43	# nzdl-1.2. mgquery_old is a statically linked binary compiled from
44	# mg-1.3d. It was compiled with SHORT_SUFFIX set so doesn't need the index
45	# file extensions to be altered.
46	# Also changed the command line options a little.
47
48	BEGIN {
49	die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
50	die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
51	unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
52	$FileHandle = 'FH000';
53	}
54
55	use doc;
56	use docsave;
57	use util;
58	use parsargv;
59	use FileHandle;
60	use English;
61	use cfgread;
62	use unicode;
63	select STDERR; $\| = 1;
64	select STDOUT; $\| = 1;
65
66
67	# globals
68	$collection = ""; # the collection name
69	$index = ""; # the selected index (like stt/unu)
70	$textdir = ""; # the textdir (like text/unu)
71	$toplevelinfo = []; #list of document OIDs
72	%infodb = (); #hash into GDBM file
73	$classifyinfo = []; # list of classifications
74	$doc_classif_info = {}; # hash of OIDs->classifications they belong to
75	$collect_cfg = {}; #data for the configuration file
76	$mgread = ++$FileHandle;
77	$mgwrite = ++$FileHandle;
78
79	sub print_usage {
80	print STDERR "\n usage: $0 [options]\n\n";
81	print STDERR " options:\n";
82	print STDERR " -verbosity number 0=none, 3=lots\n";
83	print STDERR " -old directory The directory in which the old collection lives\n";
84	print STDERR " -new directory The directory in which to put the new collection\n";
85	}
86
87	&main ();
88
89	sub main {
90	if (!parsargv::parse(\@ARGV,
91	'verbosity/\d+/2', \$verbosity,
92	'old/.*/', \$old,
93	'new/.*/', \$new)) {
94	&print_usage();
95	die "\n";
96	}
97
98	die "$old does not exist\n\n" unless (-d $old);
99	my $old_indexdir = &util::filename_cat($old, "index");
100	my $old_archivedir = &util::filename_cat($old, "archives");
101
102	my $new_archivedir = &util::filename_cat($new, "archives");
103	my $new_etcdir = &util::filename_cat($new, "etc");
104	if (!-e $new_archivedir) {
105	&util::mk_all_dir($new_archivedir);
106	}
107	if (!(-d $new_etcdir)) {
108	&util::mk_all_dir($new_etcdir);
109	}
110
111	my $gdbmfile = &get_gdbmfile($old_indexdir); # sets $collection and $textdir
112	&set_index($old_indexdir); # sets $index (just chooses one index)
113
114	my $buildcfgfile = &util::filename_cat($old_indexdir, "build.cfg");
115	my $colcfgfile = &util::filename_cat($new_etcdir, "collect.cfg");
116
117	# don't need this but giving it a value stops an annoying undefined
118	# value warning
119	$ENV{'GSDLCOLLECTDIR'} = 'tmp';
120
121	&add_default_cfg();
122	&add_index_cfg($buildcfgfile);
123
124	# work out all the classifications from the gdbm file, info for each doc
125	# (ie which classifications they belong to, are kept in $doc_classif_info
126	&get_classifyinfo($gdbmfile); #puts a list of classifications into classifyinfo
127	&get_toplevel_OID($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
128
129	# read ldb file into %infodb
130	&read_gdbm($gdbmfile);
131
132	#this makes the files specifying the hierarchy of subjects, titles etc
133	foreach my $classify (@$classifyinfo) {
134	&make_info_file($new_etcdir, $classify);
135	}
136
137	# write out the collect.cfg
138	&output_cfg_file($colcfgfile);
139
140	&openmg($old_indexdir);
141
142	# read the archive information file
143	my $archive_info_filename = &util::filename_cat ($new_archivedir, "archives.inf");
144	my $archive_info = new arcinfo ();
145
146	# create a docsave object to process the documents
147	my $processor = new docsave ("", $archive_info, $verbosity);
148	$processor->setarchivedir ($new_archivedir);
149
150	my ($doc_obj, $hashref, $children);
151	print STDERR "processing documents now\n" if $verbosity >=2;
152	foreach $oid (@$toplevelinfo) {
153	$value = $infodb{$oid};
154	$hashref={};
155	$children = [];
156	&get_metadata($value, $hashref);
157	$doc_obj = new doc ();
158	$doc_obj->set_OID($oid);
159	my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
160	$top = $doc_obj->get_top_section();
161	&add_section_content ($doc_obj, $top, $hashref, $olddir);
162	&add_classification_metadata($oid, $doc_obj, $top);
163	&add_cover_image($doc_obj, $old_archivedir, $olddir);
164	&get_children($hashref, $children);
165	&recurse_sections($doc_obj, $children, $oid, $top, $olddir) if (defined ($children));
166
167	#change OID to new format
168	$doc_obj->delete_OID();
169	$doc_obj->set_OID();
170	$processor->process($doc_obj);
171	# last;
172	}
173	print STDERR "\n";
174
175	&closemg();
176
177	# write out the archive information file
178	$archive_info->save_info($archive_info_filename);
179
180
181	}
182
183	# returns the path to the gdbm info database - also
184	# sets the $collection and $textdir global variable
185	sub get_gdbmfile {
186	my ($indexdir) = @_;
187	my ($gdbmfile);
188
189	opendir (DIR, $indexdir) \|\| die "Couldn't open directory $indexdir\n\n";
190	my @conts = readdir DIR;
191	close DIR;
192
193	foreach $file (@conts) {
194	if ($file =~ /text$/) {
195	$textdir = $file;
196	last;
197	}
198	}
199	die "No text directory found in $indexdir\n\n"
200	unless defined $textdir && $textdir =~ /text$/;
201
202	$gdbmfile = &util::filename_cat ($indexdir, $textdir);
203
204	opendir (DIR, $gdbmfile) \|\| die "Couldn't open directory $gdbmfile\n\n";
205	@conts = readdir DIR;
206	close DIR;
207
208	foreach my $file (@conts) {
209	if ($file =~ /^(.*?)\.(?:ldb\|bdb)$/) {
210	$collection = $1;
211	$gdbmfile = &util::filename_cat ($gdbmfile, $file);
212	last;
213	}
214	}
215
216	if (defined $collection && $collection =~ /\w/) {
217	$textdir = &util::filename_cat ($textdir, $collection);
218	} else {
219	die "collection global wasn't set\n";
220	}
221	return $gdbmfile if (-e $gdbmfile);
222	die "Couldn't find gdbm info database in $indexdir\n\n";
223	}
224
225
226	#this function will probably need to be changed for any different file formats
227	sub get_toplevel_OID {
228	my ($gdbmfile) = @_;
229
230	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
231	print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
232
233	$/ = '-' x 70;
234	my $entry = "";
235	while (defined ($entry = <DB2TXT>)) {
236	next unless $entry =~ /\w/; #ignore blank entries
237	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
238	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
239
240	next unless ($key =~ /B\.\d*$/); # assumes top level OID is like
241	# B.12
242
243	push( @$toplevelinfo, $key);
244
245	}
246
247	$/ = "\n";
248	}
249
250	# gets all the metadata from a gdbm file entry, and puts it into a hashref
251	#this has changed for old style formats
252	sub get_metadata {
253
254	my ($gdb_str_ref, $hashref) = @_;
255	# print STDERR $gdb_str_ref;
256	my @entries = split(/\n/, $gdb_str_ref);
257
258	my $hastext = 1;
259	foreach $entry (@entries) {
260
261	my($key, $value) = ($entry =~ /^<([^>])>(.?)$/ );
262
263	next if ($key eq "p");
264	next if ($key eq "j");
265
266	$key = "Title" if $key eq "t";
267	$key = "docnum" if $key eq "d";
268	if ($key eq "o") {
269	$key = "archivedir";
270	$value =~ s/HASH/hash/;
271	}
272	if ($key eq "c") {
273	$key = "contains";
274	$hastext=0;}
275
276	$$hashref{$key} .= $value;
277
278	}
279
280	$$hashref{'hastxt'} = $hastext;
281
282	}
283
284	# takes a hashref containing the metadata for a gdbmfile entry, and
285	# extracts the childrens numbers (from the 'contains' entry). assumes
286	# format is like B.14.1:B.24.2:B.24.3 returns a list like 1 2 3 4
287	sub get_children {
288	my ($hashref, $children) = @_;
289
290	my $childs = $hashref->{'contains'};
291	if (defined ($childs)) {
292	@xchildren = split /\:/, $childs;
293	foreach $kid (@xchildren) {
294	my ($child)= $kid =~ /\.([^\.]*)(?:\\n\|$)/;
295	push (@$children, $child);
296	}
297	# print STDERR "children are: ", @$children;
298	}
299	else {
300	$children = [];
301	}
302	}
303
304	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
305	#the childrens numbers (from the 'contains' entry).
306	#assumes format is like B.14.1:B.24.2:B.24.3
307	#returns a list with the full child name ie B.14.1 B.14.2 etc
308	#used for classification stuff
309	sub get_whole_children {
310
311	my ($hashref, $children) = @_;
312
313	my $childs = $hashref->{'contains'};
314	if (defined ($childs)) {
315	@$children = split /\:/, $childs;
316	#print STDERR "children are: ", @$children;
317	}
318	else {
319	$children = [];
320	}
321	}
322
323	sub recurse_sections {
324	my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
325
326	foreach $child (sort numerically @$children) {
327	$doc_obj->create_named_section("$parentsection.$child");
328	my $value = $infodb{"$parentoid.$child"};
329	my $hashref={};
330	&get_metadata($value, $hashref); # get childs metadata
331	my $newchildren = [];
332	&get_children($hashref, $newchildren); # get childs children
333	#add content fo rcurrent section
334	&add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
335	# process all the children if there are any
336	&recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
337	if (defined ($newchildren));
338	}
339	}
340
341	sub add_section_content {
342	my ($doc_obj, $cursection, $hashref, $olddir) = @_;
343
344	foreach $key (keys %$hashref) {
345	#dont need to store these metadata
346	next if $key =~ /(contains\|docnum\|hastxt\|doctype\|archivedir\|classifytype)/i;
347	my @items = split /@/, $hashref->{$key};
348	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
349
350	}
351
352	my ($hastext) = $hashref->{'hastxt'} =~ /(0\|1)/;
353	if ($hastext) {
354
355	my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
356	my $text = &get_text($docnum);
357	$doc_obj->add_text ($cursection, $text);
358
359	my (@images) = $text =~ /<img.?src=\"([^\"])\"[^>]*>/g;
360
361	if (scalar(@images)>0) {
362
363	foreach $img (@images) {
364	my ($assoc_file) = $img =~ /([^\/\\]\..)$/; #the name of the image
365	$img =~ s/_httpcollection_/$old/; #replace _httpcollection_ with path to old collection
366	$olddir =~ s/HASH/hash/;
367	$img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name #the path to the image
368	$img = lc $img; # doc names and path upper case in file, lower
369	# case in directory
370	if (-e $img) {
371	$doc_obj->associate_file($img, $assoc_file);
372	} else {
373	print STDERR "WARNING: $img does not exist\n";
374	}
375	}
376
377	}
378	}
379	}
380
381
382	sub add_classification_metadata {
383
384	my ($oid, $doc_obj, $cursection) = @_;
385
386	if (defined $doc_classif_info->{$oid}) {
387
388	$hashref = $doc_classif_info->{$oid};
389
390	foreach $key (keys %$hashref) {
391	my @items = @{$hashref->{$key}};
392	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
393	}
394	}
395	}
396	# picks up the cover image "cover.jpg" from the old archives directory.
397	sub add_cover_image {
398	my ($doc_obj, $archivedir, $olddir) = @_;
399	$assoc_file = "cover.jpg";
400	$img = &util::filename_cat($archivedir, $olddir, $assoc_file);
401
402	if (-e $img) {
403	$doc_obj->associate_file($img, $assoc_file);
404	} else {
405	print STDERR "WARNING: couldn't find $img\n";
406	}
407	}
408
409	sub set_index {
410	my ($indexdir) = @_;
411
412	# check that $collection has been set
413	die "collection global was not set\n"
414	unless defined $collection && $collection =~ /\w/;
415
416	# find an index (just use first non-text and non-paragraph directory we
417	# come across in $indexdir)
418	opendir (INDEXDIR, $indexdir) \|\| die "couldn't open directory $indexdir\n";
419	my @indexes = readdir INDEXDIR;
420	close INDEXDIR;
421	foreach $i (@indexes) {
422	next if $i =~ /text$/i \|\| $i =~ /\./ \|\| $i =~ /^p/i;
423	$index = &util::filename_cat ($i, $collection);
424	last;
425	}
426	}
427
428
429	#########################################################################
430
431	################ functions involving mg ################################
432	sub get_text {
433	my ($docnum) = @_;
434
435	print STDERR "." if $verbosity >= 2;
436	&mgcommand ($docnum);
437
438	<$mgread>; # eat the document separator
439
440	my $text = "";
441	my $line = "";
442
443	while (defined ($line = <$mgread>))
444	{
445	last if $line =~ /^<\/mg>/;
446	$text .= $line;
447	}
448
449	# Read in the last statement, which should be:
450	# "dd documents retrieved."
451	<$mgread>;
452
453	return $text;
454	}
455
456
457	sub numerically {$a <=> $b;}
458
459
460
461	# mg stuff
462
463	sub openmg {
464	my ($indexdir) = @_;
465
466	die "Unable to start mgquery." unless
467	&openpipe($mgread, $mgwrite, "mgquery_old -d $indexdir -f $index -t $textdir");
468
469	$mgwrite->autoflush();
470	&mgcommand('.set expert true');
471	&mgcommand('.set terminator "</mg>\n"');
472	&mgcommand('.set mode text');
473	&mgcommand('.set query docnums');
474	&mgcommand('.set term_freq off');
475	}
476
477	sub closemg {
478	&mgcommand (".quit");
479	close($mgread);
480	close($mgwrite);
481	}
482
483	sub mgcommand {
484	my ($command) = @_;
485
486	return if $command =~ /^\s*$/; #whitespace
487	#print STDERR "command: $command\n";
488	print $mgwrite "$command\n";
489
490	# eat up the command executed which is echoed
491	<$mgread>;
492	}
493
494	# openpipe(READ, WRITE, CMD)
495	#
496	# Like open2, except CMD's stderr is also redirected.
497	#
498	sub openpipe
499	{
500	my ($read, $write, $cmd) = @_;
501	my ($child_read, $child_write);
502
503	$child_read = ++$FileHandle;
504	$child_write = ++$FileHandle;
505
506	pipe($read, $child_write) \|\| die "Failed pipe($read, $child_write): $!";
507	pipe($child_read, $write) \|\| die "Failed pipe($child_read, $write): $!";
508	my $pid;
509
510	if (($pid = fork) < 0) {
511	die "Failed fork: $!";
512	} elsif ($pid == 0) {
513	close($read);
514	close($write);
515	open(STDIN, "<&$child_read");
516	open(STDOUT, ">&$child_write");
517	open(STDERR, ">&$child_write");
518	exec($cmd);
519	die "Failed exec $cmd: $!";
520	}
521
522	close($child_read);
523	close($child_write);
524
525	$write->autoflush();
526	$read->autoflush();
527
528	return 1;
529	}
530
531
532	######################################################################
533
534	############# functions to do with the classificaiton stuff ##########
535	sub get_classifyinfo {
536	my ($gdbmfile) = @_;
537
538	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
539	print STDERR "Finding all classification sections from $gdbmfile\n" ;
540
541	$/ = '-' x 70;
542	my $entry = "";
543	while (defined ($entry = <DB2TXT>)) {
544	next unless $entry =~ /\w/; #ignore blank entries
545	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
546	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
547
548	next unless ($key =~/^[A-Z]$/); # assumes classification OID is like
549	# C or T etc
550
551	push( @$classifyinfo, $key);
552
553	}
554
555	$/ = "\n";
556	}
557
558	#this creates the classification files needed for the hierarchy classifier
559	#used for subjects, titles, orgs etc
560	#also adds in entries to the collect_cfg hash
561	sub make_info_file {
562	my ($etcdir, $classifier) = @_;
563	my $info_file = "";
564
565	$info_file = &util::filename_cat($etcdir, "CL.${classifier}.txt");
566	$classifier_name = "CL.$classifier";
567	print STDERR "classification $classifier will be called CL.$classifier\n";
568
569	open (OUTDOC, ">$info_file" ) \|\| die "couldn't open file $info_file\n";
570
571	my $entry = $infodb{$classifier};
572	#print STDERR "entry = $entry\n";
573	my $hashref = {};
574	&get_metadata($entry, $hashref);
575	my $children=[];
576	&get_whole_children($hashref, $children);
577	foreach $child (@$children) {
578	&process_entry(OUTDOC, $classifier_name, $child);
579	}
580
581	close OUTDOC;
582
583	&add_classify_cfg($classifier, $classifier_name, $info_file);
584
585	}
586
587
588	sub process_entry {
589	my ($handle, $classifier_name, $classify_id) = @_;
590
591	my $value = $infodb{$classify_id};
592	my $hashref={};
593	&get_metadata($value, $hashref);
594	my $title = $hashref->{'Title'};
595
596	&add_line($handle, $classify_id, $title);
597
598	my $children = [];
599	&get_whole_children($hashref, $children);
600	foreach $child (@$children) {
601	if (&is_document($child)) {
602	&add_doc_metadata($child, $classifier_name, $classify_id);
603	}
604	else {
605	&process_entry($handle, $classifier_name, $child);
606	}
607	}
608	}
609
610	sub add_doc_metadata {
611	my ($doc_id, $classifier_name, $classifier_id) = @_;
612
613	#add entry to doc database
614	#print STDERR "at doc level, docnum=$classify_id\n";
615	$doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
616	$doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
617	defined $doc_classif_info->{$doc_id}->{$classifier_name};
618	push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
619	}
620
621	sub add_line {
622	my ($handle, $classify_id, $title) = @_;
623	$title = &unicode::ascii2utf8(\$title);
624	my ($num) = $classify_id =~ /^[A-Z]\.(.*)$/; #remove the C. from the front
625	print $handle "$classify_id\t$num\t\"$title\"\n";
626	}
627
628	sub is_document {
629	my ($oid) = @_;
630	return 1 if $oid =~ /^B\.\d/;
631	return 0;
632	}
633
634	########################################################################
635
636	########## stuff for producing collect.cfg file ###########################
637
638	sub add_default_cfg {
639
640	$username=`whoami`;
641	$username=`logname` unless defined $username;
642	$username="a_user" unless defined $username;
643	$username =~ s/\n//;
644	$collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
645	$collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
646	$collect_cfg->{'public'}="true";
647
648	$collect_cfg->{'plugin'}=[];
649	push (@{$collect_cfg->{'plugin'}}, ["GAPlug"]);
650	push (@{$collect_cfg->{'plugin'}}, ["ArcPlug"]);
651	push (@{$collect_cfg->{'plugin'}}, ["RecPlug"]);
652
653	$collect_cfg->{'format'}={};
654	$collect_cfg->{'format'}->{'DocumentImages'}="true";
655	$collect_cfg->{'format'}->{'DocumentText'} =
656	"\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
657	$collect_cfg->{'format'}->{'SearchVList'} =
658	"\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
659
660	$collect_cfg->{'collectionmeta'}={};
661	$collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
662	$collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"_httpprefix_/collect/$collection/images/$collection.gif\"";
663	$collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"_httpprefix_/collect/$collection/images/${collection}sm.gif\"";
664	$collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
665
666	}
667
668	sub add_index_cfg {
669	my ($buildfile) = @_;
670
671	my $data={};
672	$collect_cfg->{'indexes'}=[];
673	if (-e $buildfile) {
674	$data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
675	foreach my $i (@{$data->{'indexmap'}}) {
676	my ($thisindex, $abbrev)= split (/\-\>/, $i);
677	push (@{$collect_cfg->{'indexes'}}, $thisindex);
678	$collect_cfg->{'defaultindex'} = $thisindex unless defined
679	$collect_cfg->{'defaultindex'};
680	$name=&get_index_name($thisindex);
681	$thisindex=".$thisindex";
682	$collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
683	}
684	}
685	else {
686	print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
687	}
688
689	}
690
691	sub get_index_name {
692	my ($thisindex) = @_;
693	return "paragraphs" if $thisindex =~ /paragraph/;
694	return "chapters" if $thisindex =~ /section.*text/;
695	return "titles" if $thisindex =~ /Title/;
696	return "other";
697	}
698
699	sub add_classify_cfg {
700
701	my ($classify, $metadata, $file) = @_;
702	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
703
704	my ($title);
705	$title = "Howto" if $classify eq 'H';
706	$title = "Subject" if $classify eq 'C';
707	$title = "Organization" if $classify eq 'O';
708	$title = "Title" if $classify eq 'T';
709	$title = "Other" unless defined $title;
710
711	my ($filename) = $file =~ /\/([^\/]*)$/;
712	my $entry = "Hierarchy -hfile $filename -metadata $metadata -buttonname $title -sort Title";
713	$entry .= " -hlist_at_top" if $title eq "Title";
714	push (@{$collect_cfg->{'classify'}},[$entry]);
715
716
717	}
718
719	sub output_cfg_file {
720
721	my ($collfile) = @_;
722	&cfgread::write_cfg_file($collfile, $collect_cfg,
723	'^(creator\|maintainer\|public\|beta\|defaultindex)$',
724	'^(indexes)$', '^(format\|collectionmeta)$',
725	'^(plugin\|classify)$');
726	}
727
728	sub read_gdbm {
729	my ($filename) = @_;
730
731	open (PIPEIN, "db2txt \"$filename\" \|") \|\| die "couldn't open pipe from db2txt\n";
732	my $line = ""; my $key = ""; my $value = "";
733	while (defined ($line = <PIPEIN>)) {
734	if ($line =~ /^\[([^\]]+)\]$/) {
735	$key = $1;
736	} elsif ($line =~ /^-{70}$/) {
737	$infodb{$key} = $value;
738	$value = "";
739	$key = "";
740	} else {
741	$value .= $line;
742	}
743	}
744	close PIPEIN;
745	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: