Context Navigation

source: gsdl/trunk/bin/script/unbuildv1.pl@ 19625

Last change on this file since 19625 was 17750, checked in by kjdon, 16 years ago
plugout and plugin name changes. do these scripts still work??
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 21.8 KB

Rev	Line
[3324]	1	#!/usr/bin/perl -w
	2
	3	###########################################################################
	4	#
	5	# unbuildv1.pl --
	6	# A component of the Greenstone digital library software
	7	# from the New Zealand Digital Library Project at the
	8	# University of Waikato, New Zealand.
	9	#
	10	# Copyright (C) 1999 New Zealand Digital Library Project
	11	#
	12	# This program is free software; you can redistribute it and/or modify
	13	# it under the terms of the GNU General Public License as published by
	14	# the Free Software Foundation; either version 2 of the License, or
	15	# (at your option) any later version.
	16	#
	17	# This program is distributed in the hope that it will be useful,
	18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	# GNU General Public License for more details.
	21	#
	22	# You should have received a copy of the GNU General Public License
	23	# along with this program; if not, write to the Free Software
	24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	25	#
	26	###########################################################################
	27
	28	# this program will decompress all the text from a built index
	29	# and return it to gml format - this version of the program is designed to
	30	# work on collections built with the first version of the greenstone software
	31	# it extracts the browse classifications from the gdbm file, and recreates
	32	# them. It also creates a default collect.cfg file.
	33
	34	# To run, this program needs the old archive directory (called archives),
	35	# the index directory with the text and one other index, not paragraphs!! -
	36	# use one of the section indexes. Also needs the build.cfg file (in the
	37	# index dir) - used in creating a collect.cfg file. If build.cfg is not
	38	# present, the parts of the collect.cfg file relating to indexes will not
	39	# be entered.
	40
	41	# Stefan altered this script slightly August 2002 to use
	42	# $GSDLHOME/bin/$GSDLOS/mgquery_old instead of the old mgquery in
	43	# nzdl-1.2. mgquery_old is a statically linked binary compiled from
	44	# mg-1.3d. It was compiled with SHORT_SUFFIX set so doesn't need the index
	45	# file extensions to be altered.
	46	# Also changed the command line options a little.
	47
	48	BEGIN {
	49	die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
	50	die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
	51	unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
	52	$FileHandle = 'FH000';
	53	}
	54
	55	use doc;
	56	use util;
	57	use parsargv;
	58	use FileHandle;
	59	use English;
	60	use cfgread;
	61	use unicode;
[12355]	62	use plugout;
[3324]	63	select STDERR; $\| = 1;
	64	select STDOUT; $\| = 1;
	65
	66
	67	# globals
	68	$collection = ""; # the collection name
	69	$index = ""; # the selected index (like stt/unu)
	70	$textdir = ""; # the textdir (like text/unu)
	71	$toplevelinfo = []; #list of document OIDs
	72	%infodb = (); #hash into GDBM file
	73	$classifyinfo = []; # list of classifications
	74	$doc_classif_info = {}; # hash of OIDs->classifications they belong to
	75	$collect_cfg = {}; #data for the configuration file
	76	$mgread = ++$FileHandle;
	77	$mgwrite = ++$FileHandle;
	78
	79	sub print_usage {
	80	print STDERR "\n usage: $0 [options]\n\n";
	81	print STDERR " options:\n";
	82	print STDERR " -verbosity number 0=none, 3=lots\n";
	83	print STDERR " -old directory The directory in which the old collection lives\n";
	84	print STDERR " -new directory The directory in which to put the new collection\n";
	85	}
	86
	87	&main ();
	88
	89	sub main {
	90	if (!parsargv::parse(\@ARGV,
	91	'verbosity/\d+/2', \$verbosity,
	92	'old/.*/', \$old,
	93	'new/.*/', \$new)) {
	94	&print_usage();
	95	die "\n";
	96	}
	97
	98	die "$old does not exist\n\n" unless (-d $old);
	99	my $old_indexdir = &util::filename_cat($old, "index");
	100	my $old_archivedir = &util::filename_cat($old, "archives");
	101
	102	my $new_archivedir = &util::filename_cat($new, "archives");
	103	my $new_etcdir = &util::filename_cat($new, "etc");
	104	if (!-e $new_archivedir) {
	105	&util::mk_all_dir($new_archivedir);
	106	}
	107	if (!(-d $new_etcdir)) {
	108	&util::mk_all_dir($new_etcdir);
	109	}
	110
	111	my $gdbmfile = &get_gdbmfile($old_indexdir); # sets $collection and $textdir
	112	&set_index($old_indexdir); # sets $index (just chooses one index)
	113
	114	my $buildcfgfile = &util::filename_cat($old_indexdir, "build.cfg");
	115	my $colcfgfile = &util::filename_cat($new_etcdir, "collect.cfg");
	116
	117	# don't need this but giving it a value stops an annoying undefined
	118	# value warning
	119	$ENV{'GSDLCOLLECTDIR'} = 'tmp';
	120
	121	&add_default_cfg();
	122	&add_index_cfg($buildcfgfile);
	123
	124	# work out all the classifications from the gdbm file, info for each doc
	125	# (ie which classifications they belong to, are kept in $doc_classif_info
	126	&get_classifyinfo($gdbmfile); #puts a list of classifications into classifyinfo
	127	&get_toplevel_OID($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
	128
	129	# read ldb file into %infodb
	130	&read_gdbm($gdbmfile);
	131
	132	#this makes the files specifying the hierarchy of subjects, titles etc
	133	foreach my $classify (@$classifyinfo) {
	134	&make_info_file($new_etcdir, $classify);
	135	}
	136
	137	# write out the collect.cfg
	138	&output_cfg_file($colcfgfile);
	139
	140	&openmg($old_indexdir);
	141
	142	# read the archive information file
	143	my $archive_info_filename = &util::filename_cat ($new_archivedir, "archives.inf");
	144	my $archive_info = new arcinfo ();
	145
[12355]	146	my $opts = [];
	147	push @$opts,("-output_info",$archive_info);
[3324]	148
[17750]	149	$processor = &plugout::load_plugout("GreenstoneXMLPlugout",$opts);
[12355]	150	$processor->setoutputdir ($new_archivedir);
	151
[3324]	152	my ($doc_obj, $hashref, $children);
	153	print STDERR "processing documents now\n" if $verbosity >=2;
	154	foreach $oid (@$toplevelinfo) {
	155	$value = $infodb{$oid};
	156	$hashref={};
	157	$children = [];
	158	&get_metadata($value, $hashref);
	159	$doc_obj = new doc ();
	160	$doc_obj->set_OID($oid);
	161	my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
	162	$top = $doc_obj->get_top_section();
	163	&add_section_content ($doc_obj, $top, $hashref, $olddir);
	164	&add_classification_metadata($oid, $doc_obj, $top);
	165	&add_cover_image($doc_obj, $old_archivedir, $olddir);
	166	&get_children($hashref, $children);
	167	&recurse_sections($doc_obj, $children, $oid, $top, $olddir) if (defined ($children));
	168
	169	#change OID to new format
	170	$doc_obj->delete_OID();
	171	$doc_obj->set_OID();
	172	$processor->process($doc_obj);
	173	# last;
	174	}
	175	print STDERR "\n";
	176
	177	&closemg();
	178
	179	# write out the archive information file
	180	$archive_info->save_info($archive_info_filename);
	181
	182
	183	}
	184
	185	# returns the path to the gdbm info database - also
	186	# sets the $collection and $textdir global variable
	187	sub get_gdbmfile {
	188	my ($indexdir) = @_;
	189	my ($gdbmfile);
	190
	191	opendir (DIR, $indexdir) \|\| die "Couldn't open directory $indexdir\n\n";
	192	my @conts = readdir DIR;
	193	close DIR;
	194
	195	foreach $file (@conts) {
	196	if ($file =~ /text$/) {
	197	$textdir = $file;
	198	last;
	199	}
	200	}
	201	die "No text directory found in $indexdir\n\n"
	202	unless defined $textdir && $textdir =~ /text$/;
	203
	204	$gdbmfile = &util::filename_cat ($indexdir, $textdir);
	205
	206	opendir (DIR, $gdbmfile) \|\| die "Couldn't open directory $gdbmfile\n\n";
	207	@conts = readdir DIR;
	208	close DIR;
	209
	210	foreach my $file (@conts) {
	211	if ($file =~ /^(.*?)\.(?:ldb\|bdb)$/) {
	212	$collection = $1;
	213	$gdbmfile = &util::filename_cat ($gdbmfile, $file);
	214	last;
	215	}
	216	}
	217
	218	if (defined $collection && $collection =~ /\w/) {
	219	$textdir = &util::filename_cat ($textdir, $collection);
	220	} else {
	221	die "collection global wasn't set\n";
	222	}
	223	return $gdbmfile if (-e $gdbmfile);
	224	die "Couldn't find gdbm info database in $indexdir\n\n";
	225	}
	226
	227
	228	#this function will probably need to be changed for any different file formats
	229	sub get_toplevel_OID {
	230	my ($gdbmfile) = @_;
	231
	232	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
	233	print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
	234
	235	$/ = '-' x 70;
	236	my $entry = "";
	237	while (defined ($entry = <DB2TXT>)) {
	238	next unless $entry =~ /\w/; #ignore blank entries
	239	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
	240	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
	241
	242	next unless ($key =~ /B\.\d*$/); # assumes top level OID is like
	243	# B.12
	244
	245	push( @$toplevelinfo, $key);
	246
	247	}
	248
	249	$/ = "\n";
	250	}
	251
	252	# gets all the metadata from a gdbm file entry, and puts it into a hashref
	253	#this has changed for old style formats
	254	sub get_metadata {
	255
	256	my ($gdb_str_ref, $hashref) = @_;
	257	# print STDERR $gdb_str_ref;
	258	my @entries = split(/\n/, $gdb_str_ref);
	259
	260	my $hastext = 1;
	261	foreach $entry (@entries) {
	262
	263	my($key, $value) = ($entry =~ /^<([^>])>(.?)$/ );
	264
	265	next if ($key eq "p");
	266	next if ($key eq "j");
	267
	268	$key = "Title" if $key eq "t";
	269	$key = "docnum" if $key eq "d";
	270	if ($key eq "o") {
	271	$key = "archivedir";
	272	$value =~ s/HASH/hash/;
	273	}
	274	if ($key eq "c") {
	275	$key = "contains";
	276	$hastext=0;}
	277
	278	$$hashref{$key} .= $value;
	279
	280	}
	281
	282	$$hashref{'hastxt'} = $hastext;
	283
	284	}
	285
	286	# takes a hashref containing the metadata for a gdbmfile entry, and
	287	# extracts the childrens numbers (from the 'contains' entry). assumes
	288	# format is like B.14.1:B.24.2:B.24.3 returns a list like 1 2 3 4
	289	sub get_children {
	290	my ($hashref, $children) = @_;
	291
	292	my $childs = $hashref->{'contains'};
	293	if (defined ($childs)) {
	294	@xchildren = split /\:/, $childs;
	295	foreach $kid (@xchildren) {
	296	my ($child)= $kid =~ /\.([^\.]*)(?:\\n\|$)/;
	297	push (@$children, $child);
	298	}
	299	# print STDERR "children are: ", @$children;
	300	}
	301	else {
	302	$children = [];
	303	}
	304	}
	305
	306	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
	307	#the childrens numbers (from the 'contains' entry).
	308	#assumes format is like B.14.1:B.24.2:B.24.3
	309	#returns a list with the full child name ie B.14.1 B.14.2 etc
	310	#used for classification stuff
	311	sub get_whole_children {
	312
	313	my ($hashref, $children) = @_;
	314
	315	my $childs = $hashref->{'contains'};
	316	if (defined ($childs)) {
	317	@$children = split /\:/, $childs;
	318	#print STDERR "children are: ", @$children;
	319	}
	320	else {
	321	$children = [];
	322	}
	323	}
	324
	325	sub recurse_sections {
	326	my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
	327
	328	foreach $child (sort numerically @$children) {
	329	$doc_obj->create_named_section("$parentsection.$child");
	330	my $value = $infodb{"$parentoid.$child"};
	331	my $hashref={};
	332	&get_metadata($value, $hashref); # get childs metadata
	333	my $newchildren = [];
	334	&get_children($hashref, $newchildren); # get childs children
	335	#add content fo rcurrent section
	336	&add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
	337	# process all the children if there are any
	338	&recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
	339	if (defined ($newchildren));
	340	}
	341	}
	342
	343	sub add_section_content {
	344	my ($doc_obj, $cursection, $hashref, $olddir) = @_;
	345
	346	foreach $key (keys %$hashref) {
	347	#dont need to store these metadata
	348	next if $key =~ /(contains\|docnum\|hastxt\|doctype\|archivedir\|classifytype)/i;
	349	my @items = split /@/, $hashref->{$key};
	350	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
	351
	352	}
	353
	354	my ($hastext) = $hashref->{'hastxt'} =~ /(0\|1)/;
	355	if ($hastext) {
	356
	357	my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
	358	my $text = &get_text($docnum);
	359	$doc_obj->add_text ($cursection, $text);
	360
	361	my (@images) = $text =~ /<img.?src=\"([^\"])\"[^>]*>/g;
	362
	363	if (scalar(@images)>0) {
	364
	365	foreach $img (@images) {
	366	my ($assoc_file) = $img =~ /([^\/\\]\..)$/; #the name of the image
	367	$img =~ s/_httpcollection_/$old/; #replace _httpcollection_ with path to old collection
	368	$olddir =~ s/HASH/hash/;
	369	$img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name #the path to the image
	370	$img = lc $img; # doc names and path upper case in file, lower
	371	# case in directory
	372	if (-e $img) {
	373	$doc_obj->associate_file($img, $assoc_file);
	374	} else {
	375	print STDERR "WARNING: $img does not exist\n";
	376	}
	377	}
	378
	379	}
	380	}
	381	}
	382
	383
	384	sub add_classification_metadata {
	385
	386	my ($oid, $doc_obj, $cursection) = @_;
	387
	388	if (defined $doc_classif_info->{$oid}) {
	389
	390	$hashref = $doc_classif_info->{$oid};
	391
	392	foreach $key (keys %$hashref) {
	393	my @items = @{$hashref->{$key}};
	394	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
	395	}
	396	}
	397	}
	398	# picks up the cover image "cover.jpg" from the old archives directory.
	399	sub add_cover_image {
	400	my ($doc_obj, $archivedir, $olddir) = @_;
	401	$assoc_file = "cover.jpg";
	402	$img = &util::filename_cat($archivedir, $olddir, $assoc_file);
	403
	404	if (-e $img) {
	405	$doc_obj->associate_file($img, $assoc_file);
	406	} else {
	407	print STDERR "WARNING: couldn't find $img\n";
	408	}
	409	}
	410
	411	sub set_index {
	412	my ($indexdir) = @_;
	413
	414	# check that $collection has been set
	415	die "collection global was not set\n"
	416	unless defined $collection && $collection =~ /\w/;
	417
	418	# find an index (just use first non-text and non-paragraph directory we
	419	# come across in $indexdir)
	420	opendir (INDEXDIR, $indexdir) \|\| die "couldn't open directory $indexdir\n";
	421	my @indexes = readdir INDEXDIR;
	422	close INDEXDIR;
	423	foreach $i (@indexes) {
	424	next if $i =~ /text$/i \|\| $i =~ /\./ \|\| $i =~ /^p/i;
	425	$index = &util::filename_cat ($i, $collection);
	426	last;
	427	}
	428	}
	429
	430
	431	#########################################################################
	432
	433	################ functions involving mg ################################
	434	sub get_text {
	435	my ($docnum) = @_;
	436
	437	print STDERR "." if $verbosity >= 2;
	438	&mgcommand ($docnum);
	439
	440	<$mgread>; # eat the document separator
	441
	442	my $text = "";
	443	my $line = "";
	444
	445	while (defined ($line = <$mgread>))
	446	{
	447	last if $line =~ /^<\/mg>/;
	448	$text .= $line;
	449	}
	450
	451	# Read in the last statement, which should be:
	452	# "dd documents retrieved."
	453	<$mgread>;
	454
	455	return $text;
	456	}
	457
	458
	459	sub numerically {$a <=> $b;}
	460
	461
	462
	463	# mg stuff
	464
	465	sub openmg {
	466	my ($indexdir) = @_;
	467
	468	die "Unable to start mgquery." unless
	469	&openpipe($mgread, $mgwrite, "mgquery_old -d $indexdir -f $index -t $textdir");
	470
	471	$mgwrite->autoflush();
	472	&mgcommand('.set expert true');
	473	&mgcommand('.set terminator "</mg>\n"');
	474	&mgcommand('.set mode text');
	475	&mgcommand('.set query docnums');
	476	&mgcommand('.set term_freq off');
	477	}
	478
	479	sub closemg {
	480	&mgcommand (".quit");
	481	close($mgread);
	482	close($mgwrite);
	483	}
	484
	485	sub mgcommand {
	486	my ($command) = @_;
	487
	488	return if $command =~ /^\s*$/; #whitespace
	489	#print STDERR "command: $command\n";
	490	print $mgwrite "$command\n";
	491
	492	# eat up the command executed which is echoed
	493	<$mgread>;
	494	}
	495
	496	# openpipe(READ, WRITE, CMD)
	497	#
	498	# Like open2, except CMD's stderr is also redirected.
	499	#
	500	sub openpipe
	501	{
	502	my ($read, $write, $cmd) = @_;
	503	my ($child_read, $child_write);
	504
	505	$child_read = ++$FileHandle;
	506	$child_write = ++$FileHandle;
	507
	508	pipe($read, $child_write) \|\| die "Failed pipe($read, $child_write): $!";
	509	pipe($child_read, $write) \|\| die "Failed pipe($child_read, $write): $!";
	510	my $pid;
	511
	512	if (($pid = fork) < 0) {
	513	die "Failed fork: $!";
	514	} elsif ($pid == 0) {
	515	close($read);
	516	close($write);
	517	open(STDIN, "<&$child_read");
	518	open(STDOUT, ">&$child_write");
	519	open(STDERR, ">&$child_write");
	520	exec($cmd);
	521	die "Failed exec $cmd: $!";
	522	}
	523
	524	close($child_read);
	525	close($child_write);
	526
	527	$write->autoflush();
	528	$read->autoflush();
	529
	530	return 1;
	531	}
	532
	533
	534	######################################################################
	535
	536	############# functions to do with the classificaiton stuff ##########
	537	sub get_classifyinfo {
	538	my ($gdbmfile) = @_;
	539
	540	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
	541	print STDERR "Finding all classification sections from $gdbmfile\n" ;
	542
	543	$/ = '-' x 70;
	544	my $entry = "";
	545	while (defined ($entry = <DB2TXT>)) {
	546	next unless $entry =~ /\w/; #ignore blank entries
	547	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
	548	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
	549
	550	next unless ($key =~/^[A-Z]$/); # assumes classification OID is like
	551	# C or T etc
	552
	553	push( @$classifyinfo, $key);
	554
	555	}
	556
	557	$/ = "\n";
	558	}
	559
	560	#this creates the classification files needed for the hierarchy classifier
	561	#used for subjects, titles, orgs etc
	562	#also adds in entries to the collect_cfg hash
	563	sub make_info_file {
	564	my ($etcdir, $classifier) = @_;
	565	my $info_file = "";
	566
	567	$info_file = &util::filename_cat($etcdir, "CL.${classifier}.txt");
	568	$classifier_name = "CL.$classifier";
	569	print STDERR "classification $classifier will be called CL.$classifier\n";
	570
	571	open (OUTDOC, ">$info_file" ) \|\| die "couldn't open file $info_file\n";
	572
	573	my $entry = $infodb{$classifier};
	574	#print STDERR "entry = $entry\n";
	575	my $hashref = {};
	576	&get_metadata($entry, $hashref);
	577	my $children=[];
	578	&get_whole_children($hashref, $children);
	579	foreach $child (@$children) {
	580	&process_entry(OUTDOC, $classifier_name, $child);
	581	}
	582
	583	close OUTDOC;
	584
	585	&add_classify_cfg($classifier, $classifier_name, $info_file);
	586
	587	}
	588
	589
	590	sub process_entry {
	591	my ($handle, $classifier_name, $classify_id) = @_;
	592
	593	my $value = $infodb{$classify_id};
	594	my $hashref={};
	595	&get_metadata($value, $hashref);
	596	my $title = $hashref->{'Title'};
	597
	598	&add_line($handle, $classify_id, $title);
	599
	600	my $children = [];
	601	&get_whole_children($hashref, $children);
	602	foreach $child (@$children) {
	603	if (&is_document($child)) {
	604	&add_doc_metadata($child, $classifier_name, $classify_id);
	605	}
	606	else {
	607	&process_entry($handle, $classifier_name, $child);
	608	}
	609	}
	610	}
	611
	612	sub add_doc_metadata {
	613	my ($doc_id, $classifier_name, $classifier_id) = @_;
	614
	615	#add entry to doc database
	616	#print STDERR "at doc level, docnum=$classify_id\n";
	617	$doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
	618	$doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
	619	defined $doc_classif_info->{$doc_id}->{$classifier_name};
	620	push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
	621	}
	622
	623	sub add_line {
	624	my ($handle, $classify_id, $title) = @_;
	625	$title = &unicode::ascii2utf8(\$title);
	626	my ($num) = $classify_id =~ /^[A-Z]\.(.*)$/; #remove the C. from the front
	627	print $handle "$classify_id\t$num\t\"$title\"\n";
	628	}
	629
	630	sub is_document {
	631	my ($oid) = @_;
	632	return 1 if $oid =~ /^B\.\d/;
	633	return 0;
	634	}
	635
	636	########################################################################
	637
	638	########## stuff for producing collect.cfg file ###########################
	639
	640	sub add_default_cfg {
	641
	642	$username=`whoami`;
	643	$username=`logname` unless defined $username;
	644	$username="a_user" unless defined $username;
	645	$username =~ s/\n//;
	646	$collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
	647	$collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
	648	$collect_cfg->{'public'}="true";
	649
	650	$collect_cfg->{'plugin'}=[];
[17750]	651	push (@{$collect_cfg->{'plugin'}}, ["GreenstoneXMLPlugin"]);
	652	push (@{$collect_cfg->{'plugin'}}, ["ArchivesInfPlugin"]);
	653	push (@{$collect_cfg->{'plugin'}}, ["DirectoryPlugin"]);
[3324]	654
	655	$collect_cfg->{'format'}={};
	656	$collect_cfg->{'format'}->{'DocumentImages'}="true";
	657	$collect_cfg->{'format'}->{'DocumentText'} =
	658	"\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
	659	$collect_cfg->{'format'}->{'SearchVList'} =
	660	"\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
	661
	662	$collect_cfg->{'collectionmeta'}={};
	663	$collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
	664	$collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"_httpprefix_/collect/$collection/images/$collection.gif\"";
	665	$collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"_httpprefix_/collect/$collection/images/${collection}sm.gif\"";
	666	$collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
	667
	668	}
	669
	670	sub add_index_cfg {
	671	my ($buildfile) = @_;
	672
	673	my $data={};
	674	$collect_cfg->{'indexes'}=[];
	675	if (-e $buildfile) {
	676	$data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
	677	foreach my $i (@{$data->{'indexmap'}}) {
	678	my ($thisindex, $abbrev)= split (/\-\>/, $i);
	679	push (@{$collect_cfg->{'indexes'}}, $thisindex);
	680	$collect_cfg->{'defaultindex'} = $thisindex unless defined
	681	$collect_cfg->{'defaultindex'};
	682	$name=&get_index_name($thisindex);
	683	$thisindex=".$thisindex";
	684	$collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
	685	}
	686	}
	687	else {
	688	print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
	689	}
	690
	691	}
	692
	693	sub get_index_name {
	694	my ($thisindex) = @_;
	695	return "paragraphs" if $thisindex =~ /paragraph/;
	696	return "chapters" if $thisindex =~ /section.*text/;
	697	return "titles" if $thisindex =~ /Title/;
	698	return "other";
	699	}
	700
	701	sub add_classify_cfg {
	702
	703	my ($classify, $metadata, $file) = @_;
	704	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
	705
	706	my ($title);
	707	$title = "Howto" if $classify eq 'H';
	708	$title = "Subject" if $classify eq 'C';
	709	$title = "Organization" if $classify eq 'O';
	710	$title = "Title" if $classify eq 'T';
	711	$title = "Other" unless defined $title;
	712
	713	my ($filename) = $file =~ /\/([^\/]*)$/;
[3326]	714	my $entry = "Hierarchy -hfile $filename -metadata $metadata -buttonname $title -sort Title";
	715	$entry .= " -hlist_at_top" if $title eq "Title";
[3324]	716	push (@{$collect_cfg->{'classify'}},[$entry]);
	717
	718
	719	}
	720
	721	sub output_cfg_file {
	722
	723	my ($collfile) = @_;
	724	&cfgread::write_cfg_file($collfile, $collect_cfg,
	725	'^(creator\|maintainer\|public\|beta\|defaultindex)$',
	726	'^(indexes)$', '^(format\|collectionmeta)$',
	727	'^(plugin\|classify)$');
	728	}
	729
	730	sub read_gdbm {
	731	my ($filename) = @_;
	732
	733	open (PIPEIN, "db2txt \"$filename\" \|") \|\| die "couldn't open pipe from db2txt\n";
	734	my $line = ""; my $key = ""; my $value = "";
	735	while (defined ($line = <PIPEIN>)) {
	736	if ($line =~ /^\[([^\]]+)\]$/) {
	737	$key = $1;
	738	} elsif ($line =~ /^-{70}$/) {
	739	$infodb{$key} = $value;
	740	$value = "";
	741	$key = "";
	742	} else {
	743	$value .= $line;
	744	}
	745	}
	746	close PIPEIN;
	747	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: