Context Navigation

source: gsdl/trunk/bin/script/unbuildv2.pl@ 16552

Last change on this file since 16552 was 15741, checked in by mdewsnip, 16 years ago
Added infodbtype argument to call to read_infodb_file().
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 21.8 KB

Rev	Line
[3324]	1	#!/usr/bin/perl -w
	2
	3	###########################################################################
	4	#
[7588]	5	# unbuildv2.pl --
[3324]	6	# A component of the Greenstone digital library software
	7	# from the New Zealand Digital Library Project at the
	8	# University of Waikato, New Zealand.
	9	#
	10	# Copyright (C) 1999 New Zealand Digital Library Project
	11	#
	12	# This program is free software; you can redistribute it and/or modify
	13	# it under the terms of the GNU General Public License as published by
	14	# the Free Software Foundation; either version 2 of the License, or
	15	# (at your option) any later version.
	16	#
	17	# This program is distributed in the hope that it will be useful,
	18	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	19	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	20	# GNU General Public License for more details.
	21	#
	22	# You should have received a copy of the GNU General Public License
	23	# along with this program; if not, write to the Free Software
	24	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
	25	#
	26	###########################################################################
	27
	28	# this program will decompress all the text from a built index
	29	# and return it to gml format -
	30	#this is based on the format current in Nov 1999. (version two)
	31
	32	# Stefan updated unbuildv1.pl in August 2002 but unbuildv2.pl was not
	33	# updated. It probably needs some work done before using.
	34
[7588]	35	# Katherine updated this but apparently it still doesn't work.
	36
[3324]	37	BEGIN {
	38	die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
	39	die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
	40	unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
	41	$FileHandle = 'FH000';
	42	}
	43
[15706]	44	use dbutil;
[3324]	45	use doc;
	46	use util;
	47	use parsargv;
	48	use GDBM_File;
	49	use FileHandle;
	50	use English;
	51	use cfgread;
	52	use unicode;
[12355]	53	use plugout;
[3324]	54
	55	select STDERR; $\| = 1;
	56	select STDOUT; $\| = 1;
	57
	58
	59	# globals
	60	$collection = ""; # the collection name
	61	$index = ""; # the selected index (like stt/unu)
	62	$textdir = ""; # the textdir (like text/unu)
	63	$toplevelinfo = []; #list of document OIDs
	64	%infodb = (); #hash into GDBM file
	65	$classifyinfo = []; # list of classifications
	66	$doc_classif_info = {}; # hash of OIDs->classifications they belong to
	67	$collect_cfg = {}; #data for the configuration file
	68
	69	$mgread = ++$FileHandle;
	70	$mgwrite = ++$FileHandle;
	71
	72
	73
	74	sub print_usage {
	75	print STDERR "\n usage: $0 [options]\n\n";
	76	print STDERR " options:\n";
	77	print STDERR " -verbosity number 0=none, 3=lots\n";
	78	print STDERR " -indexdir directory The index to be decompressed (defaults to ./index)\n";
	79	print STDERR " -archivedir directory Where the converted material ends up (defaults to ./archives.new\n";
	80	print STDERR " -removeold Will remove the old contents of the archives\n";
	81	print STDERR " directory -- use with care\n\n";
	82	}
	83
	84	&main ();
	85
	86	sub main {
	87	if (!parsargv::parse(\@ARGV,
	88	'verbosity/\d+/2', \$verbosity,
	89	'indexdir/.*/index', \$indexdir,
	90	'archivedir/.*/archives.new', \$archivedir,
	91	'removeold', \$removeold)) {
	92	&print_usage();
	93	die "\n";
	94	}
	95
	96	die "indexdir $indexdir does not exist\n\n" unless (-d $indexdir);
	97	$indexdir =~ s/\/$//;
	98	if (-d $archivedir) {
	99	if ($removeold) {
	100	print STDERR "Warning - removing current contents of the archives directory $archivedir\n";
	101	print STDERR " in preparation for the import\n";
	102	sleep(5); # just in case...
	103	&util::rm_r ($archivedir);
	104	}
	105	} else {
	106	&util::mk_all_dir ($archivedir);
	107	}
	108
	109	$etcdir = "./etc";
	110	if (!(-d $etcdir)) {
	111	&util::mk_all_dir ($etcdir);
	112	}
	113
	114
	115	my $gdbmfile = &get_gdbmfile ($indexdir); #sets $collection and $textdir
	116	&set_index (); # sets $index (just chooses one index)
	117
	118
	119	my $buildcfgfile = &util::filename_cat($indexdir, "build.cfg");
	120	my $colcfgfile = &util::filename_cat($etcdir, "collect.cfg");
	121
	122	&add_default_cfg();
	123	&add_index_cfg($buildcfgfile);
	124
	125
	126	#work out all the classifications from the gdbm file, info for each doc
	127	#(ie which classifications they belong to, are kept in $doc_classif_info
	128	&get_classifyinfo ($gdbmfile); #puts a list of classifications into classifyinfo
	129
	130	&get_toplevel_OID ($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
	131
[7588]	132	#tie (%infodb, "GDBM_File", $gdbmfile, 1, 0);
	133
	134	#read ldb file into %infodb
[15741]	135	&dbutil::read_infodb_file("gdbm", $gdbmfile, \%infodb);
[3324]	136
	137	#this makes the files specifying the hierarchy of subjects, titles etc
	138	foreach $classify (@$classifyinfo) {
	139
[7588]	140	&make_info_file($classify);
[3324]	141
	142	}
	143
	144
	145	#write out the collect.cfg
	146	&output_cfg_file($colcfgfile);
	147
	148	&openmg ();
	149
	150	# read the archive information file
	151	my $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
	152	my $archive_info = new arcinfo ();
	153
[12355]	154	my $opts = [];
	155	push @$opts,("-output_info",$archive_info);
[3324]	156
[12355]	157	$processor = &plugout::load_plugout("GAPlugout",$opts);
	158	$processor->setoutputdir ($new_archivedir);
	159
[3324]	160	my ($doc_obj, $hashref, $children);
	161	print STDERR "processing documents now\n" if $verbosity >=2;
	162	foreach $oid (@$toplevelinfo) {
	163	$value = $infodb{$oid};
	164	$hashref={};
	165	$children = [];
	166	&get_metadata($value, $hashref);
	167	$doc_obj = new doc ();
	168	$doc_obj->set_OID($oid);
	169	my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
	170	$top = $doc_obj->get_top_section();
	171	&add_section_content ($doc_obj, $top, $hashref, $olddir);
	172	&add_classification_metadata($oid, $doc_obj, $top);
	173	&add_cover_image($doc_obj, $olddir);
	174	&get_children($hashref, $children);
	175	&recurse_sections($doc_obj, $children, $oid, $top, $olddir);
	176	$processor->process($doc_obj);
	177
	178	}
	179	print STDERR "\n";
	180
	181	&closemg();
	182
	183	# write out the archive information file
	184	$archive_info->save_info($archive_info_filename);
	185	}
	186
	187	# returns the path to the gdbm info database - also
	188	# sets the $collection and $textdir global variable
	189	sub get_gdbmfile {
	190	my ($indexdir) = @_;
	191	my ($gdbmfile);
	192
	193	opendir (DIR, $indexdir) \|\| die "Couldn't open directory $indexdir\n\n";
	194	my @conts = readdir DIR;
	195	close DIR;
	196
	197	foreach $file (@conts) {
	198	if ($file =~ /text$/) {
	199	$textdir = $file;
	200	last;
	201	}
	202	}
	203	die "No text directory found in $indexdir\n\n"
	204	unless defined $textdir && $textdir =~ /text$/;
	205
	206	$gdbmfile = &util::filename_cat ($indexdir, $textdir);
	207
	208	opendir (DIR, $gdbmfile) \|\| die "Couldn't open directory $gdbmfile\n\n";
	209	@conts = readdir DIR;
	210	close DIR;
	211
	212	foreach $file (@conts) {
	213	if ($file =~ /^(.*?)\.(?:ldb\|bdb)$/) {
	214	$collection = $1;
	215	$gdbmfile = &util::filename_cat ($gdbmfile, $file);
	216	last;
	217	}
	218	}
	219
	220	if (defined $collection && $collection =~ /\w/) {
	221	$textdir = &util::filename_cat ($textdir, $collection);
	222	} else {
	223	die "collection global wasn't set\n";
	224	}
	225	return $gdbmfile if (-e $gdbmfile);
	226	die "Couldn't find gdbm info database in $indexdir\n\n";
	227	}
	228
	229
	230	sub get_toplevel_OID {
	231	my ($gdbmfile) = @_;
	232
	233	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
	234	print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
	235
	236	$/ = '-' x 70;
	237	my $entry = "";
	238	while (defined ($entry = <DB2TXT>)) {
	239	next unless $entry =~ /\w/; #ignore blank entries
	240	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
	241	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
	242
	243	next if ($key =~ /\./); #ignore any lower level entries
	244	next if ($key =~ /^CL/); #ignore classification entries
	245	next if ($value =~ /<section>/); #ignore docnum->OID entries
[7588]	246	next if ($value =~ /<docoid>/); #ignore strange s133->OID entries
[3324]	247	next if ($key !~ /\d/); #ignore collection, browse entries
	248
	249	push( @$toplevelinfo, $key);
	250
	251	}
	252
	253	$/ = "\n";
	254	#print STDERR "toplevel sections are: ", join ("\n", @$toplevelinfo);
[7588]	255	#print STDERR "\n";
[3324]	256	}
	257
	258	# gets all the metadata from a gdbm file entry, and puts it into a hashref
	259	sub get_metadata {
	260
	261	my ($gdb_str_ref, $hashref) = @_;
	262	my @entries = split(/\n/, $gdb_str_ref);
	263	foreach $entry (@entries) {
	264	my($key, $value) = ($entry =~ /^<([^>])>(.?)$/ );
	265	$$hashref{$key} .= '@' if defined $$hashref{$key};
	266	$$hashref{$key} .= $value;
[7588]	267
[3324]	268	}
	269
	270
	271	}
	272
	273	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
	274	#the childrens numbers (from the 'contains' entry).
	275	#assumes format is ".1;".2;".3
	276	sub get_children {
	277	my ($hashref, $children) = @_;
	278
	279	$childs = $hashref->{'contains'};
	280	if (defined ($childs)) {
	281	$childs =~ s/\@$//; #remove trailing @
	282	$childs =~ s/^\"\.//; #remove initial ".
	283	@$children = split /\;\"\./, $childs;
	284
	285	}
	286	else {
	287	$children = [];
	288	}
	289	}
	290
	291	#takes a hashref containing the metadata for a gdbmfile entry, and extracts
	292	#the childrens numbers (from the 'contains' entry).
	293	#assumes format is ".1;".2;".3
	294	#returns a list with the full child name ie HASH0123...ac.1 HASH0123...ac.2
	295	#etc
	296	#used for classification stuff
	297	sub get_whole_children {
	298
	299	my ($parentoid, $hashref, $children) = @_;
	300
	301	my $childs = $hashref->{'contains'};
	302	my @items;
	303	if (defined ($childs)) {
	304	$childs =~ s/\@$//; #remove trailing @
[7588]	305	@items = split /\;/, $childs; #split on ;
	306	foreach $item (@items) {
	307	$item =~ s/^\"/$parentoid/; # replace " with parentoid
	308	push (@$children, "$item");
[3324]	309	}
	310	}
	311	else {
	312	$children = [];
	313	}
[7588]	314	}
[3324]	315
[7588]	316
[3324]	317	sub recurse_sections {
	318	my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
	319
	320	foreach $child (sort numerically @$children) {
	321	$doc_obj->create_named_section("$parentsection.$child");
	322	my $value = $infodb{"$parentoid.$child"};
	323	my $hashref={};
	324	&get_metadata($value, $hashref); # get childs metadata
	325	my $newchildren = [];
	326	&get_children($hashref, $newchildren); # get childs children
	327	#add content fo rcurrent section
	328	&add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
	329	# process all the children if there are any
	330	&recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
	331	if (defined ($newchildren));
	332	}
	333
	334
	335	}
	336
	337	sub add_section_content {
	338	my ($doc_obj, $cursection, $hashref, $olddir) = @_;
	339
	340	foreach $key (keys %$hashref) {
	341	#dont need to store these metadata
	342	next if $key =~ /(contains\|docnum\|hastxt\|doctype\|archivedir\|classifytype)/i;
	343	my @items = split /@/, $hashref->{$key};
	344	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
	345
	346	}
	347	my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
	348	my ($hastext) =$hashref->{'hastxt'} =~ /(0\|1)/;
	349
	350	my $images=[];
	351	if ($hastext) {
	352	my $text = &get_text($docnum);
	353
	354	#my (@images) = $text =~ /<img.?src=\"([^\"])\"[^>]*>/g;
	355
	356	# in text replace path to image with _httpdocimg_/blah.gif
	357	#while ($text =~ s/(<img.?src=\")([^\"])(\"[^>]*>)/
	358	# $1.&get_img($2, \@images).$3/sgei) {
	359	$text =~ s/(<img.?src=\")([^\"])(\"[^>]*>)/
	360	$1.&get_img($2,$images).$3/sgei;
	361
	362	$doc_obj->add_text ($cursection, $text);
	363
	364	if (scalar(@$images)>0) {
	365
	366	foreach $img (@$images) {
	367	my ($assoc_file) = $img =~ /([^\/\\]\..)$/; #the name of the image
	368	$img =~ s/_httpcollection_/\./; #replace _httpcollection_ with .
	369	$img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name
	370
	371	$doc_obj->associate_file($img, $assoc_file);
	372	}
	373	}
	374	}
	375	}
	376
	377
	378
	379	sub get_img {
	380	my ($path, $images) = @_;
	381	my $img = "_httpdocimg_/";
	382	my ($imgname) = $path =~ /([^\/\\]\..)$/;
	383	push (@$images, $path);
	384	$img .= $imgname;
	385	return $img;
	386	}
	387
	388
	389	sub add_classification_metadata {
	390
	391	my ($oid, $doc_obj, $cursection) = @_;
	392
	393	if (defined $doc_classif_info->{$oid}) {
	394
	395	my $hashref = $doc_classif_info->{$oid};
	396
	397	foreach $key (keys %$hashref) {
	398	my @items = @{$hashref->{$key}};
	399	map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
	400	}
	401	}
	402	}
	403
	404	# picks up the cover image "cover.jpg" from the old archives directory.
	405	sub add_cover_image {
	406
	407	my ($doc_obj, $olddir) = @_;
	408	$assoc_file = "cover.jpg";
	409	$img = "archives/$olddir/$assoc_file";
	410
	411
	412	if (-e $img) {
	413	$doc_obj->associate_file($img, $assoc_file);
	414	}
	415	}
	416
	417
	418
	419	sub set_index {
	420	# check that $collection has been set
	421	die "collection global was not set\n"
	422	unless defined $collection && $collection =~ /\w/;
	423
	424	# find an index (just use first non-text directory we come across in $indexdir)
	425	opendir (INDEXDIR, $indexdir) \|\| die "couldn't open directory $indexdir\n";
	426	my @indexes = readdir INDEXDIR;
	427	close INDEXDIR;
	428	foreach $i (@indexes) {
	429	next if $i =~ /text$/i \|\| $i =~ /\./ \|\| $i =~ /assoc$/i;
	430	$index = &util::filename_cat ($i, $collection);
	431	last;
	432	}
	433	}
	434
	435
	436	#########################################################################
	437
	438	################ functions involving mg ################################
	439
	440	sub get_text {
	441	my ($docnum) = @_;
	442
	443	print STDERR "." if $verbosity >= 2;
	444	&mgcommand ($docnum);
	445
	446	<$mgread>; # eat the document separator
	447
	448	my $text = "";
	449	my $line = "";
	450
	451	while (defined ($line = <$mgread>))
	452	{
	453	last if $line =~ /^<\/mg>/;
	454	$text .= $line;
	455	}
	456
	457	# Read in the last statement, which should be:
	458	# "dd documents retrieved."
	459	<$mgread>;
	460
	461	return $text;
	462	}
	463
	464	sub numerically {$a <=> $b;}
	465
	466
	467
	468	sub openmg {
	469
	470	#print STDERR "index: $index\n";
	471
	472	die "Unable to start mgquery." unless
	473	&openpipe($mgread, $mgwrite,
	474	"mgquery -d $indexdir -f $index -t $textdir");
	475
	476	$mgwrite->autoflush();
	477
	478	&mgcommand('.set expert true');
	479	&mgcommand('.set terminator "</mg>\n"');
	480	&mgcommand('.set mode text');
	481	&mgcommand('.set query docnums');
	482	&mgcommand('.set term_freq off');
[7588]	483	&mgcommand('.set briefstats off');
	484	&mgcommand('.set memstats off');
	485	&mgcommand('.set sizestats off');
	486	&mgcommand('.set timestats off');
[3324]	487	}
	488
	489	sub closemg {
	490	&mgcommand (".quit");
	491	close($mgread);
	492	close($mgwrite);
	493	}
	494
	495	sub mgcommand {
	496	my ($command) = @_;
	497
	498	return if $command =~ /^\s*$/; #whitespace
	499	#print STDERR "command: $command\n";
	500	print $mgwrite "$command\n";
	501
	502	# eat up the command executed which is echoed
	503	<$mgread>;
	504	}
	505
	506	# openpipe(READ, WRITE, CMD)
	507	#
	508	# Like open2, except CMD's stderr is also redirected.
	509	#
	510	sub openpipe
	511	{
	512	my ($read, $write, $cmd) = @_;
	513	my ($child_read, $child_write);
	514
	515	$child_read = ++$FileHandle;
	516	$child_write = ++$FileHandle;
	517
	518	pipe($read, $child_write) \|\| die "Failed pipe($read, $child_write): $!";
	519	pipe($child_read, $write) \|\| die "Failed pipe($child_read, $write): $!";
	520	my $pid;
	521
	522	if (($pid = fork) < 0) {
	523	die "Failed fork: $!";
	524	} elsif ($pid == 0) {
	525	close($read);
	526	close($write);
	527	open(STDIN, "<&$child_read");
	528	open(STDOUT, ">&$child_write");
	529	open(STDERR, ">&$child_write");
	530	exec($cmd);
	531	die "Failed exec $cmd: $!";
	532	}
	533
	534	close($child_read);
	535	close($child_write);
	536
	537	$write->autoflush();
	538	$read->autoflush();
	539
	540	return 1;
	541	}
	542
	543
	544
	545
	546	######################################################################
	547
	548	############# functions to do with the classification stuff ##########
	549
	550	#returns the top level classification oids
	551	sub get_classifyinfo {
	552	my ($gdbmfile) = @_;
	553
	554	open (DB2TXT, "db2txt $gdbmfile \|") \|\| die "couldn't open pipe to db2txt\n";
	555	print STDERR "Finding all classification sections from $gdbmfile\n" ;
	556
	557	$/ = '-' x 70;
	558	my $entry = "";
	559	while (defined ($entry = <DB2TXT>)) {
	560	next unless $entry =~ /\w/; #ignore blank entries
	561	$entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
	562	my ($key, $value) = $entry =~ /\[([^\]])\](.)/;
	563
	564	next unless ($key =~/^CL\d$/); # assumes classification OID is like
	565	# CL1 or CL2 etc
	566
	567	push( @$classifyinfo, $key);
	568
	569	}
	570
	571	$/ = "\n";
[7588]	572	#print STDERR "classifications are: ", join(", ", @$classifyinfo);
	573	#print STDERR "\n";
	574
[3324]	575	}
	576
	577	#this creates the classification files needed for the hierarchy classifier
	578	#used for subjects, titles, orgs etc
	579	#also adds in entries to the collect_cfg hash
	580	sub make_info_file {
	581
	582	my ($classifier) = @_;
	583	my $info_file = "";
[7588]	584	my $entry = $infodb{$classifier};
[3324]	585
	586	my $hashref = {};
	587	&get_metadata($entry, $hashref);
	588
	589	my $classifier_name = "CL".$hashref->{'Title'}; #like CLSubject
	590	$classifier_name =~ s/\@$//; #remove trailing @
	591
[7588]	592	# check children - if there is a classifier node at this level,
	593	# use a hierarchy, otherwise use an AZList.
[3324]	594
	595	my $children=[];
[7588]	596	my $hierarchy = 0;
	597	&get_whole_children($classifier, $hashref, $children); #returns a list of the child ids
	598	foreach $child(@$children) {
	599	if(not &is_document($child)) {
	600	$hierarchy = 1;
	601	last;
	602	}
	603	}
	604
	605	if (!$hierarchy) {
[3324]	606	&add_classify_cfg_list($classifier_name);
	607	}else { #there is a hierarchy so create a file
	608	$info_file = "./etc/$classifier_name.txt";
	609
	610	print STDERR "classification $classifier will be called $classifier_name\n";
	611
	612	open (OUTDOC, "> $info_file" ) \|\| die "couldn't open file $info_file\n";
	613
	614	foreach $child (@$children) {
	615	&process_entry(OUTDOC, $classifier_name, $child);
	616	}
	617
	618	close OUTDOC;
	619
	620	&add_classify_cfg($classifier, $classifier_name, $info_file);
	621	}
	622	}
	623
	624
	625	sub process_entry {
	626	my ($handle, $classifier_name, $classify_id) = @_;
	627	my $value = $infodb{$classify_id};
	628
	629	my $hashref={};
	630	&get_metadata($value, $hashref);
	631	my $title = $hashref->{'Title'};
	632	$title =~ s/\@$//; #remove trailing @
	633	&add_line($handle, $classify_id, $title);
	634
	635	my $children = [];
	636	&get_whole_children($classify_id, $hashref, $children);
	637	foreach $child (@$children) {
	638	if (&is_document($child)) {
	639	&add_doc_metadata($child, $classifier_name, $title);
	640	}else {
[7588]	641	&process_entry($handle, $classifier_name, $child);
[3324]	642	}
	643	}
	644
	645	}
	646
	647
	648
	649	sub add_doc_metadata {
	650
	651	my ($doc_id, $classifier_name, $classifier_id) = @_;
	652
	653	#add entry to doc database
	654	#print STDERR "at doc level, docnum=$classify_id\n";
	655
	656	$doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
	657	$doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
	658	defined $doc_classif_info->{$doc_id}->{$classifier_name};
	659	push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
	660
	661	}
	662
	663
	664
	665
	666	sub add_line {
	667
	668	my ($handle, $classify_id, $title) = @_;
	669	#print STDERR "classify id= $classify_id, title= $title\n";
[7588]	670	$title = &unicode::ascii2utf8(\$title);
[3324]	671	my ($num) = $classify_id =~ /^CL\d\.(.*)$/; #remove the CL1. from the front
	672
	673	print $handle "\"$title\"\t$num\t\"$title\"\n";
	674
	675
	676	}
	677
	678	sub is_document {
	679	my ($oid) = @_;
	680	return 1 if $oid =~ /^HASH/i;
	681	return 0;
	682	}
	683
	684	########################################################################
	685
	686	########## stuff for producing collect.cfg file ###########################
	687
	688	sub add_default_cfg {
	689
	690	$username=`whoami`;
	691	$username=`logname` unless defined $username;
	692	$username="a_user" unless defined $username;
	693	$username =~ s/\n//;
	694	$collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
	695	$collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
[7588]	696	$collect_cfg->{'public'}="true";
[3324]	697	$collect_cfg->{'beta'}="true";
	698
	699	$collect_cfg->{'plugin'}=[];
[7588]	700	push (@{$collect_cfg->{'plugin'}}, ["GAPlug"]);
[3324]	701	push (@{$collect_cfg->{'plugin'}}, ["ArcPlug"]);
	702	push (@{$collect_cfg->{'plugin'}}, ["RecPlug"]);
	703
	704	$collect_cfg->{'format'}={};
	705	$collect_cfg->{'format'}->{'DocumentImages'}="true";
	706	$collect_cfg->{'format'}->{'DocumentText'} =
	707	"\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
	708	$collect_cfg->{'format'}->{'SearchVList'} =
	709	"\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
	710
	711	$collect_cfg->{'collectionmeta'}={};
	712	$collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
[7588]	713	$collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"_httpprefix_/collect/$collection/images/$collection.gif\"";
	714	$collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"_httpprefix_/collect/$collection/images/${collection}sm.gif\"";
[3324]	715	$collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
	716
	717	}
	718
	719	sub add_index_cfg {
	720	my ($buildfile) = @_;
	721
	722	my $data={};
	723	$collect_cfg->{'indexes'}=[];
	724	if (-e $buildfile) {
	725	$data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
	726	foreach $i (@{$data->{'indexmap'}}) {
	727	($thisindex, $abbrev)= split ("\-\>", $i);
	728	push (@{$collect_cfg->{'indexes'}}, $thisindex);
	729	$collect_cfg->{'defaultindex'} = $thisindex unless defined
	730	$collect_cfg->{'defaultindex'};
	731	$name=&get_index_name($thisindex);
	732	$thisindex=".$thisindex";
	733	$collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
	734	}
	735	}
	736	else {
	737	print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
	738	}
	739
	740	}
	741
	742	sub get_index_name {
	743	my ($thisindex) = @_;
	744	return "paragraphs" if $thisindex =~ /paragraph/;
	745	return "chapters" if $thisindex =~ /section.*text/;
	746	return "titles" if $thisindex =~ /Title/;
	747	return "other";
	748	}
	749
	750	sub add_classify_cfg {
	751
	752	my ($classify, $classifier_name, $file) = @_;
	753	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
	754
	755	my ($title) = $classifier_name =~ /^CL(.*)$/;
	756	my ($filename) = $file =~ /\/([^\/]*)$/;
[7588]	757	my $entry = "Hierarchy -hfile $filename -metadata $classifier_name -buttonname $title -sort Title";
[3324]	758	push (@{$collect_cfg->{'classify'}},[$entry]);
	759
	760
	761	}
	762
	763	sub add_classify_cfg_list {
	764
	765	my ($classifier) = @_;
	766	$collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
	767	my ($title) = $classifier =~ /^CL(.*)$/;
[7588]	768	my $entry = "AZList -metadata $classifier -buttonname $title";
[3324]	769	push (@{$collect_cfg->{'classify'}},[$entry]);
	770	}
	771
	772	sub output_cfg_file {
	773
	774	my ($collfile) = @_;
	775
	776	if (-e $collfile) { #collect.cfg already exists
	777	$collfile .= ".new";
	778	}
	779	&cfgread::write_cfg_file($collfile, $collect_cfg,
	780	'^(creator\|maintainer\|public\|beta\|defaultindex)$',
	781	'^(indexes)$', '^(format\|collectionmeta)$',
	782	'^(plugin\|classify)$');
	783
	784
	785
	786	}
	787
	788

Note: See TracBrowser for help on using the repository browser.

Download in other formats: