source: main/tags/2.40/gsdl/bin/script/unbuildv1.pl@ 31150

Last change on this file since 31150 was 3326, checked in by sjboddie, 22 years ago

* empty log message *

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.7 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# unbuildv1.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# this program will decompress all the text from a built index
29# and return it to gml format - this version of the program is designed to
30# work on collections built with the first version of the greenstone software
31# it extracts the browse classifications from the gdbm file, and recreates
32# them. It also creates a default collect.cfg file.
33
34# To run, this program needs the old archive directory (called archives),
35# the index directory with the text and one other index, not paragraphs!! -
36# use one of the section indexes. Also needs the build.cfg file (in the
37# index dir) - used in creating a collect.cfg file. If build.cfg is not
38# present, the parts of the collect.cfg file relating to indexes will not
39# be entered.
40
41# Stefan altered this script slightly August 2002 to use
42# $GSDLHOME/bin/$GSDLOS/mgquery_old instead of the old mgquery in
43# nzdl-1.2. mgquery_old is a statically linked binary compiled from
44# mg-1.3d. It was compiled with SHORT_SUFFIX set so doesn't need the index
45# file extensions to be altered.
46# Also changed the command line options a little.
47
48BEGIN {
49 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
50 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
51 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
52 $FileHandle = 'FH000';
53}
54
55use doc;
56use docsave;
57use util;
58use parsargv;
59use FileHandle;
60use English;
61use cfgread;
62use unicode;
63select STDERR; $| = 1;
64select STDOUT; $| = 1;
65
66
67# globals
68$collection = ""; # the collection name
69$index = ""; # the selected index (like stt/unu)
70$textdir = ""; # the textdir (like text/unu)
71$toplevelinfo = []; #list of document OIDs
72%infodb = (); #hash into GDBM file
73$classifyinfo = []; # list of classifications
74$doc_classif_info = {}; # hash of OIDs->classifications they belong to
75$collect_cfg = {}; #data for the configuration file
76$mgread = ++$FileHandle;
77$mgwrite = ++$FileHandle;
78
79sub print_usage {
80 print STDERR "\n usage: $0 [options]\n\n";
81 print STDERR " options:\n";
82 print STDERR " -verbosity number 0=none, 3=lots\n";
83 print STDERR " -old directory The directory in which the old collection lives\n";
84 print STDERR " -new directory The directory in which to put the new collection\n";
85}
86
87&main ();
88
89sub main {
90 if (!parsargv::parse(\@ARGV,
91 'verbosity/\d+/2', \$verbosity,
92 'old/.*/', \$old,
93 'new/.*/', \$new)) {
94 &print_usage();
95 die "\n";
96 }
97
98 die "$old does not exist\n\n" unless (-d $old);
99 my $old_indexdir = &util::filename_cat($old, "index");
100 my $old_archivedir = &util::filename_cat($old, "archives");
101
102 my $new_archivedir = &util::filename_cat($new, "archives");
103 my $new_etcdir = &util::filename_cat($new, "etc");
104 if (!-e $new_archivedir) {
105 &util::mk_all_dir($new_archivedir);
106 }
107 if (!(-d $new_etcdir)) {
108 &util::mk_all_dir($new_etcdir);
109 }
110
111 my $gdbmfile = &get_gdbmfile($old_indexdir); # sets $collection and $textdir
112 &set_index($old_indexdir); # sets $index (just chooses one index)
113
114 my $buildcfgfile = &util::filename_cat($old_indexdir, "build.cfg");
115 my $colcfgfile = &util::filename_cat($new_etcdir, "collect.cfg");
116
117 # don't need this but giving it a value stops an annoying undefined
118 # value warning
119 $ENV{'GSDLCOLLECTDIR'} = 'tmp';
120
121 &add_default_cfg();
122 &add_index_cfg($buildcfgfile);
123
124 # work out all the classifications from the gdbm file, info for each doc
125 # (ie which classifications they belong to, are kept in $doc_classif_info
126 &get_classifyinfo($gdbmfile); #puts a list of classifications into classifyinfo
127 &get_toplevel_OID($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
128
129 # read ldb file into %infodb
130 &read_gdbm($gdbmfile);
131
132 #this makes the files specifying the hierarchy of subjects, titles etc
133 foreach my $classify (@$classifyinfo) {
134 &make_info_file($new_etcdir, $classify);
135 }
136
137 # write out the collect.cfg
138 &output_cfg_file($colcfgfile);
139
140 &openmg($old_indexdir);
141
142 # read the archive information file
143 my $archive_info_filename = &util::filename_cat ($new_archivedir, "archives.inf");
144 my $archive_info = new arcinfo ();
145
146 # create a docsave object to process the documents
147 my $processor = new docsave ("", $archive_info, $verbosity);
148 $processor->setarchivedir ($new_archivedir);
149
150 my ($doc_obj, $hashref, $children);
151 print STDERR "processing documents now\n" if $verbosity >=2;
152 foreach $oid (@$toplevelinfo) {
153 $value = $infodb{$oid};
154 $hashref={};
155 $children = [];
156 &get_metadata($value, $hashref);
157 $doc_obj = new doc ();
158 $doc_obj->set_OID($oid);
159 my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
160 $top = $doc_obj->get_top_section();
161 &add_section_content ($doc_obj, $top, $hashref, $olddir);
162 &add_classification_metadata($oid, $doc_obj, $top);
163 &add_cover_image($doc_obj, $old_archivedir, $olddir);
164 &get_children($hashref, $children);
165 &recurse_sections($doc_obj, $children, $oid, $top, $olddir) if (defined ($children));
166
167 #change OID to new format
168 $doc_obj->delete_OID();
169 $doc_obj->set_OID();
170 $processor->process($doc_obj);
171# last;
172 }
173 print STDERR "\n";
174
175 &closemg();
176
177 # write out the archive information file
178 $archive_info->save_info($archive_info_filename);
179
180
181}
182
183# returns the path to the gdbm info database - also
184# sets the $collection and $textdir global variable
185sub get_gdbmfile {
186 my ($indexdir) = @_;
187 my ($gdbmfile);
188
189 opendir (DIR, $indexdir) || die "Couldn't open directory $indexdir\n\n";
190 my @conts = readdir DIR;
191 close DIR;
192
193 foreach $file (@conts) {
194 if ($file =~ /text$/) {
195 $textdir = $file;
196 last;
197 }
198 }
199 die "No text directory found in $indexdir\n\n"
200 unless defined $textdir && $textdir =~ /text$/;
201
202 $gdbmfile = &util::filename_cat ($indexdir, $textdir);
203
204 opendir (DIR, $gdbmfile) || die "Couldn't open directory $gdbmfile\n\n";
205 @conts = readdir DIR;
206 close DIR;
207
208 foreach my $file (@conts) {
209 if ($file =~ /^(.*?)\.(?:ldb|bdb)$/) {
210 $collection = $1;
211 $gdbmfile = &util::filename_cat ($gdbmfile, $file);
212 last;
213 }
214 }
215
216 if (defined $collection && $collection =~ /\w/) {
217 $textdir = &util::filename_cat ($textdir, $collection);
218 } else {
219 die "collection global wasn't set\n";
220 }
221 return $gdbmfile if (-e $gdbmfile);
222 die "Couldn't find gdbm info database in $indexdir\n\n";
223}
224
225
226#this function will probably need to be changed for any different file formats
227sub get_toplevel_OID {
228 my ($gdbmfile) = @_;
229
230 open (DB2TXT, "db2txt $gdbmfile |") || die "couldn't open pipe to db2txt\n";
231 print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
232
233 $/ = '-' x 70;
234 my $entry = "";
235 while (defined ($entry = <DB2TXT>)) {
236 next unless $entry =~ /\w/; #ignore blank entries
237 $entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
238 my ($key, $value) = $entry =~ /\[([^\]]*)\](.*)/;
239
240 next unless ($key =~ /B\.\d*$/); # assumes top level OID is like
241 # B.12
242
243 push( @$toplevelinfo, $key);
244
245 }
246
247 $/ = "\n";
248}
249
250# gets all the metadata from a gdbm file entry, and puts it into a hashref
251#this has changed for old style formats
252sub get_metadata {
253
254 my ($gdb_str_ref, $hashref) = @_;
255# print STDERR $gdb_str_ref;
256 my @entries = split(/\n/, $gdb_str_ref);
257
258 my $hastext = 1;
259 foreach $entry (@entries) {
260
261 my($key, $value) = ($entry =~ /^<([^>]*)>(.*?)$/ );
262
263 next if ($key eq "p");
264 next if ($key eq "j");
265
266 $key = "Title" if $key eq "t";
267 $key = "docnum" if $key eq "d";
268 if ($key eq "o") {
269 $key = "archivedir";
270 $value =~ s/HASH/hash/;
271 }
272 if ($key eq "c") {
273 $key = "contains";
274 $hastext=0;}
275
276 $$hashref{$key} .= $value;
277
278 }
279
280 $$hashref{'hastxt'} = $hastext;
281
282}
283
284# takes a hashref containing the metadata for a gdbmfile entry, and
285# extracts the childrens numbers (from the 'contains' entry). assumes
286# format is like B.14.1:B.24.2:B.24.3 returns a list like 1 2 3 4
287sub get_children {
288 my ($hashref, $children) = @_;
289
290 my $childs = $hashref->{'contains'};
291 if (defined ($childs)) {
292 @xchildren = split /\:/, $childs;
293 foreach $kid (@xchildren) {
294 my ($child)= $kid =~ /\.([^\.]*)(?:\\n|$)/;
295 push (@$children, $child);
296 }
297# print STDERR "children are: ", @$children;
298 }
299 else {
300 $children = [];
301 }
302}
303
304#takes a hashref containing the metadata for a gdbmfile entry, and extracts
305#the childrens numbers (from the 'contains' entry).
306#assumes format is like B.14.1:B.24.2:B.24.3
307#returns a list with the full child name ie B.14.1 B.14.2 etc
308#used for classification stuff
309sub get_whole_children {
310
311 my ($hashref, $children) = @_;
312
313 my $childs = $hashref->{'contains'};
314 if (defined ($childs)) {
315 @$children = split /\:/, $childs;
316 #print STDERR "children are: ", @$children;
317 }
318 else {
319 $children = [];
320 }
321}
322
323sub recurse_sections {
324 my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
325
326 foreach $child (sort numerically @$children) {
327 $doc_obj->create_named_section("$parentsection.$child");
328 my $value = $infodb{"$parentoid.$child"};
329 my $hashref={};
330 &get_metadata($value, $hashref); # get childs metadata
331 my $newchildren = [];
332 &get_children($hashref, $newchildren); # get childs children
333 #add content fo rcurrent section
334 &add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
335 # process all the children if there are any
336 &recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
337 if (defined ($newchildren));
338 }
339}
340
341sub add_section_content {
342 my ($doc_obj, $cursection, $hashref, $olddir) = @_;
343
344 foreach $key (keys %$hashref) {
345 #dont need to store these metadata
346 next if $key =~ /(contains|docnum|hastxt|doctype|archivedir|classifytype)/i;
347 my @items = split /@/, $hashref->{$key};
348 map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
349
350 }
351
352 my ($hastext) = $hashref->{'hastxt'} =~ /(0|1)/;
353 if ($hastext) {
354
355 my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
356 my $text = &get_text($docnum);
357 $doc_obj->add_text ($cursection, $text);
358
359 my (@images) = $text =~ /<img.*?src=\"([^\"]*)\"[^>]*>/g;
360
361 if (scalar(@images)>0) {
362
363 foreach $img (@images) {
364 my ($assoc_file) = $img =~ /([^\/\\]*\..*)$/; #the name of the image
365 $img =~ s/_httpcollection_/$old/; #replace _httpcollection_ with path to old collection
366 $olddir =~ s/HASH/hash/;
367 $img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name #the path to the image
368 $img = lc $img; # doc names and path upper case in file, lower
369 # case in directory
370 if (-e $img) {
371 $doc_obj->associate_file($img, $assoc_file);
372 } else {
373 print STDERR "WARNING: $img does not exist\n";
374 }
375 }
376
377 }
378 }
379}
380
381
382sub add_classification_metadata {
383
384 my ($oid, $doc_obj, $cursection) = @_;
385
386 if (defined $doc_classif_info->{$oid}) {
387
388 $hashref = $doc_classif_info->{$oid};
389
390 foreach $key (keys %$hashref) {
391 my @items = @{$hashref->{$key}};
392 map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
393 }
394 }
395}
396# picks up the cover image "cover.jpg" from the old archives directory.
397sub add_cover_image {
398 my ($doc_obj, $archivedir, $olddir) = @_;
399 $assoc_file = "cover.jpg";
400 $img = &util::filename_cat($archivedir, $olddir, $assoc_file);
401
402 if (-e $img) {
403 $doc_obj->associate_file($img, $assoc_file);
404 } else {
405 print STDERR "WARNING: couldn't find $img\n";
406 }
407}
408
409sub set_index {
410 my ($indexdir) = @_;
411
412 # check that $collection has been set
413 die "collection global was not set\n"
414 unless defined $collection && $collection =~ /\w/;
415
416 # find an index (just use first non-text and non-paragraph directory we
417 # come across in $indexdir)
418 opendir (INDEXDIR, $indexdir) || die "couldn't open directory $indexdir\n";
419 my @indexes = readdir INDEXDIR;
420 close INDEXDIR;
421 foreach $i (@indexes) {
422 next if $i =~ /text$/i || $i =~ /\./ || $i =~ /^p/i;
423 $index = &util::filename_cat ($i, $collection);
424 last;
425 }
426}
427
428
429#########################################################################
430
431################ functions involving mg ################################
432sub get_text {
433 my ($docnum) = @_;
434
435 print STDERR "." if $verbosity >= 2;
436 &mgcommand ($docnum);
437
438 <$mgread>; # eat the document separator
439
440 my $text = "";
441 my $line = "";
442
443 while (defined ($line = <$mgread>))
444 {
445 last if $line =~ /^<\/mg>/;
446 $text .= $line;
447 }
448
449 # Read in the last statement, which should be:
450 # "dd documents retrieved."
451 <$mgread>;
452
453 return $text;
454}
455
456
457sub numerically {$a <=> $b;}
458
459
460
461# mg stuff
462
463sub openmg {
464 my ($indexdir) = @_;
465
466 die "Unable to start mgquery." unless
467 &openpipe($mgread, $mgwrite, "mgquery_old -d $indexdir -f $index -t $textdir");
468
469 $mgwrite->autoflush();
470 &mgcommand('.set expert true');
471 &mgcommand('.set terminator "</mg>\n"');
472 &mgcommand('.set mode text');
473 &mgcommand('.set query docnums');
474 &mgcommand('.set term_freq off');
475}
476
477sub closemg {
478 &mgcommand (".quit");
479 close($mgread);
480 close($mgwrite);
481}
482
483sub mgcommand {
484 my ($command) = @_;
485
486 return if $command =~ /^\s*$/; #whitespace
487 #print STDERR "command: $command\n";
488 print $mgwrite "$command\n";
489
490 # eat up the command executed which is echoed
491 <$mgread>;
492}
493
494# openpipe(READ, WRITE, CMD)
495#
496# Like open2, except CMD's stderr is also redirected.
497#
498sub openpipe
499{
500 my ($read, $write, $cmd) = @_;
501 my ($child_read, $child_write);
502
503 $child_read = ++$FileHandle;
504 $child_write = ++$FileHandle;
505
506 pipe($read, $child_write) || die "Failed pipe($read, $child_write): $!";
507 pipe($child_read, $write) || die "Failed pipe($child_read, $write): $!";
508 my $pid;
509
510 if (($pid = fork) < 0) {
511 die "Failed fork: $!";
512 } elsif ($pid == 0) {
513 close($read);
514 close($write);
515 open(STDIN, "<&$child_read");
516 open(STDOUT, ">&$child_write");
517 open(STDERR, ">&$child_write");
518 exec($cmd);
519 die "Failed exec $cmd: $!";
520 }
521
522 close($child_read);
523 close($child_write);
524
525 $write->autoflush();
526 $read->autoflush();
527
528 return 1;
529}
530
531
532######################################################################
533
534############# functions to do with the classificaiton stuff ##########
535sub get_classifyinfo {
536 my ($gdbmfile) = @_;
537
538 open (DB2TXT, "db2txt $gdbmfile |") || die "couldn't open pipe to db2txt\n";
539 print STDERR "Finding all classification sections from $gdbmfile\n" ;
540
541 $/ = '-' x 70;
542 my $entry = "";
543 while (defined ($entry = <DB2TXT>)) {
544 next unless $entry =~ /\w/; #ignore blank entries
545 $entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
546 my ($key, $value) = $entry =~ /\[([^\]]*)\](.*)/;
547
548 next unless ($key =~/^[A-Z]$/); # assumes classification OID is like
549 # C or T etc
550
551 push( @$classifyinfo, $key);
552
553 }
554
555 $/ = "\n";
556}
557
558#this creates the classification files needed for the hierarchy classifier
559#used for subjects, titles, orgs etc
560#also adds in entries to the collect_cfg hash
561sub make_info_file {
562 my ($etcdir, $classifier) = @_;
563 my $info_file = "";
564
565 $info_file = &util::filename_cat($etcdir, "CL.${classifier}.txt");
566 $classifier_name = "CL.$classifier";
567 print STDERR "classification $classifier will be called CL.$classifier\n";
568
569 open (OUTDOC, ">$info_file" ) || die "couldn't open file $info_file\n";
570
571 my $entry = $infodb{$classifier};
572 #print STDERR "entry = $entry\n";
573 my $hashref = {};
574 &get_metadata($entry, $hashref);
575 my $children=[];
576 &get_whole_children($hashref, $children);
577 foreach $child (@$children) {
578 &process_entry(OUTDOC, $classifier_name, $child);
579 }
580
581 close OUTDOC;
582
583 &add_classify_cfg($classifier, $classifier_name, $info_file);
584
585}
586
587
588sub process_entry {
589 my ($handle, $classifier_name, $classify_id) = @_;
590
591 my $value = $infodb{$classify_id};
592 my $hashref={};
593 &get_metadata($value, $hashref);
594 my $title = $hashref->{'Title'};
595
596 &add_line($handle, $classify_id, $title);
597
598 my $children = [];
599 &get_whole_children($hashref, $children);
600 foreach $child (@$children) {
601 if (&is_document($child)) {
602 &add_doc_metadata($child, $classifier_name, $classify_id);
603 }
604 else {
605 &process_entry($handle, $classifier_name, $child);
606 }
607 }
608}
609
610sub add_doc_metadata {
611 my ($doc_id, $classifier_name, $classifier_id) = @_;
612
613 #add entry to doc database
614 #print STDERR "at doc level, docnum=$classify_id\n";
615 $doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
616 $doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
617 defined $doc_classif_info->{$doc_id}->{$classifier_name};
618 push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
619}
620
621sub add_line {
622 my ($handle, $classify_id, $title) = @_;
623 $title = &unicode::ascii2utf8(\$title);
624 my ($num) = $classify_id =~ /^[A-Z]\.(.*)$/; #remove the C. from the front
625 print $handle "$classify_id\t$num\t\"$title\"\n";
626}
627
628sub is_document {
629 my ($oid) = @_;
630 return 1 if $oid =~ /^B\.\d/;
631 return 0;
632}
633
634########################################################################
635
636########## stuff for producing collect.cfg file ###########################
637
638sub add_default_cfg {
639
640 $username=`whoami`;
641 $username=`logname` unless defined $username;
642 $username="a_user" unless defined $username;
643 $username =~ s/\n//;
644 $collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
645 $collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
646 $collect_cfg->{'public'}="true";
647
648 $collect_cfg->{'plugin'}=[];
649 push (@{$collect_cfg->{'plugin'}}, ["GAPlug"]);
650 push (@{$collect_cfg->{'plugin'}}, ["ArcPlug"]);
651 push (@{$collect_cfg->{'plugin'}}, ["RecPlug"]);
652
653 $collect_cfg->{'format'}={};
654 $collect_cfg->{'format'}->{'DocumentImages'}="true";
655 $collect_cfg->{'format'}->{'DocumentText'} =
656 "\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
657 $collect_cfg->{'format'}->{'SearchVList'} =
658 "\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
659
660 $collect_cfg->{'collectionmeta'}={};
661 $collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
662 $collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"_httpprefix_/collect/$collection/images/$collection.gif\"";
663 $collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"_httpprefix_/collect/$collection/images/${collection}sm.gif\"";
664 $collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
665
666}
667
668sub add_index_cfg {
669 my ($buildfile) = @_;
670
671 my $data={};
672 $collect_cfg->{'indexes'}=[];
673 if (-e $buildfile) {
674 $data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
675 foreach my $i (@{$data->{'indexmap'}}) {
676 my ($thisindex, $abbrev)= split (/\-\>/, $i);
677 push (@{$collect_cfg->{'indexes'}}, $thisindex);
678 $collect_cfg->{'defaultindex'} = $thisindex unless defined
679 $collect_cfg->{'defaultindex'};
680 $name=&get_index_name($thisindex);
681 $thisindex=".$thisindex";
682 $collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
683 }
684 }
685 else {
686 print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
687 }
688
689}
690
691sub get_index_name {
692 my ($thisindex) = @_;
693 return "paragraphs" if $thisindex =~ /paragraph/;
694 return "chapters" if $thisindex =~ /section.*text/;
695 return "titles" if $thisindex =~ /Title/;
696 return "other";
697}
698
699sub add_classify_cfg {
700
701 my ($classify, $metadata, $file) = @_;
702 $collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
703
704 my ($title);
705 $title = "Howto" if $classify eq 'H';
706 $title = "Subject" if $classify eq 'C';
707 $title = "Organization" if $classify eq 'O';
708 $title = "Title" if $classify eq 'T';
709 $title = "Other" unless defined $title;
710
711 my ($filename) = $file =~ /\/([^\/]*)$/;
712 my $entry = "Hierarchy -hfile $filename -metadata $metadata -buttonname $title -sort Title";
713 $entry .= " -hlist_at_top" if $title eq "Title";
714 push (@{$collect_cfg->{'classify'}},[$entry]);
715
716
717}
718
719sub output_cfg_file {
720
721 my ($collfile) = @_;
722 &cfgread::write_cfg_file($collfile, $collect_cfg,
723 '^(creator|maintainer|public|beta|defaultindex)$',
724 '^(indexes)$', '^(format|collectionmeta)$',
725 '^(plugin|classify)$');
726}
727
728sub read_gdbm {
729 my ($filename) = @_;
730
731 open (PIPEIN, "db2txt \"$filename\" |") || die "couldn't open pipe from db2txt\n";
732 my $line = ""; my $key = ""; my $value = "";
733 while (defined ($line = <PIPEIN>)) {
734 if ($line =~ /^\[([^\]]+)\]$/) {
735 $key = $1;
736 } elsif ($line =~ /^-{70}$/) {
737 $infodb{$key} = $value;
738 $value = "";
739 $key = "";
740 } else {
741 $value .= $line;
742 }
743 }
744 close PIPEIN;
745}
Note: See TracBrowser for help on using the repository browser.