source: gsdl/trunk/bin/script/unbuildv1.pl@ 17750

Last change on this file since 17750 was 17750, checked in by kjdon, 15 years ago

plugout and plugin name changes. do these scripts still work??

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 21.8 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# unbuildv1.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# this program will decompress all the text from a built index
29# and return it to gml format - this version of the program is designed to
30# work on collections built with the first version of the greenstone software
31# it extracts the browse classifications from the gdbm file, and recreates
32# them. It also creates a default collect.cfg file.
33
34# To run, this program needs the old archive directory (called archives),
35# the index directory with the text and one other index, not paragraphs!! -
36# use one of the section indexes. Also needs the build.cfg file (in the
37# index dir) - used in creating a collect.cfg file. If build.cfg is not
38# present, the parts of the collect.cfg file relating to indexes will not
39# be entered.
40
41# Stefan altered this script slightly August 2002 to use
42# $GSDLHOME/bin/$GSDLOS/mgquery_old instead of the old mgquery in
43# nzdl-1.2. mgquery_old is a statically linked binary compiled from
44# mg-1.3d. It was compiled with SHORT_SUFFIX set so doesn't need the index
45# file extensions to be altered.
46# Also changed the command line options a little.
47
48BEGIN {
49 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
50 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
51 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
52 $FileHandle = 'FH000';
53}
54
55use doc;
56use util;
57use parsargv;
58use FileHandle;
59use English;
60use cfgread;
61use unicode;
62use plugout;
63select STDERR; $| = 1;
64select STDOUT; $| = 1;
65
66
67# globals
68$collection = ""; # the collection name
69$index = ""; # the selected index (like stt/unu)
70$textdir = ""; # the textdir (like text/unu)
71$toplevelinfo = []; #list of document OIDs
72%infodb = (); #hash into GDBM file
73$classifyinfo = []; # list of classifications
74$doc_classif_info = {}; # hash of OIDs->classifications they belong to
75$collect_cfg = {}; #data for the configuration file
76$mgread = ++$FileHandle;
77$mgwrite = ++$FileHandle;
78
79sub print_usage {
80 print STDERR "\n usage: $0 [options]\n\n";
81 print STDERR " options:\n";
82 print STDERR " -verbosity number 0=none, 3=lots\n";
83 print STDERR " -old directory The directory in which the old collection lives\n";
84 print STDERR " -new directory The directory in which to put the new collection\n";
85}
86
87&main ();
88
89sub main {
90 if (!parsargv::parse(\@ARGV,
91 'verbosity/\d+/2', \$verbosity,
92 'old/.*/', \$old,
93 'new/.*/', \$new)) {
94 &print_usage();
95 die "\n";
96 }
97
98 die "$old does not exist\n\n" unless (-d $old);
99 my $old_indexdir = &util::filename_cat($old, "index");
100 my $old_archivedir = &util::filename_cat($old, "archives");
101
102 my $new_archivedir = &util::filename_cat($new, "archives");
103 my $new_etcdir = &util::filename_cat($new, "etc");
104 if (!-e $new_archivedir) {
105 &util::mk_all_dir($new_archivedir);
106 }
107 if (!(-d $new_etcdir)) {
108 &util::mk_all_dir($new_etcdir);
109 }
110
111 my $gdbmfile = &get_gdbmfile($old_indexdir); # sets $collection and $textdir
112 &set_index($old_indexdir); # sets $index (just chooses one index)
113
114 my $buildcfgfile = &util::filename_cat($old_indexdir, "build.cfg");
115 my $colcfgfile = &util::filename_cat($new_etcdir, "collect.cfg");
116
117 # don't need this but giving it a value stops an annoying undefined
118 # value warning
119 $ENV{'GSDLCOLLECTDIR'} = 'tmp';
120
121 &add_default_cfg();
122 &add_index_cfg($buildcfgfile);
123
124 # work out all the classifications from the gdbm file, info for each doc
125 # (ie which classifications they belong to, are kept in $doc_classif_info
126 &get_classifyinfo($gdbmfile); #puts a list of classifications into classifyinfo
127 &get_toplevel_OID($gdbmfile); # puts a list of the top level document OIDs into $toplevelinfo
128
129 # read ldb file into %infodb
130 &read_gdbm($gdbmfile);
131
132 #this makes the files specifying the hierarchy of subjects, titles etc
133 foreach my $classify (@$classifyinfo) {
134 &make_info_file($new_etcdir, $classify);
135 }
136
137 # write out the collect.cfg
138 &output_cfg_file($colcfgfile);
139
140 &openmg($old_indexdir);
141
142 # read the archive information file
143 my $archive_info_filename = &util::filename_cat ($new_archivedir, "archives.inf");
144 my $archive_info = new arcinfo ();
145
146 my $opts = [];
147 push @$opts,("-output_info",$archive_info);
148
149 $processor = &plugout::load_plugout("GreenstoneXMLPlugout",$opts);
150 $processor->setoutputdir ($new_archivedir);
151
152 my ($doc_obj, $hashref, $children);
153 print STDERR "processing documents now\n" if $verbosity >=2;
154 foreach $oid (@$toplevelinfo) {
155 $value = $infodb{$oid};
156 $hashref={};
157 $children = [];
158 &get_metadata($value, $hashref);
159 $doc_obj = new doc ();
160 $doc_obj->set_OID($oid);
161 my ($olddir) = $hashref->{'archivedir'}; # old dir for this doc, where images are stored
162 $top = $doc_obj->get_top_section();
163 &add_section_content ($doc_obj, $top, $hashref, $olddir);
164 &add_classification_metadata($oid, $doc_obj, $top);
165 &add_cover_image($doc_obj, $old_archivedir, $olddir);
166 &get_children($hashref, $children);
167 &recurse_sections($doc_obj, $children, $oid, $top, $olddir) if (defined ($children));
168
169 #change OID to new format
170 $doc_obj->delete_OID();
171 $doc_obj->set_OID();
172 $processor->process($doc_obj);
173# last;
174 }
175 print STDERR "\n";
176
177 &closemg();
178
179 # write out the archive information file
180 $archive_info->save_info($archive_info_filename);
181
182
183}
184
185# returns the path to the gdbm info database - also
186# sets the $collection and $textdir global variable
187sub get_gdbmfile {
188 my ($indexdir) = @_;
189 my ($gdbmfile);
190
191 opendir (DIR, $indexdir) || die "Couldn't open directory $indexdir\n\n";
192 my @conts = readdir DIR;
193 close DIR;
194
195 foreach $file (@conts) {
196 if ($file =~ /text$/) {
197 $textdir = $file;
198 last;
199 }
200 }
201 die "No text directory found in $indexdir\n\n"
202 unless defined $textdir && $textdir =~ /text$/;
203
204 $gdbmfile = &util::filename_cat ($indexdir, $textdir);
205
206 opendir (DIR, $gdbmfile) || die "Couldn't open directory $gdbmfile\n\n";
207 @conts = readdir DIR;
208 close DIR;
209
210 foreach my $file (@conts) {
211 if ($file =~ /^(.*?)\.(?:ldb|bdb)$/) {
212 $collection = $1;
213 $gdbmfile = &util::filename_cat ($gdbmfile, $file);
214 last;
215 }
216 }
217
218 if (defined $collection && $collection =~ /\w/) {
219 $textdir = &util::filename_cat ($textdir, $collection);
220 } else {
221 die "collection global wasn't set\n";
222 }
223 return $gdbmfile if (-e $gdbmfile);
224 die "Couldn't find gdbm info database in $indexdir\n\n";
225}
226
227
228#this function will probably need to be changed for any different file formats
229sub get_toplevel_OID {
230 my ($gdbmfile) = @_;
231
232 open (DB2TXT, "db2txt $gdbmfile |") || die "couldn't open pipe to db2txt\n";
233 print STDERR "Finding all top level sections from $gdbmfile\n" if $verbosity >= 2;
234
235 $/ = '-' x 70;
236 my $entry = "";
237 while (defined ($entry = <DB2TXT>)) {
238 next unless $entry =~ /\w/; #ignore blank entries
239 $entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
240 my ($key, $value) = $entry =~ /\[([^\]]*)\](.*)/;
241
242 next unless ($key =~ /B\.\d*$/); # assumes top level OID is like
243 # B.12
244
245 push( @$toplevelinfo, $key);
246
247 }
248
249 $/ = "\n";
250}
251
252# gets all the metadata from a gdbm file entry, and puts it into a hashref
253#this has changed for old style formats
254sub get_metadata {
255
256 my ($gdb_str_ref, $hashref) = @_;
257# print STDERR $gdb_str_ref;
258 my @entries = split(/\n/, $gdb_str_ref);
259
260 my $hastext = 1;
261 foreach $entry (@entries) {
262
263 my($key, $value) = ($entry =~ /^<([^>]*)>(.*?)$/ );
264
265 next if ($key eq "p");
266 next if ($key eq "j");
267
268 $key = "Title" if $key eq "t";
269 $key = "docnum" if $key eq "d";
270 if ($key eq "o") {
271 $key = "archivedir";
272 $value =~ s/HASH/hash/;
273 }
274 if ($key eq "c") {
275 $key = "contains";
276 $hastext=0;}
277
278 $$hashref{$key} .= $value;
279
280 }
281
282 $$hashref{'hastxt'} = $hastext;
283
284}
285
286# takes a hashref containing the metadata for a gdbmfile entry, and
287# extracts the childrens numbers (from the 'contains' entry). assumes
288# format is like B.14.1:B.24.2:B.24.3 returns a list like 1 2 3 4
289sub get_children {
290 my ($hashref, $children) = @_;
291
292 my $childs = $hashref->{'contains'};
293 if (defined ($childs)) {
294 @xchildren = split /\:/, $childs;
295 foreach $kid (@xchildren) {
296 my ($child)= $kid =~ /\.([^\.]*)(?:\\n|$)/;
297 push (@$children, $child);
298 }
299# print STDERR "children are: ", @$children;
300 }
301 else {
302 $children = [];
303 }
304}
305
306#takes a hashref containing the metadata for a gdbmfile entry, and extracts
307#the childrens numbers (from the 'contains' entry).
308#assumes format is like B.14.1:B.24.2:B.24.3
309#returns a list with the full child name ie B.14.1 B.14.2 etc
310#used for classification stuff
311sub get_whole_children {
312
313 my ($hashref, $children) = @_;
314
315 my $childs = $hashref->{'contains'};
316 if (defined ($childs)) {
317 @$children = split /\:/, $childs;
318 #print STDERR "children are: ", @$children;
319 }
320 else {
321 $children = [];
322 }
323}
324
325sub recurse_sections {
326 my ($doc_obj, $children, $parentoid, $parentsection, $olddir) = @_;
327
328 foreach $child (sort numerically @$children) {
329 $doc_obj->create_named_section("$parentsection.$child");
330 my $value = $infodb{"$parentoid.$child"};
331 my $hashref={};
332 &get_metadata($value, $hashref); # get childs metadata
333 my $newchildren = [];
334 &get_children($hashref, $newchildren); # get childs children
335 #add content fo rcurrent section
336 &add_section_content($doc_obj, "$parentsection.$child", $hashref, $olddir);
337 # process all the children if there are any
338 &recurse_sections($doc_obj, $newchildren, "$parentoid.$child", "$parentsection.$child", $olddir)
339 if (defined ($newchildren));
340 }
341}
342
343sub add_section_content {
344 my ($doc_obj, $cursection, $hashref, $olddir) = @_;
345
346 foreach $key (keys %$hashref) {
347 #dont need to store these metadata
348 next if $key =~ /(contains|docnum|hastxt|doctype|archivedir|classifytype)/i;
349 my @items = split /@/, $hashref->{$key};
350 map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
351
352 }
353
354 my ($hastext) = $hashref->{'hastxt'} =~ /(0|1)/;
355 if ($hastext) {
356
357 my ($docnum)= $hashref->{'docnum'} =~ /(\d*)/;
358 my $text = &get_text($docnum);
359 $doc_obj->add_text ($cursection, $text);
360
361 my (@images) = $text =~ /<img.*?src=\"([^\"]*)\"[^>]*>/g;
362
363 if (scalar(@images)>0) {
364
365 foreach $img (@images) {
366 my ($assoc_file) = $img =~ /([^\/\\]*\..*)$/; #the name of the image
367 $img =~ s/_httpcollection_/$old/; #replace _httpcollection_ with path to old collection
368 $olddir =~ s/HASH/hash/;
369 $img =~ s/_thisOID_/$olddir/; #replace _thisOID_ with old archivedir name #the path to the image
370 $img = lc $img; # doc names and path upper case in file, lower
371 # case in directory
372 if (-e $img) {
373 $doc_obj->associate_file($img, $assoc_file);
374 } else {
375 print STDERR "WARNING: $img does not exist\n";
376 }
377 }
378
379 }
380 }
381}
382
383
384sub add_classification_metadata {
385
386 my ($oid, $doc_obj, $cursection) = @_;
387
388 if (defined $doc_classif_info->{$oid}) {
389
390 $hashref = $doc_classif_info->{$oid};
391
392 foreach $key (keys %$hashref) {
393 my @items = @{$hashref->{$key}};
394 map {$doc_obj->add_metadata ($cursection, $key, $_); } @items;
395 }
396 }
397}
398# picks up the cover image "cover.jpg" from the old archives directory.
399sub add_cover_image {
400 my ($doc_obj, $archivedir, $olddir) = @_;
401 $assoc_file = "cover.jpg";
402 $img = &util::filename_cat($archivedir, $olddir, $assoc_file);
403
404 if (-e $img) {
405 $doc_obj->associate_file($img, $assoc_file);
406 } else {
407 print STDERR "WARNING: couldn't find $img\n";
408 }
409}
410
411sub set_index {
412 my ($indexdir) = @_;
413
414 # check that $collection has been set
415 die "collection global was not set\n"
416 unless defined $collection && $collection =~ /\w/;
417
418 # find an index (just use first non-text and non-paragraph directory we
419 # come across in $indexdir)
420 opendir (INDEXDIR, $indexdir) || die "couldn't open directory $indexdir\n";
421 my @indexes = readdir INDEXDIR;
422 close INDEXDIR;
423 foreach $i (@indexes) {
424 next if $i =~ /text$/i || $i =~ /\./ || $i =~ /^p/i;
425 $index = &util::filename_cat ($i, $collection);
426 last;
427 }
428}
429
430
431#########################################################################
432
433################ functions involving mg ################################
434sub get_text {
435 my ($docnum) = @_;
436
437 print STDERR "." if $verbosity >= 2;
438 &mgcommand ($docnum);
439
440 <$mgread>; # eat the document separator
441
442 my $text = "";
443 my $line = "";
444
445 while (defined ($line = <$mgread>))
446 {
447 last if $line =~ /^<\/mg>/;
448 $text .= $line;
449 }
450
451 # Read in the last statement, which should be:
452 # "dd documents retrieved."
453 <$mgread>;
454
455 return $text;
456}
457
458
459sub numerically {$a <=> $b;}
460
461
462
463# mg stuff
464
465sub openmg {
466 my ($indexdir) = @_;
467
468 die "Unable to start mgquery." unless
469 &openpipe($mgread, $mgwrite, "mgquery_old -d $indexdir -f $index -t $textdir");
470
471 $mgwrite->autoflush();
472 &mgcommand('.set expert true');
473 &mgcommand('.set terminator "</mg>\n"');
474 &mgcommand('.set mode text');
475 &mgcommand('.set query docnums');
476 &mgcommand('.set term_freq off');
477}
478
479sub closemg {
480 &mgcommand (".quit");
481 close($mgread);
482 close($mgwrite);
483}
484
485sub mgcommand {
486 my ($command) = @_;
487
488 return if $command =~ /^\s*$/; #whitespace
489 #print STDERR "command: $command\n";
490 print $mgwrite "$command\n";
491
492 # eat up the command executed which is echoed
493 <$mgread>;
494}
495
496# openpipe(READ, WRITE, CMD)
497#
498# Like open2, except CMD's stderr is also redirected.
499#
500sub openpipe
501{
502 my ($read, $write, $cmd) = @_;
503 my ($child_read, $child_write);
504
505 $child_read = ++$FileHandle;
506 $child_write = ++$FileHandle;
507
508 pipe($read, $child_write) || die "Failed pipe($read, $child_write): $!";
509 pipe($child_read, $write) || die "Failed pipe($child_read, $write): $!";
510 my $pid;
511
512 if (($pid = fork) < 0) {
513 die "Failed fork: $!";
514 } elsif ($pid == 0) {
515 close($read);
516 close($write);
517 open(STDIN, "<&$child_read");
518 open(STDOUT, ">&$child_write");
519 open(STDERR, ">&$child_write");
520 exec($cmd);
521 die "Failed exec $cmd: $!";
522 }
523
524 close($child_read);
525 close($child_write);
526
527 $write->autoflush();
528 $read->autoflush();
529
530 return 1;
531}
532
533
534######################################################################
535
536############# functions to do with the classificaiton stuff ##########
537sub get_classifyinfo {
538 my ($gdbmfile) = @_;
539
540 open (DB2TXT, "db2txt $gdbmfile |") || die "couldn't open pipe to db2txt\n";
541 print STDERR "Finding all classification sections from $gdbmfile\n" ;
542
543 $/ = '-' x 70;
544 my $entry = "";
545 while (defined ($entry = <DB2TXT>)) {
546 next unless $entry =~ /\w/; #ignore blank entries
547 $entry =~ s/\n+/\\n/g; # replace multiple \n with single \n
548 my ($key, $value) = $entry =~ /\[([^\]]*)\](.*)/;
549
550 next unless ($key =~/^[A-Z]$/); # assumes classification OID is like
551 # C or T etc
552
553 push( @$classifyinfo, $key);
554
555 }
556
557 $/ = "\n";
558}
559
560#this creates the classification files needed for the hierarchy classifier
561#used for subjects, titles, orgs etc
562#also adds in entries to the collect_cfg hash
563sub make_info_file {
564 my ($etcdir, $classifier) = @_;
565 my $info_file = "";
566
567 $info_file = &util::filename_cat($etcdir, "CL.${classifier}.txt");
568 $classifier_name = "CL.$classifier";
569 print STDERR "classification $classifier will be called CL.$classifier\n";
570
571 open (OUTDOC, ">$info_file" ) || die "couldn't open file $info_file\n";
572
573 my $entry = $infodb{$classifier};
574 #print STDERR "entry = $entry\n";
575 my $hashref = {};
576 &get_metadata($entry, $hashref);
577 my $children=[];
578 &get_whole_children($hashref, $children);
579 foreach $child (@$children) {
580 &process_entry(OUTDOC, $classifier_name, $child);
581 }
582
583 close OUTDOC;
584
585 &add_classify_cfg($classifier, $classifier_name, $info_file);
586
587}
588
589
590sub process_entry {
591 my ($handle, $classifier_name, $classify_id) = @_;
592
593 my $value = $infodb{$classify_id};
594 my $hashref={};
595 &get_metadata($value, $hashref);
596 my $title = $hashref->{'Title'};
597
598 &add_line($handle, $classify_id, $title);
599
600 my $children = [];
601 &get_whole_children($hashref, $children);
602 foreach $child (@$children) {
603 if (&is_document($child)) {
604 &add_doc_metadata($child, $classifier_name, $classify_id);
605 }
606 else {
607 &process_entry($handle, $classifier_name, $child);
608 }
609 }
610}
611
612sub add_doc_metadata {
613 my ($doc_id, $classifier_name, $classifier_id) = @_;
614
615 #add entry to doc database
616 #print STDERR "at doc level, docnum=$classify_id\n";
617 $doc_classif_info->{$doc_id}={} unless defined $doc_classif_info->{$doc_id};
618 $doc_classif_info->{$doc_id}->{$classifier_name}=[] unless
619 defined $doc_classif_info->{$doc_id}->{$classifier_name};
620 push (@{$doc_classif_info->{$doc_id}->{$classifier_name}}, $classifier_id);
621}
622
623sub add_line {
624 my ($handle, $classify_id, $title) = @_;
625 $title = &unicode::ascii2utf8(\$title);
626 my ($num) = $classify_id =~ /^[A-Z]\.(.*)$/; #remove the C. from the front
627 print $handle "$classify_id\t$num\t\"$title\"\n";
628}
629
630sub is_document {
631 my ($oid) = @_;
632 return 1 if $oid =~ /^B\.\d/;
633 return 0;
634}
635
636########################################################################
637
638########## stuff for producing collect.cfg file ###########################
639
640sub add_default_cfg {
641
642 $username=`whoami`;
643 $username=`logname` unless defined $username;
644 $username="a_user" unless defined $username;
645 $username =~ s/\n//;
646 $collect_cfg->{'creator'}="$username\@cs.waikato.ac.nz";
647 $collect_cfg->{'maintainer'}="$username\@cs.waikato.ac.nz";
648 $collect_cfg->{'public'}="true";
649
650 $collect_cfg->{'plugin'}=[];
651 push (@{$collect_cfg->{'plugin'}}, ["GreenstoneXMLPlugin"]);
652 push (@{$collect_cfg->{'plugin'}}, ["ArchivesInfPlugin"]);
653 push (@{$collect_cfg->{'plugin'}}, ["DirectoryPlugin"]);
654
655 $collect_cfg->{'format'}={};
656 $collect_cfg->{'format'}->{'DocumentImages'}="true";
657 $collect_cfg->{'format'}->{'DocumentText'} =
658 "\"<h3>[Title]</h3>\\\\n\\\\n<p>[Text]\"";
659 $collect_cfg->{'format'}->{'SearchVList'} =
660 "\"<td valign=top>[link][icon][/link]</td><td>{If}{[parent(All': '):Title],[parent(All': '):Title]:}[link][Title][/link]</td>\"";
661
662 $collect_cfg->{'collectionmeta'}={};
663 $collect_cfg->{'collectionmeta'}->{'collectionname'}="\"$collection\"";
664 $collect_cfg->{'collectionmeta'}->{'iconcollection'}="\"_httpprefix_/collect/$collection/images/$collection.gif\"";
665 $collect_cfg->{'collectionmeta'}->{'iconcollectionsmall'}="\"_httpprefix_/collect/$collection/images/${collection}sm.gif\"";
666 $collect_cfg->{'collectionmeta'}->{'collectionextra'} = "\"This is a collection rebuilt from CDROM.\"";
667
668}
669
670sub add_index_cfg {
671 my ($buildfile) = @_;
672
673 my $data={};
674 $collect_cfg->{'indexes'}=[];
675 if (-e $buildfile) {
676 $data=&cfgread::read_cfg_file($buildfile, '^(this)$', '^(indexmap)$');
677 foreach my $i (@{$data->{'indexmap'}}) {
678 my ($thisindex, $abbrev)= split (/\-\>/, $i);
679 push (@{$collect_cfg->{'indexes'}}, $thisindex);
680 $collect_cfg->{'defaultindex'} = $thisindex unless defined
681 $collect_cfg->{'defaultindex'};
682 $name=&get_index_name($thisindex);
683 $thisindex=".$thisindex";
684 $collect_cfg->{'collectionmeta'}->{$thisindex} = "\"$name\"";
685 }
686 }
687 else {
688 print STDERR "Couldn't read $buildfile, could not add index data to configuration file\n";
689 }
690
691}
692
693sub get_index_name {
694 my ($thisindex) = @_;
695 return "paragraphs" if $thisindex =~ /paragraph/;
696 return "chapters" if $thisindex =~ /section.*text/;
697 return "titles" if $thisindex =~ /Title/;
698 return "other";
699}
700
701sub add_classify_cfg {
702
703 my ($classify, $metadata, $file) = @_;
704 $collect_cfg->{'classify'} = [] unless defined $collect_cfg->{'classify'};
705
706 my ($title);
707 $title = "Howto" if $classify eq 'H';
708 $title = "Subject" if $classify eq 'C';
709 $title = "Organization" if $classify eq 'O';
710 $title = "Title" if $classify eq 'T';
711 $title = "Other" unless defined $title;
712
713 my ($filename) = $file =~ /\/([^\/]*)$/;
714 my $entry = "Hierarchy -hfile $filename -metadata $metadata -buttonname $title -sort Title";
715 $entry .= " -hlist_at_top" if $title eq "Title";
716 push (@{$collect_cfg->{'classify'}},[$entry]);
717
718
719}
720
721sub output_cfg_file {
722
723 my ($collfile) = @_;
724 &cfgread::write_cfg_file($collfile, $collect_cfg,
725 '^(creator|maintainer|public|beta|defaultindex)$',
726 '^(indexes)$', '^(format|collectionmeta)$',
727 '^(plugin|classify)$');
728}
729
730sub read_gdbm {
731 my ($filename) = @_;
732
733 open (PIPEIN, "db2txt \"$filename\" |") || die "couldn't open pipe from db2txt\n";
734 my $line = ""; my $key = ""; my $value = "";
735 while (defined ($line = <PIPEIN>)) {
736 if ($line =~ /^\[([^\]]+)\]$/) {
737 $key = $1;
738 } elsif ($line =~ /^-{70}$/) {
739 $infodb{$key} = $value;
740 $value = "";
741 $key = "";
742 } else {
743 $value .= $line;
744 }
745 }
746 close PIPEIN;
747}
Note: See TracBrowser for help on using the repository browser.