source: trunk/gsdl/perllib/arcinfo.pm@ 2018

Last change on this file since 2018 was 1287, checked in by sjboddie, 24 years ago

Implemented a -sortmeta option for import.pl to sort archives.inf file
(generated at end of import process) alphabetically by the given
metadata element. This may be useful for some collections as boolean
queries currently return matches in build (fairly random) order. Changing
the order of archives.inf changes the order that documents are built.
This option has a couple of important limitations:

  1. Can't be used in conjunction with the groupsize option as it would then only change the build order of groups of documents which doesn't seem very useful.
  2. Is of limited use when building indexes at a section level as the build order is only sorted by document, not by section.
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.7 KB
Line 
1###########################################################################
2#
3# arcinfo.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26
27# This module stores information about the archives. At the moment
28# this information just consists of the file name (relative to the
29# directory the archives information file is in) and its OID.
30
31# This module assumes there is a one to one correspondance between
32# a file in the archives directory and an OID.
33
34
35package arcinfo;
36
37sub new {
38 my ($class) = @_;
39 my $self = {'info'=>{},
40 'order'=>[]};
41
42 return bless $self, $class;
43}
44
45sub load_info {
46 my $self = shift (@_);
47 my ($filename) = @_;
48
49 $self->{'info'} = {};
50
51 if (-e $filename) {
52 open (INFILE, $filename) ||
53 die "arcinfo::load_info couldn't read $filename\n";
54
55 my ($line, @line);
56 while (defined ($line = <INFILE>)) {
57 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
58 @line = split ("\t", $line); # filename,
59 if (scalar(@line) >= 2) {
60 $self->add_info (@line);
61 }
62 }
63
64 close (INFILE);
65 }
66}
67
68sub save_info {
69 my $self = shift (@_);
70 my ($filename) = @_;
71
72 my ($OID, $info);
73
74 open (OUTFILE, ">$filename") ||
75 die "arcinfo::save_info couldn't write $filename\n";
76
77 foreach $info (@{$self->get_OID_list()}) {
78 if (defined $info) {
79 print OUTFILE join("\t", @$info), "\n";
80 }
81 }
82
83 close (OUTFILE);
84}
85
86sub delete_info {
87 my $self = shift (@_);
88 my ($OID) = @_;
89
90 if (defined $self->{'info'}->{$OID}) {
91 delete $self->{'info'}->{$OID};
92
93 my $i = 0;
94 while ($i < scalar (@{$self->{'order'}})) {
95 if ($self->{'order'}->[$i]->[0] eq $OID) {
96 splice (@{$self->{'order'}}, $i, 1);
97 last;
98 }
99
100 $i ++;
101 }
102 }
103}
104
105sub add_info {
106 my $self = shift (@_);
107 my ($OID, $doc_file, $sortmeta) = @_;
108 $sortmeta = "" unless defined $sortmeta;
109
110 $self->delete_info ($OID);
111 $self->{'info'}->{$OID} = [$doc_file];
112 push (@{$self->{'order'}}, [$OID, $sortmeta]);
113}
114
115# returns a list of the form [[OID, doc_file], ...]
116sub get_OID_list {
117 my $self = shift (@_);
118
119 my ($OID);
120 my @list = ();
121
122 foreach $OID (sort {$a->[1] cmp $b->[1]} @{$self->{'order'}}) {
123 push (@list, [$OID->[0], $self->{'info'}->{$OID->[0]}->[0]]);
124 }
125
126 return \@list;
127}
128
129# returns a list of the form [[doc_file, OID], ...]
130sub get_file_list {
131 my $self = shift (@_);
132
133 my ($OID);
134 my @list = ();
135
136 foreach $OID (sort {$a->[1] cmp $b->[1]} @{$self->{'order'}}) {
137 push (@list, [$self->{'info'}->{$OID->[0]}->[0], $OID->[0]]);
138 }
139
140 return \@list;
141}
142
143
144# returns a list of the form [doc_file]
145sub get_info {
146 my $self = shift (@_);
147 my ($OID) = @_;
148
149 if (defined $self->{'info'}->{$OID}) {
150 return $self->{'info'}->{$OID};
151 }
152
153 return undef;
154}
155
156
157# returns the number of documents so far
158sub size {
159 my $self = shift (@_);
160 return (scalar(@{$self->{'order'}}));
161}
162
1631;
164
Note: See TracBrowser for help on using the repository browser.