source: trunk/gsdl/perllib/plugins/TOCPlug.pm@ 229

Last change on this file since 229 was 4, checked in by sjboddie, 26 years ago

Initial revision

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.7 KB
Line 
1# This plugin processes all books in a table of contents file.
2# A toc.txt file contains two things: a subject classification and a
3# list of file names.
4
5package TOCPlug;
6
7use plugin;
8use BasPlug;
9use lang;
10use doc;
11
12sub BEGIN {
13 @ISA = ('BasPlug');
14}
15
16sub new {
17 my ($class) = @_;
18 $self = new BasPlug ();
19
20 return bless $self, $class;
21}
22
23# return 1 if this class might recurse using $pluginfo
24sub is_recursive {
25 my $self = shift (@_);
26
27 return 1;
28}
29
30sub read_toc_subject {
31 my $self = shift (@_);
32 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
33
34 my ($infoline, $line, @line);
35
36 open (TOCFILE, "$base_dir$file") ||
37 die "TOCPlug::read_toc_subject couldn't open $tocfile\n";
38 $infoline = <TOCFILE>; # first line is an info line
39 my $doc_obj = new doc($file, "classification");
40 $doc_obj->set_OID ("CLSU");
41
42 while (defined ($line = <TOCFILE>)) {
43 $line =~ s/^\#.*$//; # remove comments
44 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
45 @line = split(/\t/, $line);
46
47 # remove spaces at the start and end of each field
48 map { s/^\s+|\s+$//g; } @line;
49
50 # if this is a classification entry add it to the classification file
51 if ((scalar(@line) >= 2) && ($line[0] ne "") && ($line[1] ne "")) {
52 my $classifier = $self->int_classification ($line[0]); # convert leading letter to int
53 $doc_obj->create_named_section($classifier);
54 $doc_obj->add_metadata($classifier, "Title", $line[1]);
55 }
56 }
57
58 close (TOCFILE);
59
60 # process the classification file
61 $processor->process($doc_obj);
62}
63
64# converts leading letter of a classification into its ascii equivalent
65# i.e C.2.4 becomes 67.2.4
66sub int_classification {
67 my $self = shift (@_);
68 my ($classification) = @_;
69 my $c = ord($classification);
70 $classification =~ s/^./$c/;
71
72 return $classification;
73}
74
75sub read_toc_files {
76 my $self = shift (@_);
77 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
78
79 my ($infoline, $line);
80# the fields are in the following order (separated by tabs):
81# Classification, Classification title, Title,
82# Language, Creator
83
84 open (TOCFILE, "$base_dir$file") ||
85 die "TOCPlug::read_toc_files couldn't open $tocfile\n";
86 $infoline = <TOCFILE>; # first line is an info line
87
88 while (defined ($line = <TOCFILE>)) {
89 next if ($line =~ $line =~ /^\#.*$/) || ($line !~ /\w/);
90 $line =~ s/\cM|\cJ//g; # remove end-of-line characters
91 @line = split(/\t/, $line);
92
93 # remove spaces at the start and end of each field
94 map { s/^\s+|\s+$//g; } @line;
95
96 # if this is a file entry process it
97 if ((scalar(@line) >= 5) && ($line[1] eq "") && ($line[4] ne "")) {
98 my $metadata = {'Subject' => $line[0],
99 'Title' => $line[2],
100 'Language'=> &lang::english_to_iso639($line[3])};
101
102 $metadata->{'Creator'} = $line[5] if defined $line[5];
103
104
105 my $subimportfile = "$base_dir$line[4]";
106
107 &plugin::read ($pluginfo, $base_dir, $line[4], $metadata, $processor);
108 }
109 }
110
111 close (TOCFILE);
112}
113
114
115# return 1 if processed, 0 if not processed
116# Note that $base_dir might be "" and that $file might
117# include directories
118sub read {
119 my $self = shift (@_);
120 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
121
122 my $tocfile = "$file/toc.txt";
123 if (!(-f "$base_dir$tocfile")) {
124 # not a directory containing a toc file
125 return 0;
126 }
127
128 # found a toc.txt file
129
130 print STDERR "TOCPlug: processing $tocfile\n";
131
132 # create the subject classification document
133 $self->read_toc_subject ($pluginfo, $base_dir, $tocfile, {}, $processor);
134
135 # process each file within this table of contents file
136 $self->read_toc_files ($pluginfo, $base_dir, $tocfile, {}, $processor);
137
138 return 1; # was processed
139}
140
141
1421;
Note: See TracBrowser for help on using the repository browser.