1 | # This plugin processes all books in a table of contents file.
|
---|
2 | # A toc.txt file contains two things: a subject classification and a
|
---|
3 | # list of file names.
|
---|
4 |
|
---|
5 | package TOCPlug;
|
---|
6 |
|
---|
7 | use plugin;
|
---|
8 | use BasPlug;
|
---|
9 | use lang;
|
---|
10 | use doc;
|
---|
11 |
|
---|
12 | sub BEGIN {
|
---|
13 | @ISA = ('BasPlug');
|
---|
14 | }
|
---|
15 |
|
---|
16 | sub new {
|
---|
17 | my ($class) = @_;
|
---|
18 | $self = new BasPlug ();
|
---|
19 |
|
---|
20 | return bless $self, $class;
|
---|
21 | }
|
---|
22 |
|
---|
23 | # return 1 if this class might recurse using $pluginfo
|
---|
24 | sub is_recursive {
|
---|
25 | my $self = shift (@_);
|
---|
26 |
|
---|
27 | return 1;
|
---|
28 | }
|
---|
29 |
|
---|
30 | sub read_toc_subject {
|
---|
31 | my $self = shift (@_);
|
---|
32 | my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
|
---|
33 |
|
---|
34 | my ($infoline, $line, @line);
|
---|
35 |
|
---|
36 | open (TOCFILE, "$base_dir$file") ||
|
---|
37 | die "TOCPlug::read_toc_subject couldn't open $tocfile\n";
|
---|
38 | $infoline = <TOCFILE>; # first line is an info line
|
---|
39 | my $doc_obj = new doc($file, "classification");
|
---|
40 | $doc_obj->set_OID ("CLSU");
|
---|
41 |
|
---|
42 | while (defined ($line = <TOCFILE>)) {
|
---|
43 | $line =~ s/^\#.*$//; # remove comments
|
---|
44 | $line =~ s/\cM|\cJ//g; # remove end-of-line characters
|
---|
45 | @line = split(/\t/, $line);
|
---|
46 |
|
---|
47 | # remove spaces at the start and end of each field
|
---|
48 | map { s/^\s+|\s+$//g; } @line;
|
---|
49 |
|
---|
50 | # if this is a classification entry add it to the classification file
|
---|
51 | if ((scalar(@line) >= 2) && ($line[0] ne "") && ($line[1] ne "")) {
|
---|
52 | my $classifier = $self->int_classification ($line[0]); # convert leading letter to int
|
---|
53 | $doc_obj->create_named_section($classifier);
|
---|
54 | $doc_obj->add_metadata($classifier, "Title", $line[1]);
|
---|
55 | }
|
---|
56 | }
|
---|
57 |
|
---|
58 | close (TOCFILE);
|
---|
59 |
|
---|
60 | # process the classification file
|
---|
61 | $processor->process($doc_obj);
|
---|
62 | }
|
---|
63 |
|
---|
64 | # converts leading letter of a classification into its ascii equivalent
|
---|
65 | # i.e C.2.4 becomes 67.2.4
|
---|
66 | sub int_classification {
|
---|
67 | my $self = shift (@_);
|
---|
68 | my ($classification) = @_;
|
---|
69 | my $c = ord($classification);
|
---|
70 | $classification =~ s/^./$c/;
|
---|
71 |
|
---|
72 | return $classification;
|
---|
73 | }
|
---|
74 |
|
---|
75 | sub read_toc_files {
|
---|
76 | my $self = shift (@_);
|
---|
77 | my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
|
---|
78 |
|
---|
79 | my ($infoline, $line);
|
---|
80 | # the fields are in the following order (separated by tabs):
|
---|
81 | # Classification, Classification title, Title,
|
---|
82 | # Language, Creator
|
---|
83 |
|
---|
84 | open (TOCFILE, "$base_dir$file") ||
|
---|
85 | die "TOCPlug::read_toc_files couldn't open $tocfile\n";
|
---|
86 | $infoline = <TOCFILE>; # first line is an info line
|
---|
87 |
|
---|
88 | while (defined ($line = <TOCFILE>)) {
|
---|
89 | next if ($line =~ $line =~ /^\#.*$/) || ($line !~ /\w/);
|
---|
90 | $line =~ s/\cM|\cJ//g; # remove end-of-line characters
|
---|
91 | @line = split(/\t/, $line);
|
---|
92 |
|
---|
93 | # remove spaces at the start and end of each field
|
---|
94 | map { s/^\s+|\s+$//g; } @line;
|
---|
95 |
|
---|
96 | # if this is a file entry process it
|
---|
97 | if ((scalar(@line) >= 5) && ($line[1] eq "") && ($line[4] ne "")) {
|
---|
98 | my $metadata = {'Subject' => $line[0],
|
---|
99 | 'Title' => $line[2],
|
---|
100 | 'Language'=> &lang::english_to_iso639($line[3])};
|
---|
101 |
|
---|
102 | $metadata->{'Creator'} = $line[5] if defined $line[5];
|
---|
103 |
|
---|
104 |
|
---|
105 | my $subimportfile = "$base_dir$line[4]";
|
---|
106 |
|
---|
107 | &plugin::read ($pluginfo, $base_dir, $line[4], $metadata, $processor);
|
---|
108 | }
|
---|
109 | }
|
---|
110 |
|
---|
111 | close (TOCFILE);
|
---|
112 | }
|
---|
113 |
|
---|
114 |
|
---|
115 | # return 1 if processed, 0 if not processed
|
---|
116 | # Note that $base_dir might be "" and that $file might
|
---|
117 | # include directories
|
---|
118 | sub read {
|
---|
119 | my $self = shift (@_);
|
---|
120 | my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
|
---|
121 |
|
---|
122 | my $tocfile = "$file/toc.txt";
|
---|
123 | if (!(-f "$base_dir$tocfile")) {
|
---|
124 | # not a directory containing a toc file
|
---|
125 | return 0;
|
---|
126 | }
|
---|
127 |
|
---|
128 | # found a toc.txt file
|
---|
129 |
|
---|
130 | print STDERR "TOCPlug: processing $tocfile\n";
|
---|
131 |
|
---|
132 | # create the subject classification document
|
---|
133 | $self->read_toc_subject ($pluginfo, $base_dir, $tocfile, {}, $processor);
|
---|
134 |
|
---|
135 | # process each file within this table of contents file
|
---|
136 | $self->read_toc_files ($pluginfo, $base_dir, $tocfile, {}, $processor);
|
---|
137 |
|
---|
138 | return 1; # was processed
|
---|
139 | }
|
---|
140 |
|
---|
141 |
|
---|
142 | 1;
|
---|