source: trunk/gsdl/perllib/plugins/PSPlug.pm@ 10254

Last change on this file since 10254 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1###########################################################################
2#
3# PSPlug.pm -- this might look VERY similar to the PDF plugin...
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# 12/05/02 Added usage datastructure - John Thompson
27
28package PSPlug;
29
30use ConvertToPlug;
31use sorttools;
32
33use strict;
34no strict 'refs'; # allow filehandles to be variables and viceversa
35
36sub BEGIN {
37 @PSPlug::ISA = ('ConvertToPlug');
38}
39
40my $arguments =
41 [ { 'name' => "process_exp",
42 'desc' => "{BasPlug.process_exp}",
43 'type' => "regexp",
44 'deft' => &get_default_process_exp(),
45 'reqd' => "no" },
46 { 'name' => "block_exp",
47 'desc' => "{BasPlug.block_exp}",
48 'type' => 'regexp',
49 'deft' => &get_default_block_exp() },
50 { 'name' => "extract_date",
51 'desc' => "{PSPlug.extract_date}",
52 'type' => "flag" },
53 { 'name' => "extract_pages",
54 'desc' => "{PSPlug.extract_pages}",
55 'type' => "flag" },
56 { 'name' => "extract_title",
57 'desc' => "{PSPlug.extract_title}",
58 'type' => "flag" } ];
59
60my $options = { 'name' => "PSPlug",
61 'desc' => "{PSPlug.desc}",
62 'abstract' => "no",
63 'inherits' => "yes",
64 'args' => $arguments };
65
66sub new {
67 my ($class) = shift (@_);
68 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
69 push(@$pluginlist, $class);
70
71 push(@$inputargs,"-convert_to");
72 push(@$inputargs,"text");
73 push(@$inputargs,"-title_sub");
74 push(@$inputargs,'^(Page\s+\d+)?(\s*1\s+)?');
75
76 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
77 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
78
79 my $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);
80
81 return bless $self, $class;
82}
83
84
85sub get_default_block_exp {
86 my $self = shift (@_);
87
88 return q^(?i)\.(eps)$^;
89}
90
91sub get_default_process_exp {
92 my $self = shift (@_);
93
94 return q^(?i)\.ps$^;
95}
96
97sub extract_metadata_from_postscript {
98 my $self = shift (@_);
99 my $filename = shift (@_);
100 my $doc = shift (@_);
101 my $section = $doc->get_top_section();
102
103 my $title_found = 0;
104 my $pages_found = 0;
105 my $date_found = 0;
106
107 print STDERR "PSPlug: extracting PostScript metadata from \"$filename\"\n"
108 if $self->{'verbosity'} > 1;
109
110 open(INPUT, "<$filename");
111 my $date;
112
113 while(my $line =<INPUT>) {
114 if ($self->{'extract_title'} && !$title_found) {
115 foreach my $word ($line =~ m|Title: ([-A-Za-z0-9@/\/\(\):,. ]*)|g) {
116 my $new_word = $word;
117 $new_word =~ s/\(Untitled\)//i;
118 $new_word =~ s/\(Microsoft Word\)//i;
119 $new_word =~ s/Microsoft Word//i;
120 $new_word =~ s/^\(//i;
121 $new_word =~ s/\)$//i;
122 $new_word =~ s/^ - //i;
123 if ($new_word ne "") {
124 $doc->add_utf8_metadata($section, "Title", $new_word );
125 $title_found = 1;
126 }
127 }
128 }
129 if ($self->{'extract_date'} && !$date_found) {
130 foreach my $word ($line =~ m/(Creation[-A-Za-z0-9@\/\(\):,. ]*)/g) {
131 if ($word =~ m/ ([A-Za-z][A-Za-z][A-Za-z]) ([0-9 ][0-9]) ?[0-9: ]+ ([0-9]{4})/) {
132 $date = &sorttools::format_date($2,$1,$3);
133 if (defined $date) {
134 $doc->add_utf8_metadata($section, "Date", $date );
135 }
136 }
137 if ($word =~ m/D:([0-9]{4})([0-9]{2})([0-9]{2})[0-9]{6}\)/) {
138 $date = &sorttools::format_date($3,$2,$1);
139 if (defined $date) {
140 $doc->add_utf8_metadata($section, "Date", $date );
141 }
142 }
143 if ($word =~ m/CreationDate: ([0-9]{4}) ([A-Za-z][A-Za-z][A-Za-z]) ([0-9 ][0-9]) [0-9:]*/) {
144 $date = &sorttools::format_date($3,$2,$1);
145 if (defined $date) {
146 $doc->add_utf8_metadata($section, "Date", $date );
147 }
148 }
149 $date_found = 1;
150 }
151 }
152 if ($self->{'extract_pages'} && !$pages_found) {
153 foreach my $word ($line =~ m/(Pages: [0-9]*)/g) {
154 my $digits = $word;
155 $digits =~ s/[^0-9]//g;
156 if ($digits ne "" && $digits ne "0") {
157 $doc->add_utf8_metadata($section, "Pages", $digits );
158 $pages_found = 1;
159 }
160 }
161 }
162 }
163}
164
165# do plugin specific processing of doc_obj for HTML type
166sub process {
167 my $self = shift (@_);
168 my ($trash, $trash2, $path, $file, $trash3, $doc, $gli) = @_;
169
170 my $outhandle = $self->{'outhandle'};
171 print $outhandle "PSPlug: passing $_[3] on to $self->{'converted_to'}Plug\n"
172 if $self->{'verbosity'} > 1;
173 print STDERR "<Processing n='$_[3]' p='PSPlug'>\n" if ($gli);
174
175 &extract_metadata_from_postscript($self,"$path/$file", $doc);
176
177 return ConvertToPlug::process_type($self,"ps",@_);
178}
179
180
1811;
182
Note: See TracBrowser for help on using the repository browser.