source: trunk/gsdl/perllib/plugins/SRCPlug.pm@ 5845

Last change on this file since 5845 was 5680, checked in by mdewsnip, 21 years ago

Moved plugin descriptions into the resource bundle (perllib/strings.rb), for translation by UNESCO.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.6 KB
Line 
1###########################################################################
2#
3# SRCPlug.pm -- source code plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
28
29# filename is currently used for Title ( optionally minus some prefix )
30
31# Current languages:
32# text: READMEs/Makefiles
33# C/C++ (currently extracts #include statements and C++ class decls)
34# Perl (currently only done as text)
35# Shell (currently only done as text)
36
37# 12/05/02 Added usage datastructure - John Thompson
38
39package SRCPlug;
40
41use BasPlug;
42use parsargv;
43
44sub BEGIN {
45 @ISA = ('BasPlug');
46}
47
48my $arguments =
49 [ { 'name' => "process_exp",
50 'desc' => "{BasPlug.process_exp}",
51 'type' => "string",
52 'deft' => &get_default_process_exp(),
53 'reqd' => "no" } ,
54 { 'name' => "block_exp",
55 'desc' => "{BasPlug.block_exp}",
56 'type' => 'string',
57 'deft' => &get_default_block_exp(),
58 'reqd' => "no" },
59 { 'name' => "remove_prefix",
60 'desc' => "{SRCPlug.remove_prefix}",
61 'type' => 'string',
62 'deft' => "^.*[/\\]",
63 'reqd' => "no" } ];
64
65my $options = { 'name' => "SRCPlug",
66 'desc' => "{SRCPlug.desc}",
67 'inherits' => "yes",
68 'args' => $arguments };
69
70# sub print_usage {
71# print STDERR "\n usage: plugin SRCPlug [options]\n";
72# print STDERR "Try to import C and C++ source code. Adds \"class\"";
73# print STDERR " metadata.\n\n";
74# print STDERR " options:\n";
75# print STDERR " -remove_prefix <pattern> Remove this leading pattern from the filename\n";
76# print STDERR " (eg -remove_prefix /tmp/XX/src/). The default is to\n";
77# print STDERR " remove the whole path from the filename.\n";
78# print STDERR "\n";
79# }
80
81sub new {
82 my ($class) = @_;
83 my $self = new BasPlug ($class, @_);
84
85 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
86 my $option_list = $self->{'option_list'};
87 push( @{$option_list}, $options );
88
89 if (!parsargv::parse(\@_,
90 q^remove_prefix/(\S+)/^, \$self->{'remove_prefix'},
91 "allow_extra_options"
92 )
93 ) {
94 print STDERR "\nIncorrect options passed to SRCPlug, ";
95 print STDERR "check your collect.cfg configuration file\n";
96 $self->print_txt_usage(""); # Use default resource bundle
97 die "\n";
98 }
99 return bless $self, $class;
100}
101
102sub get_default_block_exp {
103 my $self = shift (@_);
104
105 return q^(?i)\.(o|obj|a|so|dll)$^;
106}
107
108sub get_default_process_exp {
109 my $self = shift (@_);
110
111# return q^(?i)\.te?xt$^;
112 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
113}
114
115
116
117# do plugin specific processing of doc_obj
118sub process {
119 my $self = shift (@_);
120 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
121 my $outhandle = $self->{'outhandle'};
122
123 print $outhandle "SRCPlug: processing $file\n"
124 if $self->{'verbosity'} > 1;
125
126 my $cursection = $doc_obj->get_top_section();
127
128 my $filetype="text"; # Makefiles, READMEs, ...
129 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
130 elsif ($file =~ /\.c$/) {$filetype="C";}
131 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
132 elsif ($file =~ /\.sh$/) {$filetype="sh";}
133
134 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
135 $$textref =~ s/</&lt;/g;
136 $$textref =~ s/>/&gt;/g;
137 $$textref =~ s/_/&#95;/g;
138 # try _escape_text($text) from doc.pm....
139
140 # don't want mg to turn escape chars into actual values
141 $$textref =~ s/\\/\\\\/g;
142
143 # use filename (minus any prefix) as the title.
144 my $title;
145 if ($self->{'remove_prefix' ne ""}) {
146 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
147 } else {
148 ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
149 }
150 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
151 # remove the gsdl prefix from the filename
152 my $relative_filename=$file;
153 $relative_filename =~ s@^.*?gsdl[/\\]@@;
154 $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
155
156 # class information from .h and .cc and .C and .cpp files
157 if ($filetype eq "C++")
158 {
159 process_c_plus_plus($textref,$pluginfo, $base_dir,
160 $file, $metadata, $doc_obj);
161 } elsif ($filetype eq "C")
162 {
163 get_includes_metadata($textref, $doc_obj);
164 }
165
166
167 # default operation...
168 # insert preformat tags and add text to document object
169 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
170
171 return 1;
172}
173
174
175
176
177sub get_includes_metadata {
178 my ($textref, $doc_obj) = @_;
179
180 my $topsection = $doc_obj->get_top_section();
181
182 # Get '#include' directives for metadata
183 if ($$textref !~ /\#\s*include\b/) {
184 return;
185 }
186
187 my @includes =
188 ($$textref =~ m/^\s*\#\s*include\s*(?:\"|&lt;)(.*?)(?:\"|&gt;)/mg);
189
190 my $incs_done_ref=$doc_obj->get_metadata($section, "includes");
191 my @incs_done;
192 if (defined($incs_done_ref)) {
193 @incs_done=@$incs_done_ref;
194 } else {
195 @incs_done=();
196 }
197
198 foreach my $inc (@includes) {
199 # add entries, but only if they don't already exist
200 if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
201 push @incs_done, $inc;
202 $doc_obj->add_utf8_metadata($topsection, "includes", $inc);
203 }
204 }
205}
206
207
208
209sub process_c_plus_plus {
210 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
211
212 my $topsection = $doc_obj->get_top_section();
213
214
215 # Check for include metadata
216 get_includes_metadata($textref, $doc_obj);
217
218
219
220 # Get class declarations (but not forward declarations...) as metadata
221 if ($$textref =~ /\bclass\b/ ) {
222 my $classnames=$$textref;
223
224 # remove commented lines
225 $classnames =~ s@/\*.*?\*/@@sg;
226 $classnames =~ s@//.*$@@mg;
227 while ($classnames =~ /\bclass\b/) {
228
229 # delete all lines up to the next "class"
230 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
231 {$classnames =~ s/.*\n//;}
232
233# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
234
235 # get the line including the next "class" and remove it from
236 # our tmp text.
237 $classnames =~ s/^(.*\bclass\b.*)$//m;
238
239 # don't index if merely a reference/fwd decl. of another class
240 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
241 # $1 is still the whole line - eg:
242 # "class StaffSystem: public BaseStaffSystem"
243 my $wholeline=$1;
244 my $classname=$1;
245 $classname =~ s/.*class\W(\w+).*/$1/;
246 my $classes=$doc_obj->get_metadata($section, "class");
247 foreach my $elem (@$classes) {
248 if ("$elem" eq "$classname") {goto class_done;}
249 }
250 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
251 class_done:
252 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
253 }
254 }
255 } # end of "class"
256
257 return 1;
258}
259
2601;
261
Note: See TracBrowser for help on using the repository browser.