source: trunk/gsdl/perllib/plugins/SRCPlug.pm@ 4842

Last change on this file since 4842 was 4785, checked in by mdewsnip, 21 years ago

Commented out print_usage functions - plugins should now call $self->print_txt_usage() to display their usage text. Updates to the options of a plugin should be made in the $options and $arguments data structures at the top of the plugin.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.4 KB
Line 
1###########################################################################
2#
3# SRCPlug.pm -- source code plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
28
29# filename is currently used for Title ( optionally minus some prefix )
30
31# Current languages:
32# text: READMEs/Makefiles
33# C/C++ (currently extracts #include statements and C++ class decls)
34# Perl (currently only done as text)
35# Shell (currently only done as text)
36
37# 12/05/02 Added usage datastructure - John Thompson
38
39package SRCPlug;
40
41use BasPlug;
42use parsargv;
43
44sub BEGIN {
45 @ISA = ('BasPlug');
46}
47
48my $arguments =
49 [ { 'name' => "process_exp",
50 'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",
51 'type' => "string",
52 'deft' => &get_default_process_exp(),
53 'reqd' => "no" } ,
54 { 'name' => "block_exp",
55 'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
56 'type' => 'string',
57 'deft' => &get_default_block_exp(),
58 'reqd' => "no" } ,
59 { 'name' => "remove_prefix",
60 'desc' => "Remove this leading pattern from the filename (eg -remove_prefix /tmp/XX/src/). The default is to remove the whole path from the filename.",
61 'type' => 'string',
62 'deft' => "",
63 'reqd' => "no" } ];
64
65my $options = { 'name' => "SRCPlug",
66 'desc' => "Filename is currently used for Title ( optionally minus some prefix ). Current languages:\ntext: READMEs/Makefiles\nC/C++ (currently extracts #include statements and C++ class decls)\nPerl (currently only done as text)\nShell (currently only done as text)",
67 'inherits' => "yes",
68 'args' => $arguments };
69
70# sub print_usage {
71# print STDERR "\n usage: plugin SRCPlug [options]\n";
72# print STDERR "Try to import C and C++ source code. Adds \"class\"";
73# print STDERR " metadata.\n\n";
74# print STDERR " options:\n";
75# print STDERR " -remove_prefix <pattern> Remove this leading pattern from the filename\n";
76# print STDERR " (eg -remove_prefix /tmp/XX/src/). The default is to\n";
77# print STDERR " remove the whole path from the filename.\n";
78# print STDERR "\n";
79# }
80
81sub new {
82 my ($class) = @_;
83 my $self = new BasPlug ($class, @_);
84
85 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
86 my $option_list = $self->{'option_list'};
87 push( @{$option_list}, $options );
88
89 if (!parsargv::parse(\@_,
90 q^remove_prefix/(\S+)/^, \$self->{'remove_prefix'},
91 "allow_extra_options"
92 )
93 ) {
94 print STDERR "\nIncorrect options passed to SRCPlug, ";
95 print STDERR "check your collect.cfg configuration file\n";
96 $self->print_txt_usage();
97 die "\n";
98 }
99 return bless $self, $class;
100}
101
102sub get_default_block_exp {
103 my $self = shift (@_);
104
105 return q^(?i)\.(o|obj|a|so|dll)$^;
106}
107
108sub get_default_process_exp {
109 my $self = shift (@_);
110
111# return q^(?i)\.te?xt$^;
112 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
113}
114
115
116
117# do plugin specific processing of doc_obj
118sub process {
119 my $self = shift (@_);
120 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
121 my $outhandle = $self->{'outhandle'};
122
123 print $outhandle "SRCPlug: processing $file\n"
124 if $self->{'verbosity'} > 1;
125
126 my $cursection = $doc_obj->get_top_section();
127
128 my $filetype="text"; # Makefiles, READMEs, ...
129 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
130 elsif ($file =~ /\.c$/) {$filetype="C";}
131 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
132 elsif ($file =~ /\.sh$/) {$filetype="sh";}
133
134 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
135 $$textref =~ s/</&lt;/g;
136 $$textref =~ s/>/&gt;/g;
137 $$textref =~ s/_/&#95;/g;
138 # try _escape_text($text) from doc.pm....
139
140 # don't want mg to turn escape chars into actual values
141 $$textref =~ s/\\/\\\\/g;
142
143 # use filename (minus any prefix) as the title.
144 my $title;
145 if ($self->{'remove_prefix' ne ""}) {
146 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
147 } else {
148 ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
149 }
150 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
151 # remove the gsdl prefix from the filename
152 my $relative_filename=$file;
153 $relative_filename =~ s@^.*?gsdl[/\\]@@;
154 $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
155
156 # class information from .h and .cc and .C and .cpp files
157 if ($filetype eq "C++")
158 {
159 process_c_plus_plus($textref,$pluginfo, $base_dir,
160 $file, $metadata, $doc_obj);
161 } elsif ($filetype eq "C")
162 {
163 get_includes_metadata($textref, $doc_obj);
164 }
165
166
167 # default operation...
168 # insert preformat tags and add text to document object
169 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
170
171 return 1;
172}
173
174
175
176
177sub get_includes_metadata {
178 my ($textref, $doc_obj) = @_;
179
180 my $topsection = $doc_obj->get_top_section();
181
182 # Get '#include' directives for metadata
183 if ($$textref !~ /\#\s*include\b/) {
184 return;
185 }
186
187 my @includes =
188 ($$textref =~ m/^\s*\#\s*include\s*(?:\"|&lt;)(.*?)(?:\"|&gt;)/mg);
189
190 my $incs_done_ref=$doc_obj->get_metadata($section, "includes");
191 my @incs_done;
192 if (defined($incs_done_ref)) {
193 @incs_done=@$incs_done_ref;
194 } else {
195 @incs_done=();
196 }
197
198 foreach my $inc (@includes) {
199 # add entries, but only if they don't already exist
200 if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
201 push @incs_done, $inc;
202 $doc_obj->add_utf8_metadata($topsection, "includes", $inc);
203 }
204 }
205}
206
207
208
209sub process_c_plus_plus {
210 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
211
212 my $topsection = $doc_obj->get_top_section();
213
214
215 # Check for include metadata
216 get_includes_metadata($textref, $doc_obj);
217
218
219
220 # Get class declarations (but not forward declarations...) as metadata
221 if ($$textref =~ /\bclass\b/ ) {
222 my $classnames=$$textref;
223
224 # remove commented lines
225 $classnames =~ s@/\*.*?\*/@@sg;
226 $classnames =~ s@//.*$@@mg;
227 while ($classnames =~ /\bclass\b/) {
228
229 # delete all lines up to the next "class"
230 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
231 {$classnames =~ s/.*\n//;}
232
233# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
234
235 # get the line including the next "class" and remove it from
236 # our tmp text.
237 $classnames =~ s/^(.*\bclass\b.*)$//m;
238
239 # don't index if merely a reference/fwd decl. of another class
240 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
241 # $1 is still the whole line - eg:
242 # "class StaffSystem: public BaseStaffSystem"
243 my $wholeline=$1;
244 my $classname=$1;
245 $classname =~ s/.*class\W(\w+).*/$1/;
246 my $classes=$doc_obj->get_metadata($section, "class");
247 foreach my $elem (@$classes) {
248 if ("$elem" eq "$classname") {goto class_done;}
249 }
250 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
251 class_done:
252 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
253 }
254 }
255 } # end of "class"
256
257 return 1;
258}
259
2601;
261
Note: See TracBrowser for help on using the repository browser.