source: trunk/gsdl/perllib/plugins/SRCPlug.pm@ 7243

Last change on this file since 7243 was 6408, checked in by jmt12, 20 years ago

Added two new attributes for script arguments. HiddenGLI controls whether the argument will be visible at all in GLI, while ModeGLI defines the lowest detail mode under which the argument will be visible (only really for import and buildcol). Also ensured that the scripts were reporting their correct default process expressions, and further refined argument types by adding the catagory regexp for any regular expression (which can then be hidden under lower detail modes in GLI)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1###########################################################################
2#
3# SRCPlug.pm -- source code plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
28
29# filename is currently used for Title ( optionally minus some prefix )
30
31# Current languages:
32# text: READMEs/Makefiles
33# C/C++ (currently extracts #include statements and C++ class decls)
34# Perl (currently only done as text)
35# Shell (currently only done as text)
36
37# 12/05/02 Added usage datastructure - John Thompson
38
39package SRCPlug;
40
41use BasPlug;
42use parsargv;
43
44sub BEGIN {
45 @ISA = ('BasPlug');
46}
47
48my $arguments =
49 [ { 'name' => "process_exp",
50 'desc' => "{BasPlug.process_exp}",
51 'type' => "regexp",
52 'deft' => &get_default_process_exp(),
53 'reqd' => "no" } ,
54 { 'name' => "block_exp",
55 'desc' => "{BasPlug.block_exp}",
56 'type' => "regexp",
57 'deft' => &get_default_block_exp(),
58 'reqd' => "no" },
59 { 'name' => "remove_prefix",
60 'desc' => "{SRCPlug.remove_prefix}",
61 'type' => "regexp",
62 'deft' => "^.*[/\\]",
63 'reqd' => "no" } ];
64
65my $options = { 'name' => "SRCPlug",
66 'desc' => "{SRCPlug.desc}",
67 'abstract' => "no",
68 'inherits' => "yes",
69 'args' => $arguments };
70
71# sub print_usage {
72# print STDERR "\n usage: plugin SRCPlug [options]\n";
73# print STDERR "Try to import C and C++ source code. Adds \"class\"";
74# print STDERR " metadata.\n\n";
75# print STDERR " options:\n";
76# print STDERR " -remove_prefix <pattern> Remove this leading pattern from the filename\n";
77# print STDERR " (eg -remove_prefix /tmp/XX/src/). The default is to\n";
78# print STDERR " remove the whole path from the filename.\n";
79# print STDERR "\n";
80# }
81
82sub new {
83 my ($class) = @_;
84 my $self = new BasPlug ($class, @_);
85 $self->{'plugin_type'} = "SRCPlug";
86 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
87 my $option_list = $self->{'option_list'};
88 push( @{$option_list}, $options );
89
90 if (!parsargv::parse(\@_,
91 q^remove_prefix/(\S+)/^, \$self->{'remove_prefix'},
92 "allow_extra_options"
93 )
94 ) {
95 print STDERR "\nIncorrect options passed to SRCPlug, ";
96 print STDERR "check your collect.cfg configuration file\n";
97 $self->print_txt_usage(""); # Use default resource bundle
98 die "\n";
99 }
100 return bless $self, $class;
101}
102
103sub get_default_block_exp {
104 my $self = shift (@_);
105
106 return q^(?i)\.(o|obj|a|so|dll)$^;
107}
108
109sub get_default_process_exp {
110 my $self = shift (@_);
111
112# return q^(?i)\.te?xt$^;
113 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
114}
115
116
117
118# do plugin specific processing of doc_obj
119sub process {
120 my $self = shift (@_);
121 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
122 my $outhandle = $self->{'outhandle'};
123
124 print STDERR "<Processing n='$file' p='SRCPlug'>\n" if ($gli);
125 print $outhandle "SRCPlug: processing $file\n"
126 if $self->{'verbosity'} > 1;
127
128 my $cursection = $doc_obj->get_top_section();
129
130 my $filetype="text"; # Makefiles, READMEs, ...
131 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
132 elsif ($file =~ /\.c$/) {$filetype="C";}
133 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
134 elsif ($file =~ /\.sh$/) {$filetype="sh";}
135
136 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
137 $$textref =~ s/</&lt;/g;
138 $$textref =~ s/>/&gt;/g;
139 $$textref =~ s/_/&#95;/g;
140 # try _escape_text($text) from doc.pm....
141
142 # don't want mg to turn escape chars into actual values
143 $$textref =~ s/\\/\\\\/g;
144
145 # use filename (minus any prefix) as the title.
146 my $title;
147 if ($self->{'remove_prefix' ne ""}) {
148 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
149 } else {
150 ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
151 }
152 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
153 # remove the gsdl prefix from the filename
154 my $relative_filename=$file;
155 $relative_filename =~ s@^.*?gsdl[/\\]@@;
156 $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
157
158 # class information from .h and .cc and .C and .cpp files
159 if ($filetype eq "C++")
160 {
161 process_c_plus_plus($textref,$pluginfo, $base_dir,
162 $file, $metadata, $doc_obj);
163 } elsif ($filetype eq "C")
164 {
165 get_includes_metadata($textref, $doc_obj);
166 }
167
168
169 # default operation...
170 # insert preformat tags and add text to document object
171 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
172
173 return 1;
174}
175
176
177
178
179sub get_includes_metadata {
180 my ($textref, $doc_obj) = @_;
181
182 my $topsection = $doc_obj->get_top_section();
183
184 # Get '#include' directives for metadata
185 if ($$textref !~ /\#\s*include\b/) {
186 return;
187 }
188
189 my @includes =
190 ($$textref =~ m/^\s*\#\s*include\s*(?:\"|&lt;)(.*?)(?:\"|&gt;)/mg);
191
192 my $incs_done_ref=$doc_obj->get_metadata($section, "includes");
193 my @incs_done;
194 if (defined($incs_done_ref)) {
195 @incs_done=@$incs_done_ref;
196 } else {
197 @incs_done=();
198 }
199
200 foreach my $inc (@includes) {
201 # add entries, but only if they don't already exist
202 if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
203 push @incs_done, $inc;
204 $doc_obj->add_utf8_metadata($topsection, "includes", $inc);
205 }
206 }
207}
208
209
210
211sub process_c_plus_plus {
212 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
213
214 my $topsection = $doc_obj->get_top_section();
215
216
217 # Check for include metadata
218 get_includes_metadata($textref, $doc_obj);
219
220
221
222 # Get class declarations (but not forward declarations...) as metadata
223 if ($$textref =~ /\bclass\b/ ) {
224 my $classnames=$$textref;
225
226 # remove commented lines
227 $classnames =~ s@/\*.*?\*/@@sg;
228 $classnames =~ s@//.*$@@mg;
229 while ($classnames =~ /\bclass\b/) {
230
231 # delete all lines up to the next "class"
232 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
233 {$classnames =~ s/.*\n//;}
234
235# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
236
237 # get the line including the next "class" and remove it from
238 # our tmp text.
239 $classnames =~ s/^(.*\bclass\b.*)$//m;
240
241 # don't index if merely a reference/fwd decl. of another class
242 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
243 # $1 is still the whole line - eg:
244 # "class StaffSystem: public BaseStaffSystem"
245 my $wholeline=$1;
246 my $classname=$1;
247 $classname =~ s/.*class\W(\w+).*/$1/;
248 my $classes=$doc_obj->get_metadata($section, "class");
249 foreach my $elem (@$classes) {
250 if ("$elem" eq "$classname") {goto class_done;}
251 }
252 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
253 class_done:
254 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
255 }
256 }
257 } # end of "class"
258
259 return 1;
260}
261
2621;
263
Note: See TracBrowser for help on using the repository browser.