source: main/tags/2.37/gsdl/perllib/plugins/SRCPlug.pm@ 33178

Last change on this file since 33178 was 2657, checked in by jrm21, 23 years ago

fixed a bug when #including a macro (ie no "... or <... on the line)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1###########################################################################
2#
3# SRCPlug.pm -- source code plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
28
29# filename is currently used for Title ( optionally minus some prefix )
30
31# Current languages:
32# text: READMEs/Makefiles
33# C/C++ (currently extracts #include statements and C++ class decls)
34# Perl (currently only done as text)
35# Shell (currently only done as text)
36
37
38package SRCPlug;
39
40use BasPlug;
41use parsargv;
42
43sub BEGIN {
44 @ISA = ('BasPlug');
45}
46
47
48sub print_usage {
49 print STDERR "\n usage: plugin SRCPlug [options]\n";
50 print STDERR "Try to import C and C++ source code. Adds \"class\"";
51 print STDERR " metadata.\n\n";
52 print STDERR " options:\n";
53 print STDERR " -remove_prefix <pattern> Remove this leading pattern from the filename\n";
54 print STDERR " (eg -remove_prefix /tmp/XX/src/). The default is to\n";
55 print STDERR " remove the whole path from the filename.\n";
56 print STDERR "\n";
57}
58
59sub new {
60 my ($class) = @_;
61 my $self = new BasPlug ($class, @_);
62
63 if (!parsargv::parse(\@_,
64 q^remove_prefix/(\S+)/^, \$self->{'remove_prefix'},
65 "allow_extra_options"
66 )
67 ) {
68 print STDERR "\nIncorrect options passed to SRCPlug, ";
69 print STDERR "check your collect.cfg configuration file\n";
70 &print_usage();
71 die "\n";
72 }
73 return bless $self, $class;
74}
75
76sub get_default_block_exp {
77 my $self = shift (@_);
78
79 return q^(?i)\.(o|obj|a|so|dll)$^;
80}
81
82sub get_default_process_exp {
83 my $self = shift (@_);
84
85# return q^(?i)\.te?xt$^;
86 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
87}
88
89
90
91# do plugin specific processing of doc_obj
92sub process {
93 my $self = shift (@_);
94 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
95 my $outhandle = $self->{'outhandle'};
96
97 print $outhandle "SRCPlug: processing $file\n"
98 if $self->{'verbosity'} > 1;
99
100 my $cursection = $doc_obj->get_top_section();
101
102 my $filetype="text"; # Makefiles, READMEs, ...
103 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
104 elsif ($file =~ /\.c$/) {$filetype="C";}
105 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
106 elsif ($file =~ /\.sh$/) {$filetype="sh";}
107
108 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
109 $$textref =~ s/</&lt;/g;
110 $$textref =~ s/>/&gt;/g;
111 $$textref =~ s/_/&#95;/g;
112 # try _escape_text($text) from doc.pm....
113
114 # don't want mg to turn escape chars into actual values
115 $$textref =~ s/\\/\\\\/g;
116
117 # use filename (minus any prefix) as the title.
118 my $title;
119 if ($self->{'remove_prefix' ne ""}) {
120 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
121 } else {
122 ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
123 }
124 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
125 # remove the gsdl prefix from the filename
126 my $relative_filename=$file;
127 $relative_filename =~ s@^.*?gsdl[/\\]@@;
128 $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
129
130 # class information from .h and .cc and .C and .cpp files
131 if ($filetype eq "C++")
132 {
133 process_c_plus_plus($textref,$pluginfo, $base_dir,
134 $file, $metadata, $doc_obj);
135 } elsif ($filetype eq "C")
136 {
137 get_includes_metadata($textref, $doc_obj);
138 }
139
140
141 # default operation...
142 # insert preformat tags and add text to document object
143 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
144
145 return 1;
146}
147
148
149
150
151sub get_includes_metadata {
152 my ($textref, $doc_obj) = @_;
153
154 my $topsection = $doc_obj->get_top_section();
155
156 # Get '#include' directives for metadata
157 if ($$textref =~ /\#\W?include\b/) {
158 my $includes=$$textref;
159
160 # remove commented lines
161 $includes =~ s@/\*.*?\*/@@sg; # treat string as single line
162 # ? means match smallest instead of longest !!!
163 $includes =~ s@//.*$@@mg; # treat string as multiple lines
164
165 # remove non- include lines (well, lines without a '#')
166 $includes =~ s/^[^\#]*$//mg;
167
168 # lines don't always start '#include "'.... we have to allow (eg)
169 # '# include ...'
170 # ' #include ...'
171 # also make sure it contains an actual filename, as we don't
172 # do pre-processing (eg #including a macro).
173 while ($includes =~ /\#\s*include.*(\"|&lt;)/) {
174 $includes =~ s/^.*?include.*?(\"|&lt;)(.*)(\"|&gt;).*$//m;
175 my $include=$2;
176 # remove leading pathname
177 $include =~ s@^.*(/|\\)@@;
178
179 my $incs_done=$doc_obj->get_metadata($section, "includes");
180 foreach my $elem (@$incs_done) {
181 if ("$elem" eq "$include") {goto header_done;}
182 }
183 $doc_obj->add_utf8_metadata($topsection, "includes", $include);
184 header_done:
185 }
186 }
187}
188
189
190
191sub process_c_plus_plus {
192 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
193
194 my $topsection = $doc_obj->get_top_section();
195
196
197 # Check for include metadata
198 get_includes_metadata($textref, $doc_obj);
199
200
201
202 # Get class declarations (but not forward declarations...) as metadata
203 if ($$textref =~ /\bclass\b/ ) {
204 my $classnames=$$textref;
205
206 # remove commented lines
207 $classnames =~ s@/\*.*?\*/@@sg;
208 $classnames =~ s@//.*$@@mg;
209 while ($classnames =~ /\bclass\b/) {
210
211 # delete all lines up to the next "class"
212 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
213 {$classnames =~ s/.*\n//;}
214
215# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
216
217 # get the line including the next "class" and remove it from
218 # our tmp text.
219 $classnames =~ s/^(.*\bclass\b.*)$//m;
220
221 # don't index if merely a reference/fwd decl. of another class
222 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
223 # $1 is still the whole line - eg:
224 # "class StaffSystem: public BaseStaffSystem"
225 my $wholeline=$1;
226 my $classname=$1;
227 $classname =~ s/.*class\W(\w+).*/$1/;
228 my $classes=$doc_obj->get_metadata($section, "class");
229 foreach my $elem (@$classes) {
230 if ("$elem" eq "$classname") {goto class_done;}
231 }
232 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
233 class_done:
234 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
235 }
236 }
237 } # end of "class"
238
239 return 1;
240}
241
2421;
243
Note: See TracBrowser for help on using the repository browser.