source: gsdl/trunk/perllib/plugins/SourceCodePlugin.pm@ 15918

Last change on this file since 15918 was 15872, checked in by kjdon, 16 years ago

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.1 KB
Line 
1###########################################################################
2#
3# SourceCodePlugin.pm -- source code plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
28
29# filename is currently used for Title ( optionally minus some prefix )
30
31# Current languages:
32# text: READMEs/Makefiles
33# C/C++ (currently extracts #include statements and C++ class decls)
34# Perl (currently only done as text)
35# Shell (currently only done as text)
36
37# 12/05/02 Added usage datastructure - John Thompson
38
39package SourceCodePlugin;
40
41use ReadTextFile;
42
43use strict;
44no strict 'refs'; # allow filehandles to be variables and viceversa
45
46sub BEGIN {
47 @SourceCodePlugin::ISA = ('ReadTextFile');
48}
49
50my $arguments =
51 [ { 'name' => "process_exp",
52 'desc' => "{ReadTextFile.process_exp}",
53 'type' => "regexp",
54 'deft' => &get_default_process_exp(),
55 'reqd' => "no" } ,
56 { 'name' => "block_exp",
57 'desc' => "{ReadTextFile.block_exp}",
58 'type' => "regexp",
59 'deft' => &get_default_block_exp(),
60 'reqd' => "no" },
61 { 'name' => "remove_prefix",
62 'desc' => "{SourceCodePlugin.remove_prefix}",
63 'type' => "regexp",
64 'deft' => "^.*[/\\]",
65 'reqd' => "no" } ];
66
67my $options = { 'name' => "SourceCodePlugin",
68 'desc' => "{SourceCodePlugin.desc}",
69 'abstract' => "no",
70 'inherits' => "yes",
71 'args' => $arguments };
72
73
74sub new {
75 my ($class) = shift (@_);
76 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
77 push(@$pluginlist, $class);
78
79 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
80 push(@{$hashArgOptLists->{"OptList"}},$options);
81
82 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
83
84 return bless $self, $class;
85}
86
87sub get_default_block_exp {
88 my $self = shift (@_);
89
90 return q^(?i)\.(o|obj|a|so|dll)$^;
91}
92
93sub get_default_process_exp {
94 my $self = shift (@_);
95
96 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
97}
98
99
100
101# do plugin specific processing of doc_obj
102sub process {
103 my $self = shift (@_);
104 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
105 my $outhandle = $self->{'outhandle'};
106
107 print STDERR "<Processing n='$file' p='SourceCodePlugin'>\n" if ($gli);
108 print $outhandle "SourceCodePlugin: processing $file\n"
109 if $self->{'verbosity'} > 1;
110
111 my $cursection = $doc_obj->get_top_section();
112
113 my $filetype="text"; # Makefiles, READMEs, ...
114 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
115 elsif ($file =~ /\.c$/) {$filetype="C";}
116 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
117 elsif ($file =~ /\.sh$/) {$filetype="sh";}
118
119 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
120 $$textref =~ s/</&lt;/g;
121 $$textref =~ s/>/&gt;/g;
122 $$textref =~ s/_/&#95;/g;
123 # try _escape_text($text) from doc.pm....
124
125 # don't want mg to turn escape chars into actual values
126 $$textref =~ s/\\/\\\\/g;
127
128 # use filename (minus any prefix) as the title.
129 my $title;
130 if ($self->{'remove_prefix' ne ""}) {
131 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
132 } else {
133 ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
134 }
135 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
136 $doc_obj->add_metadata ($cursection, "FileFormat", "SRC");
137
138 # remove the gsdl prefix from the filename
139 my $relative_filename=$file;
140 $relative_filename =~ s@^.*?gsdl[/\\]@@;
141 $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
142
143 # class information from .h and .cc and .C and .cpp files
144 if ($filetype eq "C++")
145 {
146 process_c_plus_plus($textref,$pluginfo, $base_dir,
147 $file, $metadata, $doc_obj);
148 } elsif ($filetype eq "C")
149 {
150 get_includes_metadata($textref, $doc_obj);
151 }
152
153
154 # default operation...
155 # insert preformat tags and add text to document object
156 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
157
158 return 1;
159}
160
161
162
163
164sub get_includes_metadata {
165 my ($textref, $doc_obj) = @_;
166
167 my $topsection = $doc_obj->get_top_section();
168
169 # Get '#include' directives for metadata
170 if ($$textref !~ /\#\s*include\b/) {
171 return;
172 }
173
174 my @includes =
175 ($$textref =~ m/^\s*\#\s*include\s*(?:\"|&lt;)(.*?)(?:\"|&gt;)/mg);
176
177 my $incs_done_ref=$doc_obj->get_metadata($topsection, "includes");
178 my @incs_done;
179 if (defined($incs_done_ref)) {
180 @incs_done=@$incs_done_ref;
181 } else {
182 @incs_done=();
183 }
184
185 foreach my $inc (@includes) {
186 # add entries, but only if they don't already exist
187 if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
188 push @incs_done, $inc;
189 $doc_obj->add_utf8_metadata($topsection, "includes", $inc);
190 }
191 }
192}
193
194
195
196sub process_c_plus_plus {
197 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
198
199 my $topsection = $doc_obj->get_top_section();
200
201
202 # Check for include metadata
203 get_includes_metadata($textref, $doc_obj);
204
205
206
207 # Get class declarations (but not forward declarations...) as metadata
208 if ($$textref =~ /\bclass\b/ ) {
209 my $classnames=$$textref;
210
211 # remove commented lines
212 $classnames =~ s@/\*.*?\*/@@sg;
213 $classnames =~ s@//.*$@@mg;
214 while ($classnames =~ /\bclass\b/) {
215
216 # delete all lines up to the next "class"
217 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
218 {$classnames =~ s/.*\n//;}
219
220# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
221
222 # get the line including the next "class" and remove it from
223 # our tmp text.
224 $classnames =~ s/^(.*\bclass\b.*)$//m;
225
226 # don't index if merely a reference/fwd decl. of another class
227 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
228 # $1 is still the whole line - eg:
229 # "class StaffSystem: public BaseStaffSystem"
230 my $wholeline=$1;
231 my $classname=$1;
232 $classname =~ s/.*class\W(\w+).*/$1/;
233 my $classes=$doc_obj->get_metadata($topsection, "class");
234 foreach my $elem (@$classes) {
235 if ("$elem" eq "$classname") {goto class_done;}
236 }
237 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
238 class_done:
239 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
240 }
241 }
242 } # end of "class"
243
244 return 1;
245}
246
2471;
248
Note: See TracBrowser for help on using the repository browser.