source: main/trunk/greenstone2/perllib/plugins/SourceCodePlugin.pm@ 31492

Last change on this file since 31492 was 31492, checked in by kjdon, 7 years ago

renamed EncodingUtil to CommonUtil, BasePlugin to BaseImporter. The idea is that only top level plugins that you can specify in your collection get to have plugin in their name. Modified all other plugins to reflect these name changes

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.9 KB
Line 
1###########################################################################
2#
3# SourceCodePlugin.pm -- source code plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
28
29# filename is currently used for Title ( optionally minus some prefix )
30
31# Current languages:
32# text: READMEs/Makefiles
33# C/C++ (currently extracts #include statements and C++ class decls)
34# Perl (currently only done as text)
35# Shell (currently only done as text)
36
37package SourceCodePlugin;
38
39use ReadTextFile;
40use MetadataRead;
41
42use strict;
43no strict 'refs'; # allow filehandles to be variables and viceversa
44
45sub BEGIN {
46 @SourceCodePlugin::ISA = ('MetadataRead', 'ReadTextFile');
47}
48
49my $arguments =
50 [ { 'name' => "process_exp",
51 'desc' => "{BaseImporter.process_exp}",
52 'type' => "regexp",
53 'deft' => &get_default_process_exp(),
54 'reqd' => "no" } ,
55 { 'name' => "block_exp",
56 'desc' => "{BaseImporter.block_exp}",
57 'type' => "regexp",
58 'deft' => &get_default_block_exp(),
59 'reqd' => "no" },
60 { 'name' => "remove_prefix",
61 'desc' => "{SourceCodePlugin.remove_prefix}",
62 'type' => "regexp",
63 'deft' => "^.*[/\\]",
64 'reqd' => "no" } ];
65
66my $options = { 'name' => "SourceCodePlugin",
67 'desc' => "{SourceCodePlugin.desc}",
68 'abstract' => "no",
69 'inherits' => "yes",
70 'args' => $arguments };
71
72
73sub new {
74 my ($class) = shift (@_);
75 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
76 push(@$pluginlist, $class);
77
78 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
79 push(@{$hashArgOptLists->{"OptList"}},$options);
80
81 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
82
83 return bless $self, $class;
84}
85
86sub get_default_block_exp {
87 my $self = shift (@_);
88
89 return q^(?i)\.(o|obj|a|so|dll)$^;
90}
91
92sub get_default_process_exp {
93 my $self = shift (@_);
94
95 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
96}
97
98
99
100# do plugin specific processing of doc_obj
101sub process {
102 my $self = shift (@_);
103 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
104 my $outhandle = $self->{'outhandle'};
105
106 my $cursection = $doc_obj->get_top_section();
107
108 my $filetype="text"; # Makefiles, READMEs, ...
109 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
110 elsif ($file =~ /\.c$/) {$filetype="C";}
111 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
112 elsif ($file =~ /\.sh$/) {$filetype="sh";}
113
114 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
115 $$textref =~ s/</&lt;/g;
116 $$textref =~ s/>/&gt;/g;
117 $$textref =~ s/_/&#95;/g;
118 # try _escape_text($text) from doc.pm....
119
120 # don't want mg to turn escape chars into actual values
121 $$textref =~ s/\\/\\\\/g;
122
123 # use filename (minus any prefix) as the title.
124 my $title;
125 if ($self->{'remove_prefix' ne ""}) {
126 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
127 } else {
128 ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
129 }
130 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
131 $doc_obj->add_metadata ($cursection, "FileFormat", "SRC");
132
133 # remove the gsdl prefix from the filename
134 my $relative_filename=$file;
135 $relative_filename =~ s@^.*?gsdl[/\\]@@;
136 $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
137
138 # class information from .h and .cc and .C and .cpp files
139 if ($filetype eq "C++")
140 {
141 process_c_plus_plus($textref,$pluginfo, $base_dir,
142 $file, $metadata, $doc_obj);
143 } elsif ($filetype eq "C")
144 {
145 get_includes_metadata($textref, $doc_obj);
146 }
147
148
149 # default operation...
150 # insert preformat tags and add text to document object
151 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
152
153 return 1;
154}
155
156
157
158
159sub get_includes_metadata {
160 my ($textref, $doc_obj) = @_;
161
162 my $topsection = $doc_obj->get_top_section();
163
164 # Get '#include' directives for metadata
165 if ($$textref !~ /\#\s*include\b/) {
166 return;
167 }
168
169 my @includes =
170 ($$textref =~ m/^\s*\#\s*include\s*(?:\"|&lt;)(.*?)(?:\"|&gt;)/mg);
171
172 my $incs_done_ref=$doc_obj->get_metadata($topsection, "includes");
173 my @incs_done;
174 if (defined($incs_done_ref)) {
175 @incs_done=@$incs_done_ref;
176 } else {
177 @incs_done=();
178 }
179
180 foreach my $inc (@includes) {
181 # add entries, but only if they don't already exist
182 if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
183 push @incs_done, $inc;
184 $doc_obj->add_utf8_metadata($topsection, "includes", $inc);
185 }
186 }
187}
188
189
190
191sub process_c_plus_plus {
192 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
193
194 my $topsection = $doc_obj->get_top_section();
195
196
197 # Check for include metadata
198 get_includes_metadata($textref, $doc_obj);
199
200
201
202 # Get class declarations (but not forward declarations...) as metadata
203 if ($$textref =~ /\bclass\b/ ) {
204 my $classnames=$$textref;
205
206 # remove commented lines
207 $classnames =~ s@/\*.*?\*/@@sg;
208 $classnames =~ s@//.*$@@mg;
209 while ($classnames =~ /\bclass\b/) {
210
211 # delete all lines up to the next "class"
212 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
213 {$classnames =~ s/.*\n//;}
214
215# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
216
217 # get the line including the next "class" and remove it from
218 # our tmp text.
219 $classnames =~ s/^(.*\bclass\b.*)$//m;
220
221 # don't index if merely a reference/fwd decl. of another class
222 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
223 # $1 is still the whole line - eg:
224 # "class StaffSystem: public BaseStaffSystem"
225 my $wholeline=$1;
226 my $classname=$1;
227 $classname =~ s/.*class\W(\w+).*/$1/;
228 my $classes=$doc_obj->get_metadata($topsection, "class");
229 foreach my $elem (@$classes) {
230 if ("$elem" eq "$classname") {goto class_done;}
231 }
232 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
233 class_done:
234 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
235 }
236 }
237 } # end of "class"
238
239 return 1;
240}
241
2421;
243
Note: See TracBrowser for help on using the repository browser.