source: gsdl/trunk/perllib/plugins/SourceCodePlugin.pm@ 16104

Last change on this file since 16104 was 16104, checked in by kjdon, 16 years ago

tried to make the 'xxxplugin processing file' print statements more consistent. They are now done in read (or read_into_doc_obj) and not process

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.9 KB
RevLine 
[1707]1###########################################################################
2#
[15872]3# SourceCodePlugin.pm -- source code plugin
[1731]4#
[1707]5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
[1731]28
[1707]29# filename is currently used for Title ( optionally minus some prefix )
30
[1731]31# Current languages:
32# text: READMEs/Makefiles
33# C/C++ (currently extracts #include statements and C++ class decls)
34# Perl (currently only done as text)
35# Shell (currently only done as text)
36
[3540]37# 12/05/02 Added usage datastructure - John Thompson
[1731]38
[15872]39package SourceCodePlugin;
[1707]40
[15872]41use ReadTextFile;
[1707]42
[10254]43use strict;
44no strict 'refs'; # allow filehandles to be variables and viceversa
45
[1707]46sub BEGIN {
[15872]47 @SourceCodePlugin::ISA = ('ReadTextFile');
[1707]48}
49
[4744]50my $arguments =
51 [ { 'name' => "process_exp",
[16019]52 'desc' => "{BasePlugin.process_exp}",
[6408]53 'type' => "regexp",
[4744]54 'deft' => &get_default_process_exp(),
55 'reqd' => "no" } ,
56 { 'name' => "block_exp",
[16019]57 'desc' => "{BasePlugin.block_exp}",
[6408]58 'type' => "regexp",
[4744]59 'deft' => &get_default_block_exp(),
[4873]60 'reqd' => "no" },
[4744]61 { 'name' => "remove_prefix",
[15872]62 'desc' => "{SourceCodePlugin.remove_prefix}",
[6408]63 'type' => "regexp",
[4873]64 'deft' => "^.*[/\\]",
[4744]65 'reqd' => "no" } ];
[1707]66
[15872]67my $options = { 'name' => "SourceCodePlugin",
68 'desc' => "{SourceCodePlugin.desc}",
[6408]69 'abstract' => "no",
[4744]70 'inherits' => "yes",
71 'args' => $arguments };
[3540]72
[1707]73
74sub new {
[10218]75 my ($class) = shift (@_);
76 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
77 push(@$pluginlist, $class);
[1707]78
[15872]79 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
80 push(@{$hashArgOptLists->{"OptList"}},$options);
[10218]81
[15872]82 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
[10218]83
[1707]84 return bless $self, $class;
85}
86
87sub get_default_block_exp {
88 my $self = shift (@_);
89
90 return q^(?i)\.(o|obj|a|so|dll)$^;
91}
92
93sub get_default_process_exp {
94 my $self = shift (@_);
95
96 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
97}
98
[1731]99
100
[1707]101# do plugin specific processing of doc_obj
102sub process {
103 my $self = shift (@_);
[6332]104 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
[1707]105 my $outhandle = $self->{'outhandle'};
106
107 my $cursection = $doc_obj->get_top_section();
108
109 my $filetype="text"; # Makefiles, READMEs, ...
110 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
111 elsif ($file =~ /\.c$/) {$filetype="C";}
112 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
113 elsif ($file =~ /\.sh$/) {$filetype="sh";}
114
115 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
116 $$textref =~ s/</&lt;/g;
117 $$textref =~ s/>/&gt;/g;
[1740]118 $$textref =~ s/_/&#95;/g;
[1707]119 # try _escape_text($text) from doc.pm....
120
[2085]121 # don't want mg to turn escape chars into actual values
122 $$textref =~ s/\\/\\\\/g;
[1707]123
124 # use filename (minus any prefix) as the title.
125 my $title;
126 if ($self->{'remove_prefix' ne ""}) {
127 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
128 } else {
[2657]129 ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
[1707]130 }
131 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
[8121]132 $doc_obj->add_metadata ($cursection, "FileFormat", "SRC");
133
[2657]134 # remove the gsdl prefix from the filename
135 my $relative_filename=$file;
136 $relative_filename =~ s@^.*?gsdl[/\\]@@;
137 $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
[1707]138
139 # class information from .h and .cc and .C and .cpp files
140 if ($filetype eq "C++")
141 {
[1731]142 process_c_plus_plus($textref,$pluginfo, $base_dir,
[1707]143 $file, $metadata, $doc_obj);
[1731]144 } elsif ($filetype eq "C")
145 {
146 get_includes_metadata($textref, $doc_obj);
[1707]147 }
[1731]148
149
[1707]150 # default operation...
151 # insert preformat tags and add text to document object
152 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
153
154 return 1;
155}
156
157
158
[1731]159
160sub get_includes_metadata {
161 my ($textref, $doc_obj) = @_;
162
[1707]163 my $topsection = $doc_obj->get_top_section();
164
165 # Get '#include' directives for metadata
[3919]166 if ($$textref !~ /\#\s*include\b/) {
167 return;
168 }
[1707]169
[3919]170 my @includes =
171 ($$textref =~ m/^\s*\#\s*include\s*(?:\"|&lt;)(.*?)(?:\"|&gt;)/mg);
172
[10254]173 my $incs_done_ref=$doc_obj->get_metadata($topsection, "includes");
[3919]174 my @incs_done;
175 if (defined($incs_done_ref)) {
176 @incs_done=@$incs_done_ref;
177 } else {
178 @incs_done=();
179 }
[1707]180
[3919]181 foreach my $inc (@includes) {
182 # add entries, but only if they don't already exist
183 if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
184 push @incs_done, $inc;
185 $doc_obj->add_utf8_metadata($topsection, "includes", $inc);
[1707]186 }
187 }
[1731]188}
[1707]189
190
191
[1731]192sub process_c_plus_plus {
193 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
[1707]194
[1731]195 my $topsection = $doc_obj->get_top_section();
196
197
198 # Check for include metadata
199 get_includes_metadata($textref, $doc_obj);
200
201
202
[1707]203 # Get class declarations (but not forward declarations...) as metadata
204 if ($$textref =~ /\bclass\b/ ) {
205 my $classnames=$$textref;
206
207 # remove commented lines
208 $classnames =~ s@/\*.*?\*/@@sg;
209 $classnames =~ s@//.*$@@mg;
210 while ($classnames =~ /\bclass\b/) {
211
212 # delete all lines up to the next "class"
213 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
214 {$classnames =~ s/.*\n//;}
215
216# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
217
218 # get the line including the next "class" and remove it from
219 # our tmp text.
220 $classnames =~ s/^(.*\bclass\b.*)$//m;
221
222 # don't index if merely a reference/fwd decl. of another class
223 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
224 # $1 is still the whole line - eg:
225 # "class StaffSystem: public BaseStaffSystem"
226 my $wholeline=$1;
227 my $classname=$1;
228 $classname =~ s/.*class\W(\w+).*/$1/;
[10254]229 my $classes=$doc_obj->get_metadata($topsection, "class");
[1707]230 foreach my $elem (@$classes) {
231 if ("$elem" eq "$classname") {goto class_done;}
232 }
233 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
234 class_done:
235 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
236 }
237 }
238 } # end of "class"
239
240 return 1;
241}
242
2431;
244
Note: See TracBrowser for help on using the repository browser.