source: main/trunk/greenstone2/perllib/plugins/SourceCodePlugin.pm@ 32783

Last change on this file since 32783 was 31494, checked in by kjdon, 7 years ago

updated text string keys based on new plugin names

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.9 KB
RevLine 
[1707]1###########################################################################
2#
[15872]3# SourceCodePlugin.pm -- source code plugin
[1731]4#
[1707]5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
[1731]28
[1707]29# filename is currently used for Title ( optionally minus some prefix )
30
[1731]31# Current languages:
32# text: READMEs/Makefiles
33# C/C++ (currently extracts #include statements and C++ class decls)
34# Perl (currently only done as text)
35# Shell (currently only done as text)
36
[15872]37package SourceCodePlugin;
[1707]38
[15872]39use ReadTextFile;
[24548]40use MetadataRead;
[1707]41
[10254]42use strict;
43no strict 'refs'; # allow filehandles to be variables and viceversa
44
[1707]45sub BEGIN {
[24548]46 @SourceCodePlugin::ISA = ('MetadataRead', 'ReadTextFile');
[1707]47}
48
[4744]49my $arguments =
50 [ { 'name' => "process_exp",
[31492]51 'desc' => "{BaseImporter.process_exp}",
[6408]52 'type' => "regexp",
[4744]53 'deft' => &get_default_process_exp(),
54 'reqd' => "no" } ,
55 { 'name' => "block_exp",
[31494]56 'desc' => "{CommonUtil.block_exp}",
[6408]57 'type' => "regexp",
[4744]58 'deft' => &get_default_block_exp(),
[4873]59 'reqd' => "no" },
[4744]60 { 'name' => "remove_prefix",
[15872]61 'desc' => "{SourceCodePlugin.remove_prefix}",
[6408]62 'type' => "regexp",
[4873]63 'deft' => "^.*[/\\]",
[4744]64 'reqd' => "no" } ];
[1707]65
[15872]66my $options = { 'name' => "SourceCodePlugin",
67 'desc' => "{SourceCodePlugin.desc}",
[6408]68 'abstract' => "no",
[4744]69 'inherits' => "yes",
70 'args' => $arguments };
[3540]71
[1707]72
73sub new {
[10218]74 my ($class) = shift (@_);
75 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
76 push(@$pluginlist, $class);
[1707]77
[15872]78 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
79 push(@{$hashArgOptLists->{"OptList"}},$options);
[10218]80
[15872]81 my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
[10218]82
[1707]83 return bless $self, $class;
84}
85
86sub get_default_block_exp {
87 my $self = shift (@_);
88
89 return q^(?i)\.(o|obj|a|so|dll)$^;
90}
91
92sub get_default_process_exp {
93 my $self = shift (@_);
94
95 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
96}
97
[1731]98
99
[1707]100# do plugin specific processing of doc_obj
101sub process {
102 my $self = shift (@_);
[6332]103 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
[1707]104 my $outhandle = $self->{'outhandle'};
105
106 my $cursection = $doc_obj->get_top_section();
107
108 my $filetype="text"; # Makefiles, READMEs, ...
109 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
110 elsif ($file =~ /\.c$/) {$filetype="C";}
111 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
112 elsif ($file =~ /\.sh$/) {$filetype="sh";}
113
114 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
115 $$textref =~ s/</&lt;/g;
116 $$textref =~ s/>/&gt;/g;
[1740]117 $$textref =~ s/_/&#95;/g;
[1707]118 # try _escape_text($text) from doc.pm....
119
[2085]120 # don't want mg to turn escape chars into actual values
121 $$textref =~ s/\\/\\\\/g;
[1707]122
123 # use filename (minus any prefix) as the title.
124 my $title;
125 if ($self->{'remove_prefix' ne ""}) {
126 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
127 } else {
[2657]128 ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
[1707]129 }
130 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
[8121]131 $doc_obj->add_metadata ($cursection, "FileFormat", "SRC");
132
[2657]133 # remove the gsdl prefix from the filename
134 my $relative_filename=$file;
135 $relative_filename =~ s@^.*?gsdl[/\\]@@;
136 $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
[1707]137
138 # class information from .h and .cc and .C and .cpp files
139 if ($filetype eq "C++")
140 {
[1731]141 process_c_plus_plus($textref,$pluginfo, $base_dir,
[1707]142 $file, $metadata, $doc_obj);
[1731]143 } elsif ($filetype eq "C")
144 {
145 get_includes_metadata($textref, $doc_obj);
[1707]146 }
[1731]147
148
[1707]149 # default operation...
150 # insert preformat tags and add text to document object
151 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
152
153 return 1;
154}
155
156
157
[1731]158
159sub get_includes_metadata {
160 my ($textref, $doc_obj) = @_;
161
[1707]162 my $topsection = $doc_obj->get_top_section();
163
164 # Get '#include' directives for metadata
[3919]165 if ($$textref !~ /\#\s*include\b/) {
166 return;
167 }
[1707]168
[3919]169 my @includes =
170 ($$textref =~ m/^\s*\#\s*include\s*(?:\"|&lt;)(.*?)(?:\"|&gt;)/mg);
171
[10254]172 my $incs_done_ref=$doc_obj->get_metadata($topsection, "includes");
[3919]173 my @incs_done;
174 if (defined($incs_done_ref)) {
175 @incs_done=@$incs_done_ref;
176 } else {
177 @incs_done=();
178 }
[1707]179
[3919]180 foreach my $inc (@includes) {
181 # add entries, but only if they don't already exist
182 if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
183 push @incs_done, $inc;
184 $doc_obj->add_utf8_metadata($topsection, "includes", $inc);
[1707]185 }
186 }
[1731]187}
[1707]188
189
190
[1731]191sub process_c_plus_plus {
192 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
[1707]193
[1731]194 my $topsection = $doc_obj->get_top_section();
195
196
197 # Check for include metadata
198 get_includes_metadata($textref, $doc_obj);
199
200
201
[1707]202 # Get class declarations (but not forward declarations...) as metadata
203 if ($$textref =~ /\bclass\b/ ) {
204 my $classnames=$$textref;
205
206 # remove commented lines
207 $classnames =~ s@/\*.*?\*/@@sg;
208 $classnames =~ s@//.*$@@mg;
209 while ($classnames =~ /\bclass\b/) {
210
211 # delete all lines up to the next "class"
212 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
213 {$classnames =~ s/.*\n//;}
214
215# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
216
217 # get the line including the next "class" and remove it from
218 # our tmp text.
219 $classnames =~ s/^(.*\bclass\b.*)$//m;
220
221 # don't index if merely a reference/fwd decl. of another class
222 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
223 # $1 is still the whole line - eg:
224 # "class StaffSystem: public BaseStaffSystem"
225 my $wholeline=$1;
226 my $classname=$1;
227 $classname =~ s/.*class\W(\w+).*/$1/;
[10254]228 my $classes=$doc_obj->get_metadata($topsection, "class");
[1707]229 foreach my $elem (@$classes) {
230 if ("$elem" eq "$classname") {goto class_done;}
231 }
232 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
233 class_done:
234 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
235 }
236 }
237 } # end of "class"
238
239 return 1;
240}
241
2421;
243
Note: See TracBrowser for help on using the repository browser.