source: trunk/gsdl/perllib/plugins/SRCPlug.pm@ 2207

Last change on this file since 2207 was 2085, checked in by jrm21, 23 years ago

When importing, we need to escape any escape codes otherwise mg(?)
actually converts them. eg "\n" -> "
n"

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.9 KB
Line 
1###########################################################################
2#
3# SRCPlug.pm -- source code plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
28
29# filename is currently used for Title ( optionally minus some prefix )
30
31# Current languages:
32# text: READMEs/Makefiles
33# C/C++ (currently extracts #include statements and C++ class decls)
34# Perl (currently only done as text)
35# Shell (currently only done as text)
36
37
38package SRCPlug;
39
40use BasPlug;
41use parsargv;
42
43sub BEGIN {
44 @ISA = ('BasPlug');
45}
46
47
48sub print_usage {
49 print STDERR "\n usage: plugin SRCPlug [options]\n";
50 print STDERR "Try to import C and C++ source code. Adds \"class\"";
51 print STDERR " metadata.\n\n";
52 print STDERR " options:\n";
53 print STDERR " -remove_prefix <pattern> Remove this leading pattern from the filename\n";
54 print STDERR " (eg -remove_prefix /tmp/XX/src/). The default is to\n";
55 print STDERR " remove the whole path from the filename.\n";
56 print STDERR "\n";
57}
58
59sub new {
60 my ($class) = @_;
61 my $self = new BasPlug ($class, @_);
62
63 if (!parsargv::parse(\@_,
64 q^remove_prefix/(\S+)/^, \$self->{'remove_prefix'},
65 "allow_extra_options"
66 )
67 ) {
68 print STDERR "\nIncorrect options passed to SRCPlug, ";
69 print STDERR "check your collect.cfg configuration file\n";
70 &print_usage();
71 die "\n";
72 }
73 return bless $self, $class;
74}
75
76sub get_default_block_exp {
77 my $self = shift (@_);
78
79 return q^(?i)\.(o|obj|a|so|dll)$^;
80}
81
82sub get_default_process_exp {
83 my $self = shift (@_);
84
85# return q^(?i)\.te?xt$^;
86 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
87}
88
89
90
91# do plugin specific processing of doc_obj
92sub process {
93 my $self = shift (@_);
94 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
95 my $outhandle = $self->{'outhandle'};
96
97 print $outhandle "SRCPlug: processing $file\n"
98 if $self->{'verbosity'} > 1;
99
100 my $cursection = $doc_obj->get_top_section();
101
102 my $filetype="text"; # Makefiles, READMEs, ...
103 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
104 elsif ($file =~ /\.c$/) {$filetype="C";}
105 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
106 elsif ($file =~ /\.sh$/) {$filetype="sh";}
107
108 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
109 $$textref =~ s/</&lt;/g;
110 $$textref =~ s/>/&gt;/g;
111 $$textref =~ s/_/&#95;/g;
112 # try _escape_text($text) from doc.pm....
113
114 # don't want mg to turn escape chars into actual values
115 $$textref =~ s/\\/\\\\/g;
116
117 # use filename (minus any prefix) as the title.
118 my $title;
119 if ($self->{'remove_prefix' ne ""}) {
120 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
121 } else {
122 ($title = $file) =~ s@^.*(/|\\)@@; # remove pathname by default
123 }
124 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
125 $doc_obj->add_utf8_metadata ($cursection, "filename", $file);
126
127 # class information from .h and .cc and .C and .cpp files
128 if ($filetype eq "C++")
129 {
130 process_c_plus_plus($textref,$pluginfo, $base_dir,
131 $file, $metadata, $doc_obj);
132 } elsif ($filetype eq "C")
133 {
134 get_includes_metadata($textref, $doc_obj);
135 }
136
137
138 # default operation...
139 # insert preformat tags and add text to document object
140 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
141
142 return 1;
143}
144
145
146
147
148sub get_includes_metadata {
149 my ($textref, $doc_obj) = @_;
150
151 my $topsection = $doc_obj->get_top_section();
152
153 # Get '#include' directives for metadata
154 if ($$textref =~ /\#\W?include\b/) {
155 my $includes=$$textref;
156
157 # remove commented lines
158 $includes =~ s@/\*.*?\*/@@sg; # treat string as single line
159 # ? means match smallest instead of longest !!!
160 $includes =~ s@//.*$@@mg; # treat string as multiple lines
161
162 # remove non- include lines (well, lines without a '#')
163 $includes =~ s/^[^\#]*$//mg;
164
165 # lines don't always start '#include "'.... we have to allow (eg)
166 # '# include ...'
167 # ' #include ...'
168 while ($includes =~ /\#\W?include/) {
169 $includes =~ s/^.*?include.*?(\"|&lt;)(.*)(\"|&gt;).*$//m;
170 # $1 is now the filename between the "s.
171 my $include=$2;
172 # remove leading pathname
173 $include =~ s@^.*(/|\\)@@;
174
175 my $incs_done=$doc_obj->get_metadata($section, "includes");
176 foreach my $elem (@$incs_done) {
177 if ("$elem" eq "$include") {goto header_done;}
178 }
179 $doc_obj->add_utf8_metadata($topsection, "includes", $include);
180 header_done:
181 }
182 }
183}
184
185
186
187sub process_c_plus_plus {
188 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
189
190 my $topsection = $doc_obj->get_top_section();
191
192
193 # Check for include metadata
194 get_includes_metadata($textref, $doc_obj);
195
196
197
198 # Get class declarations (but not forward declarations...) as metadata
199 if ($$textref =~ /\bclass\b/ ) {
200 my $classnames=$$textref;
201
202 # remove commented lines
203 $classnames =~ s@/\*.*?\*/@@sg;
204 $classnames =~ s@//.*$@@mg;
205 while ($classnames =~ /\bclass\b/) {
206
207 # delete all lines up to the next "class"
208 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
209 {$classnames =~ s/.*\n//;}
210
211# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
212
213 # get the line including the next "class" and remove it from
214 # our tmp text.
215 $classnames =~ s/^(.*\bclass\b.*)$//m;
216
217 # don't index if merely a reference/fwd decl. of another class
218 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
219 # $1 is still the whole line - eg:
220 # "class StaffSystem: public BaseStaffSystem"
221 my $wholeline=$1;
222 my $classname=$1;
223 $classname =~ s/.*class\W(\w+).*/$1/;
224 my $classes=$doc_obj->get_metadata($section, "class");
225 foreach my $elem (@$classes) {
226 if ("$elem" eq "$classname") {goto class_done;}
227 }
228 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
229 class_done:
230 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
231 }
232 }
233 } # end of "class"
234
235 return 1;
236}
237
2381;
239
Note: See TracBrowser for help on using the repository browser.