Context Navigation

source: trunk/gsdl/perllib/plugins/SRCPlug.pm@ 4842

Last change on this file since 4842 was 4785, checked in by mdewsnip, 21 years ago
Commented out print_usage functions - plugins should now call $self->print_txt_usage() to display their usage text. Updates to the options of a plugin should be made in the $options and $arguments data structures at the top of the plugin.
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 8.4 KB

Line
1	###########################################################################
2	#
3	# SRCPlug.pm -- source code plugin
4	#
5	# A component of the Greenstone digital library software
6	# from the New Zealand Digital Library Project at the
7	# University of Waikato, New Zealand.
8	#
9	# Copyright (C) 1999 New Zealand Digital Library Project
10	#
11	# This program is free software; you can redistribute it and/or modify
12	# it under the terms of the GNU General Public License as published by
13	# the Free Software Foundation; either version 2 of the License, or
14	# (at your option) any later version.
15	#
16	# This program is distributed in the hope that it will be useful,
17	# but WITHOUT ANY WARRANTY; without even the implied warranty of
18	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	# GNU General Public License for more details.
20	#
21	# You should have received a copy of the GNU General Public License
22	# along with this program; if not, write to the Free Software
23	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24	#
25	###########################################################################
26	# John McPherson Nov 2000
27	# originally based on TEXTPlug
28
29	# filename is currently used for Title ( optionally minus some prefix )
30
31	# Current languages:
32	# text: READMEs/Makefiles
33	# C/C++ (currently extracts #include statements and C++ class decls)
34	# Perl (currently only done as text)
35	# Shell (currently only done as text)
36
37	# 12/05/02 Added usage datastructure - John Thompson
38
39	package SRCPlug;
40
41	use BasPlug;
42	use parsargv;
43
44	sub BEGIN {
45	@ISA = ('BasPlug');
46	}
47
48	my $arguments =
49	[ { 'name' => "process_exp",
50	'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",
51	'type' => "string",
52	'deft' => &get_default_process_exp(),
53	'reqd' => "no" } ,
54	{ 'name' => "block_exp",
55	'desc' => "Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",
56	'type' => 'string',
57	'deft' => &get_default_block_exp(),
58	'reqd' => "no" } ,
59	{ 'name' => "remove_prefix",
60	'desc' => "Remove this leading pattern from the filename (eg -remove_prefix /tmp/XX/src/). The default is to remove the whole path from the filename.",
61	'type' => 'string',
62	'deft' => "",
63	'reqd' => "no" } ];
64
65	my $options = { 'name' => "SRCPlug",
66	'desc' => "Filename is currently used for Title ( optionally minus some prefix ). Current languages:\ntext: READMEs/Makefiles\nC/C++ (currently extracts #include statements and C++ class decls)\nPerl (currently only done as text)\nShell (currently only done as text)",
67	'inherits' => "yes",
68	'args' => $arguments };
69
70	# sub print_usage {
71	# print STDERR "\n usage: plugin SRCPlug [options]\n";
72	# print STDERR "Try to import C and C++ source code. Adds \"class\"";
73	# print STDERR " metadata.\n\n";
74	# print STDERR " options:\n";
75	# print STDERR " -remove_prefix <pattern> Remove this leading pattern from the filename\n";
76	# print STDERR " (eg -remove_prefix /tmp/XX/src/). The default is to\n";
77	# print STDERR " remove the whole path from the filename.\n";
78	# print STDERR "\n";
79	# }
80
81	sub new {
82	my ($class) = @_;
83	my $self = new BasPlug ($class, @_);
84
85	# 14-05-02 To allow for proper inheritance of arguments - John Thompson
86	my $option_list = $self->{'option_list'};
87	push( @{$option_list}, $options );
88
89	if (!parsargv::parse(\@_,
90	q^remove_prefix/(\S+)/^, \$self->{'remove_prefix'},
91	"allow_extra_options"
92	)
93	) {
94	print STDERR "\nIncorrect options passed to SRCPlug, ";
95	print STDERR "check your collect.cfg configuration file\n";
96	$self->print_txt_usage();
97	die "\n";
98	}
99	return bless $self, $class;
100	}
101
102	sub get_default_block_exp {
103	my $self = shift (@_);
104
105	return q^(?i)\.(o\|obj\|a\|so\|dll)$^;
106	}
107
108	sub get_default_process_exp {
109	my $self = shift (@_);
110
111	# return q^(?i)\.te?xt$^;
112	return q^(Makefile.\|README.\|(?i)\.(c\|cc\|cpp\|C\|h\|hpp\|pl\|pm\|sh))$^;
113	}
114
115
116
117	# do plugin specific processing of doc_obj
118	sub process {
119	my $self = shift (@_);
120	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
121	my $outhandle = $self->{'outhandle'};
122
123	print $outhandle "SRCPlug: processing $file\n"
124	if $self->{'verbosity'} > 1;
125
126	my $cursection = $doc_obj->get_top_section();
127
128	my $filetype="text"; # Makefiles, READMEs, ...
129	if ($file =~ /\.(cc\|h\|cpp\|C)$/) {$filetype="C++";} # assume all .h files...
130	elsif ($file =~ /\.c$/) {$filetype="C";}
131	elsif ($file =~ /\.p(l\|m)$/) {$filetype="perl";}
132	elsif ($file =~ /\.sh$/) {$filetype="sh";}
133
134	# modify '<' and '>' for GML... (even though inside <pre> tags!!)
135	$$textref =~ s/</</g;
136	$$textref =~ s/>/>/g;
137	$$textref =~ s/_/_/g;
138	# try _escape_text($text) from doc.pm....
139
140	# don't want mg to turn escape chars into actual values
141	$$textref =~ s/\\/\\\\/g;
142
143	# use filename (minus any prefix) as the title.
144	my $title;
145	if ($self->{'remove_prefix' ne ""}) {
146	($title = $file) =~ s/^$self->{'remove_prefix'}//;
147	} else {
148	($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
149	}
150	$doc_obj->add_utf8_metadata ($cursection, "Title", $title);
151	# remove the gsdl prefix from the filename
152	my $relative_filename=$file;
153	$relative_filename =~ s@^.*?gsdl[/\\]@@;
154	$doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
155
156	# class information from .h and .cc and .C and .cpp files
157	if ($filetype eq "C++")
158	{
159	process_c_plus_plus($textref,$pluginfo, $base_dir,
160	$file, $metadata, $doc_obj);
161	} elsif ($filetype eq "C")
162	{
163	get_includes_metadata($textref, $doc_obj);
164	}
165
166
167	# default operation...
168	# insert preformat tags and add text to document object
169	$doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
170
171	return 1;
172	}
173
174
175
176
177	sub get_includes_metadata {
178	my ($textref, $doc_obj) = @_;
179
180	my $topsection = $doc_obj->get_top_section();
181
182	# Get '#include' directives for metadata
183	if ($$textref !~ /\#\s*include\b/) {
184	return;
185	}
186
187	my @includes =
188	($$textref =~ m/^\s\#\sinclude\s(?:\"\|<)(.?)(?:\"\|>)/mg);
189
190	my $incs_done_ref=$doc_obj->get_metadata($section, "includes");
191	my @incs_done;
192	if (defined($incs_done_ref)) {
193	@incs_done=@$incs_done_ref;
194	} else {
195	@incs_done=();
196	}
197
198	foreach my $inc (@includes) {
199	# add entries, but only if they don't already exist
200	if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
201	push @incs_done, $inc;
202	$doc_obj->add_utf8_metadata($topsection, "includes", $inc);
203	}
204	}
205	}
206
207
208
209	sub process_c_plus_plus {
210	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
211
212	my $topsection = $doc_obj->get_top_section();
213
214
215	# Check for include metadata
216	get_includes_metadata($textref, $doc_obj);
217
218
219
220	# Get class declarations (but not forward declarations...) as metadata
221	if ($$textref =~ /\bclass\b/ ) {
222	my $classnames=$$textref;
223
224	# remove commented lines
225	$classnames =~ s@/\.?\*/@@sg;
226	$classnames =~ s@//.*$@@mg;
227	while ($classnames =~ /\bclass\b/) {
228
229	# delete all lines up to the next "class"
230	while ($classnames !~ /^[^\n]\bclass\b[^\n]\n/)
231	{$classnames =~ s/.*\n//;}
232
233	# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
234
235	# get the line including the next "class" and remove it from
236	# our tmp text.
237	$classnames =~ s/^(.\bclass\b.)$//m;
238
239	# don't index if merely a reference/fwd decl. of another class
240	if ($1 !~ /(friend\Wclass)\|(class\W\w+\W?\;)\|(\/\/.*class)/) {
241	# $1 is still the whole line - eg:
242	# "class StaffSystem: public BaseStaffSystem"
243	my $wholeline=$1;
244	my $classname=$1;
245	$classname =~ s/.class\W(\w+)./$1/;
246	my $classes=$doc_obj->get_metadata($section, "class");
247	foreach my $elem (@$classes) {
248	if ("$elem" eq "$classname") {goto class_done;}
249	}
250	$doc_obj->add_utf8_metadata($topsection, "class", $classname);
251	class_done:
252	$doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
253	}
254	}
255	} # end of "class"
256
257	return 1;
258	}
259
260	1;
261

Note: See TracBrowser for help on using the repository browser.

Download in other formats: