Context Navigation

source: trunk/gsdl/perllib/plugins/SRCPlug.pm@ 2207

Last change on this file since 2207 was 2085, checked in by jrm21, 23 years ago
When importing, we need to escape any escape codes otherwise mg(?) actually converts them. eg "\n" -> " n"
Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 6.9 KB

Line
1	###########################################################################
2	#
3	# SRCPlug.pm -- source code plugin
4	#
5	# A component of the Greenstone digital library software
6	# from the New Zealand Digital Library Project at the
7	# University of Waikato, New Zealand.
8	#
9	# Copyright (C) 1999 New Zealand Digital Library Project
10	#
11	# This program is free software; you can redistribute it and/or modify
12	# it under the terms of the GNU General Public License as published by
13	# the Free Software Foundation; either version 2 of the License, or
14	# (at your option) any later version.
15	#
16	# This program is distributed in the hope that it will be useful,
17	# but WITHOUT ANY WARRANTY; without even the implied warranty of
18	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	# GNU General Public License for more details.
20	#
21	# You should have received a copy of the GNU General Public License
22	# along with this program; if not, write to the Free Software
23	# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24	#
25	###########################################################################
26	# John McPherson Nov 2000
27	# originally based on TEXTPlug
28
29	# filename is currently used for Title ( optionally minus some prefix )
30
31	# Current languages:
32	# text: READMEs/Makefiles
33	# C/C++ (currently extracts #include statements and C++ class decls)
34	# Perl (currently only done as text)
35	# Shell (currently only done as text)
36
37
38	package SRCPlug;
39
40	use BasPlug;
41	use parsargv;
42
43	sub BEGIN {
44	@ISA = ('BasPlug');
45	}
46
47
48	sub print_usage {
49	print STDERR "\n usage: plugin SRCPlug [options]\n";
50	print STDERR "Try to import C and C++ source code. Adds \"class\"";
51	print STDERR " metadata.\n\n";
52	print STDERR " options:\n";
53	print STDERR " -remove_prefix <pattern> Remove this leading pattern from the filename\n";
54	print STDERR " (eg -remove_prefix /tmp/XX/src/). The default is to\n";
55	print STDERR " remove the whole path from the filename.\n";
56	print STDERR "\n";
57	}
58
59	sub new {
60	my ($class) = @_;
61	my $self = new BasPlug ($class, @_);
62
63	if (!parsargv::parse(\@_,
64	q^remove_prefix/(\S+)/^, \$self->{'remove_prefix'},
65	"allow_extra_options"
66	)
67	) {
68	print STDERR "\nIncorrect options passed to SRCPlug, ";
69	print STDERR "check your collect.cfg configuration file\n";
70	&print_usage();
71	die "\n";
72	}
73	return bless $self, $class;
74	}
75
76	sub get_default_block_exp {
77	my $self = shift (@_);
78
79	return q^(?i)\.(o\|obj\|a\|so\|dll)$^;
80	}
81
82	sub get_default_process_exp {
83	my $self = shift (@_);
84
85	# return q^(?i)\.te?xt$^;
86	return q^(Makefile.\|README.\|(?i)\.(c\|cc\|cpp\|C\|h\|hpp\|pl\|pm\|sh))$^;
87	}
88
89
90
91	# do plugin specific processing of doc_obj
92	sub process {
93	my $self = shift (@_);
94	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
95	my $outhandle = $self->{'outhandle'};
96
97	print $outhandle "SRCPlug: processing $file\n"
98	if $self->{'verbosity'} > 1;
99
100	my $cursection = $doc_obj->get_top_section();
101
102	my $filetype="text"; # Makefiles, READMEs, ...
103	if ($file =~ /\.(cc\|h\|cpp\|C)$/) {$filetype="C++";} # assume all .h files...
104	elsif ($file =~ /\.c$/) {$filetype="C";}
105	elsif ($file =~ /\.p(l\|m)$/) {$filetype="perl";}
106	elsif ($file =~ /\.sh$/) {$filetype="sh";}
107
108	# modify '<' and '>' for GML... (even though inside <pre> tags!!)
109	$$textref =~ s/</</g;
110	$$textref =~ s/>/>/g;
111	$$textref =~ s/_/_/g;
112	# try _escape_text($text) from doc.pm....
113
114	# don't want mg to turn escape chars into actual values
115	$$textref =~ s/\\/\\\\/g;
116
117	# use filename (minus any prefix) as the title.
118	my $title;
119	if ($self->{'remove_prefix' ne ""}) {
120	($title = $file) =~ s/^$self->{'remove_prefix'}//;
121	} else {
122	($title = $file) =~ s@^.*(/\|\\)@@; # remove pathname by default
123	}
124	$doc_obj->add_utf8_metadata ($cursection, "Title", $title);
125	$doc_obj->add_utf8_metadata ($cursection, "filename", $file);
126
127	# class information from .h and .cc and .C and .cpp files
128	if ($filetype eq "C++")
129	{
130	process_c_plus_plus($textref,$pluginfo, $base_dir,
131	$file, $metadata, $doc_obj);
132	} elsif ($filetype eq "C")
133	{
134	get_includes_metadata($textref, $doc_obj);
135	}
136
137
138	# default operation...
139	# insert preformat tags and add text to document object
140	$doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
141
142	return 1;
143	}
144
145
146
147
148	sub get_includes_metadata {
149	my ($textref, $doc_obj) = @_;
150
151	my $topsection = $doc_obj->get_top_section();
152
153	# Get '#include' directives for metadata
154	if ($$textref =~ /\#\W?include\b/) {
155	my $includes=$$textref;
156
157	# remove commented lines
158	$includes =~ s@/\.?\*/@@sg; # treat string as single line
159	# ? means match smallest instead of longest !!!
160	$includes =~ s@//.*$@@mg; # treat string as multiple lines
161
162	# remove non- include lines (well, lines without a '#')
163	$includes =~ s/^[^\#]*$//mg;
164
165	# lines don't always start '#include "'.... we have to allow (eg)
166	# '# include ...'
167	# ' #include ...'
168	while ($includes =~ /\#\W?include/) {
169	$includes =~ s/^.?include.?(\"\|<)(.)(\"\|>).$//m;
170	# $1 is now the filename between the "s.
171	my $include=$2;
172	# remove leading pathname
173	$include =~ s@^.*(/\|\\)@@;
174
175	my $incs_done=$doc_obj->get_metadata($section, "includes");
176	foreach my $elem (@$incs_done) {
177	if ("$elem" eq "$include") {goto header_done;}
178	}
179	$doc_obj->add_utf8_metadata($topsection, "includes", $include);
180	header_done:
181	}
182	}
183	}
184
185
186
187	sub process_c_plus_plus {
188	my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
189
190	my $topsection = $doc_obj->get_top_section();
191
192
193	# Check for include metadata
194	get_includes_metadata($textref, $doc_obj);
195
196
197
198	# Get class declarations (but not forward declarations...) as metadata
199	if ($$textref =~ /\bclass\b/ ) {
200	my $classnames=$$textref;
201
202	# remove commented lines
203	$classnames =~ s@/\.?\*/@@sg;
204	$classnames =~ s@//.*$@@mg;
205	while ($classnames =~ /\bclass\b/) {
206
207	# delete all lines up to the next "class"
208	while ($classnames !~ /^[^\n]\bclass\b[^\n]\n/)
209	{$classnames =~ s/.*\n//;}
210
211	# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
212
213	# get the line including the next "class" and remove it from
214	# our tmp text.
215	$classnames =~ s/^(.\bclass\b.)$//m;
216
217	# don't index if merely a reference/fwd decl. of another class
218	if ($1 !~ /(friend\Wclass)\|(class\W\w+\W?\;)\|(\/\/.*class)/) {
219	# $1 is still the whole line - eg:
220	# "class StaffSystem: public BaseStaffSystem"
221	my $wholeline=$1;
222	my $classname=$1;
223	$classname =~ s/.class\W(\w+)./$1/;
224	my $classes=$doc_obj->get_metadata($section, "class");
225	foreach my $elem (@$classes) {
226	if ("$elem" eq "$classname") {goto class_done;}
227	}
228	$doc_obj->add_utf8_metadata($topsection, "class", $classname);
229	class_done:
230	$doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
231	}
232	}
233	} # end of "class"
234
235	return 1;
236	}
237
238	1;
239

Note: See TracBrowser for help on using the repository browser.

Download in other formats: