source: trunk/gsdl/perllib/plugins/SRCPlug.pm@ 1707

Last change on this file since 1707 was 1707, checked in by jrm21, 23 years ago

Plugin for source code (primarily for putting Greenstone src into a
collection!). Currently does special tricks for C++ (and C), and plain
text for Makefiles, READMEs and Perl code.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.4 KB
Line 
1###########################################################################
2#
3# SRCPlug.pm -- source code plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25# John McPherson Nov 2000
26# originally based on TEXTPlug
27# filename is currently used for Title ( optionally minus some prefix )
28
29package SRCPlug;
30
31use BasPlug;
32use parsargv;
33
34sub BEGIN {
35 @ISA = ('BasPlug');
36}
37
38
39sub print_usage {
40 print STDERR "\n usage: plugin SRCPlug [options]\n";
41 print STDERR "Try to import C and C++ source code. Adds \"class\"";
42 print STDERR " metadata.\n\n";
43 print STDERR " options:\n";
44 print STDERR " -remove_prefix <pattern> Remove this leading pattern from the filename\n";
45 print STDERR " (eg -remove_prefix /tmp/XX/src/). The default is to\n";
46 print STDERR " remove the whole path from the filename.\n";
47 print STDERR "\n";
48}
49
50sub new {
51 my ($class) = @_;
52 my $self = new BasPlug ($class, @_);
53
54 if (!parsargv::parse(\@_,
55 q^remove_prefix/(\S+)/^, \$self->{'remove_prefix'})) {
56 print STDERR "\nIncorrect options passed to SRCPlug, ";
57 print STDERR "check your collect.cfg configuration file\n";
58 &print_usage();
59 die "\n";
60 }
61 return bless $self, $class;
62}
63
64sub get_default_block_exp {
65 my $self = shift (@_);
66
67 return q^(?i)\.(o|obj|a|so|dll)$^;
68}
69
70sub get_default_process_exp {
71 my $self = shift (@_);
72
73# return q^(?i)\.te?xt$^;
74 return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
75}
76
77# do plugin specific processing of doc_obj
78sub process {
79 my $self = shift (@_);
80 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
81 my $outhandle = $self->{'outhandle'};
82
83 print $outhandle "SRCPlug: processing $file\n"
84 if $self->{'verbosity'} > 1;
85
86 my $cursection = $doc_obj->get_top_section();
87
88 my $filetype="text"; # Makefiles, READMEs, ...
89 if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
90 elsif ($file =~ /\.c$/) {$filetype="C";}
91 elsif ($file =~ /\.p(l|m)$/) {$filetype="perl";}
92 elsif ($file =~ /\.sh$/) {$filetype="sh";}
93
94 # modify '<' and '>' for GML... (even though inside <pre> tags!!)
95 $$textref =~ s/</&lt;/g;
96 $$textref =~ s/>/&gt;/g;
97 # try _escape_text($text) from doc.pm....
98
99
100 # use filename (minus any prefix) as the title.
101 my $title;
102 if ($self->{'remove_prefix' ne ""}) {
103 ($title = $file) =~ s/^$self->{'remove_prefix'}//;
104 } else {
105 ($title = $file) =~ s@^.*(/|\\)@@; # remove pathname by default
106 }
107 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
108 $doc_obj->add_utf8_metadata ($cursection, "filename", $file);
109
110 # class information from .h and .cc and .C and .cpp files
111 if ($filetype eq "C++")
112 {
113 return process_c_plus_plus($textref,$pluginfo, $base_dir,
114 $file, $metadata, $doc_obj);
115 }
116 # default operation...
117 # insert preformat tags and add text to document object
118 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
119
120 return 1;
121}
122
123
124sub process_c_plus_plus {
125 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
126
127 my $topsection = $doc_obj->get_top_section();
128
129
130
131 # Get '#include' directives for metadata
132 if ($$textref =~ /\#\W?include\b/) {
133 my $includes=$$textref;
134
135 # remove commented lines
136 $includes =~ s@/\*.*?\*/@@sg; # treat string as single line
137 # ? means match smallest instead of longest !!!
138 $includes =~ s@//.*$@@mg; # treat string as multiple lines
139
140 # remove non- include lines (well, lines without a '#')
141 $includes =~ s/^[^\#]*$//mg;
142
143 # lines don't always start '#include "'.... we have to allow (eg)
144 # '# include ...'
145 # ' #include ...'
146 while ($includes =~ /\#\W?include/) {
147 $includes =~ s/^.*?include.*?(\"|&lt;)(.*)(\"|&gt;).*$//m;
148 # $1 is now the filename between the "s.
149 my $include=$2;
150 # remove leading pathname
151 $include =~ s@^.*(/|\\)@@;
152
153 my $incs_done=$doc_obj->get_metadata($section, "includes");
154 foreach my $elem (@$incs_done) {
155 if ("$elem" eq "$include") {goto header_done;}
156 }
157 $doc_obj->add_utf8_metadata($topsection, "includes", $include);
158 header_done:
159 }
160 }
161
162
163
164
165 # Get class declarations (but not forward declarations...) as metadata
166 if ($$textref =~ /\bclass\b/ ) {
167 my $classnames=$$textref;
168
169 # remove commented lines
170 $classnames =~ s@/\*.*?\*/@@sg;
171 $classnames =~ s@//.*$@@mg;
172 while ($classnames =~ /\bclass\b/) {
173
174 # delete all lines up to the next "class"
175 while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
176 {$classnames =~ s/.*\n//;}
177
178# $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
179
180 # get the line including the next "class" and remove it from
181 # our tmp text.
182 $classnames =~ s/^(.*\bclass\b.*)$//m;
183
184 # don't index if merely a reference/fwd decl. of another class
185 if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
186 # $1 is still the whole line - eg:
187 # "class StaffSystem: public BaseStaffSystem"
188 my $wholeline=$1;
189 my $classname=$1;
190 $classname =~ s/.*class\W(\w+).*/$1/;
191 my $classes=$doc_obj->get_metadata($section, "class");
192 foreach my $elem (@$classes) {
193 if ("$elem" eq "$classname") {goto class_done;}
194 }
195 $doc_obj->add_utf8_metadata($topsection, "class", $classname);
196 class_done:
197 $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
198 }
199 }
200 } # end of "class"
201
202
203
204
205 # insert preformat tags and add text to document object
206 $doc_obj->add_utf8_text($topsection, "<pre>\n$$textref\n</pre>");
207
208 return 1;
209}
210
2111;
212
Note: See TracBrowser for help on using the repository browser.