root/gsdl/trunk/perllib/plugins/SourceCodePlugin.pm @ 16104

Revision 16104, 6.9 KB (checked in by kjdon, 12 years ago)

tried to make the 'xxxplugin processing file' print statements more consistent. They are now done in read (or read_into_doc_obj) and not process

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# SourceCodePlugin.pm -- source code plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
28
29# filename is currently used for Title ( optionally minus some prefix )
30
31# Current languages:
32#   text: READMEs/Makefiles
33#   C/C++   (currently extracts #include statements and C++ class decls)
34#   Perl    (currently only done as text)
35#   Shell   (currently only done as text)
36
37# 12/05/02 Added usage datastructure - John Thompson
38
39package SourceCodePlugin;
40
41use ReadTextFile;
42
43use strict;
44no strict 'refs'; # allow filehandles to be variables and viceversa
45
46sub BEGIN {
47    @SourceCodePlugin::ISA = ('ReadTextFile');
48}
49
50my $arguments =
51    [ { 'name' => "process_exp",
52    'desc' => "{BasePlugin.process_exp}",
53    'type' => "regexp",
54    'deft' => &get_default_process_exp(),
55    'reqd' => "no" } ,
56      { 'name' => "block_exp",
57    'desc' => "{BasePlugin.block_exp}",
58    'type' => "regexp",
59    'deft' => &get_default_block_exp(),
60    'reqd' => "no" },
61      { 'name' => "remove_prefix",
62    'desc' => "{SourceCodePlugin.remove_prefix}",
63    'type' => "regexp",
64    'deft' => "^.*[/\\]",
65    'reqd' => "no" } ];
66
67my $options = { 'name'     => "SourceCodePlugin",
68        'desc'     => "{SourceCodePlugin.desc}",
69        'abstract' => "no",
70        'inherits' => "yes",
71        'args'     => $arguments };
72
73
74sub new {
75    my ($class) = shift (@_);
76    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
77    push(@$pluginlist, $class);
78
79    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
80    push(@{$hashArgOptLists->{"OptList"}},$options);
81
82    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
83
84    return bless $self, $class;
85}
86
87sub get_default_block_exp {
88    my $self = shift (@_);
89
90    return q^(?i)\.(o|obj|a|so|dll)$^;
91}
92
93sub get_default_process_exp {
94    my $self = shift (@_);
95
96    return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
97}
98
99
100
101# do plugin specific processing of doc_obj
102sub process {
103    my $self = shift (@_);
104    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
105    my $outhandle = $self->{'outhandle'};
106   
107    my $cursection = $doc_obj->get_top_section();
108
109    my $filetype="text";  # Makefiles, READMEs, ...
110    if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
111    elsif ($file =~ /\.c$/)         {$filetype="C";}
112    elsif ($file =~ /\.p(l|m)$/)    {$filetype="perl";}
113    elsif ($file =~ /\.sh$/)        {$filetype="sh";}
114
115    # modify '<' and '>' for GML... (even though inside <pre> tags!!)
116    $$textref =~ s/</&lt;/g;
117    $$textref =~ s/>/&gt;/g;
118    $$textref =~ s/_/&#95;/g;
119    # try _escape_text($text) from doc.pm....
120
121    # don't want mg to turn escape chars into actual values
122    $$textref =~ s/\\/\\\\/g;
123
124    # use filename (minus any prefix) as the title.
125    my $title;
126    if ($self->{'remove_prefix' ne ""}) {
127    ($title = $file) =~ s/^$self->{'remove_prefix'}//;
128    } else {
129    ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
130    }
131    $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
132    $doc_obj->add_metadata ($cursection, "FileFormat", "SRC");
133
134    # remove the gsdl prefix from the filename
135    my $relative_filename=$file;
136    $relative_filename =~ s@^.*?gsdl[/\\]@@;
137    $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
138
139    # class information from .h and .cc and .C and .cpp files
140    if ($filetype eq "C++")
141    {
142    process_c_plus_plus($textref,$pluginfo, $base_dir,
143                   $file, $metadata, $doc_obj);
144    } elsif ($filetype eq "C")
145    {
146    get_includes_metadata($textref, $doc_obj);
147    }
148
149
150     # default operation...
151     # insert preformat tags and add text to document object
152    $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
153   
154    return 1;
155}
156
157
158
159
160sub get_includes_metadata {
161    my ($textref, $doc_obj) = @_;
162   
163    my $topsection = $doc_obj->get_top_section();
164
165    # Get '#include' directives for metadata
166    if ($$textref !~ /\#\s*include\b/) {
167    return;
168    }
169
170    my @includes =
171    ($$textref =~ m/^\s*\#\s*include\s*(?:\"|&lt;)(.*?)(?:\"|&gt;)/mg);
172   
173    my $incs_done_ref=$doc_obj->get_metadata($topsection, "includes");
174    my @incs_done;
175    if (defined($incs_done_ref)) {
176    @incs_done=@$incs_done_ref;
177    } else {
178    @incs_done=();
179    }
180
181    foreach my $inc (@includes) {
182    # add entries, but only if they don't already exist
183    if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
184        push @incs_done, $inc;
185        $doc_obj->add_utf8_metadata($topsection, "includes", $inc);
186    }
187    }
188}
189
190
191
192sub process_c_plus_plus {
193    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
194
195    my $topsection = $doc_obj->get_top_section();
196
197
198    # Check for include metadata
199    get_includes_metadata($textref, $doc_obj);
200
201
202
203    # Get class declarations (but not forward declarations...) as metadata
204    if ($$textref =~ /\bclass\b/ ) {
205    my $classnames=$$textref;
206   
207    # remove commented lines
208    $classnames =~ s@/\*.*?\*/@@sg;
209    $classnames =~ s@//.*$@@mg;
210    while ($classnames =~ /\bclass\b/) {
211
212        # delete all lines up to the next "class"
213        while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
214        {$classnames =~ s/.*\n//;}
215       
216#       $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
217
218        # get the line including the next "class" and remove it from
219        # our tmp text.
220        $classnames =~ s/^(.*\bclass\b.*)$//m;
221
222        # don't index if merely a reference/fwd decl. of another class
223        if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
224        # $1 is still the whole line - eg:
225        # "class StaffSystem: public BaseStaffSystem"
226        my $wholeline=$1;
227        my $classname=$1;
228        $classname =~ s/.*class\W(\w+).*/$1/;
229        my $classes=$doc_obj->get_metadata($topsection, "class");
230        foreach my $elem (@$classes) {
231            if ("$elem" eq "$classname") {goto class_done;}
232        }
233        $doc_obj->add_utf8_metadata($topsection, "class", $classname);
234          class_done:
235        $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
236        }
237    }
238    } # end of "class"
239
240    return 1;
241}
242
2431;
244
Note: See TracBrowser for help on using the browser.