root/gsdl/trunk/perllib/plugins/SourceCodePlugin.pm @ 17739

Revision 17739, 6.8 KB (checked in by kjdon, 12 years ago)

removed unnecessary comment

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# SourceCodePlugin.pm -- source code plugin
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 1999 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26# John McPherson Nov 2000
27# originally based on TEXTPlug
28
29# filename is currently used for Title ( optionally minus some prefix )
30
31# Current languages:
32#   text: READMEs/Makefiles
33#   C/C++   (currently extracts #include statements and C++ class decls)
34#   Perl    (currently only done as text)
35#   Shell   (currently only done as text)
36
37package SourceCodePlugin;
38
39use ReadTextFile;
40
41use strict;
42no strict 'refs'; # allow filehandles to be variables and viceversa
43
44sub BEGIN {
45    @SourceCodePlugin::ISA = ('ReadTextFile');
46}
47
48my $arguments =
49    [ { 'name' => "process_exp",
50    'desc' => "{BasePlugin.process_exp}",
51    'type' => "regexp",
52    'deft' => &get_default_process_exp(),
53    'reqd' => "no" } ,
54      { 'name' => "block_exp",
55    'desc' => "{BasePlugin.block_exp}",
56    'type' => "regexp",
57    'deft' => &get_default_block_exp(),
58    'reqd' => "no" },
59      { 'name' => "remove_prefix",
60    'desc' => "{SourceCodePlugin.remove_prefix}",
61    'type' => "regexp",
62    'deft' => "^.*[/\\]",
63    'reqd' => "no" } ];
64
65my $options = { 'name'     => "SourceCodePlugin",
66        'desc'     => "{SourceCodePlugin.desc}",
67        'abstract' => "no",
68        'inherits' => "yes",
69        'args'     => $arguments };
70
71
72sub new {
73    my ($class) = shift (@_);
74    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
75    push(@$pluginlist, $class);
76
77    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
78    push(@{$hashArgOptLists->{"OptList"}},$options);
79
80    my $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
81
82    return bless $self, $class;
83}
84
85sub get_default_block_exp {
86    my $self = shift (@_);
87
88    return q^(?i)\.(o|obj|a|so|dll)$^;
89}
90
91sub get_default_process_exp {
92    my $self = shift (@_);
93
94    return q^(Makefile.*|README.*|(?i)\.(c|cc|cpp|C|h|hpp|pl|pm|sh))$^;
95}
96
97
98
99# do plugin specific processing of doc_obj
100sub process {
101    my $self = shift (@_);
102    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
103    my $outhandle = $self->{'outhandle'};
104   
105    my $cursection = $doc_obj->get_top_section();
106
107    my $filetype="text";  # Makefiles, READMEs, ...
108    if ($file =~ /\.(cc|h|cpp|C)$/) {$filetype="C++";} # assume all .h files...
109    elsif ($file =~ /\.c$/)         {$filetype="C";}
110    elsif ($file =~ /\.p(l|m)$/)    {$filetype="perl";}
111    elsif ($file =~ /\.sh$/)        {$filetype="sh";}
112
113    # modify '<' and '>' for GML... (even though inside <pre> tags!!)
114    $$textref =~ s/</&lt;/g;
115    $$textref =~ s/>/&gt;/g;
116    $$textref =~ s/_/&#95;/g;
117    # try _escape_text($text) from doc.pm....
118
119    # don't want mg to turn escape chars into actual values
120    $$textref =~ s/\\/\\\\/g;
121
122    # use filename (minus any prefix) as the title.
123    my $title;
124    if ($self->{'remove_prefix' ne ""}) {
125    ($title = $file) =~ s/^$self->{'remove_prefix'}//;
126    } else {
127    ($title = $file) =~ s@^.*[/\\]@@; # remove pathname by default
128    }
129    $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
130    $doc_obj->add_metadata ($cursection, "FileFormat", "SRC");
131
132    # remove the gsdl prefix from the filename
133    my $relative_filename=$file;
134    $relative_filename =~ s@^.*?gsdl[/\\]@@;
135    $doc_obj->add_utf8_metadata ($cursection, "filename", $relative_filename);
136
137    # class information from .h and .cc and .C and .cpp files
138    if ($filetype eq "C++")
139    {
140    process_c_plus_plus($textref,$pluginfo, $base_dir,
141                   $file, $metadata, $doc_obj);
142    } elsif ($filetype eq "C")
143    {
144    get_includes_metadata($textref, $doc_obj);
145    }
146
147
148     # default operation...
149     # insert preformat tags and add text to document object
150    $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
151   
152    return 1;
153}
154
155
156
157
158sub get_includes_metadata {
159    my ($textref, $doc_obj) = @_;
160   
161    my $topsection = $doc_obj->get_top_section();
162
163    # Get '#include' directives for metadata
164    if ($$textref !~ /\#\s*include\b/) {
165    return;
166    }
167
168    my @includes =
169    ($$textref =~ m/^\s*\#\s*include\s*(?:\"|&lt;)(.*?)(?:\"|&gt;)/mg);
170   
171    my $incs_done_ref=$doc_obj->get_metadata($topsection, "includes");
172    my @incs_done;
173    if (defined($incs_done_ref)) {
174    @incs_done=@$incs_done_ref;
175    } else {
176    @incs_done=();
177    }
178
179    foreach my $inc (@includes) {
180    # add entries, but only if they don't already exist
181    if (!join('', map {$_ eq "$inc"?1:""} @incs_done)) {
182        push @incs_done, $inc;
183        $doc_obj->add_utf8_metadata($topsection, "includes", $inc);
184    }
185    }
186}
187
188
189
190sub process_c_plus_plus {
191    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
192
193    my $topsection = $doc_obj->get_top_section();
194
195
196    # Check for include metadata
197    get_includes_metadata($textref, $doc_obj);
198
199
200
201    # Get class declarations (but not forward declarations...) as metadata
202    if ($$textref =~ /\bclass\b/ ) {
203    my $classnames=$$textref;
204   
205    # remove commented lines
206    $classnames =~ s@/\*.*?\*/@@sg;
207    $classnames =~ s@//.*$@@mg;
208    while ($classnames =~ /\bclass\b/) {
209
210        # delete all lines up to the next "class"
211        while ($classnames !~ /^[^\n]*\bclass\b[^\n]*\n/)
212        {$classnames =~ s/.*\n//;}
213       
214#       $classnames =~ s/^([^c][^l])*(.)?$//mg; # delete unneccessary lines
215
216        # get the line including the next "class" and remove it from
217        # our tmp text.
218        $classnames =~ s/^(.*\bclass\b.*)$//m;
219
220        # don't index if merely a reference/fwd decl. of another class
221        if ($1 !~ /(friend\Wclass)|(class\W\w+\W?\;)|(\/\/.*class)/) {
222        # $1 is still the whole line - eg:
223        # "class StaffSystem: public BaseStaffSystem"
224        my $wholeline=$1;
225        my $classname=$1;
226        $classname =~ s/.*class\W(\w+).*/$1/;
227        my $classes=$doc_obj->get_metadata($topsection, "class");
228        foreach my $elem (@$classes) {
229            if ("$elem" eq "$classname") {goto class_done;}
230        }
231        $doc_obj->add_utf8_metadata($topsection, "class", $classname);
232          class_done:
233        $doc_obj->add_utf8_metadata($topsection, "classdecl", $wholeline);
234        }
235    }
236    } # end of "class"
237
238    return 1;
239}
240
2411;
242
Note: See TracBrowser for help on using the browser.