source: gs2-extensions/parallel-building/trunk/src/perllib/plugins/LoremTextPlugin.pm@ 25113

Last change on this file since 25113 was 25113, checked in by jmt12, 12 years ago

A TextPlugin extended to do a little more processing in order to (hopefully) strain the CPU

File size: 2.9 KB
Line 
1package LoremTextPlugin;
2
3use TextFile;
4
5use strict;
6no strict 'refs'; # allow filehandles to be variables and viceversa
7no strict 'subs';
8
9sub BEGIN {
10 @LoremTextPlugin::ISA = ('TextFile');
11}
12
13my $arguments =
14 [ { 'name' => "process_exp",
15 'desc' => "{BasePlugin.process_exp}",
16 'type' => "regexp",
17 'deft' => &get_default_process_exp(),
18 'reqd' => "no" } ];
19
20my $options = { 'name' => "LoremTextPlugin",
21 'desc' => "TextPlugin extended to do a little more processing (to stress CPU rather than IO)",
22 'abstract' => "no",
23 'inherits' => "yes",
24 'srcreplaceable' => "yes", # Source docs in regular txt format can be replaced with GS-generated html
25 'args' => $arguments };
26
27
28sub new
29{
30 my ($class) = shift (@_);
31 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
32 push(@$pluginlist, $class);
33
34 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
35 push(@{$hashArgOptLists->{"OptList"}},$options);
36
37 my $self = new TextFile($pluginlist, $inputargs, $hashArgOptLists);
38
39 $self->{'lexicon'} = {};
40 $self->{'word_lengths'} = {};
41
42 return bless $self, $class;
43}
44
45# do plugin specific processing of doc_obj
46sub process
47{
48 my $self = shift (@_);
49 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
50 my $outhandle = $self->{'outhandle'};
51
52 my $cursection = $doc_obj->get_top_section();
53
54 # get title metadata
55 # (don't need to get title if it has been passed
56 # in from another plugin)
57 if (!defined $metadata->{'Title'})
58 {
59 my $title = $self->get_title_metadata($textref);
60 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
61 }
62 # Add FileFormat metadata
63 $doc_obj->add_metadata($cursection, "FileFormat", "Text");
64
65 # Process the text to determine
66 # - a) the lexicon used, and
67 $self->{'lexicon'} = {};
68 # - b) the frequency of words of various lengths
69 $self->{'word_lengths'} = {};
70
71 my @words = split('/[\,\.\s]+/', $$textref);
72 foreach my $word (@words)
73 {
74 $word = lc($word);
75 if (defined $self->{'lexicon'}->{$word})
76 {
77 $self->{'lexicon'}->{$word}++;
78 }
79 else
80 {
81 $self->{'lexicon'}->{$word} = 1;
82 }
83 my $length = length($word);
84 if (defined $self->{'word_lengths'}->{$length})
85 {
86 $self->{'word_lengths'}->{$length} = 1;
87 }
88 else
89 {
90 $self->{'word_lengths'}->{$length}++;
91 }
92 }
93
94 my @lexicon;
95 foreach my $word (sort keys %{$self->{'lexicon'}})
96 {
97 push(@lexicon, $word . ':' . $self->{'lexicon'}->{$word});
98 }
99 $doc_obj->add_metadata($cursection, "Lexicon", join(', ', @lexicon));
100
101 my @word_lengths;
102 foreach my $word_length (sort keys %{$self->{'word_lengths'}})
103 {
104 push(@word_lengths, $word_length . ':' . $self->{'word_lengths'}->{$word_length});
105 }
106 $doc_obj->add_metadata($cursection, "WordLengths", join(', ', @word_lengths));
107
108 # insert preformat tags and add text to document object
109 $self->text_to_html($textref); # modifies the text
110 $doc_obj->add_utf8_text($cursection, $$textref);
111
112 return 1;
113}
114
1151;
Note: See TracBrowser for help on using the repository browser.