source: gs2-extensions/parallel-building/trunk/src/perllib/plugins/LoremTextPlugin.pm@ 27039

Last change on this file since 27039 was 26939, checked in by jmt12, 11 years ago

Replacing uses of TextFile with TextPlugin... don't know how the former ever worked... maybe it didn't

File size: 3.0 KB
Line 
1package LoremTextPlugin;
2
3use TextPlugin;
4
5use strict;
6no strict 'refs'; # allow filehandles to be variables and viceversa
7no strict 'subs';
8
9sub BEGIN {
10 @LoremTextPlugin::ISA = ('TextPlugin');
11}
12
13my $arguments =
14 [ { 'name' => "process_exp",
15 'desc' => "{BasePlugin.process_exp}",
16 'type' => "regexp",
17 'deft' => &get_default_process_exp(),
18 'reqd' => "no" } ];
19
20my $options = { 'name' => "LoremTextPlugin",
21 'desc' => "TextPlugin extended to do a little more processing (to stress CPU rather than IO)",
22 'abstract' => "no",
23 'inherits' => "yes",
24 'srcreplaceable' => "yes", # Source docs in regular txt format can be replaced with GS-generated html
25 'args' => $arguments };
26
27
28sub get_default_process_exp
29{
30 my $self = shift (@_);
31 return q^(?i)\.te?xt$^;
32}
33
34sub new
35{
36 my ($class) = shift (@_);
37 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
38 push(@$pluginlist, $class);
39
40 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
41 push(@{$hashArgOptLists->{"OptList"}},$options);
42
43 my $self = new TextPlugin($pluginlist, $inputargs, $hashArgOptLists);
44
45 $self->{'lexicon'} = {};
46 $self->{'word_lengths'} = {};
47
48 return bless $self, $class;
49}
50
51# do plugin specific processing of doc_obj
52sub process
53{
54 my $self = shift (@_);
55 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
56 my $outhandle = $self->{'outhandle'};
57
58 my $cursection = $doc_obj->get_top_section();
59
60 # get title metadata
61 # (don't need to get title if it has been passed
62 # in from another plugin)
63 if (!defined $metadata->{'Title'})
64 {
65 my $title = $self->get_title_metadata($textref);
66 $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
67 }
68 # Add FileFormat metadata
69 $doc_obj->add_metadata($cursection, "FileFormat", "Text");
70
71 # Process the text to determine
72 # - a) the lexicon used, and
73 $self->{'lexicon'} = {};
74 # - b) the frequency of words of various lengths
75 $self->{'word_lengths'} = {};
76
77 my @words = split('/[\,\.\s]+/', $$textref);
78 foreach my $word (@words)
79 {
80 $word = lc($word);
81 if (defined $self->{'lexicon'}->{$word})
82 {
83 $self->{'lexicon'}->{$word}++;
84 }
85 else
86 {
87 $self->{'lexicon'}->{$word} = 1;
88 }
89 my $length = length($word);
90 if (defined $self->{'word_lengths'}->{$length})
91 {
92 $self->{'word_lengths'}->{$length} = 1;
93 }
94 else
95 {
96 $self->{'word_lengths'}->{$length}++;
97 }
98 }
99
100 my @lexicon;
101 foreach my $word (sort keys %{$self->{'lexicon'}})
102 {
103 push(@lexicon, $word . ':' . $self->{'lexicon'}->{$word});
104 }
105 $doc_obj->add_metadata($cursection, "Lexicon", join(', ', @lexicon));
106
107 my @word_lengths;
108 foreach my $word_length (sort keys %{$self->{'word_lengths'}})
109 {
110 push(@word_lengths, $word_length . ':' . $self->{'word_lengths'}->{$word_length});
111 }
112 $doc_obj->add_metadata($cursection, "WordLengths", join(', ', @word_lengths));
113
114 # insert preformat tags and add text to document object
115 $self->text_to_html($textref); # modifies the text
116 $doc_obj->add_utf8_text($cursection, $$textref);
117
118 return 1;
119}
120
1211;
Note: See TracBrowser for help on using the repository browser.