Changeset 10404 for trunk/gsdl
- Timestamp:
- 2005-08-03T13:26:08+12:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/StructuredHTMLPlug.pm
r10356 r10404 2 2 # 3 3 # StructuredHTMLPlug.pm -- html plugin with extra facilities for teasing out 4 # hierarchical structure (such as h1, h2, h3 tags) in an HTML document 4 # hierarchical structure (such as h1, h2, h3, or user-defined tags) in an 5 # HTML document 5 6 # 6 7 # A component of the Greenstone digital library software … … 25 26 # 26 27 ########################################################################### 28 # This plugin is to process an HTML file where sections are divided by 29 # user-defined headings tags. As it is difficult to predict what user's definition 30 # this plugin allows to detect the user-defined titles up to three levels (level1, level2, level3...) 31 # as well as allows to get rid of user-defined Table of Content (TOC)... 32 # format:e.g. level1 (Abstract_title|ChapterTitle|Referencing Heading) level2(SectionHeading)... 33 27 34 package StructuredHTMLPlug; 28 35 … … 30 37 use ImagePlug; 31 38 39 #use strict; # every perl program should have this! 40 #no strict 'refs'; # make an exception so we can use variables as filehandles 41 32 42 sub BEGIN { 33 @ISA = ('HTMLPlug'); 34 } 35 36 my $arguments = 37 [ { 'name' => "check_toc", 38 'desc' => "StructuredHTMLPlug.check_toc'}", 39 'type' => "flag", 40 'reqd' => "no"}, 41 { 'name' => "title_header", 42 'desc' => "{StructuredHTMLPlug.title_header}", 43 'type' => "regexp", 44 'reqd' => "no"}, 45 { 'name' => "level1_header", 46 'desc' => "{StructuredHTMLPlug.level1_header}", 47 'type' => "regexp", 48 'reqd' => "no"}, 49 { 'name' => "level2_header", 50 'desc' => "{StructuredHTMLPlug.level2_header}", 51 'type' => "regexp", 52 'reqd' => "no"}, 53 { 'name' => "level3_header", 54 'desc' => "{StructuredHTMLPlug.level3_header}", 55 'type' => "regexp", 56 'reqd' => "no"}, 57 { 'name' => "toc_header", 58 'desc' => "{StructuredHTMLPlug.toc_header}", 59 'type' => "regexp", 60 'reqd' => "no"}, 61 { 'name' => "tof_header", 62 'desc' => "{StructuredHTMLPlug.tof_header}", 63 'type' => "regexp", 64 'reqd' => "no"}]; 43 @StructuredHTMLPlug::ISA = ('HTMLPlug'); 44 } 45 46 my $arguments = []; 65 47 66 48 my $options = { 'name' => "StructuredHTMLPlug", … … 105 87 sub process { 106 88 my $self = shift (@_); 107 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 89 #my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 90 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_; 108 91 my $outhandle = $self->{'outhandle'}; 109 92 … … 114 97 my $head = shift(@head_and_body); 115 98 my $body_text = join("<body", @head_and_body); 116 99 100 # If checkout_toc is enables, it means to get rid of toc and tof contents. 117 101 # get rid of TOC and TOF sections and their title 118 # If check_toc is enables, it means to get rid of toc and tof contents. 119 if ($self->{'check_toc'}){ 102 if ($self->{'checkout_toc'}){ 120 103 #line-height:150%;mso-ansi-language:FR'>Contents<o:p></o:p></span></b></p> 121 104 # get rid of Table of Contents title and Table of Figures 122 $body_text =~ s/<p[^>]*><b><span[^>]*>(Table of Content.|Content.)<o:p><\/o:p><\/span><\/b><\/p>//isg;123 $body_text =~ s/<p[^>]*><b><span[^>]*>(Table of Figure.|Figure.)<o:p><\/o:p><\/span><\/b><\/p>//isg;105 #$body_text =~ s/<p[^>]*><b><span[^>]*>(Table of Content.|Content.)<o:p><\/o:p><\/span><\/b><\/p>//isg; 106 #$body_text =~ s/<p[^>]*><b><span[^>]*>(Table of Figure.|Figure.)<o:p><\/o:p><\/span><\/b><\/p>//isg; 124 107 $body_text =~ s/<p class=(($self->{'toc_header'})[^>]*)>(.+?)<\/p>//isg; 125 108 $body_text =~ s/<p class=(($self->{'tof_header'})[^>]*)>(.+?)<\/p>//isg;
Note:
See TracChangeset
for help on using the changeset viewer.