Changeset 10121


Ignore:
Timestamp:
2005-06-16T13:44:32+12:00 (19 years ago)
Author:
mdewsnip
Message:

Added the "sectionalise_using_h_tags" option to HTMLPlug, which automatically creates a sectioned document based on h1, h2, ... hX tags. Many thanks to Emanuel Dejanu for this code.

Location:
trunk/gsdl/perllib
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/HTMLPlug.pm

    r9747 r10121  
    9696    'type' => "string",
    9797    'deft' => "",
    98     'reqd' => "no"}
     98    'reqd' => "no"},
     99      { 'name' => "sectionalise_using_h_tags",
     100    'desc' => "{HTMLPlug.sectionalise_using_h_tags}",
     101    'type' => "flag" }
    99102      ];
    100103
     
    126129             q^description_tags^, \$self->{'description_tags'},
    127130             q^no_strip_metadata_html/.*/^, \$self->{'no_strip_metadata_html'},
     131             q^sectionalise_using_h_tags^, \$self->{'sectionalise_using_h_tags'},
    128132             "allow_extra_options")) {
    129133
     
    228232    $self->{'dir_num'} = 0;
    229233    $self->{'file_num'} = 0;
     234
     235    # process an HTML file where sections are divided by headings tags (H1, H2 ...)
     236    # you can also include metadata in the format (X can be any number)
     237    # <hX>Title<!--gsdl-metadata
     238    #   <Metadata name="name1">value1</Metadata>
     239    #   ...
     240    #   <Metadata name="nameN">valueN</Metadata>
     241    #--></hX>
     242    if ($self->{'sectionalise_using_h_tags'}) {
     243    # description_tags should allways be activated because we convert headings to description tags
     244    $self->{'description_tags'} = 1;
     245
     246    my $arrSections = [];
     247    $$textref =~ s/<h([0-9]+)[^>]*>(.*?)<\/h[0-9]+>/$self->process_heading($1, $2, $arrSections, $file)/isge;
     248
     249    if (scalar(@$arrSections)) {
     250        my $strMetadata = $self->update_section_data($arrSections, -1);
     251        if (length($strMetadata)) {
     252        $strMetadata = '<!--' . $strMetadata . "\n-->\n</body>";
     253        $$textref =~ s/<\/body>/$strMetadata/ig;
     254        }
     255    }
     256    }
    230257
    231258    my $cursection = $doc_obj->get_top_section();
     
    389416}
    390417
     418
     419sub process_heading
     420{
     421    my ($self, $nHeadNo, $strHeadingText, $arrSections, $file) = @_;
     422    $strHeadingText = '' if (!defined($strHeadingText));
     423
     424    my $strMetadata = $self->update_section_data($arrSections, int($nHeadNo));
     425
     426    my $strSecMetadata = '';
     427    while ($strHeadingText =~ s/<!--gsdl-metadata(.*?)-->//is)
     428    {
     429    $strSecMetadata .= $1;
     430    }
     431
     432    $strHeadingText =~ s/^\s+//g;
     433    $strHeadingText =~ s/\s+$//g;
     434    $strSecMetadata =~ s/^\s+//g;
     435    $strSecMetadata =~ s/\s+$//g;
     436
     437    $strMetadata .= "\n<Section>\n\t<Description>\n\t\t<Metadata name=\"Title\">" . $strHeadingText . "</Metadata>\n";
     438
     439    if (length($strSecMetadata)) {
     440    $strMetadata .= "\t\t" . $strSecMetadata . "\n";
     441    }
     442
     443    $strMetadata .= "\t</Description>\n";
     444
     445    return "<!--" . $strMetadata . "-->";
     446}
     447
     448
     449sub update_section_data
     450{
     451    my ($self, $arrSections, $nCurTocNo) = @_;
     452    my ($strBuffer, $nLast, $nSections) = ('', 0, scalar(@$arrSections));
     453
     454    if ($nSections == 0) {
     455    push @$arrSections, $nCurTocNo;
     456    return $strBuffer;
     457    }
     458    $nLast = $arrSections->[$nSections - 1];
     459    if ($nCurTocNo > $nLast) {
     460    push @$arrSections, $nCurTocNo;
     461    return $strBuffer;
     462    }
     463    for(my $i = $nSections - 1; $i >= 0; $i--) {
     464    if ($nCurTocNo <= $arrSections->[$i]) {
     465        $strBuffer .= "\n</Section>";
     466        pop @$arrSections;
     467    }
     468    }
     469    push @$arrSections, $nCurTocNo;
     470    return $strBuffer;
     471}
     472
     473
    391474# note that process_section may be called multiple times for a single
    392475# section (relying on the fact that add_utf8_text appends the text to any
  • trunk/gsdl/perllib/strings.rb

    r9964 r10121  
    647647HTMLPlug.rename_assoc_files:Renames files associated with documents (e.g. images). Also creates much shallower directory structure (useful when creating collections to go on cd-rom).
    648648
     649HTMLPlug.sectionalise_using_h_tags:Automatically create a sectioned document using h1, h2, ... hX tags.
     650
    649651HTMLPlug.title_sub:Substitution expression to modify string stored as Title. Used by, for example, PDFPlug to remove "Page 1", etc from text used as the title.
    650652
Note: See TracChangeset for help on using the changeset viewer.