Changeset 11893


Ignore:
Timestamp:
2006-05-31T10:33:25+12:00 (18 years ago)
Author:
kjdon
Message:

deleted some stuff to do with original Word documents - this plugin shouldn't know about where the html has come from. also, commented out the print statements - they were interfering with GLI processing of output

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/StructuredHTMLPlug.pm

    r11884 r11893  
    9696}
    9797
    98 sub read {
    99     my $self = shift (@_); 
    100     my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
    101 
    102     my $filename = $file;
    103     $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
    104 
    105     if ($filename =~ m/\.html?$/) {
    106     my $poss_doc_filename = $filename;
    107     $poss_doc_filename =~ s/\.html?$/.doc/;
    108 
    109     if (-e $poss_doc_filename) {
    110         # this file has already been processed by Word plugin
    111         return 0;
    112     }
    113     }
    114     return $self->SUPER::read(@_);
    115 }
    11698
    11799sub process {
    118100    my $self = shift (@_);
    119     #my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
    120101    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    121102    my $outhandle = $self->{'outhandle'};
     
    139120    }
    140121   
    141     # If delete_toc is enables, it means to get rid of toc and tof contents.
     122    # set the title here if we haven't found it yet
     123    if (!defined $doc_obj->get_metadata_element ($doc_obj->get_top_section(), "Title")) {
     124    if (defined $doctitle && $doctitle =~ /\S/) {
     125        $doc_obj->add_metadata($doc_obj->get_top_section(), "Title", $doctitle);
     126    } else {
     127        $self->title_fallback($doc_obj,$doc_obj->get_top_section(),$file);
     128    }
     129    }
     130
     131    # If delete_toc is enabled, it means to get rid of toc and tof contents.
    142132    # get rid of TOC and TOF sections and their title
    143133    if (defined $self->{'delete_toc'} && ($self->{'delete_toc'} == 1)){
     
    149139    if (defined $self->{'title_header'} && $self->{'title_header'}=~ /\S/){
    150140    $self->{'title_header'} =~ s/^(\()(.*)(\))/$2/is;
    151     #$body_text =~ s/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/<p class=$1><title>$3<\/title><\/p>/isg;
    152     #$doctitle = $3;
    153141    $body_text =~ s/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/<p class=$1><h1>$3<\/h1><\/p>/isg;
    154     #$body_text =~ m/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/isg;
    155     #$doctitle = "<h1>".$3."<\/h1>" if defined $3;
    156142    }
    157143
     
    170156    $body_text =~ s/<p class=(($self->{'level3_header'})[^>]*)>(.+?)<\/p>/<p class=$1><h3>$3<\/h3><\/p>/isg;
    171157    }
     158   
    172159    # Tidy up extra new lines
    173160    $body_text =~ s/(<p[^>]*><span[^>]*><o:p>&nbsp;<\/o:p><\/span><\/p>)//isg;
     
    246233            $section_text .= "-->\n";
    247234           
    248             print $outhandle $spacing."$h_text\n"
    249             if $self->{'verbosity'} > 2;
     235            #print $outhandle $spacing."$h_text\n"
     236            #   if $self->{'verbosity'} > 2;
    250237           
    251238            $sectionh1++ if ($hnum==1);
     
    276263    $$textref = $section_text;
    277264   
    278     # should be textref not testref???
    279     #$$testref =~ s/<h(\d+)>(.*?)<\/h$1>/<Section><Metadata name=\"Title\">$1<\/Metadata></Section><h$1><\/h$1>/gi;
    280    
    281     if ($sectionh1>0)
    282     {
    283     print $outhandle "  Located section headings ..."
    284         if $self->{'verbosity'} > 1;
    285     }
    286     print $outhandle "  Passing on the HTMLPlug\n"
    287     if $self->{'verbosity'} > 1;
     265#    if ($sectionh1>0)
     266#    {
     267#   print $outhandle "  Located section headings ..."
     268#       if $self->{'verbosity'} > 1;
     269#    }
    288270   
    289271    $$textref =~ s/<!\[if !vml\]>/<![if vml]>/g;
     
    295277    $self->SUPER::process(@_);
    296278   
    297     # associate original file with doc object
    298     my $cursection = $doc_obj->get_top_section();
    299     my $filename = &util::filename_cat($base_dir, $file);
    300     if (-e $filename)
    301     {
    302     print $outhandle "  Adding associated Word document\n"
    303         if $self->{'verbosity'} > 1;
    304    
    305     $doc_obj->associate_file($filename, "doc.doc", undef, $cursection);
    306    
    307     my $doclink = "<a href=_httpprefix_/collect/[collection]/index/assoc/[archivedir]/doc.doc>";
    308     $doc_obj->add_utf8_metadata ($cursection, "srclink",  $doclink);
    309     $doc_obj->add_utf8_metadata ($cursection, "srcicon",  "_icondoc_");
    310     $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
    311     $doc_obj->add_utf8_metadata ($cursection, "Title", $doctitle);
    312     my $file_size = -s $filename;
    313     if ($file_size>1024)
    314     {
    315         my $fs_kbytes = sprintf("%d",$file_size/1024);
    316         $doc_obj->add_utf8_metadata ($cursection, "filesize", "$fs_kbytes Kb");
    317     }
    318     else
    319     {
    320         $doc_obj->add_utf8_metadata ($cursection, "filesize", "$file_size bytes");
    321     }
    322 
    323     if ($file_size > 200000)
    324     {
    325         $doc_obj->add_utf8_metadata ($cursection, "fswarning", "1");
    326     }
    327     }
    328279}
    329280
     
    363314
    364315    if (($img_width < $actual_width) || ($img_height < $actual_height)) {
    365         print $outhandle "Resizing $img_filename\n" if ($verbosity > 0);
     316        #print $outhandle "Resizing $img_filename\n" if ($verbosity > 0);
    366317       
    367318        # derive new image name based on current image
     
    378329        my $command = "convert -interlace plane -verbose "
    379330        ."-geometry $newsize \"img_$filename\" \"$resized_filename\"";
    380         print $outhandle "ImageResize: $command\n" if ($verbosity > 2);
    381         my $result = '';
    382         print $outhandle "ImageResize result: $result\n" if ($verbosity > 2);
     331        #print $outhandle "ImageResize: $command\n" if ($verbosity > 2);
     332        #my $result = '';
     333        #print $outhandle "ImageResize result: $result\n" if ($verbosity > 2);
    383334    }
    384335    }
     
    438389        $value = $1;
    439390        if (!defined $value || !defined $tag){
    440         print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";
     391        #print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";
    441392        next;
    442393        } else {
     
    444395        chomp($value); # remove trailing \n, if any
    445396        $tag = $find_fields{lc($tag)};
    446         print $outhandle " extracted \"$tag\" metadata \"$value\"\n"
    447             if ($self->{'verbosity'} > 2);
     397        #print $outhandle " extracted \"$tag\" metadata \"$value\"\n"
     398        #    if ($self->{'verbosity'} > 2);
    448399        $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), $tag, $value);
    449400        }
Note: See TracChangeset for help on using the changeset viewer.