Context Navigation

← Previous Changeset
Next Changeset →

Changeset 11893

Timestamp:

2006-05-31T10:33:25+12:00 (18 years ago)

Author:

kjdon

Message:

deleted some stuff to do with original Word documents - this plugin shouldn't know about where the html has come from. also, commented out the print statements - they were interfering with GLI processing of output

File:

: 1 edited

trunk/gsdl/perllib/plugins/StructuredHTMLPlug.pm (modified) (11 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/perllib/plugins/StructuredHTMLPlug.pm

-              r11884
+              r11893
+}
-sub read {
-    my $self = shift (@_);
-    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
-    my $filename = $file;
-    $filename = &util::filename_cat ($base_dir, $file) if $base_dir =~ /\w/;
-    if ($filename =~ m/\.html?$/) {
-    my $poss_doc_filename = $filename;
-    $poss_doc_filename =~ s/\.html?$/.doc/;
-    if (-e $poss_doc_filename) {
-        # this file has already been processed by Word plugin
-        return 0;
+    }
+    }
-    return $self->SUPER::read(@_);
+}
 sub process {
     my $self = shift (@_);
-    #my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
     my $outhandle = $self->{'outhandle'};
 …
+    }
+    # If delete_toc is enables, it means to get rid of toc and tof contents.
+    # set the title here if we haven't found it yet
+    if (!defined $doc_obj->get_metadata_element ($doc_obj->get_top_section(), "Title")) {
+    if (defined $doctitle && $doctitle =~ /\S/) {
+        $doc_obj->add_metadata($doc_obj->get_top_section(), "Title", $doctitle);
+    } else {
+        $self->title_fallback($doc_obj,$doc_obj->get_top_section(),$file);
+    }
+    }
+    # If delete_toc is enabled, it means to get rid of toc and tof contents.
     # get rid of TOC and TOF sections and their title
     if (defined $self->{'delete_toc'} && ($self->{'delete_toc'} == 1)){
 …
     if (defined $self->{'title_header'} && $self->{'title_header'}=~ /\S/){
     $self->{'title_header'} =~ s/^(\()(.*)(\))/$2/is;
-    #$body_text =~ s/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/<p class=$1><title>$3<\/title><\/p>/isg;
-    #$doctitle = $3;
     $body_text =~ s/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/<p class=$1><h1>$3<\/h1><\/p>/isg;
-    #$body_text =~ m/<p class=(($self->{'title_header'})[^>]*)>(.+?)<\/p>/isg;
-    #$doctitle = "<h1>".$3."<\/h1>" if defined $3;
+    }
 …
     $body_text =~ s/<p class=(($self->{'level3_header'})[^>]*)>(.+?)<\/p>/<p class=$1><h3>$3<\/h3><\/p>/isg;
+    }
     # Tidy up extra new lines
     $body_text =~ s/(<p[^>]*><span[^>]*><o:p>&nbsp;<\/o:p><\/span><\/p>)//isg;
 …
             $section_text .= "-->\n";
             print $outhandle $spacing."$h_text\n"
             if $self->{'verbosity'} > 2;
+            #print $outhandle $spacing."$h_text\n"
+            #   if $self->{'verbosity'} > 2;
             $sectionh1++ if ($hnum==1);
 …
     $$textref = $section_text;
+    # should be textref not testref???
+    #$$testref =~ s/<h(\d+)>(.*?)<\/h$1>/<Section><Metadata name=\"Title\">$1<\/Metadata></Section><h$1><\/h$1>/gi;
+    if ($sectionh1>0)
+    {
+    print $outhandle "  Located section headings ..."
+        if $self->{'verbosity'} > 1;
+    }
+    print $outhandle "  Passing on the HTMLPlug\n"
+    if $self->{'verbosity'} > 1;
+#    if ($sectionh1>0)
+#    {
+#   print $outhandle "  Located section headings ..."
+#       if $self->{'verbosity'} > 1;
+#    }
     $$textref =~ s/<!\[if !vml\]>/<![if vml]>/g;
 …
     $self->SUPER::process(@_);
-    # associate original file with doc object
-    my $cursection = $doc_obj->get_top_section();
-    my $filename = &util::filename_cat($base_dir, $file);
-    if (-e $filename)
+    {
-    print $outhandle "  Adding associated Word document\n"
-        if $self->{'verbosity'} > 1;
-    $doc_obj->associate_file($filename, "doc.doc", undef, $cursection);
-    my $doclink = "<a href=_httpprefix_/collect/[collection]/index/assoc/[archivedir]/doc.doc>";
-    $doc_obj->add_utf8_metadata ($cursection, "srclink",  $doclink);
-    $doc_obj->add_utf8_metadata ($cursection, "srcicon",  "_icondoc_");
-    $doc_obj->add_utf8_metadata ($cursection, "/srclink", "</a>");
-    $doc_obj->add_utf8_metadata ($cursection, "Title", $doctitle);
-    my $file_size = -s $filename;
-    if ($file_size>1024)
+    {
-        my $fs_kbytes = sprintf("%d",$file_size/1024);
-        $doc_obj->add_utf8_metadata ($cursection, "filesize", "$fs_kbytes Kb");
+    }
-    else
+    {
-        $doc_obj->add_utf8_metadata ($cursection, "filesize", "$file_size bytes");
+    }
-    if ($file_size > 200000)
+    {
-        $doc_obj->add_utf8_metadata ($cursection, "fswarning", "1");
+    }
+    }
+}
 …
     if (($img_width < $actual_width) || ($img_height < $actual_height)) {
         print $outhandle "Resizing $img_filename\n" if ($verbosity > 0);
+        #print $outhandle "Resizing $img_filename\n" if ($verbosity > 0);
         # derive new image name based on current image
 …
         my $command = "convert -interlace plane -verbose "
         ."-geometry $newsize \"img_$filename\" \"$resized_filename\"";
         print $outhandle "ImageResize: $command\n" if ($verbosity > 2);
         my $result = '';
         print $outhandle "ImageResize result: $result\n" if ($verbosity > 2);
+        #print $outhandle "ImageResize: $command\n" if ($verbosity > 2);
+        #my $result = '';
+        #print $outhandle "ImageResize result: $result\n" if ($verbosity > 2);
+    }
+    }
 …
         $value = $1;
         if (!defined $value || !defined $tag){
         print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";
+        #print $outhandle "StructuredHTMLPlug: can't find VALUE in \"$tag\"\n";
         next;
         } else {
 …
         chomp($value); # remove trailing \n, if any
         $tag = $find_fields{lc($tag)};
         print $outhandle " extracted \"$tag\" metadata \"$value\"\n"
             if ($self->{'verbosity'} > 2);
+        #print $outhandle " extracted \"$tag\" metadata \"$value\"\n"
+        #    if ($self->{'verbosity'} > 2);
         $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), $tag, $value);
+        }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 11893

Legend:

trunk/gsdl/perllib/plugins/StructuredHTMLPlug.pm

Download in other formats: