Changeset 3614


Ignore:
Timestamp:
2002-12-03T14:41:17+13:00 (21 years ago)
Author:
jrm21
Message:

modified section-handling stuff to work with output from v.0.34 of pdftohtml.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/PDFPlug.pm

    r3590 r3614  
    118118    my $textref=$_[0];
    119119
    120     # This might be specific to the version of pdftohtml in <=  gsdl-2.38
    121     my @sections = split('<a name=\d+>', $$textref);
     120    # we have "<a name=1></a>" etc for each page
     121    my @sections = split('<a name=', $$textref);
    122122
    123123    shift @sections; # don't need HTML header, etc
     
    141141    # add metadata per section...
    142142    foreach my $section (@sections) {
    143         $section =~ m@^<b>Page (\d+)</b>@;
     143        $section =~ s@^(\d+)></a>@@; # leftover from split expression...
     144
    144145        $title = $1; # Greenstone does magic if sections are titled digits
    145146        if (! defined($title) ) {
Note: See TracChangeset for help on using the changeset viewer.