Changeset 3142


Ignore:
Timestamp:
2002-06-10T18:14:45+12:00 (22 years ago)
Author:
jrm21
Message:

1) We can't use "Date" for the year metadata, as greenstone assumes Date is YYYYMMDD, so we now use "Year" instead.

2) Tidied up the single-entry-per-line code, as it was missing some entries.

3) bibtex allows 3 letter month abbreviations, so we know expand those.

4) some more tidying up of latex (and bibtex) commands.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BibTexPlug.pm

    r3112 r3142  
    105105         'title', 'Title',
    106106         'volume', 'Volume',
    107          'year', 'Date',
     107         'year', 'Year', # Can't use "Date" as this implies DDDDMMYY!
    108108
    109109         'keywords', 'Keywords',
     
    119119    $verbosity = 0 unless $verbosity;
    120120
    121     my $lines=$$textref;
    122 
    123     # Make sure the text has exactly one entry per line
    124 
    125     $lines  =~ s/^\s*(\@[^,]+,)\s*\n/$1=====/; #splitting key in entry
    126     $lines =~ s/([\"\}]\s*,)\s*\n/$1=====/g; #splitting by comma, followed by \n (assuming end of lines are " or })
    127     $lines =~ s/(\d+\s*\,)\s*\n/$1=====/g; #for the case, when we have number entry without closing "
    128     $lines =~ s/\n\s*\n/%%%%%/g; #this was simply added in order to allow to process newline inside  quoted strings,
    129                                  #that continues for several lines
    130     $lines =~ s/\s+/ /g;
    131     $lines =~ s/\s*=====\s*/\n/g;
    132 
    133     my @all_lines = split(/\n+/, $lines);
     121    # Make sure the text has exactly one entry per line -
     122    # append line to previous if it doesn't start with "  <key> = "
     123
     124    my @input_lines=split('\n', $$textref);
     125    my @all_lines;
     126    my $entry_line=shift @input_lines;
     127    foreach my $input_line (@input_lines) {
     128    if ($input_line =~ m/^\s*\w+\s*=\s*/) {
     129        # this is a new key
     130        push(@all_lines, $entry_line);
     131        $entry_line=$input_line;
     132    } else {
     133        # this is a continuation of previous line
     134        $entry_line .= $input_line;
     135    }
     136   
     137    }
     138    # add final line, removing trailing '}'
     139    $entry_line =~ s/\}\s*$//;
     140    push(@all_lines, $entry_line);
     141    push(@all_lines, "}");
     142
     143
    134144
    135145    # Read and process each line in the bib file.
     
    189199    $value = $metadata{$entryname};
    190200
     201    if ($name =~ /^Month/) {
     202        $value=expand_month($value);
     203    }
    191204    # Add the various fields as metadata   
    192205    my $html_value = &text_into_html($value);
     
    338351
    339352
     353sub expand_month {
     354    my $text=shift;
     355
     356    # bibtex style files expand abbreviations for months.
     357    # Entries can contain more than one month (eg ' month = jun # "-" # aug, ')
     358    $text =~ s/jan/_textmonth01_/g;
     359    $text =~ s/feb/_textmonth02_/g;
     360    $text =~ s/mar/_textmonth03_/g;
     361    $text =~ s/apr/_textmonth04_/g;
     362    $text =~ s/may/_textmonth05_/g;
     363    $text =~ s/jun/_textmonth06_/g;
     364    $text =~ s/jul/_textmonth07_/g;
     365    $text =~ s/aug/_textmonth08_/g;
     366    $text =~ s/sep/_textmonth09_/g;
     367    $text =~ s/oct/_textmonth10_/g;
     368    $text =~ s/nov/_textmonth11_/g;
     369    $text =~ s/dec/_textmonth12_/g;
     370
     371    return $text;
     372}
    340373
    341374
     
    416449     '"Y' => chr(0xc3).chr(0xb8),
    417450     # tilde
     451     '~A' => chr(0xc3).chr(0x83),
     452     '~N' => chr(0xc3).chr(0x91),
     453     '~O' => chr(0xc3).chr(0x95),
     454     '~a' => chr(0xc3).chr(0xa3),
     455     '~n' => chr(0xc3).chr(0xb1),
     456     '~o' => chr(0xc3).chr(0xb5),
    418457     # caron - handled specially
    419458#      ',s' => chr(0xc5).chr(0xa1),
     
    520559      # only do the change if immediately followed by a space, }, {, or \
    521560      for $latex_code (keys %special_utf8_chars) {
    522       $text =~ s/\\${latex_code}([\\\s\{\}])/$special_utf8_chars{$latex_code}$1/g;
     561      $text =~ s/\\${latex_code}([\\\s{}])/$special_utf8_chars{$latex_code}$1/g;
    523562      }
    524563    }
    525 
    526564    # remove latex commands
    527     $text =~ s@\\\w+{([^}]*)}@$1@g;
    528 
     565    $text =~ s@\\noopsort{[^}]+\}@@g;
     566    $text =~ s@\\\w+{([^}]*)}@$1@g; # all other commands
     567   
    529568    # remove latex groupings { } (but not \{ or \} )
    530     $text =~ s/([^\\])\{/$1/g;
    531     $text =~ s/([^\\])\}/$1/g;
    532     $text =~ s/^{//; # remove { if first char
    533 
     569    while ($text =~ s/([^\\])[\{\}]/$1/g) {;}
     570    $text =~ s/^\{//; # remove { if first char
     571   
    534572    # maths mode $...$ - this is not interpreted in any way at the moment...
    535573    $text =~ s@\$(.*)\$@$1@g;
    536 
     574   
     575    # latex characters
     576    # spaces - nobr space (~), opt break (\-), append ("#" - bibtex only)
     577    $text =~ s/([^\\])~+/$1/g; # non-breaking space  "~"
     578    # optional break "\-"
     579    if ($text =~ m/\#/) { # concat macros (bibtex)
     580    # the non-macro bits have quotes around them - we just remove quotes
     581    $text =~ s/[\"\#]//g;
     582    }
    537583    # quoted { } chars
    538584    $text =~ s@\\{@{@g;
Note: See TracChangeset for help on using the changeset viewer.