Changeset 73
- Timestamp:
- 1998-12-11T20:59:16+13:00 (25 years ago)
- Location:
- trunk/gsdl/perllib
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/doc.pm
r65 r73 3 3 package doc; 4 4 5 use unicode; 5 6 use util; 6 7 use html; … … 463 464 # are for metadata which can have more than one value. 464 465 466 # set_metadata_element assumes the value is in (extended) ascii form. 467 # For text which hash been already converted to the UTF-8 format use 468 # set_utf8_metadata_element. 465 469 sub set_metadata_element { 466 470 my $self = shift (@_); 467 471 my ($section, $field, $value) = @_; 468 472 473 $self->set_utf8_metadata_element ($section, $field, 474 &unicode::ascii2utf8($value)); 475 } 476 477 # set_utf8_metadata_element assumes the text has already been 478 # converted to the UTF-8 encoding. 479 sub set_utf8_metadata_element { 480 my $self = shift (@_); 481 my ($section, $field, $value) = @_; 482 469 483 $self->delete_metadata ($section, $field); 470 $self->add_ metadata ($section, $field, $value);484 $self->add_utf8_metadata ($section, $field, $value); 471 485 } 472 486 … … 491 505 } 492 506 507 # add_metadata assumes the text is in (extended) ascii form. For 508 # text which hash been already converted to the UTF-8 format use 509 # add_utf8_metadata. 493 510 sub add_metadata { 494 511 my $self = shift (@_); 495 512 my ($section, $field, $value) = @_; 496 513 497 my $section_ptr = $self->_lookup_section($section); 498 if (!defined $section_ptr) { 499 print STDERR "doc::add_metadata couldn't find section " . 514 $self->add_utf8_metadata ($section, $field, 515 &unicode::ascii2utf8($value)); 516 } 517 518 # add_utf8_metadata assumes the text has already been converted 519 # to the UTF-8 encoding. 520 sub add_utf8_metadata { 521 my $self = shift (@_); 522 my ($section, $field, $value) = @_; 523 524 my $section_ptr = $self->_lookup_section($section); 525 if (!defined $section_ptr) { 526 print STDERR "doc::add_utf8_metadata couldn't find section " . 500 527 "$section\n"; 501 528 return; … … 618 645 # methods for dealing with text 619 646 647 # add_text assumes the text is in (extended) ascii form. For 648 # text which has been already converted to the UTF-8 format 649 # use add_utf8_text. 620 650 sub add_text { 621 651 my $self = shift (@_); 622 652 my ($section, $text) = @_; 623 653 624 my $section_ptr = $self->_lookup_section($section); 625 if (!defined $section_ptr) { 626 print STDERR "doc::add_text couldn't find section " . 654 # convert the text to UTF-8 encoded unicode characters 655 # and add the text 656 $self->add_utf8_text($section, &unicode::ascii2utf8($text)); 657 } 658 659 660 # add_utf8_text assumes the text to be added has already 661 # been converted to the UTF-8 encoding. For ascii text use 662 # add_text 663 sub add_utf8_text { 664 my $self = shift (@_); 665 my ($section, $text) = @_; 666 667 my $section_ptr = $self->_lookup_section($section); 668 if (!defined $section_ptr) { 669 print STDERR "doc::add_utf8_text couldn't find section " . 627 670 "$section\n"; 628 671 return; … … 647 690 } 648 691 649 # returns the length of the text for a section692 # returns the (utf-8 encoded) length of the text for a section 650 693 sub get_text_length { 651 694 my $self = shift (@_); -
trunk/gsdl/perllib/gb.pm
r71 r73 62 62 63 63 while ($i < $len) { 64 if (($c1 = ord(substr($intext, $i, 1))) > 127) { 65 if ($i+1 < $len && ($c2 = ord(substr($intext, $i+1, 1))) > 127) { 66 # found a GB character, put black square if cannot translate 67 # printf "in: %x\n", (($c1-128)*256+$c2-128); 68 my $c = &transchar ($encodename, ($c1-128)*256+$c2-128); 69 $c = 0x25a1 if $c == 0; 70 push (@outtext, $c); 71 # printf "out: %x\n", $c; 72 $i += 2; 73 64 if (($c1 = ord(substr($intext, $i, 1))) >= 0xa0) { 65 if ($i+1 < $len) { 66 if (($c2 = ord(substr($intext, $i+1, 1))) >= 0xa0) { 67 # found a GB character, put black square if cannot translate 68 my $c = &transchar ($encodename, ($c1-128)*256+$c2-128); 69 $c = 0x25a1 if $c == 0; 70 push (@outtext, $c); 71 $i += 2; 72 73 } else { 74 # error second character not > 127 75 print STDERR "gb: ERROR second GB character not >= 0xa0\n"; 76 $i++; 77 78 } 74 79 } else { 75 80 # error 76 print STDERR "gb: ERROR in gb encoding\n";81 print STDERR "gb: ERROR missing second half of GB character\n"; 77 82 $i++; 78 83 } … … 80 85 } else { 81 86 # normal ascii character 82 push (@outtext, $c1) ;87 push (@outtext, $c1) if ($c1 < 0x80); 83 88 $i++; 84 89 } -
trunk/gsdl/perllib/unicode.pm
r71 r73 5 5 6 6 package unicode; 7 8 9 10 # ascii2unicode takes a (extended) ascii string and 11 # returns a unicode array. 12 sub ascii2unicode { 13 my ($in) = @_; 14 my $out = []; 15 16 my $i = 0; 17 my $len = length($in); 18 while ($i < $len) { 19 push (@$out, ord(substr ($in, $i, 1))); 20 $i++; 21 } 22 23 return $out; 24 } 25 26 27 # ascii2utf8 takes a (extended) ascii string and 28 # returns a UTF-8 encoded string. This is just 29 # a faster version of "&unicode2utf8(&ascii2unicode($str));" 30 sub ascii2utf8 { 31 my ($in) = @_; 32 my $out = ""; 33 34 my ($c); 35 my $i = 0; 36 my $len = length($in); 37 while ($i < $len) { 38 $c = ord (substr ($in, $i, 1)); 39 if ($c < 0x80) { 40 # ascii character 41 $out .= chr ($c); 42 43 } else { 44 # extended ascii character 45 $out .= chr (0xc0 + (($c >> 6) & 0x1f)); 46 $out .= chr (0x80 + ($c & 0x3f)); 47 } 48 $i++; 49 } 50 51 return $out; 52 } 7 53 8 54
Note:
See TracChangeset
for help on using the changeset viewer.