- Timestamp:
- 2018-11-29T21:23:49+13:00 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/GreenstoneSQLPlugin.pm
r32595 r32640 37 37 use gsmysql; 38 38 39 39 #use unicode; 40 #use Encode; 40 41 41 42 ######################################################################################## … … 370 371 # browse the rebuilt collection by files/titles. So unless you set the OID here, the deleted 371 372 # doc oids will still be listed in the index. 372 $self->{'doc_obj'}->set_OID($oid);373 373 374 374 print STDERR " GreenstoneSQLPlugin processing doc $oid (reading into docobj from SQL db)\n" … … 385 385 my ($primary_key, $did, $sid, $metaname, $metaval) = @$row; 386 386 387 # don't allow duplicate toplevel OID, as can happen when a doc has been renamed and requires reindexing 388 # TODO: any other meta that should not be duplicated, but can have been changed between rebuilds so that we need to call set_meta instead of add_meta? 389 # e.g. FileSize, SourceFile. But Language can be multiple, so gs meta is not guaranteed to be unique either. Whereas of dc metadata we know 390 # that some if not all should allow multiple entires for the same meta name, e.g. dc.Author/dc.Creator 391 if($sid =~ m@^root@ && $metaname eq "Identifier") { 392 # doc_obj's toplevel Identifier metadata is a special case: 393 # it should have only one value, so instead of add_meta() that will allow old Identifier meta to linger 394 # Need to do set_meta(). We then break out of the loop, to prevent duplicates (older values from DB) to be inserted for Identifier into doc_obj 395 # Handles the case where a doc was renamed and rebuilding triggers re-indexing case: old identifier is now overwritten with new one 396 $self->{'doc_obj'}->set_OID($oid); # calls doc_obj->set_metadata_element(top_section, Identifier, $oid). Sets OID if one doesn't exist. 397 next; # ensures Identifier set only once, and ensure Identifier is set to current docOID for the doc, a.o.t. allowing it to be set to any expired docOID from before a doc got renamed. 398 } 399 400 # process all other metadata the normal way: 401 387 402 # get rid of the artificial "root" introduced in section id when saving to sql db 388 403 $sid =~ s@^root@@; … … 395 410 $doc_obj->add_utf8_metadata($sid, $metaname, $metaval); 396 411 } 412 413 # UTF8 debugging, e.g. if we have a with macron in dc.Title 414 ##binmode(STDERR, ":utf8"); # not necessary when calling utf8decomp to print wide characters as unicode codept, avoiding 'wide character' warnings. 415 #my $dcTitle = $doc_obj->get_metadata_element($doc_obj->get_top_section(), "dc.Title"); 416 #print STDERR "###### dc.Title: ".&unicode::utf8decomp($dcTitle)."\n"; 417 #print STDERR "###### DECODED dc.Title: ".&unicode::utf8decomp(&Encode::decode_utf8($dcTitle))."\n"; # decoding was needed for perl 5.18 418 397 419 print $outhandle "----------FIN READING DOC's META FROM SQL DB------------\n" 398 420 if $self->{'verbosity'} > 2; 399 421 } 400 422 401 423 if($proc_mode eq "all" || $proc_mode eq "text_only") { 402 424 # read in fulltxt for the collection (i.e. select * from <col>_fulltxt table … … 429 451 430 452 # done reading into docobj from SQL db 431 453 432 454 # don't forget to clean up on close() in superclass 433 455 # It will get the doc_obj indexed then make it undef
Note:
See TracChangeset
for help on using the changeset viewer.