Changeset 34878


Ignore:
Timestamp:
2021-02-16T01:09:13+13:00 (3 years ago)
Author:
davidb
Message:

Further changes to work more smoothly with JSONSparqlResultsPlugin, which makes use of OIDtype metadata for each record to provide nice IDs

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/plugins/SplitTextFile.pm--for-gs311

    r34840 r34878  
    144144    # can we process this file??
    145145    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
     146
    146147    return undef unless $self->can_process_this_file($filename_full_path);
    147148
     
    273274    #$doc_obj->add_metadata($doc_obj->get_top_section(), "FileFormat", "Split");
    274275
     276#   # include any metadata passed in from previous plugins
     277#   # note that this metadata is associated with the top level section
     278#   $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
     279
     280#   # Calculate a "base" document ID.
     281#   if (!defined $id) {
     282#       $id = $self->get_base_OID($doc_obj);
     283#   }
     284
    275285    # include any metadata passed in from previous plugins
    276286    # note that this metadata is associated with the top level section
    277287    $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
    278    
    279     # Calculate a "base" document ID.
    280     if (!defined $id) {
    281         $id = $self->get_base_OID($doc_obj);
    282     }
    283    
    284 #   # include any metadata passed in from previous plugins
    285 #   # note that this metadata is associated with the top level section
    286 #   $self->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $metadata);
    287288
    288289    # do plugin specific processing of doc_obj
     
    305306    $self->auto_extract_metadata ($doc_obj);
    306307
     308    # This used to be done earlier on in routine, however $id generated
     309    # isn't used until here!
     310    # Calculate a "base" document ID.
     311    if (!defined $id) {
     312        $id = $self->get_base_OID($doc_obj);
     313    }
     314   
    307315    # add an OID
    308     $self->add_OID($doc_obj, $id, $segment);
     316    $self->add_segment_OID($doc_obj, $id, $segment);
    309317
    310318    # process the document
     
    312320
    313321    $self->{'num_processed'} ++;
     322
     323    if ($self->{'num_processed'} >= $maxdocs) {
     324        last;
     325    }
    314326    }
    315327
     
    324336    my ($doc_obj) = @_;
    325337
     338    my $identifier = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'OIDmetadata'});
     339    # print STDERR "**** get_baseOID identifier = $identifier\n";
     340   
    326341    $self->SUPER::add_OID($doc_obj);
    327342    return $doc_obj->get_OID();
    328343}
    329344
    330 sub add_OID {
     345sub add_segment_OID {
    331346    my $self = shift (@_);
    332347    my ($doc_obj, $id, $segment) = @_;
Note: See TracChangeset for help on using the changeset viewer.