Changeset 34823


Ignore:
Timestamp:
2021-02-11T18:13:01+13:00 (3 years ago)
Author:
davidb
Message:

Merging of records introduced

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/perllib/plugins/JSONSPARQLResultPlugin.pm

    r34695 r34823  
    3232package JSONSPARQLResultPlugin;
    3333
     34
     35
    3436use strict;
    3537no strict 'refs'; # allow filehandles to be variables and viceversa
     
    5254    'type' => "regexp",
    5355    'reqd' => "no",
    54     'deft' => &get_default_process_exp() }
     56    'deft' => &get_default_process_exp() },
     57      { 'name' => "decode_json_with_backslash_u",
     58    'desc' => "{JSONSPARQLResultPlugin.decode_json_with_backslash_u}",
     59    'type' => "flag",
     60    'reqd' => "no" },
     61      { 'name' => "metadata_merge_on_concat_fields",
     62    'desc' => "{JSONSPARQLResultPlugin.metadata_merge_on_concat_fields}",
     63    'type' => "string",
     64    'reqd' => "no" }
    5565      ];
    5666
     
    8393
    8494
     95sub sparqlresult_items_equal
     96{
     97    my $self = shift (@_);
     98    my ($lhs_entry,$rhs_entry) = @_;
     99
     100    my $is_equal = 0;
     101   
     102    my $lhs_type = $lhs_entry->{'type'};
     103    my $rhs_type = $rhs_entry->{'type'};
     104
     105    if ($lhs_type eq $rhs_type) {
     106##  print STDERR " checking $lhs_entry->{'value'} == $rhs_entry->{'value'}\n";
     107
     108    $is_equal = ($lhs_entry->{'value'} eq $rhs_entry->{'value'});
     109    }
     110
     111    return $is_equal;
     112}
     113
     114   
    85115sub split_text_into_segments {
    86116    my $self = shift (@_);
     
    90120
    91121    # Convert textref (a json-string) into nested object
    92    
    93     my $results_hashmap = decode_json($$textref);
     122
     123    my $results_hashmap;
     124   
     125    if ($self->{'decode_json_with_backslash_u'}) {
     126    $results_hashmap = decode_json($$textref);
     127    }
     128    else {
     129    $results_hashmap = from_json($$textref);
     130    }
    94131   
    95132    # To get each result record returned:
     
    98135    my $results_array = $results_hashmap->{'results'}->{'bindings'};
    99136
     137    my $merge_on = $self->{'metadata_merge_on_concat_fields'};
     138
     139    ## print STDERR "**** merge_on = '$merge_on'\n";
     140
     141    if ($merge_on ne "") {
     142    # build hashmap on merged metadata fields, and then regenerate @results_array
     143
     144    my $merged_hashmap = {};
     145   
     146    my @merge_fields = split(/\s*,\s*/,$merge_on);
     147   
     148    foreach my $result_entry (@$results_array) {
     149        my $merged_key = "";
     150        foreach my $field (@merge_fields) {
     151        $merged_key .= $result_entry->{$field}->{'value'};
     152        }
     153
     154        if (!defined $merged_hashmap->{$merged_key}) {
     155       
     156        $merged_hashmap->{$merged_key} = {};
     157        foreach my $md_key (keys %$result_entry) {         
     158            my $md_val = $result_entry->{$md_key};
     159            $merged_hashmap->{$merged_key}->{$md_key} = [ $md_val ];
     160        }
     161       
     162        }
     163        else {
     164        foreach my $md_key (keys %$result_entry) {
     165            my $existing_md_vals = \@{$merged_hashmap->{$merged_key}->{$md_key}};
     166            my $md_val = $result_entry->{$md_key};
     167           
     168            # append only if not already present
     169            my $found_val = 0;
     170            foreach my $existing_md_val (@$existing_md_vals) {
     171
     172##          print STDERR "$merged_key: ";
     173            my $is_match = $self->sparqlresult_items_equal($md_val,$existing_md_val);
     174           
     175            if ($is_match) {
     176                $found_val = 1;
     177                last;
     178            }
     179            }
     180
     181            if (!$found_val) {
     182            push(@$existing_md_vals, $md_val);
     183            }
     184        }
     185       
     186        }
     187    }
     188
     189    # Iterate through merged_hashmap to egenerate result_array with newly merge record entires
     190   
     191    $results_array = [];
     192   
     193    foreach my $merged_key (keys %$merged_hashmap) {
     194        push(@$results_array,$merged_hashmap->{$merged_key});
     195    }
     196    }
     197
     198   
    100199    foreach my $result_entry (@$results_array) {
    101     my $result_entry_stringified = encode_json($result_entry);
     200    my $result_entry_stringified;
     201    if ($self->{'decode_json_with_backslash_u'}) { 
     202        $result_entry_stringified = encode_json($result_entry);
     203    }
     204    else {
     205        $result_entry_stringified = to_json($result_entry);
     206    }
    102207    push(@segments,$result_entry_stringified);
    103208   
    104209    #print STDERR "*** result_entry = $result_entry_stringified\n";
    105210    }
    106 
     211   
    107212    return \@segments;
    108213}
     
    116221
    117222    my $section = $doc_obj->get_top_section();
    118    
     223
     224    my $result_entry;
    119225    #my $result_entry = decode_json($$textref);
    120     # $textref is UTF8 byte encoded, not "Unicode-aware", so need to specify this in optional param to from_json()
    121     my $result_entry = from_json($$textref, {utf8 => 1} );
     226    if ($self->{'decode_json_with_backslash_u'}) { 
     227    # $textref is UTF8 byte encoded, not "Unicode-aware", so need to specify this in optional param to from_json()
     228    $result_entry = from_json($$textref, {utf8 => 1} );
     229    }
     230    else {
     231    $result_entry = from_json($$textref);
     232    }
    122233
    123234    # In JSON syntax, it seems permissible to represent Unicode characters in the form \uNNNN
     
    132243
    133244    # Set the "text document" to be a pre-formatted, HTML friendly version of the JSON syntax
    134     $self->text_to_html(\$text_utf8);
    135     $doc_obj->add_utf8_text($section, $text_utf8);
     245    #
     246    # **** Now done later on, as pretty-print text, pp_txt
     247    #$self->text_to_html(\$text_utf8);
     248    #$doc_obj->add_utf8_text($section, $text_utf8);
    136249
    137250    # entrant{value,type=uri}
     
    139252    # country{value,xml:lang=en,type=literal}
    140253
     254    my $pp_text = "";
     255
     256    print STDERR "[";
     257   
    141258    foreach my $md_name (keys %$result_entry)
    142259    {
    143     print STDERR "*** md_name = $md_name\n";
    144     my $md_name_lod_rec = $result_entry->{$md_name};
    145     if ($md_name_lod_rec->{'type'} eq "literal") {
    146         my $md_val = $md_name_lod_rec->{'value'};
    147         $doc_obj->add_utf8_metadata($section, $md_name, $md_val);
    148     }
    149     elsif ($md_name_lod_rec->{'type'} eq "uri") {
    150         my $md_val = $md_name_lod_rec->{'value'};
    151         $doc_obj->add_utf8_metadata($section, $md_name."_uri", $md_val);
    152     }
    153    
    154     }
     260    print STDERR " $md_name ";
     261
     262    my $md_name_lod_generic_entry = $result_entry->{$md_name};
     263    my $md_name_lod_array;
     264   
     265    if (ref($md_name_lod_generic_entry) eq "ARRAY") {
     266        $md_name_lod_array = $md_name_lod_generic_entry;
     267    }
     268    else {
     269        $md_name_lod_array = [ $md_name_lod_generic_entry ];
     270    }
     271
     272    foreach my $md_name_lod_rec (@$md_name_lod_array) {
     273
     274        ## the original line to pick up on record
     275        # my $md_name_lod_rec = $result_entry->{$md_name};
     276       
     277        if ($md_name_lod_rec->{'type'} eq "literal") {
     278        my $md_val = $md_name_lod_rec->{'value'};
     279       
     280        # Deal with any %XX encodings
     281        $md_val =~ s/%([A-Fa-f\d]{2})/chr hex $1/eg;
     282       
     283        $doc_obj->add_utf8_metadata($section, $md_name, $md_val);
     284        $pp_text .= "$md_name: $md_val\n";
     285        }
     286        elsif ($md_name_lod_rec->{'type'} eq "uri") {
     287        my $md_val = $md_name_lod_rec->{'value'};
     288        $doc_obj->add_utf8_metadata($section, $md_name."_uri", $md_val);
     289        }
     290    }
     291   
     292    }
     293    print STDERR "]\n";
     294   
     295    $doc_obj->add_utf8_text($section, $pp_text);
    155296   
    156297    return 1;
Note: See TracChangeset for help on using the changeset viewer.