Changeset 34823
- Timestamp:
- 2021-02-11T18:13:01+13:00 (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/model-sites-dev/eurovision-lod/collect/eurovision/perllib/plugins/JSONSPARQLResultPlugin.pm
r34695 r34823 32 32 package JSONSPARQLResultPlugin; 33 33 34 35 34 36 use strict; 35 37 no strict 'refs'; # allow filehandles to be variables and viceversa … … 52 54 'type' => "regexp", 53 55 'reqd' => "no", 54 'deft' => &get_default_process_exp() } 56 'deft' => &get_default_process_exp() }, 57 { 'name' => "decode_json_with_backslash_u", 58 'desc' => "{JSONSPARQLResultPlugin.decode_json_with_backslash_u}", 59 'type' => "flag", 60 'reqd' => "no" }, 61 { 'name' => "metadata_merge_on_concat_fields", 62 'desc' => "{JSONSPARQLResultPlugin.metadata_merge_on_concat_fields}", 63 'type' => "string", 64 'reqd' => "no" } 55 65 ]; 56 66 … … 83 93 84 94 95 sub sparqlresult_items_equal 96 { 97 my $self = shift (@_); 98 my ($lhs_entry,$rhs_entry) = @_; 99 100 my $is_equal = 0; 101 102 my $lhs_type = $lhs_entry->{'type'}; 103 my $rhs_type = $rhs_entry->{'type'}; 104 105 if ($lhs_type eq $rhs_type) { 106 ## print STDERR " checking $lhs_entry->{'value'} == $rhs_entry->{'value'}\n"; 107 108 $is_equal = ($lhs_entry->{'value'} eq $rhs_entry->{'value'}); 109 } 110 111 return $is_equal; 112 } 113 114 85 115 sub split_text_into_segments { 86 116 my $self = shift (@_); … … 90 120 91 121 # Convert textref (a json-string) into nested object 92 93 my $results_hashmap = decode_json($$textref); 122 123 my $results_hashmap; 124 125 if ($self->{'decode_json_with_backslash_u'}) { 126 $results_hashmap = decode_json($$textref); 127 } 128 else { 129 $results_hashmap = from_json($$textref); 130 } 94 131 95 132 # To get each result record returned: … … 98 135 my $results_array = $results_hashmap->{'results'}->{'bindings'}; 99 136 137 my $merge_on = $self->{'metadata_merge_on_concat_fields'}; 138 139 ## print STDERR "**** merge_on = '$merge_on'\n"; 140 141 if ($merge_on ne "") { 142 # build hashmap on merged metadata fields, and then regenerate @results_array 143 144 my $merged_hashmap = {}; 145 146 my @merge_fields = split(/\s*,\s*/,$merge_on); 147 148 foreach my $result_entry (@$results_array) { 149 my $merged_key = ""; 150 foreach my $field (@merge_fields) { 151 $merged_key .= $result_entry->{$field}->{'value'}; 152 } 153 154 if (!defined $merged_hashmap->{$merged_key}) { 155 156 $merged_hashmap->{$merged_key} = {}; 157 foreach my $md_key (keys %$result_entry) { 158 my $md_val = $result_entry->{$md_key}; 159 $merged_hashmap->{$merged_key}->{$md_key} = [ $md_val ]; 160 } 161 162 } 163 else { 164 foreach my $md_key (keys %$result_entry) { 165 my $existing_md_vals = \@{$merged_hashmap->{$merged_key}->{$md_key}}; 166 my $md_val = $result_entry->{$md_key}; 167 168 # append only if not already present 169 my $found_val = 0; 170 foreach my $existing_md_val (@$existing_md_vals) { 171 172 ## print STDERR "$merged_key: "; 173 my $is_match = $self->sparqlresult_items_equal($md_val,$existing_md_val); 174 175 if ($is_match) { 176 $found_val = 1; 177 last; 178 } 179 } 180 181 if (!$found_val) { 182 push(@$existing_md_vals, $md_val); 183 } 184 } 185 186 } 187 } 188 189 # Iterate through merged_hashmap to egenerate result_array with newly merge record entires 190 191 $results_array = []; 192 193 foreach my $merged_key (keys %$merged_hashmap) { 194 push(@$results_array,$merged_hashmap->{$merged_key}); 195 } 196 } 197 198 100 199 foreach my $result_entry (@$results_array) { 101 my $result_entry_stringified = encode_json($result_entry); 200 my $result_entry_stringified; 201 if ($self->{'decode_json_with_backslash_u'}) { 202 $result_entry_stringified = encode_json($result_entry); 203 } 204 else { 205 $result_entry_stringified = to_json($result_entry); 206 } 102 207 push(@segments,$result_entry_stringified); 103 208 104 209 #print STDERR "*** result_entry = $result_entry_stringified\n"; 105 210 } 106 211 107 212 return \@segments; 108 213 } … … 116 221 117 222 my $section = $doc_obj->get_top_section(); 118 223 224 my $result_entry; 119 225 #my $result_entry = decode_json($$textref); 120 # $textref is UTF8 byte encoded, not "Unicode-aware", so need to specify this in optional param to from_json() 121 my $result_entry = from_json($$textref, {utf8 => 1} ); 226 if ($self->{'decode_json_with_backslash_u'}) { 227 # $textref is UTF8 byte encoded, not "Unicode-aware", so need to specify this in optional param to from_json() 228 $result_entry = from_json($$textref, {utf8 => 1} ); 229 } 230 else { 231 $result_entry = from_json($$textref); 232 } 122 233 123 234 # In JSON syntax, it seems permissible to represent Unicode characters in the form \uNNNN … … 132 243 133 244 # Set the "text document" to be a pre-formatted, HTML friendly version of the JSON syntax 134 $self->text_to_html(\$text_utf8); 135 $doc_obj->add_utf8_text($section, $text_utf8); 245 # 246 # **** Now done later on, as pretty-print text, pp_txt 247 #$self->text_to_html(\$text_utf8); 248 #$doc_obj->add_utf8_text($section, $text_utf8); 136 249 137 250 # entrant{value,type=uri} … … 139 252 # country{value,xml:lang=en,type=literal} 140 253 254 my $pp_text = ""; 255 256 print STDERR "["; 257 141 258 foreach my $md_name (keys %$result_entry) 142 259 { 143 print STDERR "*** md_name = $md_name\n"; 144 my $md_name_lod_rec = $result_entry->{$md_name}; 145 if ($md_name_lod_rec->{'type'} eq "literal") { 146 my $md_val = $md_name_lod_rec->{'value'}; 147 $doc_obj->add_utf8_metadata($section, $md_name, $md_val); 148 } 149 elsif ($md_name_lod_rec->{'type'} eq "uri") { 150 my $md_val = $md_name_lod_rec->{'value'}; 151 $doc_obj->add_utf8_metadata($section, $md_name."_uri", $md_val); 152 } 153 154 } 260 print STDERR " $md_name "; 261 262 my $md_name_lod_generic_entry = $result_entry->{$md_name}; 263 my $md_name_lod_array; 264 265 if (ref($md_name_lod_generic_entry) eq "ARRAY") { 266 $md_name_lod_array = $md_name_lod_generic_entry; 267 } 268 else { 269 $md_name_lod_array = [ $md_name_lod_generic_entry ]; 270 } 271 272 foreach my $md_name_lod_rec (@$md_name_lod_array) { 273 274 ## the original line to pick up on record 275 # my $md_name_lod_rec = $result_entry->{$md_name}; 276 277 if ($md_name_lod_rec->{'type'} eq "literal") { 278 my $md_val = $md_name_lod_rec->{'value'}; 279 280 # Deal with any %XX encodings 281 $md_val =~ s/%([A-Fa-f\d]{2})/chr hex $1/eg; 282 283 $doc_obj->add_utf8_metadata($section, $md_name, $md_val); 284 $pp_text .= "$md_name: $md_val\n"; 285 } 286 elsif ($md_name_lod_rec->{'type'} eq "uri") { 287 my $md_val = $md_name_lod_rec->{'value'}; 288 $doc_obj->add_utf8_metadata($section, $md_name."_uri", $md_val); 289 } 290 } 291 292 } 293 print STDERR "]\n"; 294 295 $doc_obj->add_utf8_text($section, $pp_text); 155 296 156 297 return 1;
Note:
See TracChangeset
for help on using the changeset viewer.