source: gs3-extensions/structured-image/trunk/src/js/document_extra.js@ 37709

Last change on this file since 37709 was 37709, checked in by davidb, 12 months ago

Further coding developments; given that Mirdora3 has an API call for opening a document with search terms already active, it is likely that some of this code is now superfluous

File size: 17.9 KB
Line 
1function load_gv_dococr_json(json_file,doc_sec_id,callback) {
2
3 var http_assocfilepath = gs.collectionMetadata["httpPath"]+"/index/assoc/"+gs.documentMetadata["assocfilepath"];
4 var json_url = http_assocfilepath +"/"+json_file;
5
6 $.ajax({
7 method: "GET",
8 url: json_url
9 })
10 .fail(function(jqXHR,textStatus) {
11 console.error("load_gv_dococr_json(): failed to retrieve url '" + json_url +"'");
12 console.error(textStatus);
13 })
14 .done(function(gv_ocr_json_result) {
15 callback(gv_ocr_json_result,doc_sec_id);
16
17 });
18
19}
20
21function gv_ocr_paras_to_text(paras)
22{
23 var ocr_text = "";
24
25 var num_paras = paras.length;
26 for (var p=0; p<num_paras; p++) {
27
28 var para = paras[p];
29 var words = para.words;
30 var num_words = words.length;
31
32 for (var w=0; w<num_words; w++) {
33 var word = words[w];
34
35 var symbols = word.symbols;
36 var num_symbols = symbols.length;
37
38 for (var s=0; s<num_symbols; s++) {
39
40 var symbol = symbols[s];
41
42 ocr_text += symbol.text;
43
44 // Enumerated fields listed at:
45 // http://googleapis.github.io/googleapis/java/grpc-google-cloud-vision-v1/0.1.5/apidocs/com/google/cloud/vision/v1/TextAnnotation.DetectedBreak.BreakType.html
46 if (symbol.property) {
47 var detected_break = symbol.property.detectedBreak;
48 if ((detected_break.type == "SPACE" || detected_break.type == "EOL_SURE_SPACE")) {
49 ocr_text += ' ';
50 }
51 else if (detected_break.type == "SURE_SPACE") { // very wide space
52 ocr_text += '\t';
53 }
54 // "HYPHEN" at end end of line => assume word continues onto next line
55 else if (detected_break.type == "LINE_BREAK" ) {
56 ocr_text += '\n\n';
57 }
58 }
59 }
60 }
61 }
62
63 return ocr_text;
64}
65
66function display_gv_ocr_bounding_boxes(gv_ocr_json,doc_sec_id)
67{
68 //console.log(gv_ocr_json);
69
70
71 var full_text_annotation = gv_ocr_json.fullTextAnnotation;
72
73 if (!full_text_annotation) {
74 // Encountered a entry where no OCR'd text was found
75 return;
76 }
77
78 var pages = full_text_annotation.pages;
79 var num_pages = pages.length;
80
81 if (num_pages == 1) {
82 var page = pages[0];
83
84 var bounding_box_rects = [];
85 var page_x_dim = page.width;
86 var page_y_dim = page.height;
87
88 var blocks = page.blocks;
89 var num_blocks = blocks.length;
90
91 for (var b=0; b<num_blocks; b++) {
92 var block = blocks[b];
93
94 var boundingBox = block.boundingBox;
95
96 var min_x = Number.MAX_SAFE_INTEGER;
97 var min_y = Number.MAX_SAFE_INTEGER;
98 var max_x = Number.MIN_SAFE_INTEGER;
99 var max_y = Number.MIN_SAFE_INTEGER;
100
101 var vertices = boundingBox.vertices;
102 var num_vertices = vertices.length;
103
104 for (var v=0; v<num_vertices; v++) {
105 var x = vertices[v].x;
106 var y = vertices[v].y;
107
108 min_x = Math.min(min_x,x);
109 min_y = Math.min(min_y,y);
110 max_x = Math.max(max_x,x);
111 max_y = Math.max(max_y,y);
112 }
113
114 var x_org = min_x;
115 var y_org = min_y;
116 var x_dim = max_x - min_x +1;
117 var y_dim = max_y - min_y +1;
118
119 var rect = { "x_org": x_org, "y_org": y_org, "x_dim": x_dim, "y_dim": y_dim};
120
121 var paras = block.paragraphs;
122 var ocr_text = gv_ocr_paras_to_text(paras);
123 rect.text = ocr_text;
124
125 bounding_box_rects.push(rect);
126 }
127
128
129 display_scaled_div_bounding_boxes(doc_sec_id, bounding_box_rects, page_x_dim,page_y_dim);
130
131 }
132 else {
133 console.error("display_gv_ocr_bounding_boxes(): incorrect number of pages found.")
134 console.error(" Expected 1 page, found " + num_pages +" page(s)");
135 }
136
137}
138
139
140function display_scaled_div_bounding_boxes(doc_sec_id, bounding_box_rects,fullsize_x_dim,fullsize_y_dim)
141{
142 var screen_x_dim = gs.variables.screenImageWidth;
143 var screen_y_dim = gs.variables.screenImageHeight;
144
145 var scale_x = screen_x_dim / fullsize_x_dim;
146 var scale_y = screen_y_dim / fullsize_y_dim;
147
148 //console.log("scale x = " + scale_x);
149 //console.log("scale y = " + scale_y);
150
151 var screen_image_id = "small"+doc_sec_id;
152
153 var $boundingbox_overlay = $("<div>")
154 .attr("id","ocr-boundingbox-overlay-"+doc_sec_id)
155 .attr("class","ocr-boundingbox-overlay");
156
157 var $screen_div = $('#'+screen_image_id);
158
159 $screen_div.append($boundingbox_overlay);
160
161 var num_bb_rects = bounding_box_rects.length;
162
163 //console.log("Block bounding boxes:")
164
165 for (var r=0; r<num_bb_rects; r++) {
166 var rect = bounding_box_rects[r];
167 //console.log(" " + JSON.stringify(rect));
168
169 var scaled_x_org = rect.x_org * scale_x;
170 var scaled_y_org = rect.y_org * scale_y;
171 var scaled_x_dim = rect.x_dim * scale_x;
172 var scaled_y_dim = rect.y_dim * scale_y;
173
174 var $boundingbox_div = $("<div>")
175 .attr("class","ocr-boundingbox")
176 .css("left", scaled_x_org)
177 .css("top", scaled_y_org)
178 .css("width", scaled_x_dim)
179 .css("height",scaled_y_dim);
180
181 var text = rect.text;
182 // //console.log("text = " + text);
183 // // Could use 'data' here, but then does not appear in the element itself
184 // // => for now do this 'old school' with an explicit 'data-text' attribute
185 // $boundingbox_div.attr("data-text",text);
186
187 $boundingbox_div.attr("text",text);
188
189 $boundingbox_overlay.append($boundingbox_div)
190 }
191}
192
193/*
194
195https://stackoverflow.com/questions/68395710/building-a-bounding-box-surrounding-text-in-google-vision-api-to-extract-the-tex
196
197def get_text_within(document, x1, y1, x2, y2):
198text = ""
199for page in document.pages:
200 for block in page.blocks:
201 for paragraph in block.paragraphs:
202 for word in paragraph.words:
203 for symbol in word.symbols:
204 min_x = min(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
205 symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
206 max_x = max(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
207 symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
208 min_y = min(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
209 symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
210 max_y = max(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
211 symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
212 if (min_x >= x1 and max_x <= x2 and min_y >= y1 and max_y <= y2):
213 text += symbol.text
214 if (symbol.property.detected_break.type == 1 or
215 symbol.property.detected_break.type == 3):
216 text += ' '
217 if (symbol.property.detected_break.type == 2):
218 text += '\t'
219 if (symbol.property.detected_break.type == 5):
220 text += '\n'
221return text
222
223*/
224
225
226
227
228function createMirador3Viewer()
229{
230 var doc_id = gs.cgiParams.d;
231 var opt_dot_pos = doc_id.indexOf(".");
232
233 var goto_page;
234 var doc_id_root;
235
236 if (opt_dot_pos>=0) {
237 goto_page = doc_id.substring(opt_dot_pos+1);
238 doc_id_root = doc_id.substring(0,opt_dot_pos);
239 }
240 else {
241 var doc_url = new URL(window.location);
242 goto_page = doc_url.searchParams.get('gotoPage') || 1;
243 doc_id_root = doc_id;
244 }
245
246 var canvas_index = goto_page -1;
247
248 var httpDocument = gs.xsltParams.library_name + "/collection/" + gs.cgiParams.c + "/document/" + doc_id_root;
249 var httpDocumentIIIFManifest = httpDocument + "?sa=iiif-manifest&ed=1&excerptid-text=iiif-manifest";
250
251 var mirador3_config = {
252 "manifests": {
253 httpDocumentIIIFManifest: {
254 "provider": "InterMusE"
255 }
256 },
257 "window" : { "panels": { "attribution": true, "search": true } },
258
259 //"annotations": {
260 // "htmlSanitizationRuleSet": 'iiif', // See src/lib/htmlRules.js for acceptable values
261 // "filteredMotivations": ['oa:commenting', 'oa:tagging', 'sc:painting', 'commenting', 'tagging'],
262 //},
263 /*
264 "requests": {
265 "postprocessors": [
266 (url, action) => {
267 if (action.type === "mirador/RECEIVE_MANIFEST") {
268 //console.log(action)
269 //console.log(iiif_manifest);
270 return {
271 ...action ,
272 "manifestJson": iiif_manifest
273 };
274 }
275 }
276 ]
277 },*/
278
279 "windows": [
280 {
281 "loadedManifest": httpDocumentIIIFManifest,
282 "canvasIndex": canvas_index,
283 "thumbnailNavigationPosition": 'far-bottom'
284 }
285 ]
286 };
287
288 var includeFileAssocOpenAnnotations = gs.variables.optionIncludeFileAssocOpenAnnotations;
289 var includeEditableOpenAnnotations = gs.variables.optionIncludeEditableOpenAnnotations;
290
291 if ((includeFileAssocOpenAnnotations == 'true') || (includeEditableOpenAnnotations == 'true')) {
292 // Activate the annotations plugin
293
294 mirador3_config.window.panels.annotations = true;
295
296 mirador3_config.window.defaultSideBarPanel = "annotations";
297 mirador3_config.window.sideBarOpenByDefault = true;
298
299 // In addition to the usual group checks, allow the user to edit annotations if they
300 // are in a group that matches site_name
301
302 var logged_in_user_can_edit = false;
303
304 if (gs.userInformation) {
305 var userinfo_groups = gs.userInformation.groups;
306 var site_name = gs.xsltParams.site_name;
307 var coll_name = gs.cgiParams.c;
308 if (userinfo_groups.includes("administrator")
309 || userinfo_groups.includes("all-collections-editor")
310 || userinfo_groups.includes(coll_name+"-collection-editor")
311 || userinfo_groups.includes(site_name+"-editor")) {
312 logged_in_user_can_edit = true;
313 }
314 }
315
316 if ((includeEditableOpenAnnotations == 'true') && (logged_in_user_can_edit)) {
317
318 var endpointURL = gs.variables.optionIncludeEditableOpenAnnotationsEndpointURL;
319
320 mirador3_config.annotation = {};
321
322 if ((endpointURL == "") || (endpointURL == "localStorage")) {
323 mirador3_config.annotation.adapter = (canvasId) => new mirador3WithAnnotations.LocalStorageAdapter(`localStorage://?canvasId=${canvasId}`)
324 }
325 else {
326 mirador3_config.annotation.adapter = (canvasId) => new mirador3WithAnnotations.SimpleAnnotationServerV2Adapter(canvasId, endpointURL);
327 }
328
329 // **** add in a further options for this ????
330 mirador3_config.annotation.exportLocalStorageAnnotations = true; // display annotation JSON export button
331
332 }
333 }
334
335 var mirador3_viewer = mirador3WithAnnotations.initMirador3Viewer('mirador3-viewer',mirador3_config);
336
337 //setTimeout(function() { miradorSearch("MUSIC") }, 2000);
338
339 return mirador3_viewer;
340}
341
342function initIIIFAnnotations()
343{
344 var iiif_annotations_example = {
345 "@context": "http://www.shared-canvas.org/ns/context.json",
346 "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
347 "@type": "sc:AnnotationList",
348 "resources": [
349 {
350 "@context": "http://iiif.io/api/presentation/2/context.json",
351 "@id": "https://iiif.harvardartmuseums.org/annotations/9641482",
352 "@type": "oa:Annotation",
353 "motivation": [
354 "oa:commenting"
355 ],
356 "on": {
357 "@type": "oa:SpecificResource",
358 "full": "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896",
359 "selector": {
360 "@type": "oa:FragmentSelector",
361 "value": "xywh=622,591,642,940"
362 },
363 "within": {
364 "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843",
365 "@type": "sc:Manifest"
366 }
367 },
368 "resource": [
369 {
370 "@type": "dctypes:Text",
371 "chars": "<p>age: 35-52<br/>gender: Female(66.337677%)<br/>CALM: 55.438412%<br/>CONFUSED: 3.949288%<br/>SURPRISED: 2.33092%<br/>DISGUSTED: 0.545727%<br/>HAPPY: 1.549943%<br/>ANGRY: 2.082294%<br/>SAD: 34.103416%<br/></p><p>Generated by AWS Rekognition</p>",
372 "format": "text/html"
373 }
374 ]
375 }
376 // More ...
377 ]
378 };
379
380 document.open("application/json");
381 document.write(JSON.stringify(iiif_annotations_example));
382 document.close();
383}
384
385
386function initIIIFManifest(iiifpres_root_id,iiifpres_label, iiif_doc_images)
387{
388 console.log("IIIF_Doc_Images");
389 console.log(iiif_doc_images);
390 // Developed with reference to the following manifest example:
391 // https://iiif.archivelab.org/iiif/McGillLibrary-rbsc_ms-medieval-073-18802/manifest.json
392
393 var iiif_manifest = {
394 "@context": "http://iiif.io/api/presentation/2/context.json",
395 "@id": iiifpres_root_id + "/manifest",
396 "@type": "sc:Manifest",
397 "label": iiifpres_label,
398 "attribution": "The Internet of Musical Events (InterMusE): Digital Scholarship, Community, and the Archiving of Performance",
399 "license": "https://**** To Be Determined ****",
400 "logo": "https://intermuse.datatodata.org/wp-content/uploads/sites/4/2021/03/cropped-cropped-cropped-1.png"
401 };
402
403 /*
404 var iiif_metadata = [
405 { "label": "creator", "value": ["aaa1","aaa2"] },
406 { "label": "title", "value": ["ttt"] }
407 ];
408
409 iiif_manifest.metadata = iiif_metadata;
410 */
411
412
413 // trival IIIF sequences => has one IIIF canvas in it
414 var iiif_sequence = {
415 "@context": "http://iiif.io/api/image/2/context.json",
416 "@id": iiifpres_root_id + "/sequence/normal",
417 "@type": "sc:Sequence"
418 };
419
420
421 var iiif_canvases = [];
422
423 var assocfilepath = gs.documentMetadata['assocfilepath'];
424
425 var simple_doc_type = (gs.documentMetadata.docType == "simple");
426
427 for (var i=0; i<iiif_doc_images.length; i++) {
428 var section = i+1; // assumes flat (i.e. non-hierarchical) image document structure
429
430 var iiif_doc_image = iiif_doc_images[i];
431 console.log(iiif_doc_image);
432 var iiifimg_root_id = iiif_doc_image.id;
433
434 var iiifimg_x_dim = iiif_doc_image.x_dim;
435 var iiifimg_y_dim = iiif_doc_image.y_dim;
436 var iiifimg_label = iiif_doc_image.chosen_title;
437
438 var opt_section = (simple_doc_type) ? "" : section;
439
440 var iiif_canvas = {
441 "@context": "http://iiif.io/api/presentation/2/context.json",
442 //"@id": "https://iiif.lib.harvard.edu/manifests/drs:48309543/canvas/canvas-48309544.json",
443 "@id": iiifpres_root_id +"/canvas/" + section,
444 "@type": "sc:Canvas",
445 "width": iiifimg_x_dim,
446 "height": iiifimg_y_dim,
447 "label": iiifimg_label,
448
449 // **** Annotation JSON File openannotation-list.json *****
450 /*
451 "otherContent": [
452 {
453 //"@id": "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
454 "@id": "https://intermuse.sowemustthink.space/greenstone3/library/sites/intermuse/collect/programmes/index/assoc/"+assocfilepath+"/openannotation-list"+opt_section+".json",
455 "@type": "sc:AnnotationList"
456 }
457 ]
458 */
459
460 // **** Annotation JSON File webannotation-list.json *****
461 /*
462 "otherContent": [
463 {
464 "@id": "https://intermuse.sowemustthink.space/greenstone3/library/sites/intermuse/collect/programmes/index/assoc/"+assocfilepath+"/webannotation-list"+opt_section+".json",
465 "@type": "sc:AnnotationList"
466 }
467 ]
468 */
469 };
470
471 var iiif_image = {
472 "@context": "http://iiif.io/api/image/2/context.json",
473 //"@id": "https://iiif.lib.harvard.edu/manifests/drs:48309543/annotation/anno-48309544.json",
474 "@id": iiifpres_root_id +"/annotation/" + section,
475 "@type": "oa:Annotation",
476 "motivation": "sc:painting",
477 //"on": "https://iiif.lib.harvard.edu/manifests/drs:48309543/canvas/canvas-48309544.json",
478 "on": iiifpres_root_id +"/canvas",
479 "resource": {
480 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd//full/full/0/default.jpg",
481 "@id": iiifimg_root_id+"/full/full/0/default.jpg",
482 "@type": "dctypes:Image",
483 "format": "image/jpeg",
484 //"width": 1700,
485 //"height": 2338,
486 "width": iiifimg_x_dim,
487 "height": iiifimg_y_dim,
488 "service": {
489 "@context": "http://iiif.io/api/image/2/context.json",
490 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd",
491 "@id": iiifimg_root_id,
492 "profile": "https://iiif.io/api/image/2/profiles/level2.json"
493 }
494 }
495 };
496
497
498 var iiif_thumbnail = {
499 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd/full/,150/0/default.jpg",
500 "@id": iiifimg_root_id + "/full/,150/0/default.jpg",
501 "@type": "dctypes:Image"
502 };
503
504 iiif_canvas.images = [ iiif_image ];
505 iiif_canvas.thumbnail = iiif_thumbnail;
506
507 iiif_canvases.push(iiif_canvas);
508 }
509
510 iiif_sequence.canvases = iiif_canvases;
511 iiif_manifest.sequences = [ iiif_sequence ];
512
513 return iiif_manifest;
514}
515
516
517// https://chuckconway.com/changing-a-react-input-value-from-vanilla-javascript/
518
519function setNativeValue(element, value) {
520 let lastValue = element.value;
521 element.value = value;
522 let event = new Event("input", { target: element, bubbles: true });
523 // React 15
524 event.simulated = true;
525 // React 16
526 let tracker = element._valueTracker;
527 if (tracker) {
528 tracker.setValue(lastValue);
529 }
530 element.dispatchEvent(event);
531}
532
533//var input = document.getElementById("ID OF ELEMENT");
534//setNativeValue(input, "VALUE YOU WANT TO SET");
535
536
537function miradorSearch(query_term)
538{
539 var $select_search_tab = $('button[aria-label="Search"]');
540 $select_search_tab.trigger("click");
541
542 // While triggering a vanilla Javascript "click" event happens synchronously,
543 // it would appear that things are more complex (obfuscated!) when React is involved
544 // and this being a synchronous call is no longer the case
545 //
546 // Because Jquery expects the call to be synchronous, it does not provide any
547 // way of attaching a callback
548 // => Need use setTimeout to cause a small delay
549
550 setTimeout(function() {
551
552 var $search_aside = $('aside[aria-label="Search"]');
553
554 var search_input_text = $search_aside.find('input[type="text"]')[0];
555
556 setNativeValue(search_input_text,query_term);
557
558 var $search_button = $search_aside.find('button[aria-label="Submit search"]');
559 $search_button.trigger("click");
560 }, 500);
561}
562
563
Note: See TracBrowser for help on using the repository browser.