source: gs3-installations/intermuse/trunk/sites/intermuse/collect/programmes/js/document_viewer.js@ 37105

Last change on this file since 37105 was 37105, checked in by davidb, 16 months ago

A series of changes getting the code and XSLT up to the point where the Mirador annotation viewer can be operated without annotations, with annotaitons from a static manifest, with annotations from the store (editable)

File size: 16.0 KB
Line 
1function load_gv_dococr_json(json_file,doc_sec_id,callback) {
2
3 var http_assocfilepath = gs.collectionMetadata["httpPath"]+"/index/assoc/"+gs.documentMetadata["assocfilepath"];
4 var json_url = http_assocfilepath +"/"+json_file;
5
6 $.ajax({
7 method: "GET",
8 url: json_url
9 })
10 .fail(function(jqXHR,textStatus) {
11 console.error("load_gv_dococr_json(): failed to retrieve url '" + json_url +"'");
12 console.error(textStatus);
13 })
14 .done(function(gv_ocr_json_result) {
15 callback(gv_ocr_json_result,doc_sec_id);
16
17 });
18
19}
20
21function gv_ocr_paras_to_text(paras)
22{
23 var ocr_text = "";
24
25 var num_paras = paras.length;
26 for (var p=0; p<num_paras; p++) {
27
28 var para = paras[p];
29 var words = para.words;
30 var num_words = words.length;
31
32 for (var w=0; w<num_words; w++) {
33 var word = words[w];
34
35 var symbols = word.symbols;
36 var num_symbols = symbols.length;
37
38 for (var s=0; s<num_symbols; s++) {
39
40 var symbol = symbols[s];
41
42 ocr_text += symbol.text;
43
44 // Enumerated fields listed at:
45 // http://googleapis.github.io/googleapis/java/grpc-google-cloud-vision-v1/0.1.5/apidocs/com/google/cloud/vision/v1/TextAnnotation.DetectedBreak.BreakType.html
46 if (symbol.property) {
47 var detected_break = symbol.property.detectedBreak;
48 if ((detected_break.type == "SPACE" || detected_break.type == "EOL_SURE_SPACE")) {
49 ocr_text += ' ';
50 }
51 else if (detected_break.type == "SURE_SPACE") { // very wide space
52 ocr_text += '\t';
53 }
54 // "HYPHEN" at end end of line => assume word continues onto next line
55 else if (detected_break.type == "LINE_BREAK" ) {
56 ocr_text += '\n\n';
57 }
58 }
59 }
60 }
61 }
62
63 return ocr_text;
64}
65
66function display_gv_ocr_bounding_boxes(gv_ocr_json,doc_sec_id)
67{
68 //console.log(gv_ocr_json);
69
70
71 var full_text_annotation = gv_ocr_json.fullTextAnnotation;
72 var pages = full_text_annotation.pages;
73 var num_pages = pages.length;
74
75 if (num_pages == 1) {
76 var page = pages[0];
77
78 var bounding_box_rects = [];
79 var page_x_dim = page.width;
80 var page_y_dim = page.height;
81
82 var blocks = page.blocks;
83 var num_blocks = blocks.length;
84
85 for (var b=0; b<num_blocks; b++) {
86 var block = blocks[b];
87
88 var boundingBox = block.boundingBox;
89
90 var min_x = Number.MAX_SAFE_INTEGER;
91 var min_y = Number.MAX_SAFE_INTEGER;
92 var max_x = Number.MIN_SAFE_INTEGER;
93 var max_y = Number.MIN_SAFE_INTEGER;
94
95 var vertices = boundingBox.vertices;
96 var num_vertices = vertices.length;
97
98 for (var v=0; v<num_vertices; v++) {
99 var x = vertices[v].x;
100 var y = vertices[v].y;
101
102 min_x = Math.min(min_x,x);
103 min_y = Math.min(min_y,y);
104 max_x = Math.max(max_x,x);
105 max_y = Math.max(max_y,y);
106 }
107
108 var x_org = min_x;
109 var y_org = min_y;
110 var x_dim = max_x - min_x +1;
111 var y_dim = max_y - min_y +1;
112
113 var rect = { "x_org": x_org, "y_org": y_org, "x_dim": x_dim, "y_dim": y_dim};
114
115 var paras = block.paragraphs;
116 var ocr_text = gv_ocr_paras_to_text(paras);
117 rect.text = ocr_text;
118
119 bounding_box_rects.push(rect);
120 }
121
122
123 display_scaled_div_bounding_boxes(doc_sec_id, bounding_box_rects, page_x_dim,page_y_dim);
124
125 }
126 else {
127 console.error("display_gv_ocr_bounding_boxes(): incorrect number of pages found.")
128 console.error(" Expected 1 page, found " + num_pages +" page(s)");
129 }
130
131}
132
133
134function display_scaled_div_bounding_boxes(doc_sec_id, bounding_box_rects,fullsize_x_dim,fullsize_y_dim)
135{
136 var screen_x_dim = gs.variables.screenImageWidth;
137 var screen_y_dim = gs.variables.screenImageHeight;
138
139 var scale_x = screen_x_dim / fullsize_x_dim;
140 var scale_y = screen_y_dim / fullsize_y_dim;
141
142 //console.log("scale x = " + scale_x);
143 //console.log("scale y = " + scale_y);
144
145 var screen_image_id = "small"+doc_sec_id;
146
147 var $boundingbox_overlay = $("<div>")
148 .attr("id","ocr-boundingbox-overlay-"+doc_sec_id)
149 .attr("class","ocr-boundingbox-overlay");
150
151 var $screen_div = $('#'+screen_image_id);
152
153 $screen_div.append($boundingbox_overlay);
154
155 var num_bb_rects = bounding_box_rects.length;
156
157 //console.log("Block bounding boxes:")
158
159 for (var r=0; r<num_bb_rects; r++) {
160 var rect = bounding_box_rects[r];
161 //console.log(" " + JSON.stringify(rect));
162
163 var scaled_x_org = rect.x_org * scale_x;
164 var scaled_y_org = rect.y_org * scale_y;
165 var scaled_x_dim = rect.x_dim * scale_x;
166 var scaled_y_dim = rect.y_dim * scale_y;
167
168 var $boundingbox_div = $("<div>")
169 .attr("class","ocr-boundingbox")
170 .css("left", scaled_x_org)
171 .css("top", scaled_y_org)
172 .css("width", scaled_x_dim)
173 .css("height",scaled_y_dim);
174
175 var text = rect.text;
176 // //console.log("text = " + text);
177 // // Could use 'data' here, but then does not appear in the element itself
178 // // => for now do this 'old school' with an explicit 'data-text' attribute
179 // $boundingbox_div.attr("data-text",text);
180
181 $boundingbox_div.attr("text",text);
182
183 $boundingbox_overlay.append($boundingbox_div)
184 }
185}
186
187/*
188
189https://stackoverflow.com/questions/68395710/building-a-bounding-box-surrounding-text-in-google-vision-api-to-extract-the-tex
190
191def get_text_within(document, x1, y1, x2, y2):
192text = ""
193for page in document.pages:
194 for block in page.blocks:
195 for paragraph in block.paragraphs:
196 for word in paragraph.words:
197 for symbol in word.symbols:
198 min_x = min(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
199 symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
200 max_x = max(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
201 symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
202 min_y = min(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
203 symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
204 max_y = max(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
205 symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
206 if (min_x >= x1 and max_x <= x2 and min_y >= y1 and max_y <= y2):
207 text += symbol.text
208 if (symbol.property.detected_break.type == 1 or
209 symbol.property.detected_break.type == 3):
210 text += ' '
211 if (symbol.property.detected_break.type == 2):
212 text += '\t'
213 if (symbol.property.detected_break.type == 5):
214 text += '\n'
215return text
216
217*/
218
219
220
221
222function createMirador3Viewer()
223{
224 var doc_url = new URL(window.location);
225 var goto_page = doc_url.searchParams.get('gotoPage') || 1;
226
227 var canvas_index = goto_page -1;
228
229 var httpDocument = gs.xsltParams.library_name + "/collection/" + gs.cgiParams.c + "/document/" + gs.cgiParams.d;
230 var httpDocumentIIIFManifest = httpDocument + "?sa=iiif-manifest&ed=1&excerptid-text=iiif-manifest";
231
232 //const endpointUrl = 'https://intermuse.sowemustthink.space/simpleAnnotationStore/annotation';
233 //const endpointUrl = '/simpleAnnotationStore/annotation';
234
235 var mirador3_config = {
236 "manifests": {
237 httpDocumentIIIFManifest: {
238 "provider": "InterMusE"
239 }
240 },
241 "window" : { "panels": { "attribution": true, "search": true } },
242
243 //"annotations": {
244 // "htmlSanitizationRuleSet": 'iiif', // See src/lib/htmlRules.js for acceptable values
245 // "filteredMotivations": ['oa:commenting', 'oa:tagging', 'sc:painting', 'commenting', 'tagging'],
246 //},
247 /*
248 "requests": {
249 "postprocessors": [
250 (url, action) => {
251 if (action.type === "mirador/RECEIVE_MANIFEST") {
252 //console.log(action)
253 //console.log(iiif_manifest);
254 return {
255 ...action ,
256 "manifestJson": iiif_manifest
257 };
258 }
259 }
260 ]
261 },*/
262
263 "windows": [
264 {
265 "loadedManifest": httpDocumentIIIFManifest,
266 "canvasIndex": canvas_index,
267 "thumbnailNavigationPosition": 'far-bottom'
268 }
269 ]
270 };
271
272 var includeFileAssocOpenAnnotations = gs.variables.optionIncludeFileAssocOpenAnnotations;
273 var includeEditableOpenAnnotations = gs.variables.optionIncludeEditableOpenAnnotations;
274
275 console.log("**** includeFileAssocOpenAnnotations = " + includeFileAssocOpenAnnotations);
276 console.log("**** includeEditableOpenAnnotations = " + includeEditableOpenAnnotations);
277
278 if ((includeFileAssocOpenAnnotations == 'true') || (includeEditableOpenAnnotations == 'true')) {
279 // Activate the annotations plugin
280
281 mirador3_config.window.panels.annotations = true;
282
283 mirador3_config.window.defaultSideBarPanel = "annotations";
284 mirador3_config.window.sideBarOpenByDefault = true;
285
286 if (includeEditableOpenAnnotations == 'true') {
287
288 //console.log("**** gs.variables.endpointURL = '" + gs.variables.endpointURL + "'");
289 //const endpointURL = gs.variables.endpointURL;
290
291 var endpointURL = gs.variables.optionIncludeEditableOpenAnnotationsEndpointURL;
292 console.log("**** endpointURL = '" + endpointURL + "'");
293
294 mirador3_config.annotation = {};
295
296 if ((endpointURL == "") || (endpointURL == "localStorage")) {
297 mirador3_config.annotation.adapter = (canvasId) => new mirador3WithAnnotations.LocalStorageAdapter(`localStorage://?canvasId=${canvasId}`)
298 }
299 else {
300 mirador3_config.annotation.adapter = (canvasId) => new mirador3WithAnnotations.SimpleAnnotationServerV2Adapter(canvasId, endpointURL);
301 }
302
303 // **** add in a further options for this ????
304
305 mirador3_config.annotation.exportLocalStorageAnnotations = true; // display annotation JSON export button
306
307 }
308 }
309
310 var mirador3_viewer = mirador3WithAnnotations.initMirador3Viewer('mirador3-viewer',mirador3_config);
311
312 return mirador3_viewer;
313}
314
315function initIIIFAnnotations()
316{
317 var iiif_annotations_example = {
318 "@context": "http://www.shared-canvas.org/ns/context.json",
319 "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
320 "@type": "sc:AnnotationList",
321 "resources": [
322 {
323 "@context": "http://iiif.io/api/presentation/2/context.json",
324 "@id": "https://iiif.harvardartmuseums.org/annotations/9641482",
325 "@type": "oa:Annotation",
326 "motivation": [
327 "oa:commenting"
328 ],
329 "on": {
330 "@type": "oa:SpecificResource",
331 "full": "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896",
332 "selector": {
333 "@type": "oa:FragmentSelector",
334 "value": "xywh=622,591,642,940"
335 },
336 "within": {
337 "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843",
338 "@type": "sc:Manifest"
339 }
340 },
341 "resource": [
342 {
343 "@type": "dctypes:Text",
344 "chars": "<p>age: 35-52<br/>gender: Female(66.337677%)<br/>CALM: 55.438412%<br/>CONFUSED: 3.949288%<br/>SURPRISED: 2.33092%<br/>DISGUSTED: 0.545727%<br/>HAPPY: 1.549943%<br/>ANGRY: 2.082294%<br/>SAD: 34.103416%<br/></p><p>Generated by AWS Rekognition</p>",
345 "format": "text/html"
346 }
347 ]
348 }
349 // More ...
350 ]
351 };
352
353 document.open("application/json");
354 document.write(JSON.stringify(iiif_annotations_example));
355 document.close();
356}
357
358
359function initIIIFManifest(iiifpres_root_id,iiifpres_label, iiif_doc_images)
360{
361 console.log("IIIF_Doc_Images");
362 console.log(iiif_doc_images);
363 // Developed with reference to the following manifest example:
364 // https://iiif.archivelab.org/iiif/McGillLibrary-rbsc_ms-medieval-073-18802/manifest.json
365
366 var iiif_manifest = {
367 "@context": "http://iiif.io/api/presentation/2/context.json",
368 "@id": iiifpres_root_id + "/manifest",
369 "@type": "sc:Manifest",
370 "label": iiifpres_label,
371 "attribution": "The Internet of Musical Events (InterMusE): Digital Scholarship, Community, and the Archiving of Performance",
372 "license": "https://**** To Be Determined ****",
373 "logo": "https://intermuse.datatodata.org/wp-content/uploads/sites/4/2021/03/cropped-cropped-cropped-1.png"
374 };
375
376 /*
377 var iiif_metadata = [
378 { "label": "creator", "value": ["aaa1","aaa2"] },
379 { "label": "title", "value": ["ttt"] }
380 ];
381
382 iiif_manifest.metadata = iiif_metadata;
383 */
384
385
386 // trival IIIF sequences => has one IIIF canvas in it
387 var iiif_sequence = {
388 "@context": "http://iiif.io/api/image/2/context.json",
389 "@id": iiifpres_root_id + "/sequence/normal",
390 "@type": "sc:Sequence"
391 };
392
393
394 var iiif_canvases = [];
395
396 var assocfilepath = gs.documentMetadata['assocfilepath'];
397
398 var simple_doc_type = (gs.documentMetadata.docType == "simple");
399
400 for (var i=0; i<iiif_doc_images.length; i++) {
401 var section = i+1; // assumes flat (i.e. non-hierarchical) image document structure
402
403 var iiif_doc_image = iiif_doc_images[i];
404 console.log(iiif_doc_image);
405 var iiifimg_root_id = iiif_doc_image.id;
406
407 var iiifimg_x_dim = iiif_doc_image.x_dim;
408 var iiifimg_y_dim = iiif_doc_image.y_dim;
409 var iiifimg_label = iiif_doc_image.chosen_title;
410
411 var opt_section = (simple_doc_type) ? "" : section;
412
413 var iiif_canvas = {
414 "@context": "http://iiif.io/api/presentation/2/context.json",
415 //"@id": "https://iiif.lib.harvard.edu/manifests/drs:48309543/canvas/canvas-48309544.json",
416 "@id": iiifpres_root_id +"/canvas/" + section,
417 "@type": "sc:Canvas",
418 "width": iiifimg_x_dim,
419 "height": iiifimg_y_dim,
420 "label": iiifimg_label,
421
422 // **** Annotation JSON File openannotation-list.json *****
423 /*
424 "otherContent": [
425 {
426 //"@id": "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
427 "@id": "https://intermuse.sowemustthink.space/greenstone3/library/sites/intermuse/collect/programmes/index/assoc/"+assocfilepath+"/openannotation-list"+opt_section+".json",
428 "@type": "sc:AnnotationList"
429 }
430 ]
431 */
432
433 // **** Annotation JSON File webannotation-list.json *****
434 /*
435 "otherContent": [
436 {
437 "@id": "https://intermuse.sowemustthink.space/greenstone3/library/sites/intermuse/collect/programmes/index/assoc/"+assocfilepath+"/webannotation-list"+opt_section+".json",
438 "@type": "sc:AnnotationList"
439 }
440 ]
441 */
442 };
443
444 var iiif_image = {
445 "@context": "http://iiif.io/api/image/2/context.json",
446 //"@id": "https://iiif.lib.harvard.edu/manifests/drs:48309543/annotation/anno-48309544.json",
447 "@id": iiifpres_root_id +"/annotation/" + section,
448 "@type": "oa:Annotation",
449 "motivation": "sc:painting",
450 //"on": "https://iiif.lib.harvard.edu/manifests/drs:48309543/canvas/canvas-48309544.json",
451 "on": iiifpres_root_id +"/canvas",
452 "resource": {
453 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd//full/full/0/default.jpg",
454 "@id": iiifimg_root_id+"/full/full/0/default.jpg",
455 "@type": "dctypes:Image",
456 "format": "image/jpeg",
457 //"width": 1700,
458 //"height": 2338,
459 "width": iiifimg_x_dim,
460 "height": iiifimg_y_dim,
461 "service": {
462 "@context": "http://iiif.io/api/image/2/context.json",
463 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd",
464 "@id": iiifimg_root_id,
465 "profile": "https://iiif.io/api/image/2/profiles/level2.json"
466 }
467 }
468 };
469
470
471 var iiif_thumbnail = {
472 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd/full/,150/0/default.jpg",
473 "@id": iiifimg_root_id + "/full/,150/0/default.jpg",
474 "@type": "dctypes:Image"
475 };
476
477 iiif_canvas.images = [ iiif_image ];
478 iiif_canvas.thumbnail = iiif_thumbnail;
479
480 iiif_canvases.push(iiif_canvas);
481 }
482
483 iiif_sequence.canvases = iiif_canvases;
484 iiif_manifest.sequences = [ iiif_sequence ];
485
486 return iiif_manifest;
487}
Note: See TracBrowser for help on using the repository browser.