source: gs3-installations/intermuse/trunk/sites/intermuse/collect/programmes/js/document_viewer.js@ 37075

Last change on this file since 37075 was 37075, checked in by davidb, 16 months ago

Changes that sync with the introduction of the SimpleAnnotationServer Storage backend

File size: 18.0 KB
Line 
1function load_gv_dococr_json(json_file,doc_sec_id,callback) {
2
3 var http_assocfilepath = gs.collectionMetadata["httpPath"]+"/index/assoc/"+gs.documentMetadata["assocfilepath"];
4 var json_url = http_assocfilepath +"/"+json_file;
5
6 $.ajax({
7 method: "GET",
8 url: json_url
9 })
10 .fail(function(jqXHR,textStatus) {
11 console.error("load_gv_dococr_json(): failed to retrieve url '" + json_url +"'");
12 console.error(textStatus);
13 })
14 .done(function(gv_ocr_json_result) {
15 callback(gv_ocr_json_result,doc_sec_id);
16
17 });
18
19}
20
21function gv_ocr_paras_to_text(paras)
22{
23 var ocr_text = "";
24
25 var num_paras = paras.length;
26 for (var p=0; p<num_paras; p++) {
27
28 var para = paras[p];
29 var words = para.words;
30 var num_words = words.length;
31
32 for (var w=0; w<num_words; w++) {
33 var word = words[w];
34
35 var symbols = word.symbols;
36 var num_symbols = symbols.length;
37
38 for (var s=0; s<num_symbols; s++) {
39
40 var symbol = symbols[s];
41
42 ocr_text += symbol.text;
43
44 // Enumerated fields listed at:
45 // http://googleapis.github.io/googleapis/java/grpc-google-cloud-vision-v1/0.1.5/apidocs/com/google/cloud/vision/v1/TextAnnotation.DetectedBreak.BreakType.html
46 if (symbol.property) {
47 var detected_break = symbol.property.detectedBreak;
48 if ((detected_break.type == "SPACE" || detected_break.type == "EOL_SURE_SPACE")) {
49 ocr_text += ' ';
50 }
51 else if (detected_break.type == "SURE_SPACE") { // very wide space
52 ocr_text += '\t';
53 }
54 // "HYPHEN" at end end of line => assume word continues onto next line
55 else if (detected_break.type == "LINE_BREAK" ) {
56 ocr_text += '\n\n';
57 }
58 }
59 }
60 }
61 }
62
63 return ocr_text;
64}
65
66function display_gv_ocr_bounding_boxes(gv_ocr_json,doc_sec_id)
67{
68 //console.log(gv_ocr_json);
69
70
71 var full_text_annotation = gv_ocr_json.fullTextAnnotation;
72 var pages = full_text_annotation.pages;
73 var num_pages = pages.length;
74
75 if (num_pages == 1) {
76 var page = pages[0];
77
78 var bounding_box_rects = [];
79 var page_x_dim = page.width;
80 var page_y_dim = page.height;
81
82 var blocks = page.blocks;
83 var num_blocks = blocks.length;
84
85 for (var b=0; b<num_blocks; b++) {
86 var block = blocks[b];
87
88 var boundingBox = block.boundingBox;
89
90 var min_x = Number.MAX_SAFE_INTEGER;
91 var min_y = Number.MAX_SAFE_INTEGER;
92 var max_x = Number.MIN_SAFE_INTEGER;
93 var max_y = Number.MIN_SAFE_INTEGER;
94
95 var vertices = boundingBox.vertices;
96 var num_vertices = vertices.length;
97
98 for (var v=0; v<num_vertices; v++) {
99 var x = vertices[v].x;
100 var y = vertices[v].y;
101
102 min_x = Math.min(min_x,x);
103 min_y = Math.min(min_y,y);
104 max_x = Math.max(max_x,x);
105 max_y = Math.max(max_y,y);
106 }
107
108 var x_org = min_x;
109 var y_org = min_y;
110 var x_dim = max_x - min_x +1;
111 var y_dim = max_y - min_y +1;
112
113 var rect = { "x_org": x_org, "y_org": y_org, "x_dim": x_dim, "y_dim": y_dim};
114
115 var paras = block.paragraphs;
116 var ocr_text = gv_ocr_paras_to_text(paras);
117 rect.text = ocr_text;
118
119 bounding_box_rects.push(rect);
120 }
121
122
123 display_scaled_div_bounding_boxes(doc_sec_id, bounding_box_rects, page_x_dim,page_y_dim);
124
125 }
126 else {
127 console.error("display_gv_ocr_bounding_boxes(): incorrect number of pages found.")
128 console.error(" Expected 1 page, found " + num_pages +" page(s)");
129 }
130
131}
132
133
134function display_scaled_div_bounding_boxes(doc_sec_id, bounding_box_rects,fullsize_x_dim,fullsize_y_dim)
135{
136 var screen_x_dim = gs.variables.screenImageWidth;
137 var screen_y_dim = gs.variables.screenImageHeight;
138
139 var scale_x = screen_x_dim / fullsize_x_dim;
140 var scale_y = screen_y_dim / fullsize_y_dim;
141
142 //console.log("scale x = " + scale_x);
143 //console.log("scale y = " + scale_y);
144
145 var screen_image_id = "small"+doc_sec_id;
146
147 var $boundingbox_overlay = $("<div>")
148 .attr("id","ocr-boundingbox-overlay-"+doc_sec_id)
149 .attr("class","ocr-boundingbox-overlay");
150
151 var $screen_div = $('#'+screen_image_id);
152
153 $screen_div.append($boundingbox_overlay);
154
155 var num_bb_rects = bounding_box_rects.length;
156
157 //console.log("Block bounding boxes:")
158
159 for (var r=0; r<num_bb_rects; r++) {
160 var rect = bounding_box_rects[r];
161 //console.log(" " + JSON.stringify(rect));
162
163 var scaled_x_org = rect.x_org * scale_x;
164 var scaled_y_org = rect.y_org * scale_y;
165 var scaled_x_dim = rect.x_dim * scale_x;
166 var scaled_y_dim = rect.y_dim * scale_y;
167
168 var $boundingbox_div = $("<div>")
169 .attr("class","ocr-boundingbox")
170 .css("left", scaled_x_org)
171 .css("top", scaled_y_org)
172 .css("width", scaled_x_dim)
173 .css("height",scaled_y_dim);
174
175 var text = rect.text;
176 // //console.log("text = " + text);
177 // // Could use 'data' here, but then does not appear in the element itself
178 // // => for now do this 'old school' with an explicit 'data-text' attribute
179 // $boundingbox_div.attr("data-text",text);
180
181 $boundingbox_div.attr("text",text);
182
183 $boundingbox_overlay.append($boundingbox_div)
184 }
185}
186
187/*
188
189https://stackoverflow.com/questions/68395710/building-a-bounding-box-surrounding-text-in-google-vision-api-to-extract-the-tex
190
191def get_text_within(document, x1, y1, x2, y2):
192text = ""
193for page in document.pages:
194 for block in page.blocks:
195 for paragraph in block.paragraphs:
196 for word in paragraph.words:
197 for symbol in word.symbols:
198 min_x = min(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
199 symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
200 max_x = max(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
201 symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
202 min_y = min(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
203 symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
204 max_y = max(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
205 symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
206 if (min_x >= x1 and max_x <= x2 and min_y >= y1 and max_y <= y2):
207 text += symbol.text
208 if (symbol.property.detected_break.type == 1 or
209 symbol.property.detected_break.type == 3):
210 text += ' '
211 if (symbol.property.detected_break.type == 2):
212 text += '\t'
213 if (symbol.property.detected_break.type == 5):
214 text += '\n'
215return text
216
217*/
218
219
220/*
221
222https://stackoverflow.com/questions/57071788/google-vision-api-text-detection-display-words-by-block
223
224
225https://gist.github.com/UBISOFT-1/f00e4d22790f4af378d70b237fa56ca9
226
227 response = client.text_detection(image=image)
228 # The actual response for the first page of the input file.
229 breaks = vision.enums.TextAnnotation.DetectedBreak.BreakType
230 paragraphs = []
231 lines = []
232 # extract text by block of detection
233 for page in response.full_text_annotation.pages:
234 for block in page.blocks:
235 for paragraph in block.paragraphs:
236 para = ""
237 line = ""
238 suppose = str(paragraph.bounding_box)
239 suppose = suppose.replace('vertices ','')
240 print(suppose)
241 for word in paragraph.words:
242 for symbol in word.symbols:
243 line += symbol.text
244 if symbol.property.detected_break.type == breaks.SPACE:
245 line += ' '
246 if symbol.property.detected_break.type == breaks.EOL_SURE_SPACE:
247 line += ' '
248 lines.append(line)
249 para += line
250 line = ''
251 if symbol.property.detected_break.type == breaks.LINE_BREAK:
252 lines.append(line)
253 para += line
254 line = ''
255 paragraphs.append(para)
256
257
258 return "\n".join(paragraphs)
259
260
261
262
263https://blog.searce.com/tips-tricks-for-using-google-vision-api-for-text-detection-2d6d1e0c6361
264
265def draw_boxes(image, bounds, color,width=5):
266 draw = ImageDraw.Draw(image)
267 for bound in bounds:
268 draw.line([
269 bound.vertices[0].x, bound.vertices[0].y,
270 bound.vertices[1].x, bound.vertices[1].y,
271 bound.vertices[2].x, bound.vertices[2].y,
272 bound.vertices[3].x, bound.vertices[3].y,
273 bound.vertices[0].x, bound.vertices[0].y],fill=color, width=width)
274 return image
275def get_document_bounds(response, feature):
276 for i,page in enumerate(document.pages):
277 for block in page.blocks:
278 if feature==FeatureType.BLOCK:
279 bounds.append(block.bounding_box)
280 for paragraph in block.paragraphs:
281 if feature==FeatureType.PARA:
282 bounds.append(paragraph.bounding_box)
283 for word in paragraph.words:
284 for symbol in word.symbols:
285 if (feature == FeatureType.SYMBOL):
286 bounds.append(symbol.bounding_box)
287 if (feature == FeatureType.WORD):
288 bounds.append(word.bounding_box)
289 return bounds
290bounds=get_document_bounds(response, FeatureType.WORD)
291draw_boxes(image,bounds, 'yellow')
292
293*/
294
295
296
297function createMirador3Viewer()
298{
299 var doc_url = new URL(window.location);
300 var goto_page = doc_url.searchParams.get('gotoPage') || 1;
301
302 var canvas_index = goto_page -1;
303
304 var mirador3_config = {
305 "manifests": {
306 "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json": {
307 "provider": "InterMusE"
308 }
309 },
310 "window" : { "panels": { "annotations": true, "attribution": true },
311 "defaultSideBarPanel": "annotations",
312 "sideBarOpenByDefault": true
313 },
314
315 //"annotations": {
316 // "htmlSanitizationRuleSet": 'iiif', // See src/lib/htmlRules.js for acceptable values
317 // "filteredMotivations": ['oa:commenting', 'oa:tagging', 'sc:painting', 'commenting', 'tagging'],
318 //},
319 "requests": {
320 "postprocessors": [
321 (url, action) => {
322 if (action.type === "mirador/RECEIVE_MANIFEST") {
323 //console.log(action)
324 //console.log(iiif_manifest);
325 return {
326 ...action ,
327 "manifestJson": iiif_manifest
328 };
329 }
330 }
331 ]
332 },
333
334 "windows": [
335 {
336 "loadedManifest": "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json",
337 "canvasIndex": canvas_index,
338 "thumbnailNavigationPosition": 'far-bottom'
339 }
340 ]
341 };
342
343
344 var mirador3_viewer = mirador3WithAnnotations.initMirador3Viewer('mirador3-viewer',mirador3_config);
345
346 /*
347 var mirador3_viewer = Mirador.viewer({
348 "id": "mirador3-viewer",
349 "manifests": {
350 "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json": {
351 "provider": "IntermusE"
352 }
353 },
354 "window" : { "panels" : { "annotations": true, "attribution": false } },
355 //"annotations": {
356 // "htmlSanitizationRuleSet": 'iiif', // See src/lib/htmlRules.js for acceptable values
357 // "filteredMotivations": ['oa:commenting', 'oa:tagging', 'sc:painting', 'commenting', 'tagging'],
358 //},
359 "requests": {
360 "postprocessors": [
361 (url, action) => {
362 if (action.type === "mirador/RECEIVE_MANIFEST") {
363 //console.log(action)
364 //console.log(iiif_manifest);
365 return {
366 ...action ,
367 "manifestJson": iiif_manifest
368 };
369 }
370 }
371 ]
372 },
373
374 "windows": [
375 {
376 "loadedManifest": "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json",
377 "canvasIndex": 0,
378 "thumbnailNavigationPosition": 'far-bottom'
379 }
380 ]
381 });
382 */
383
384 return mirador3_viewer;
385}
386
387function initIIIFAnnotations()
388{
389 var iiif_annotations_example = {
390 "@context": "http://www.shared-canvas.org/ns/context.json",
391 "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
392 "@type": "sc:AnnotationList",
393 "resources": [
394 {
395 "@context": "http://iiif.io/api/presentation/2/context.json",
396 "@id": "https://iiif.harvardartmuseums.org/annotations/9641482",
397 "@type": "oa:Annotation",
398 "motivation": [
399 "oa:commenting"
400 ],
401 "on": {
402 "@type": "oa:SpecificResource",
403 "full": "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896",
404 "selector": {
405 "@type": "oa:FragmentSelector",
406 "value": "xywh=622,591,642,940"
407 },
408 "within": {
409 "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843",
410 "@type": "sc:Manifest"
411 }
412 },
413 "resource": [
414 {
415 "@type": "dctypes:Text",
416 "chars": "<p>age: 35-52<br/>gender: Female(66.337677%)<br/>CALM: 55.438412%<br/>CONFUSED: 3.949288%<br/>SURPRISED: 2.33092%<br/>DISGUSTED: 0.545727%<br/>HAPPY: 1.549943%<br/>ANGRY: 2.082294%<br/>SAD: 34.103416%<br/></p><p>Generated by AWS Rekognition</p>",
417 "format": "text/html"
418 }
419 ]
420 }
421 // More ...
422 ]
423 };
424
425 document.open("application/json");
426 document.write(JSON.stringify(iiif_annotations_example));
427 document.close();
428}
429
430
431function initIIIFManifest(iiifpres_root_id,iiifpres_label, iiif_doc_images)
432{
433 console.log("IIIF_Doc_Images");
434 console.log(iiif_doc_images);
435 // Developed with reference to the following manifest example:
436 // https://iiif.archivelab.org/iiif/McGillLibrary-rbsc_ms-medieval-073-18802/manifest.json
437
438 var iiif_manifest = {
439 "@context": "http://iiif.io/api/presentation/2/context.json",
440 "@id": iiifpres_root_id + "/manifest",
441 "@type": "sc:Manifest",
442 "label": iiifpres_label,
443 "attribution": "The Internet of Musical Events (InterMusE): Digital Scholarship, Community, and the Archiving of Performance",
444 "license": "https://**** To Be Determined ****",
445 "logo": "https://intermuse.datatodata.org/wp-content/uploads/sites/4/2021/03/cropped-cropped-cropped-1.png"
446 };
447
448 /*
449 var iiif_metadata = [
450 { "label": "creator", "value": ["aaa1","aaa2"] },
451 { "label": "title", "value": ["ttt"] }
452 ];
453
454 iiif_manifest.metadata = iiif_metadata;
455 */
456
457
458 // trival IIIF sequences => has one IIIF canvas in it
459 var iiif_sequence = {
460 "@context": "http://iiif.io/api/image/2/context.json",
461 "@id": iiifpres_root_id + "/sequence/normal",
462 "@type": "sc:Sequence"
463 };
464
465
466 var iiif_canvases = [];
467
468 var assocfilepath = gs.documentMetadata['assocfilepath'];
469
470 var simple_doc_type = (gs.documentMetadata.docType == "simple");
471
472 for (var i=0; i<iiif_doc_images.length; i++) {
473 var section = i+1; // assumes flat (i.e. non-hierarchical) image document structure
474
475 var iiif_doc_image = iiif_doc_images[i];
476 console.log(iiif_doc_image);
477 var iiifimg_root_id = iiif_doc_image.id;
478
479 var iiifimg_x_dim = iiif_doc_image.x_dim;
480 var iiifimg_y_dim = iiif_doc_image.y_dim;
481 var iiifimg_label = iiif_doc_image.chosen_title;
482
483 var opt_section = (simple_doc_type) ? "" : section;
484
485 var iiif_canvas = {
486 "@context": "http://iiif.io/api/presentation/2/context.json",
487 //"@id": "https://iiif.lib.harvard.edu/manifests/drs:48309543/canvas/canvas-48309544.json",
488 "@id": iiifpres_root_id +"/canvas/" + section,
489 "@type": "sc:Canvas",
490 "width": iiifimg_x_dim,
491 "height": iiifimg_y_dim,
492 "label": iiifimg_label,
493
494 // **** Annotation JSON File openannotation-list.json *****
495 /*
496 "otherContent": [
497 {
498 //"@id": "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
499 "@id": "https://intermuse.sowemustthink.space/greenstone3/library/sites/intermuse/collect/programmes/index/assoc/"+assocfilepath+"/openannotation-list"+opt_section+".json",
500 "@type": "sc:AnnotationList"
501 }
502 ]
503 */
504
505 // **** Annotation JSON File webannotation-list.json *****
506 /*
507 "otherContent": [
508 {
509 "@id": "https://intermuse.sowemustthink.space/greenstone3/library/sites/intermuse/collect/programmes/index/assoc/"+assocfilepath+"/webannotation-list"+opt_section+".json",
510 "@type": "sc:AnnotationList"
511 }
512 ]
513 */
514 };
515
516 var iiif_image = {
517 "@context": "http://iiif.io/api/image/2/context.json",
518 //"@id": "https://iiif.lib.harvard.edu/manifests/drs:48309543/annotation/anno-48309544.json",
519 "@id": iiifpres_root_id +"/annotation/" + section,
520 "@type": "oa:Annotation",
521 "motivation": "sc:painting",
522 //"on": "https://iiif.lib.harvard.edu/manifests/drs:48309543/canvas/canvas-48309544.json",
523 "on": iiifpres_root_id +"/canvas",
524 "resource": {
525 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd//full/full/0/default.jpg",
526 "@id": iiifimg_root_id+"/full/full/0/default.jpg",
527 "@type": "dctypes:Image",
528 "format": "image/jpeg",
529 //"width": 1700,
530 //"height": 2338,
531 "width": iiifimg_x_dim,
532 "height": iiifimg_y_dim,
533 "service": {
534 "@context": "http://iiif.io/api/image/2/context.json",
535 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd",
536 "@id": iiifimg_root_id,
537 "profile": "https://iiif.io/api/image/2/profiles/level2.json"
538 }
539 }
540 };
541
542
543 var iiif_thumbnail = {
544 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd/full/,150/0/default.jpg",
545 "@id": iiifimg_root_id + "/full/,150/0/default.jpg",
546 "@type": "dctypes:Image"
547 };
548
549 iiif_canvas.images = [ iiif_image ];
550 iiif_canvas.thumbnail = iiif_thumbnail;
551
552 iiif_canvases.push(iiif_canvas);
553 }
554
555 iiif_sequence.canvases = iiif_canvases;
556 iiif_manifest.sequences = [ iiif_sequence ];
557
558 return iiif_manifest;
559}
Note: See TracBrowser for help on using the repository browser.