source: gs3-installations/intermuse/trunk/sites/intermuse/collect/programmes/js/document_viewer.js@ 37055

Last change on this file since 37055 was 37055, checked in by davidb, 16 months ago

A range of changes, mostly to do with the display of the Greenstone doc-view as Mirador3: regular Greenstone view still operational (whether single-page simple doc, or paged); VList statements simplified so it always takes the user to the Mirador version

File size: 17.2 KB
Line 
1function load_gv_dococr_json(json_file,doc_sec_id,callback) {
2
3 var http_assocfilepath = gs.collectionMetadata["httpPath"]+"/index/assoc/"+gs.documentMetadata["assocfilepath"];
4 var json_url = http_assocfilepath +"/"+json_file;
5
6 $.ajax({
7 method: "GET",
8 url: json_url
9 })
10 .fail(function(jqXHR,textStatus) {
11 console.error("load_gv_dococr_json(): failed to retrieve url '" + json_url +"'");
12 console.error(textStatus);
13 })
14 .done(function(gv_ocr_json_result) {
15 callback(gv_ocr_json_result,doc_sec_id);
16
17 });
18
19}
20
21function gv_ocr_paras_to_text(paras)
22{
23 var ocr_text = "";
24
25 var num_paras = paras.length;
26 for (var p=0; p<num_paras; p++) {
27
28 var para = paras[p];
29 var words = para.words;
30 var num_words = words.length;
31
32 for (var w=0; w<num_words; w++) {
33 var word = words[w];
34
35 var symbols = word.symbols;
36 var num_symbols = symbols.length;
37
38 for (var s=0; s<num_symbols; s++) {
39
40 var symbol = symbols[s];
41
42 ocr_text += symbol.text;
43
44 // Enumerated fields listed at:
45 // http://googleapis.github.io/googleapis/java/grpc-google-cloud-vision-v1/0.1.5/apidocs/com/google/cloud/vision/v1/TextAnnotation.DetectedBreak.BreakType.html
46 if (symbol.property) {
47 var detected_break = symbol.property.detectedBreak;
48 if ((detected_break.type == "SPACE" || detected_break.type == "EOL_SURE_SPACE")) {
49 ocr_text += ' ';
50 }
51 else if (detected_break.type == "SURE_SPACE") { // very wide space
52 ocr_text += '\t';
53 }
54 // "HYPHEN" at end end of line => assume word continues onto next line
55 else if (detected_break.type == "LINE_BREAK" ) {
56 ocr_text += '\n\n';
57 }
58 }
59 }
60 }
61 }
62
63 return ocr_text;
64}
65
66function display_gv_ocr_bounding_boxes(gv_ocr_json,doc_sec_id)
67{
68 //console.log(gv_ocr_json);
69
70
71 var full_text_annotation = gv_ocr_json.fullTextAnnotation;
72 var pages = full_text_annotation.pages;
73 var num_pages = pages.length;
74
75 if (num_pages == 1) {
76 var page = pages[0];
77
78 var bounding_box_rects = [];
79 var page_x_dim = page.width;
80 var page_y_dim = page.height;
81
82 var blocks = page.blocks;
83 var num_blocks = blocks.length;
84
85 for (var b=0; b<num_blocks; b++) {
86 var block = blocks[b];
87
88 var boundingBox = block.boundingBox;
89
90 var min_x = Number.MAX_SAFE_INTEGER;
91 var min_y = Number.MAX_SAFE_INTEGER;
92 var max_x = Number.MIN_SAFE_INTEGER;
93 var max_y = Number.MIN_SAFE_INTEGER;
94
95 var vertices = boundingBox.vertices;
96 var num_vertices = vertices.length;
97
98 for (var v=0; v<num_vertices; v++) {
99 var x = vertices[v].x;
100 var y = vertices[v].y;
101
102 min_x = Math.min(min_x,x);
103 min_y = Math.min(min_y,y);
104 max_x = Math.max(max_x,x);
105 max_y = Math.max(max_y,y);
106 }
107
108 var x_org = min_x;
109 var y_org = min_y;
110 var x_dim = max_x - min_x +1;
111 var y_dim = max_y - min_y +1;
112
113 var rect = { "x_org": x_org, "y_org": y_org, "x_dim": x_dim, "y_dim": y_dim};
114
115 var paras = block.paragraphs;
116 var ocr_text = gv_ocr_paras_to_text(paras);
117 rect.text = ocr_text;
118
119 bounding_box_rects.push(rect);
120 }
121
122
123 display_scaled_div_bounding_boxes(doc_sec_id, bounding_box_rects, page_x_dim,page_y_dim);
124
125 }
126 else {
127 console.error("display_gv_ocr_bounding_boxes(): incorrect number of pages found.")
128 console.error(" Expected 1 page, found " + num_pages +" page(s)");
129 }
130
131}
132
133
134function display_scaled_div_bounding_boxes(doc_sec_id, bounding_box_rects,fullsize_x_dim,fullsize_y_dim)
135{
136 var screen_x_dim = gs.variables.screenImageWidth;
137 var screen_y_dim = gs.variables.screenImageHeight;
138
139 var scale_x = screen_x_dim / fullsize_x_dim;
140 var scale_y = screen_y_dim / fullsize_y_dim;
141
142 //console.log("scale x = " + scale_x);
143 //console.log("scale y = " + scale_y);
144
145 var screen_image_id = "small"+doc_sec_id;
146
147 var $boundingbox_overlay = $("<div>")
148 .attr("id","ocr-boundingbox-overlay-"+doc_sec_id)
149 .attr("class","ocr-boundingbox-overlay");
150
151 var $screen_div = $('#'+screen_image_id);
152
153 $screen_div.append($boundingbox_overlay);
154
155 var num_bb_rects = bounding_box_rects.length;
156
157 //console.log("Block bounding boxes:")
158
159 for (var r=0; r<num_bb_rects; r++) {
160 var rect = bounding_box_rects[r];
161 //console.log(" " + JSON.stringify(rect));
162
163 var scaled_x_org = rect.x_org * scale_x;
164 var scaled_y_org = rect.y_org * scale_y;
165 var scaled_x_dim = rect.x_dim * scale_x;
166 var scaled_y_dim = rect.y_dim * scale_y;
167
168 var $boundingbox_div = $("<div>")
169 .attr("class","ocr-boundingbox")
170 .css("left", scaled_x_org)
171 .css("top", scaled_y_org)
172 .css("width", scaled_x_dim)
173 .css("height",scaled_y_dim);
174
175 var text = rect.text;
176 // //console.log("text = " + text);
177 // // Could use 'data' here, but then does not appear in the element itself
178 // // => for now do this 'old school' with an explicit 'data-text' attribute
179 // $boundingbox_div.attr("data-text",text);
180
181 $boundingbox_div.attr("text",text);
182
183 $boundingbox_overlay.append($boundingbox_div)
184 }
185}
186
187/*
188
189https://stackoverflow.com/questions/68395710/building-a-bounding-box-surrounding-text-in-google-vision-api-to-extract-the-tex
190
191def get_text_within(document, x1, y1, x2, y2):
192text = ""
193for page in document.pages:
194 for block in page.blocks:
195 for paragraph in block.paragraphs:
196 for word in paragraph.words:
197 for symbol in word.symbols:
198 min_x = min(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
199 symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
200 max_x = max(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x,
201 symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x)
202 min_y = min(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
203 symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
204 max_y = max(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y,
205 symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y)
206 if (min_x >= x1 and max_x <= x2 and min_y >= y1 and max_y <= y2):
207 text += symbol.text
208 if (symbol.property.detected_break.type == 1 or
209 symbol.property.detected_break.type == 3):
210 text += ' '
211 if (symbol.property.detected_break.type == 2):
212 text += '\t'
213 if (symbol.property.detected_break.type == 5):
214 text += '\n'
215return text
216
217*/
218
219
220/*
221
222https://stackoverflow.com/questions/57071788/google-vision-api-text-detection-display-words-by-block
223
224
225https://gist.github.com/UBISOFT-1/f00e4d22790f4af378d70b237fa56ca9
226
227 response = client.text_detection(image=image)
228 # The actual response for the first page of the input file.
229 breaks = vision.enums.TextAnnotation.DetectedBreak.BreakType
230 paragraphs = []
231 lines = []
232 # extract text by block of detection
233 for page in response.full_text_annotation.pages:
234 for block in page.blocks:
235 for paragraph in block.paragraphs:
236 para = ""
237 line = ""
238 suppose = str(paragraph.bounding_box)
239 suppose = suppose.replace('vertices ','')
240 print(suppose)
241 for word in paragraph.words:
242 for symbol in word.symbols:
243 line += symbol.text
244 if symbol.property.detected_break.type == breaks.SPACE:
245 line += ' '
246 if symbol.property.detected_break.type == breaks.EOL_SURE_SPACE:
247 line += ' '
248 lines.append(line)
249 para += line
250 line = ''
251 if symbol.property.detected_break.type == breaks.LINE_BREAK:
252 lines.append(line)
253 para += line
254 line = ''
255 paragraphs.append(para)
256
257
258 return "\n".join(paragraphs)
259
260
261
262
263https://blog.searce.com/tips-tricks-for-using-google-vision-api-for-text-detection-2d6d1e0c6361
264
265def draw_boxes(image, bounds, color,width=5):
266 draw = ImageDraw.Draw(image)
267 for bound in bounds:
268 draw.line([
269 bound.vertices[0].x, bound.vertices[0].y,
270 bound.vertices[1].x, bound.vertices[1].y,
271 bound.vertices[2].x, bound.vertices[2].y,
272 bound.vertices[3].x, bound.vertices[3].y,
273 bound.vertices[0].x, bound.vertices[0].y],fill=color, width=width)
274 return image
275def get_document_bounds(response, feature):
276 for i,page in enumerate(document.pages):
277 for block in page.blocks:
278 if feature==FeatureType.BLOCK:
279 bounds.append(block.bounding_box)
280 for paragraph in block.paragraphs:
281 if feature==FeatureType.PARA:
282 bounds.append(paragraph.bounding_box)
283 for word in paragraph.words:
284 for symbol in word.symbols:
285 if (feature == FeatureType.SYMBOL):
286 bounds.append(symbol.bounding_box)
287 if (feature == FeatureType.WORD):
288 bounds.append(word.bounding_box)
289 return bounds
290bounds=get_document_bounds(response, FeatureType.WORD)
291draw_boxes(image,bounds, 'yellow')
292
293*/
294
295
296
297function createMirador3Viewer()
298{
299
300 var mirador3_config = {
301 "manifests": {
302 "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json": {
303 "provider": "InterMusE"
304 }
305 },
306 "window" : { "panels" : { "annotations": true, "attribution": true } },
307
308 //"annotations": {
309 // "htmlSanitizationRuleSet": 'iiif', // See src/lib/htmlRules.js for acceptable values
310 // "filteredMotivations": ['oa:commenting', 'oa:tagging', 'sc:painting', 'commenting', 'tagging'],
311 //},
312 "requests": {
313 "postprocessors": [
314 (url, action) => {
315 if (action.type === "mirador/RECEIVE_MANIFEST") {
316 //console.log(action)
317 //console.log(iiif_manifest);
318 return {
319 ...action ,
320 "manifestJson": iiif_manifest
321 };
322 }
323 }
324 ]
325 },
326
327 "windows": [
328 {
329 "loadedManifest": "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json",
330 "canvasIndex": 0,
331 "thumbnailNavigationPosition": 'far-bottom'
332 }
333 ]
334 };
335
336
337 var mirador3_viewer = mirador3WithAnnotations.initMirador3Viewer('mirador3-viewer',mirador3_config);
338
339 /*
340 var mirador3_viewer = Mirador.viewer({
341 "id": "mirador3-viewer",
342 "manifests": {
343 "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json": {
344 "provider": "IntermusE"
345 }
346 },
347 "window" : { "panels" : { "annotations": true, "attribution": false } },
348 //"annotations": {
349 // "htmlSanitizationRuleSet": 'iiif', // See src/lib/htmlRules.js for acceptable values
350 // "filteredMotivations": ['oa:commenting', 'oa:tagging', 'sc:painting', 'commenting', 'tagging'],
351 //},
352 "requests": {
353 "postprocessors": [
354 (url, action) => {
355 if (action.type === "mirador/RECEIVE_MANIFEST") {
356 //console.log(action)
357 //console.log(iiif_manifest);
358 return {
359 ...action ,
360 "manifestJson": iiif_manifest
361 };
362 }
363 }
364 ]
365 },
366
367 "windows": [
368 {
369 "loadedManifest": "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json",
370 "canvasIndex": 0,
371 "thumbnailNavigationPosition": 'far-bottom'
372 }
373 ]
374 });
375 */
376
377 return mirador3_viewer;
378}
379
380function initIIIFAnnotations()
381{
382 var iiif_annotations_example = {
383 "@context": "http://www.shared-canvas.org/ns/context.json",
384 "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
385 "@type": "sc:AnnotationList",
386 "resources": [
387 {
388 "@context": "http://iiif.io/api/presentation/2/context.json",
389 "@id": "https://iiif.harvardartmuseums.org/annotations/9641482",
390 "@type": "oa:Annotation",
391 "motivation": [
392 "oa:commenting"
393 ],
394 "on": {
395 "@type": "oa:SpecificResource",
396 "full": "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896",
397 "selector": {
398 "@type": "oa:FragmentSelector",
399 "value": "xywh=622,591,642,940"
400 },
401 "within": {
402 "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843",
403 "@type": "sc:Manifest"
404 }
405 },
406 "resource": [
407 {
408 "@type": "dctypes:Text",
409 "chars": "<p>age: 35-52<br/>gender: Female(66.337677%)<br/>CALM: 55.438412%<br/>CONFUSED: 3.949288%<br/>SURPRISED: 2.33092%<br/>DISGUSTED: 0.545727%<br/>HAPPY: 1.549943%<br/>ANGRY: 2.082294%<br/>SAD: 34.103416%<br/></p><p>Generated by AWS Rekognition</p>",
410 "format": "text/html"
411 }
412 ]
413 }
414 // More ...
415 ]
416 };
417
418 document.open("application/json");
419 document.write(JSON.stringify(iiif_annotations_example));
420 document.close();
421}
422
423
424function initIIIFManifest(iiifpres_root_id,iiifpres_label, iiif_doc_images)
425{
426 console.log("IIIF_Doc_Images");
427 console.log(iiif_doc_images);
428 // Developed with reference to the following manifest example:
429 // https://iiif.archivelab.org/iiif/McGillLibrary-rbsc_ms-medieval-073-18802/manifest.json
430
431 var iiif_manifest = {
432 "@context": "http://iiif.io/api/presentation/2/context.json",
433 "@id": iiifpres_root_id + "/manifest",
434 "@type": "sc:Manifest",
435 "label": iiifpres_label,
436 "attribution": "The Internet of Musical Events (InterMusE): Digital Scholarship, Community, and the Archiving of Performance",
437 "license": "https://**** To Be Determined ****",
438 "logo": "https://intermuse.datatodata.org/wp-content/uploads/sites/4/2021/03/cropped-cropped-cropped-1.png"
439 };
440
441 /*
442 var iiif_metadata = [
443 { "label": "creator", "value": ["aaa1","aaa2"] },
444 { "label": "title", "value": ["ttt"] }
445 ];
446
447 iiif_manifest.metadata = iiif_metadata;
448 */
449
450
451 // trival IIIF sequences => has one IIIF canvas in it
452 var iiif_sequence = {
453 "@context": "http://iiif.io/api/image/2/context.json",
454 "@id": iiifpres_root_id + "/sequence/normal",
455 "@type": "sc:Sequence"
456 };
457
458
459 var iiif_canvases = [];
460
461 var assocfilepath = gs.documentMetadata['assocfilepath'];
462
463 for (var i=0; i<iiif_doc_images.length; i++) {
464 var section = i+1; // assumes flat (i.e. non-hierarchical) image document structure
465
466 var iiif_doc_image = iiif_doc_images[i];
467 console.log(iiif_doc_image);
468 var iiifimg_root_id = iiif_doc_image.id;
469
470 var iiifimg_x_dim = iiif_doc_image.x_dim;
471 var iiifimg_y_dim = iiif_doc_image.y_dim;
472 var iiifimg_label = iiif_doc_image.chosen_title;
473
474 var iiif_canvas = {
475 "@context": "http://iiif.io/api/presentation/2/context.json",
476 //"@id": "https://iiif.lib.harvard.edu/manifests/drs:48309543/canvas/canvas-48309544.json",
477 "@id": iiifpres_root_id +"/canvas/" + section,
478 "@type": "sc:Canvas",
479 "width": iiifimg_x_dim,
480 "height": iiifimg_y_dim,
481 "label": iiifimg_label,
482
483 // **** Annotation JSON File
484 "otherContent": [
485 {
486 //"@id": "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896",
487 "@id": "https://intermuse.sowemustthink.space/greenstone3/library/sites/intermuse/collect/programmes/index/assoc/"+assocfilepath+"/openannotation-list.json",
488 "@type": "sc:AnnotationList"
489 }
490 ]
491 };
492
493 var iiif_image = {
494 "@context": "http://iiif.io/api/image/2/context.json",
495 //"@id": "https://iiif.lib.harvard.edu/manifests/drs:48309543/annotation/anno-48309544.json",
496 "@id": iiifpres_root_id +"/annotation/" + section,
497 "@type": "oa:Annotation",
498 "motivation": "sc:painting",
499 //"on": "https://iiif.lib.harvard.edu/manifests/drs:48309543/canvas/canvas-48309544.json",
500 "on": iiifpres_root_id +"/canvas",
501 "resource": {
502 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd//full/full/0/default.jpg",
503 "@id": iiifimg_root_id+"/full/full/0/default.jpg",
504 "@type": "dctypes:Image",
505 "format": "image/jpeg",
506 //"width": 1700,
507 //"height": 2338,
508 "width": iiifimg_x_dim,
509 "height": iiifimg_y_dim,
510 "service": {
511 "@context": "http://iiif.io/api/image/2/context.json",
512 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd",
513 "@id": iiifimg_root_id,
514 "profile": "https://iiif.io/api/image/2/profiles/level2.json"
515 }
516 }
517 };
518
519
520 var iiif_thumbnail = {
521 //"@id": "https://intermuse.sowemustthink.space/gs-cantaloupe/iiif/2/intermuse:programmes:HASH012cd965c3e83d504f4a78cd/full/,150/0/default.jpg",
522 "@id": iiifimg_root_id + "/full/,150/0/default.jpg",
523 "@type": "dctypes:Image"
524 };
525
526 iiif_canvas.images = [ iiif_image ];
527 iiif_canvas.thumbnail = iiif_thumbnail;
528
529 iiif_canvases.push(iiif_canvas);
530 }
531
532 iiif_sequence.canvases = iiif_canvases;
533 iiif_manifest.sequences = [ iiif_sequence ];
534
535 return iiif_manifest;
536}
Note: See TracBrowser for help on using the repository browser.