function load_gv_dococr_json(json_file,doc_sec_id,callback) { var http_assocfilepath = gs.collectionMetadata["httpPath"]+"/index/assoc/"+gs.documentMetadata["assocfilepath"]; var json_url = http_assocfilepath +"/"+json_file; $.ajax({ method: "GET", url: json_url }) .fail(function(jqXHR,textStatus) { console.error("load_gv_dococr_json(): failed to retrieve url '" + json_url +"'"); console.error(textStatus); }) .done(function(gv_ocr_json_result) { callback(gv_ocr_json_result,doc_sec_id); }); } function gv_ocr_paras_to_text(paras) { var ocr_text = ""; var num_paras = paras.length; for (var p=0; p assume word continues onto next line else if (detected_break.type == "LINE_BREAK" ) { ocr_text += '\n\n'; } } } } } return ocr_text; } function display_gv_ocr_bounding_boxes(gv_ocr_json,doc_sec_id) { //console.log(gv_ocr_json); var full_text_annotation = gv_ocr_json.fullTextAnnotation; var pages = full_text_annotation.pages; var num_pages = pages.length; if (num_pages == 1) { var page = pages[0]; var bounding_box_rects = []; var page_x_dim = page.width; var page_y_dim = page.height; var blocks = page.blocks; var num_blocks = blocks.length; for (var b=0; b") .attr("id","ocr-boundingbox-overlay-"+doc_sec_id) .attr("class","ocr-boundingbox-overlay"); var $screen_div = $('#'+screen_image_id); $screen_div.append($boundingbox_overlay); var num_bb_rects = bounding_box_rects.length; //console.log("Block bounding boxes:") for (var r=0; r") .attr("class","ocr-boundingbox") .css("left", scaled_x_org) .css("top", scaled_y_org) .css("width", scaled_x_dim) .css("height",scaled_y_dim); var text = rect.text; // //console.log("text = " + text); // // Could use 'data' here, but then does not appear in the element itself // // => for now do this 'old school' with an explicit 'data-text' attribute // $boundingbox_div.attr("data-text",text); $boundingbox_div.attr("text",text); $boundingbox_overlay.append($boundingbox_div) } } /* https://stackoverflow.com/questions/68395710/building-a-bounding-box-surrounding-text-in-google-vision-api-to-extract-the-tex def get_text_within(document, x1, y1, x2, y2): text = "" for page in document.pages: for block in page.blocks: for paragraph in block.paragraphs: for word in paragraph.words: for symbol in word.symbols: min_x = min(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x, symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x) max_x = max(symbol.bounding_box.vertices[0].x, symbol.bounding_box.vertices[1].x, symbol.bounding_box.vertices[2].x, symbol.bounding_box.vertices[3].x) min_y = min(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y, symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y) max_y = max(symbol.bounding_box.vertices[0].y, symbol.bounding_box.vertices[1].y, symbol.bounding_box.vertices[2].y, symbol.bounding_box.vertices[3].y) if (min_x >= x1 and max_x <= x2 and min_y >= y1 and max_y <= y2): text += symbol.text if (symbol.property.detected_break.type == 1 or symbol.property.detected_break.type == 3): text += ' ' if (symbol.property.detected_break.type == 2): text += '\t' if (symbol.property.detected_break.type == 5): text += '\n' return text */ /* https://stackoverflow.com/questions/57071788/google-vision-api-text-detection-display-words-by-block https://gist.github.com/UBISOFT-1/f00e4d22790f4af378d70b237fa56ca9 response = client.text_detection(image=image) # The actual response for the first page of the input file. breaks = vision.enums.TextAnnotation.DetectedBreak.BreakType paragraphs = [] lines = [] # extract text by block of detection for page in response.full_text_annotation.pages: for block in page.blocks: for paragraph in block.paragraphs: para = "" line = "" suppose = str(paragraph.bounding_box) suppose = suppose.replace('vertices ','') print(suppose) for word in paragraph.words: for symbol in word.symbols: line += symbol.text if symbol.property.detected_break.type == breaks.SPACE: line += ' ' if symbol.property.detected_break.type == breaks.EOL_SURE_SPACE: line += ' ' lines.append(line) para += line line = '' if symbol.property.detected_break.type == breaks.LINE_BREAK: lines.append(line) para += line line = '' paragraphs.append(para) return "\n".join(paragraphs) https://blog.searce.com/tips-tricks-for-using-google-vision-api-for-text-detection-2d6d1e0c6361 def draw_boxes(image, bounds, color,width=5): draw = ImageDraw.Draw(image) for bound in bounds: draw.line([ bound.vertices[0].x, bound.vertices[0].y, bound.vertices[1].x, bound.vertices[1].y, bound.vertices[2].x, bound.vertices[2].y, bound.vertices[3].x, bound.vertices[3].y, bound.vertices[0].x, bound.vertices[0].y],fill=color, width=width) return image def get_document_bounds(response, feature): for i,page in enumerate(document.pages): for block in page.blocks: if feature==FeatureType.BLOCK: bounds.append(block.bounding_box) for paragraph in block.paragraphs: if feature==FeatureType.PARA: bounds.append(paragraph.bounding_box) for word in paragraph.words: for symbol in word.symbols: if (feature == FeatureType.SYMBOL): bounds.append(symbol.bounding_box) if (feature == FeatureType.WORD): bounds.append(word.bounding_box) return bounds bounds=get_document_bounds(response, FeatureType.WORD) draw_boxes(image,bounds, 'yellow') */ function createMirador3Viewer() { var doc_url = new URL(window.location); var goto_page = doc_url.searchParams.get('gotoPage') || 1; var canvas_index = goto_page -1; var mirador3_config = { "manifests": { "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json": { "provider": "InterMusE" } }, "window" : { "panels": { "annotations": true, "attribution": true }, "defaultSideBarPanel": "annotations", "sideBarOpenByDefault": true }, //"annotations": { // "htmlSanitizationRuleSet": 'iiif', // See src/lib/htmlRules.js for acceptable values // "filteredMotivations": ['oa:commenting', 'oa:tagging', 'sc:painting', 'commenting', 'tagging'], //}, "requests": { "postprocessors": [ (url, action) => { if (action.type === "mirador/RECEIVE_MANIFEST") { //console.log(action) //console.log(iiif_manifest); return { ...action , "manifestJson": iiif_manifest }; } } ] }, "windows": [ { "loadedManifest": "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json", "canvasIndex": canvas_index, "thumbnailNavigationPosition": 'far-bottom' } ] }; var mirador3_viewer = mirador3WithAnnotations.initMirador3Viewer('mirador3-viewer',mirador3_config); /* var mirador3_viewer = Mirador.viewer({ "id": "mirador3-viewer", "manifests": { "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json": { "provider": "IntermusE" } }, "window" : { "panels" : { "annotations": true, "attribution": false } }, //"annotations": { // "htmlSanitizationRuleSet": 'iiif', // See src/lib/htmlRules.js for acceptable values // "filteredMotivations": ['oa:commenting', 'oa:tagging', 'sc:painting', 'commenting', 'tagging'], //}, "requests": { "postprocessors": [ (url, action) => { if (action.type === "mirador/RECEIVE_MANIFEST") { //console.log(action) //console.log(iiif_manifest); return { ...action , "manifestJson": iiif_manifest }; } } ] }, "windows": [ { "loadedManifest": "https://intermuse.sowemustthink.space/greenstone3/sample-manifest.json", "canvasIndex": 0, "thumbnailNavigationPosition": 'far-bottom' } ] }); */ return mirador3_viewer; } function initIIIFAnnotations() { var iiif_annotations_example = { "@context": "http://www.shared-canvas.org/ns/context.json", "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843/list/47174896", "@type": "sc:AnnotationList", "resources": [ { "@context": "http://iiif.io/api/presentation/2/context.json", "@id": "https://iiif.harvardartmuseums.org/annotations/9641482", "@type": "oa:Annotation", "motivation": [ "oa:commenting" ], "on": { "@type": "oa:SpecificResource", "full": "https://iiif.harvardartmuseums.org/manifests/object/299843/canvas/canvas-47174896", "selector": { "@type": "oa:FragmentSelector", "value": "xywh=622,591,642,940" }, "within": { "@id": "https://iiif.harvardartmuseums.org/manifests/object/299843", "@type": "sc:Manifest" } }, "resource": [ { "@type": "dctypes:Text", "chars": "

age: 35-52
gender: Female(66.337677%)
CALM: 55.438412%
CONFUSED: 3.949288%
SURPRISED: 2.33092%
DISGUSTED: 0.545727%
HAPPY: 1.549943%
ANGRY: 2.082294%
SAD: 34.103416%

Generated by AWS Rekognition

", "format": "text/html" } ] } // More ... ] }; document.open("application/json"); document.write(JSON.stringify(iiif_annotations_example)); document.close(); } function initIIIFManifest(iiifpres_root_id,iiifpres_label, iiif_doc_images) { console.log("IIIF_Doc_Images"); console.log(iiif_doc_images); // Developed with reference to the following manifest example: // https://iiif.archivelab.org/iiif/McGillLibrary-rbsc_ms-medieval-073-18802/manifest.json var iiif_manifest = { "@context": "http://iiif.io/api/presentation/2/context.json", "@id": iiifpres_root_id + "/manifest", "@type": "sc:Manifest", "label": iiifpres_label, "attribution": "The Internet of Musical Events (InterMusE): Digital Scholarship, Community, and the Archiving of Performance", "license": "https://**** To Be Determined ****", "logo": "https://intermuse.datatodata.org/wp-content/uploads/sites/4/2021/03/cropped-cropped-cropped-1.png" }; /* var iiif_metadata = [ { "label": "creator", "value": ["aaa1","aaa2"] }, { "label": "title", "value": ["ttt"] } ]; iiif_manifest.metadata = iiif_metadata; */ // trival IIIF sequences => has one IIIF canvas in it var iiif_sequence = { "@context": "http://iiif.io/api/image/2/context.json", "@id": iiifpres_root_id + "/sequence/normal", "@type": "sc:Sequence" }; var iiif_canvases = []; var assocfilepath = gs.documentMetadata['assocfilepath']; var simple_doc_type = (gs.documentMetadata.docType == "simple"); for (var i=0; i