Changeset 37017
- Timestamp:
- 2022-12-10T22:04:37+13:00 (12 months ago)
- Location:
- gs3-installations/intermuse/trunk/sites/intermuse/collect/programmes
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-installations/intermuse/trunk/sites/intermuse/collect/programmes/js/document_viewer.js
r37011 r37017 19 19 } 20 20 21 function gv_ocr_paras_to_text(paras) 22 { 23 var ocr_text = ""; 24 25 var num_paras = paras.length; 26 for (var p=0; p<num_paras; p++) { 27 28 var para = paras[p]; 29 var words = para.words; 30 var num_words = words.length; 31 32 for (var w=0; w<num_words; w++) { 33 var word = words[w]; 34 35 var symbols = word.symbols; 36 var num_symbols = symbols.length; 37 38 for (var s=0; s<num_symbols; s++) { 39 40 var symbol = symbols[s]; 41 42 ocr_text += symbol.text; 43 44 // Enumerated fields listed at: 45 // http://googleapis.github.io/googleapis/java/grpc-google-cloud-vision-v1/0.1.5/apidocs/com/google/cloud/vision/v1/TextAnnotation.DetectedBreak.BreakType.html 46 if (symbol.property) { 47 var detected_break = symbol.property.detectedBreak; 48 if ((detected_break.type == "SPACE" || detected_break.type == "EOL_SURE_SPACE")) { 49 ocr_text += ' '; 50 } 51 else if (detected_break.type == "SURE_SPACE") { // very wide space 52 ocr_text += '\t'; 53 } 54 // "HYPHEN" at end end of line => assume word continues onto next line 55 else if (detected_break.type == "LINE_BREAK" ) { 56 ocr_text += '\n\n'; 57 } 58 } 59 } 60 } 61 } 62 63 return ocr_text; 64 } 65 21 66 function display_gv_ocr_bounding_boxes(gv_ocr_json,doc_sec_id) 22 67 { 23 console.log(gv_ocr_json);68 //console.log(gv_ocr_json); 24 69 25 70 … … 38 83 var num_blocks = blocks.length; 39 84 40 for ( b=0; b<num_blocks; b++) {85 for (var b=0; b<num_blocks; b++) { 41 86 var block = blocks[b]; 42 87 … … 51 96 var num_vertices = vertices.length; 52 97 53 for (v =0; v<num_vertices; v++) {98 for (var v=0; v<num_vertices; v++) { 54 99 var x = vertices[v].x; 55 100 var y = vertices[v].y; … … 67 112 68 113 var rect = { "x_org": x_org, "y_org": y_org, "x_dim": x_dim, "y_dim": y_dim}; 69 bounding_box_rects.push(rect); 114 115 var paras = block.paragraphs; 116 var ocr_text = gv_ocr_paras_to_text(paras); 117 rect.text = ocr_text; 118 119 bounding_box_rects.push(rect); 70 120 } 71 121 72 122 73 123 display_scaled_div_bounding_boxes(doc_sec_id, bounding_box_rects, page_x_dim,page_y_dim); … … 100 150 101 151 var $screen_div = $('#'+screen_image_id); 102 //var $screen_img = $screen_div.find("img");103 152 104 153 $screen_div.append($boundingbox_overlay); 105 //$screen_img.append($boundingbox_overlay);106 107 154 108 155 var num_bb_rects = bounding_box_rects.length; 109 156 110 console.log("Block bounding boxes:")111 112 for ( r=0; r<num_bb_rects; r++) {157 //console.log("Block bounding boxes:") 158 159 for (var r=0; r<num_bb_rects; r++) { 113 160 var rect = bounding_box_rects[r]; 114 console.log(" " + JSON.stringify(rect));161 //console.log(" " + JSON.stringify(rect)); 115 162 116 163 var scaled_x_org = rect.x_org * scale_x; … … 125 172 .css("width", scaled_x_dim) 126 173 .css("height",scaled_y_dim); 127 128 $boundingbox_overlay.append($boundingbox_div) 174 175 var text = rect.text; 176 // //console.log("text = " + text); 177 // // Could use 'data' here, but then does not appear in the element itself 178 // // => for now do this 'old school' with an explicit 'data-text' attribute 179 // $boundingbox_div.attr("data-text",text); 180 181 $boundingbox_div.attr("text",text); 182 183 $boundingbox_overlay.append($boundingbox_div) 129 184 } 130 185 } -
gs3-installations/intermuse/trunk/sites/intermuse/collect/programmes/transform/pages/document.xsl
r37011 r37017 33 33 <xsl:param name="assocfilepath"/> 34 34 35 <gsf:variable name="GVDocumentOCRJSON"><gsf:metadata name="GVDocumentOCRJSON"/></gsf:variable> 36 <gsf:variable name="docSectionID"><xsl:value-of select="util:replace(@nodeID, '.', '_')}"/></gsf:variable> 37 35 38 <div style="padding-top: 10px; padding-bottom: 10px;"> 36 39 Google Vision OCR JSON: … … 38 41 <xsl:attribute name="href"> 39 42 <xsl:value-of disable-output-escaping="yes" select="/page/pageResponse/collection/metadataList/metadata[@name = 'httpPath']"/> 40 <xsl:text>/index/assoc/</xsl:text><xsl:value-of disable-output-escaping="yes" select="$assocfilepath" /><xsl:text>/</xsl:text> gv_document_ocrgoogle-vision-output.json43 <xsl:text>/index/assoc/</xsl:text><xsl:value-of disable-output-escaping="yes" select="$assocfilepath" /><xsl:text>/</xsl:text><xsl:value-of select="$GVDocumentOCRJSON"/> 41 44 </xsl:attribute> 42 gv_document_ocrgoogle-vision-output.json45 <xsl:value-of select="$GVDocumentOCRJSON"/> 43 46 </a> 44 47 </div> 45 <gsf:variable name="GVDocumentOCRJSON"><gsf:metadata name="GVDocumentOCRJSON"/></gsf:variable>46 <gsf:variable name="docSectionID"><xsl:value-of select="util:replace(@nodeID, '.', '_')}"/></gsf:variable>47 48 <gsf:script> 48 49 load_gv_dococr_json(gs.variables.GVDocumentOCRJSON,gs.variables.docSectionID,display_gv_ocr_bounding_boxes);
Note:
See TracChangeset
for help on using the changeset viewer.