Changeset 36997 for gs3-installations


Ignore:
Timestamp:
2022-12-09T00:10:37+13:00 (17 months ago)
Author:
davidb
Message:

Collection now developed to the point where Google Vision OCR bounding boxes are displayed over the screen-size image for a simple doc

Location:
gs3-installations/intermuse/trunk/sites/intermuse/collect/programmes
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • gs3-installations/intermuse/trunk/sites/intermuse/collect/programmes/js/document_viewer.js

    r36993 r36997  
     1function load_gv_dococr_json(json_file, callback) {
     2
     3    var http_assocfilepath = gs.collectionMetadata["httpPath"]+"/index/assoc/"+gs.documentMetadata["assocfilepath"];
     4    var json_url = http_assocfilepath +"/"+json_file;
     5
     6    $.ajax({
     7    method: "GET",
     8    url: json_url
     9    })
     10        .fail(function(jqXHR,textStatus) {
     11            console.error("load_gv_dococr_json(): failed to retrieve url '" + json_url +"'");
     12        console.error(textStatus);
     13        })
     14        .done(function(gv_ocr_json_result) {
     15        callback(gv_ocr_json_result);
     16
     17        });
     18   
     19}
     20
     21function display_gv_ocr_bounding_boxes(gv_ocr_json)
     22{
     23    console.log(gv_ocr_json);
     24
     25   
     26    var full_text_annotation = gv_ocr_json.fullTextAnnotation;
     27    var pages = full_text_annotation.pages;
     28    var num_pages = pages.length;
     29   
     30    if (num_pages == 1) {
     31    var page = pages[0];
     32
     33    var bounding_box_rects = [];
     34    var page_x_dim = page.width;
     35    var page_y_dim = page.height;
     36
     37    var blocks = page.blocks;
     38    var num_blocks = blocks.length;
     39
     40    for (b=0; b<num_blocks; b++) {
     41        var block = blocks[b];
     42
     43        var boundingBox = block.boundingBox;
     44
     45        var min_x = Number.MAX_SAFE_INTEGER;
     46        var min_y = Number.MAX_SAFE_INTEGER;
     47        var max_x = Number.MIN_SAFE_INTEGER;
     48        var max_y = Number.MIN_SAFE_INTEGER;
     49
     50        var vertices = boundingBox.vertices;
     51        var num_vertices = vertices.length;
     52
     53        for (v=0; v<num_vertices; v++) {
     54        var x = vertices[v].x;
     55        var y = vertices[v].y;
     56       
     57        min_x = Math.min(min_x,x);
     58        min_y = Math.min(min_y,y);
     59        max_x = Math.max(max_x,x);
     60        max_y = Math.max(max_y,y);
     61        }
     62
     63        var x_org = min_x;
     64        var y_org = min_y;
     65        var x_dim = max_x - min_x +1;
     66        var y_dim = max_y - min_y +1;
     67
     68        var rect = { "x_org": x_org, "y_org": y_org, "x_dim": x_dim, "y_dim": y_dim};
     69        bounding_box_rects.push(rect);
     70    }
     71
     72       
     73    display_scaled_div_bounding_boxes(bounding_box_rects, page_x_dim,page_y_dim);
     74   
     75    }
     76    else {
     77    console.error("display_gv_ocr_bounding_boxes(): incorrect number of pages found.")
     78    console.error("  Expected 1 page, found " + num_pages +" page(s)");
     79    }
     80
     81}
     82
     83
     84function display_scaled_div_bounding_boxes(bounding_box_rects,fullsize_x_dim,fullsize_y_dim)
     85{
     86    var screen_x_dim = gs.variables.screenImageWidth;
     87    var screen_y_dim = gs.variables.screenImageHeight;
     88
     89    var scale_x = screen_x_dim / fullsize_x_dim;
     90    var scale_y = screen_y_dim / fullsize_y_dim;
     91
     92    //console.log("scale x = " + scale_x);
     93    //console.log("scale y = " + scale_y);
     94   
     95    var docID = gs.variables.d;   
     96    var screen_image_id = "small"+docID;
     97
     98    var $boundingbox_overlay = $("<div>")
     99    .attr("id","ocr-boundingbox-overlay-"+docID)
     100    .attr("class","ocr-boundingbox-overlay");
     101
     102    var $screen_div = $('#'+screen_image_id);
     103    //var $screen_img = $screen_div.find("img");
     104   
     105    $screen_div.append($boundingbox_overlay);
     106    //$screen_img.append($boundingbox_overlay);
     107
     108   
     109    var num_bb_rects = bounding_box_rects.length;
     110
     111    console.log("Block bounding boxes:")
     112
     113    for (r=0; r<num_bb_rects; r++) {
     114    var rect = bounding_box_rects[r];
     115        console.log("  " + JSON.stringify(rect));
     116
     117    var scaled_x_org = rect.x_org * scale_x;
     118    var scaled_y_org = rect.y_org * scale_y;
     119    var scaled_x_dim = rect.x_dim * scale_x;
     120    var scaled_y_dim = rect.y_dim * scale_y;
     121       
     122    var $boundingbox_div = $("<div>")
     123        .attr("class","ocr-boundingbox")
     124        .css("left",  scaled_x_org)
     125        .css("top",   scaled_y_org)
     126        .css("width", scaled_x_dim)
     127        .css("height",scaled_y_dim);
     128   
     129    $boundingbox_overlay.append($boundingbox_div)
     130    }
     131}
     132
    1133/*
    2134
  • gs3-installations/intermuse/trunk/sites/intermuse/collect/programmes/transform/pages/document.xsl

    r36994 r36997  
    99    exclude-result-prefixes="java util gsf">
    1010
     11
     12  <xsl:template name="iiif-links">
     13    <xsl:param name="identifier"/>
     14   
     15    <div style="padding-top: 10px; padding-bottom: 10px;">
     16      IIIF Server Info URL:
     17      <a href="/gs-cantaloupe/iiif/2/{$site_name}:{$collNameChecked}:{$identifier}/info.json">
     18    /gs-cantaloupe/iiif/2/<xsl:value-of select="$site_name"/>:<xsl:value-of select="$collNameChecked"/>:<xsl:value-of select="$identifier"/>/info.json
     19    </a>
     20    </div>
     21
     22    <div style="padding-top: 10px; padding-bottom: 10px;">
     23      IIIF Server Image URL:
     24      <a href="/gs-cantaloupe/iiif/2/{$site_name}:{$collNameChecked}:{$identifier}/full/full/0/default.jpg">
     25    /gs-cantaloupe/iiif/2/<xsl:value-of select="$site_name"/>:<xsl:value-of select="$collNameChecked"/>:<xsl:value-of select="$identifier"/>/full/full/0/default.jpg
     26    </a> <br />
     27    </div>
     28   
     29  </xsl:template>
     30
     31
     32  <xsl:template name="googlevision-ocr-json">
     33    <xsl:param name="assocfilepath"/>
     34   
     35    <div style="padding-top: 10px; padding-bottom: 10px;">
     36      Google Vision OCR JSON:
     37      <a>
     38    <xsl:attribute name="href">
     39      <xsl:value-of disable-output-escaping="yes" select="/page/pageResponse/collection/metadataList/metadata[@name = 'httpPath']"/>
     40      <xsl:text>/index/assoc/</xsl:text><xsl:value-of disable-output-escaping="yes" select="$assocfilepath" /><xsl:text>/</xsl:text>enable_document_ocrgoogle-vision-output.json
     41    </xsl:attribute>
     42    enable_document_ocrgoogle-vision-output.json
     43      </a>
     44    </div>
     45    <gsf:script>
     46      load_gv_dococr_json("enable_document_ocrgoogle-vision-output.json",display_gv_ocr_bounding_boxes);
     47    </gsf:script>
     48  </xsl:template>
     49
    1150<!--
    12     <xsl:template name="documentHeading">
    13       <span style="font-weight:bold; font-size: 120%;">
    14         <xsl:call-template name="choose-title"/>
    15       </span>
    16       <gsf:metadata-table>
    17         <gsf:metadata name="ex.im.Work"/>
    18         <gsf:metadata name="ex.im.Artist"/>
    19         <gsf:metadata name="ex.im.Venue"/>
    20         <gsf:metadata name="ex.im.Composer"/>
    21       </gsf:metadata-table>
    22     </xsl:template>
     51  <xsl:template name="sectionPre">
     52    OCR'd Text:
     53  </xsl:template>
     54
     55  <xsl:template name="documentPre">
     56    OCR'd Text:
     57  </xsl:template>
    2358-->
     59
     60  <xsl:template name="sectionPost">
     61    <!-- add in IIIF Server link for section image -->
     62    <xsl:variable name="nodeID" select="@nodeID"/>
     63    <xsl:call-template name="iiif-links">
     64      <xsl:with-param name="identifier" select="$nodeID" />
     65    </xsl:call-template>
     66    <!-- add in Google Vision OCR JSON link for section image -->
     67    <xsl:call-template name="googlevision-ocr-json">
     68      <xsl:with-param name="assocfilepath"><gsf:metadata name="assocfilepath" select="root"/></xsl:with-param>
     69    </xsl:call-template>   
     70  </xsl:template>
     71
     72  <xsl:template name="documentPre">
     73     <xsl:variable name="httpCollection">library/sites/<xsl:value-of select="$site_name"/>/collect/<xsl:value-of select="/page/pageResponse/collection/@name"/></xsl:variable>
     74     <gsf:script src="{$httpCollection}/js/document_viewer.js"/>
     75     <style>
     76       div.ocr-boundingbox-overlay {
     77         position: absolute;
     78         left: 0;
     79         top: 0;
     80       }
     81
     82       div.ocr-boundingbox {
     83         position: absolute;
     84         border: solid black 1px;
     85       }
     86
     87     </style>
     88  </xsl:template>
     89 
     90  <xsl:template name="documentPost">
     91    <!-- add in IIIF Server link for simple page image -->
     92    <xsl:if test="@docType='simple'">
     93      <xsl:call-template name="iiif-links">
     94    <xsl:with-param name="identifier" select="$docID" />
     95      </xsl:call-template>
     96      <!-- add in Google Vision OCR JSON link for simple page image -->     
     97      <xsl:call-template name="googlevision-ocr-json">
     98    <xsl:with-param name="assocfilepath"><gsf:metadata name="assocfilepath" pos="first"/></xsl:with-param>
     99      </xsl:call-template>
     100    </xsl:if>   
     101  </xsl:template>
     102
    24103 
    25104    <!-- content of a simple document. Will not be used for editing mode -->
    26105    <xsl:template name="documentPostInProgress">
    27106
    28             <div style="padding-top: 10px; padding-bottom: 10px;">
    29               <gsf:variable name="OID"><xsl:value-of select="@nodeID"/></gsf:variable>
    30               IIIF Server Info URL:
    31               <a href="/gs-cantaloupe/iiif/2/{$site_name}:{$collNameChecked}:{$OID}/info.json">
    32                 /gs-cantaloupe/iiif/2/<xsl:value-of select="$site_name"/>:<xsl:value-of select="$collNameChecked"/>:<xsl:value-of select="$OID"/>/info.json
    33               </a>
    34             </div>
    35107
    36108            <!--
Note: See TracChangeset for help on using the changeset viewer.