Changeset 35768 for gs3-extensions


Ignore:
Timestamp:
2021-12-07T15:41:21+13:00 (2 years ago)
Author:
cstephen
Message:

implement thresholded image retrieval

Location:
gs3-extensions/atea-nlp-tools/trunk/src/ocr
Files:
11 added
3 deleted
5 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/java/org/atea/nlptools/ocr/abstractions/objects/HttpStatusCode.java

    r35733 r35768  
    44{
    55    /**
     6     * 400 Bad Request
     7     * The server could not understand the request due to invalid syntax.
     8     */
     9    public static final int BadRequest = 400;
     10
     11    /**
    612     * 403 Forbidden
    713     * The client does not have access rights to the requested content.
    814     */
    9     public static final int ClientForbidden = 403;
     15    public static final int Forbidden = 403;
     16
     17    /**
     18     * 404 Not Found
     19     * The server cannot find the requested resource.
     20     */
     21    public static final int NotFound = 404;
     22
     23    /**
     24     * 405 Method Not Allowed
     25     * The request method is known by the server but not supported by the target resource.
     26     */
     27    public static final int MethodNotAllowed = 405;
    1028
    1129    /**
     
    1331     * The request is larger than limits defined by the server.
    1432     */
    15     public static final int ClientPayloadTooLarge = 413;
     33    public static final int PayloadTooLarge = 413;
     34
     35    /**
     36     * 415 Unsupported Media Type
     37     * The
     38     */
     39    public static final int UnsupportedMediaType = 415;
    1640
    1741    /**
     
    1943     * The server has encountered a situation that it does not know how to handle.
    2044     */
    21     public static final int ServerInternalServerError = 500;
     45    public static final int InternalServerError = 500;
    2246}
  • gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/java/org/atea/nlptools/ocr/abstractions/services/IOcrService.java

    r35733 r35768  
    44
    55import org.atea.nlptools.ocr.abstractions.objects.IOcrOptions;
     6import org.atea.nlptools.ocr.abstractions.objects.IOcrOutput;
    67
    78/**
     
    1617     * @return The recognised content of the file.
    1718     */
    18     String run(File file, IOcrOptions options) throws Exception;
     19    IOcrOutput run(File file, IOcrOptions options) throws Exception;
    1920}
  • gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/java/org/atea/nlptools/ocr/services/TesseractOcrService.java

    r35733 r35768  
    22
    33import java.io.File;
     4import java.util.UUID;
    45
    56import org.atea.nlptools.ocr.Exceptions.LeptonicaException;
    67import org.atea.nlptools.ocr.Exceptions.TesseractException;
    78import org.atea.nlptools.ocr.abstractions.objects.IOcrOptions;
     9import org.atea.nlptools.ocr.abstractions.objects.IOcrOutput;
    810import org.atea.nlptools.ocr.abstractions.services.IOcrService;
     11import org.atea.nlptools.ocr.objects.OcrOutput;
    912import org.atea.nlptools.ocr.objects.TesseractOptions;
    1013
     
    2023{
    2124    private final String tessData;
     25    private final File thresholdOutputPath;
    2226
    2327    /**
     
    2529     * @param tesseractDataPath Path to the tesseract data directory.
    2630     */
    27     public TesseractOcrService(String tesseractDataPath)
     31    public TesseractOcrService(String tesseractDataPath, File thresholdOutputPath)
    2832    {
    2933        this.tessData = tesseractDataPath;
     34        this.thresholdOutputPath = thresholdOutputPath;
    3035    }
    3136
    3237    @Override
    33     public String run(File file, IOcrOptions options)
     38    public IOcrOutput run(File file, IOcrOptions options)
    3439        throws Exception, IllegalArgumentException
    3540    {
     
    4146        TesseractOptions tOptions = (TesseractOptions)options;
    4247        PIX inputImage = null;
     48        PIX thresholdedImage = null;
    4349        TessBaseAPI api = null;
    4450        BytePointer outputTextPtr = null;
     
    5965            api.SetPageSegMode(tOptions.pageSegmentationMode);
    6066            api.SetImage(inputImage);
     67            api.ReadConfigFile("get.images");
     68           
    6169            outputTextPtr = api.GetUTF8Text();
     70           
     71            String fileName = UUID.randomUUID().toString() + ".webp";
     72            File thresholdOutput = new File(this.thresholdOutputPath, fileName);
     73            thresholdedImage = api.GetThresholdedImage();
     74            // lept.pixWriteWebP("temp", thresholdedImage, 75, 100);
     75            lept.pixWrite(thresholdOutput.getAbsolutePath(), thresholdedImage, lept.IFF_WEBP);
    6276
    63             return outputTextPtr.getString();
     77            return new OcrOutput(outputTextPtr.getString(), thresholdOutput);
    6478        }
    6579        finally
    6680        {
    67             if (inputImage != null)
    68             {
     81            if (inputImage != null) {
    6982                lept.pixDestroy(inputImage);
     83            }
     84
     85            if (thresholdedImage != null) {
     86                lept.pixDestroy(thresholdedImage);
    7087            }
    7188
  • gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/java/org/atea/nlptools/ocr/servlets/TesseractServlet.java

    r35733 r35768  
    1212import java.util.UUID;
    1313
     14import javax.servlet.ServletConfig;
    1415import javax.servlet.ServletException;
    1516import javax.servlet.annotation.MultipartConfig;
     
    2930import org.atea.nlptools.ocr.abstractions.objects.IOcrFile;
    3031import org.atea.nlptools.ocr.abstractions.objects.IOcrOptions;
     32import org.atea.nlptools.ocr.abstractions.objects.IOcrOutput;
    3133import org.atea.nlptools.ocr.abstractions.services.IOcrService;
    3234import org.atea.nlptools.ocr.objects.OcrFile;
     
    6264
    6365    @Override
    64     public void init()
    65     {
     66    public void init(ServletConfig config)
     67        throws ServletException
     68    {
     69        super.init(config);
     70
    6671        Properties prop = new Properties();
    6772
     
    7883
    7984        String tessBin = prop.getProperty("tesseract.data.path");
    80         this.ocrService = new TesseractOcrService(tessBin);
     85        File tempDir = new File((String)config.getServletContext().getAttribute("tmpdir"));
     86        this.ocrService = new TesseractOcrService(tessBin, tempDir);
    8187
    8288        logger.info("Initialised!");
     
    8793        throws IOException
    8894    {
    89         response.sendError(HttpStatusCode.ClientForbidden, "POST Multipart request expected.");
     95        response.sendError(HttpStatusCode.Forbidden, "POST Multipart request expected.");
    9096    }
    9197   
     
    118124                }
    119125
    120                 String ocrOutput = ocrService.run(file.getTempFile(), actualOptions);
     126                IOcrOutput ocrOutput = ocrService.run(file.getTempFile(), actualOptions);
    121127
    122128                writer.beginObject();
     
    129135
    130136                writer.name("text");
    131                 writer.value(ocrOutput);
     137                writer.value(ocrOutput.getText());
     138
     139                writer.name("thresholdedImageKey");
     140                writer.value(ocrOutput.getThresholdedImage().getName());
    132141
    133142                writer.endObject();
     
    139148        catch (Exception ex)
    140149        {
    141             response.sendError(HttpStatusCode.ServerInternalServerError, "Failed to process the request.");
     150            response.sendError(HttpStatusCode.InternalServerError, "Failed to process the request.");
    142151            logger.error("Failed to complete API call", ex);
    143152        }
     
    158167        if (parts.size() > MaxParts)
    159168        {
    160             response.sendError(HttpStatusCode.ClientPayloadTooLarge, "No more than " + MaxParts + " parts may be submitted.");
     169            response.sendError(HttpStatusCode.PayloadTooLarge, "No more than " + MaxParts + " parts may be submitted.");
    161170            return null;
    162171        }
     
    172181            if (p.getSize() > MaxPartSize)
    173182            {
    174                 response.sendError(HttpStatusCode.ClientPayloadTooLarge, "A submitted part must be no more than " + MaxPartSize + " bytes");
     183                response.sendError(HttpStatusCode.PayloadTooLarge, "A submitted part must be no more than " + MaxPartSize + " bytes");
    175184                return null;
    176185            }
  • gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/webapp/WEB-INF/web.xml

    r35733 r35768  
    2929         parameters, including zero.
    3030    -->
     31
     32    <listener>
     33      <listener-class>org.atea.nlptools.ocr.listeners.MyServletContextListener</listener-class>
     34    </listener>
    3135
    3236    <filter>
     
    6771      <servlet-name>tesseract</servlet-name>
    6872      <servlet-class>org.atea.nlptools.ocr.servlets.TesseractServlet</servlet-class>
    69       <load-on-startup>0</load-on-startup>
     73    </servlet>
     74
     75    <servlet>
     76      <servlet-name>imageRetrieval</servlet-name>
     77      <servlet-class>org.atea.nlptools.ocr.servlets.ImageRetrievalServlet</servlet-class>
    7078    </servlet>
    7179
     
    98106    </servlet-mapping>
    99107
     108    <servlet-mapping>
     109      <servlet-name>imageRetrieval</servlet-name>
     110      <url-pattern>/image</url-pattern>
     111    </servlet-mapping>
     112
    100113    <!-- Define the default session timeout for your application,
    101114         in minutes.  From a servlet or JSP page, you can modify
Note: See TracChangeset for help on using the changeset viewer.