Changeset 35768 for gs3-extensions
- Timestamp:
- 2021-12-07T15:41:21+13:00 (2 years ago)
- Location:
- gs3-extensions/atea-nlp-tools/trunk/src/ocr
- Files:
-
- 11 added
- 3 deleted
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/java/org/atea/nlptools/ocr/abstractions/objects/HttpStatusCode.java
r35733 r35768 4 4 { 5 5 /** 6 * 400 Bad Request 7 * The server could not understand the request due to invalid syntax. 8 */ 9 public static final int BadRequest = 400; 10 11 /** 6 12 * 403 Forbidden 7 13 * The client does not have access rights to the requested content. 8 14 */ 9 public static final int ClientForbidden = 403; 15 public static final int Forbidden = 403; 16 17 /** 18 * 404 Not Found 19 * The server cannot find the requested resource. 20 */ 21 public static final int NotFound = 404; 22 23 /** 24 * 405 Method Not Allowed 25 * The request method is known by the server but not supported by the target resource. 26 */ 27 public static final int MethodNotAllowed = 405; 10 28 11 29 /** … … 13 31 * The request is larger than limits defined by the server. 14 32 */ 15 public static final int ClientPayloadTooLarge = 413; 33 public static final int PayloadTooLarge = 413; 34 35 /** 36 * 415 Unsupported Media Type 37 * The 38 */ 39 public static final int UnsupportedMediaType = 415; 16 40 17 41 /** … … 19 43 * The server has encountered a situation that it does not know how to handle. 20 44 */ 21 public static final int ServerInternalServerError = 500;45 public static final int InternalServerError = 500; 22 46 } -
gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/java/org/atea/nlptools/ocr/abstractions/services/IOcrService.java
r35733 r35768 4 4 5 5 import org.atea.nlptools.ocr.abstractions.objects.IOcrOptions; 6 import org.atea.nlptools.ocr.abstractions.objects.IOcrOutput; 6 7 7 8 /** … … 16 17 * @return The recognised content of the file. 17 18 */ 18 Stringrun(File file, IOcrOptions options) throws Exception;19 IOcrOutput run(File file, IOcrOptions options) throws Exception; 19 20 } -
gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/java/org/atea/nlptools/ocr/services/TesseractOcrService.java
r35733 r35768 2 2 3 3 import java.io.File; 4 import java.util.UUID; 4 5 5 6 import org.atea.nlptools.ocr.Exceptions.LeptonicaException; 6 7 import org.atea.nlptools.ocr.Exceptions.TesseractException; 7 8 import org.atea.nlptools.ocr.abstractions.objects.IOcrOptions; 9 import org.atea.nlptools.ocr.abstractions.objects.IOcrOutput; 8 10 import org.atea.nlptools.ocr.abstractions.services.IOcrService; 11 import org.atea.nlptools.ocr.objects.OcrOutput; 9 12 import org.atea.nlptools.ocr.objects.TesseractOptions; 10 13 … … 20 23 { 21 24 private final String tessData; 25 private final File thresholdOutputPath; 22 26 23 27 /** … … 25 29 * @param tesseractDataPath Path to the tesseract data directory. 26 30 */ 27 public TesseractOcrService(String tesseractDataPath )31 public TesseractOcrService(String tesseractDataPath, File thresholdOutputPath) 28 32 { 29 33 this.tessData = tesseractDataPath; 34 this.thresholdOutputPath = thresholdOutputPath; 30 35 } 31 36 32 37 @Override 33 public Stringrun(File file, IOcrOptions options)38 public IOcrOutput run(File file, IOcrOptions options) 34 39 throws Exception, IllegalArgumentException 35 40 { … … 41 46 TesseractOptions tOptions = (TesseractOptions)options; 42 47 PIX inputImage = null; 48 PIX thresholdedImage = null; 43 49 TessBaseAPI api = null; 44 50 BytePointer outputTextPtr = null; … … 59 65 api.SetPageSegMode(tOptions.pageSegmentationMode); 60 66 api.SetImage(inputImage); 67 api.ReadConfigFile("get.images"); 68 61 69 outputTextPtr = api.GetUTF8Text(); 70 71 String fileName = UUID.randomUUID().toString() + ".webp"; 72 File thresholdOutput = new File(this.thresholdOutputPath, fileName); 73 thresholdedImage = api.GetThresholdedImage(); 74 // lept.pixWriteWebP("temp", thresholdedImage, 75, 100); 75 lept.pixWrite(thresholdOutput.getAbsolutePath(), thresholdedImage, lept.IFF_WEBP); 62 76 63 return outputTextPtr.getString();77 return new OcrOutput(outputTextPtr.getString(), thresholdOutput); 64 78 } 65 79 finally 66 80 { 67 if (inputImage != null) 68 { 81 if (inputImage != null) { 69 82 lept.pixDestroy(inputImage); 83 } 84 85 if (thresholdedImage != null) { 86 lept.pixDestroy(thresholdedImage); 70 87 } 71 88 -
gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/java/org/atea/nlptools/ocr/servlets/TesseractServlet.java
r35733 r35768 12 12 import java.util.UUID; 13 13 14 import javax.servlet.ServletConfig; 14 15 import javax.servlet.ServletException; 15 16 import javax.servlet.annotation.MultipartConfig; … … 29 30 import org.atea.nlptools.ocr.abstractions.objects.IOcrFile; 30 31 import org.atea.nlptools.ocr.abstractions.objects.IOcrOptions; 32 import org.atea.nlptools.ocr.abstractions.objects.IOcrOutput; 31 33 import org.atea.nlptools.ocr.abstractions.services.IOcrService; 32 34 import org.atea.nlptools.ocr.objects.OcrFile; … … 62 64 63 65 @Override 64 public void init() 65 { 66 public void init(ServletConfig config) 67 throws ServletException 68 { 69 super.init(config); 70 66 71 Properties prop = new Properties(); 67 72 … … 78 83 79 84 String tessBin = prop.getProperty("tesseract.data.path"); 80 this.ocrService = new TesseractOcrService(tessBin); 85 File tempDir = new File((String)config.getServletContext().getAttribute("tmpdir")); 86 this.ocrService = new TesseractOcrService(tessBin, tempDir); 81 87 82 88 logger.info("Initialised!"); … … 87 93 throws IOException 88 94 { 89 response.sendError(HttpStatusCode. ClientForbidden, "POST Multipart request expected.");95 response.sendError(HttpStatusCode.Forbidden, "POST Multipart request expected."); 90 96 } 91 97 … … 118 124 } 119 125 120 StringocrOutput = ocrService.run(file.getTempFile(), actualOptions);126 IOcrOutput ocrOutput = ocrService.run(file.getTempFile(), actualOptions); 121 127 122 128 writer.beginObject(); … … 129 135 130 136 writer.name("text"); 131 writer.value(ocrOutput); 137 writer.value(ocrOutput.getText()); 138 139 writer.name("thresholdedImageKey"); 140 writer.value(ocrOutput.getThresholdedImage().getName()); 132 141 133 142 writer.endObject(); … … 139 148 catch (Exception ex) 140 149 { 141 response.sendError(HttpStatusCode. ServerInternalServerError, "Failed to process the request.");150 response.sendError(HttpStatusCode.InternalServerError, "Failed to process the request."); 142 151 logger.error("Failed to complete API call", ex); 143 152 } … … 158 167 if (parts.size() > MaxParts) 159 168 { 160 response.sendError(HttpStatusCode. ClientPayloadTooLarge, "No more than " + MaxParts + " parts may be submitted.");169 response.sendError(HttpStatusCode.PayloadTooLarge, "No more than " + MaxParts + " parts may be submitted."); 161 170 return null; 162 171 } … … 172 181 if (p.getSize() > MaxPartSize) 173 182 { 174 response.sendError(HttpStatusCode. ClientPayloadTooLarge, "A submitted part must be no more than " + MaxPartSize + " bytes");183 response.sendError(HttpStatusCode.PayloadTooLarge, "A submitted part must be no more than " + MaxPartSize + " bytes"); 175 184 return null; 176 185 } -
gs3-extensions/atea-nlp-tools/trunk/src/ocr/src/main/webapp/WEB-INF/web.xml
r35733 r35768 29 29 parameters, including zero. 30 30 --> 31 32 <listener> 33 <listener-class>org.atea.nlptools.ocr.listeners.MyServletContextListener</listener-class> 34 </listener> 31 35 32 36 <filter> … … 67 71 <servlet-name>tesseract</servlet-name> 68 72 <servlet-class>org.atea.nlptools.ocr.servlets.TesseractServlet</servlet-class> 69 <load-on-startup>0</load-on-startup> 73 </servlet> 74 75 <servlet> 76 <servlet-name>imageRetrieval</servlet-name> 77 <servlet-class>org.atea.nlptools.ocr.servlets.ImageRetrievalServlet</servlet-class> 70 78 </servlet> 71 79 … … 98 106 </servlet-mapping> 99 107 108 <servlet-mapping> 109 <servlet-name>imageRetrieval</servlet-name> 110 <url-pattern>/image</url-pattern> 111 </servlet-mapping> 112 100 113 <!-- Define the default session timeout for your application, 101 114 in minutes. From a servlet or JSP page, you can modify
Note:
See TracChangeset
for help on using the changeset viewer.