Last change
on this file since 33466 was 33466, checked in by ak19, 5 years ago |
- WETProcessor.main() now processes a folder of *.warc.wet(.gz) files. Each file's WET records is written out into an individual file and put into either the keep folder or discard folder, based on amount of content (number lines and/or content-length). 2. Moved unzipFile() from NZTLDProcessor.java into new Utility.java class as a static method.
|
File size:
1.1 KB
|
Line | |
---|
1 | package org.greenstone.atea;
|
---|
2 |
|
---|
3 | import java.io.*;
|
---|
4 | import java.util.zip.GZIPInputStream;
|
---|
5 |
|
---|
6 | public class Utility {
|
---|
7 |
|
---|
8 | // Run gunzip
|
---|
9 | // To avoid making this linux specific, use Java to unzip, instead of running gunzip as process
|
---|
10 | // https://www.mkyong.com/java/how-to-decompress-file-from-gzip-file/
|
---|
11 | public static boolean unzipFile(File inZipFile, File outFile) {
|
---|
12 |
|
---|
13 | byte[] buffer = new byte[1024];
|
---|
14 |
|
---|
15 | // try-with-resources will safely close streams/dispose resources on success or error and exceptions
|
---|
16 | try (
|
---|
17 | GZIPInputStream gzis = new GZIPInputStream(new FileInputStream(inZipFile));
|
---|
18 | FileOutputStream out = new FileOutputStream(outFile);
|
---|
19 | ) {
|
---|
20 | int len;
|
---|
21 | while ((len = gzis.read(buffer)) > 0) {
|
---|
22 | out.write(buffer, 0, len);
|
---|
23 | }
|
---|
24 |
|
---|
25 | //gzis.close();
|
---|
26 | //out.close();
|
---|
27 |
|
---|
28 | //log("Unzipped " + inZipFile + " to " + outFile);
|
---|
29 |
|
---|
30 | } catch(IOException ex) {
|
---|
31 | //error("Failed to unzip " + inZipFile);
|
---|
32 | System.err.println("Failed to unzip " + inZipFile);
|
---|
33 | ex.printStackTrace();
|
---|
34 | return false;
|
---|
35 | }
|
---|
36 |
|
---|
37 | return true;
|
---|
38 | }
|
---|
39 |
|
---|
40 | }
|
---|
Note:
See
TracBrowser
for help on using the repository browser.