1 | package org.greenstone.gatherer.remote;
|
---|
2 |
|
---|
3 | import java.io.*;
|
---|
4 | import java.util.zip.*;
|
---|
5 | import org.greenstone.gatherer.util.ZipTools;
|
---|
6 |
|
---|
7 |
|
---|
8 | /**
|
---|
9 | */
|
---|
10 | public class ZipCollectionArchives
|
---|
11 | {
|
---|
12 | static public void main(String[] args)
|
---|
13 | {
|
---|
14 | if (args.length != 3) {
|
---|
15 | System.err.println("Usage: ZipCollectionArchives <zip-file> <collect-directory-path> <collection-name>");
|
---|
16 | return;
|
---|
17 | }
|
---|
18 |
|
---|
19 | String zip_file_path = args[0];
|
---|
20 | String collect_directory_path = args[1];
|
---|
21 | String collection_name = args[2];
|
---|
22 |
|
---|
23 | if (!collect_directory_path.endsWith(File.separator)) {
|
---|
24 | collect_directory_path += File.separator;
|
---|
25 | }
|
---|
26 |
|
---|
27 | try {
|
---|
28 | ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(zip_file_path));
|
---|
29 |
|
---|
30 | // Add archives directory, with doc.xml files only
|
---|
31 | String archives_relative_path = collection_name + File.separator + "archives";
|
---|
32 | ZipTools.ZipFilter collection_archives_zip_filter = new CollectionArchivesZipFilter();
|
---|
33 | ZipTools.addFileToZip(zos, collect_directory_path, archives_relative_path, collection_archives_zip_filter);
|
---|
34 |
|
---|
35 | zos.close();
|
---|
36 | }
|
---|
37 | catch (Exception exception) {
|
---|
38 | exception.printStackTrace();
|
---|
39 | }
|
---|
40 | }
|
---|
41 |
|
---|
42 |
|
---|
43 | static public class CollectionArchivesZipFilter
|
---|
44 | extends ZipTools.NullZipFilter
|
---|
45 | {
|
---|
46 | private byte[] bytes_remaining = new byte[0];
|
---|
47 | private int section_depth = 0;
|
---|
48 | private boolean in_section_content = false;
|
---|
49 |
|
---|
50 |
|
---|
51 | public boolean shouldIncludeFile(String relative_file_path)
|
---|
52 | {
|
---|
53 | // Only doc.xml files are included
|
---|
54 | return (relative_file_path.equals("doc.xml") || relative_file_path.endsWith(File.separator + "doc.xml"));
|
---|
55 | }
|
---|
56 |
|
---|
57 |
|
---|
58 | public int filterFileContent(BufferedInputStream bis, byte[] data, int bytes_to_read)
|
---|
59 | {
|
---|
60 | try {
|
---|
61 | // Read the next chunk of bytes from the file
|
---|
62 | byte[] bytes_read_this_time = new byte[bytes_to_read];
|
---|
63 | int num_bytes_read_this_time = bis.read(bytes_read_this_time, 0, bytes_to_read);
|
---|
64 | if (num_bytes_read_this_time == -1) {
|
---|
65 | // We must be at the end of the file
|
---|
66 | return -1;
|
---|
67 | }
|
---|
68 |
|
---|
69 | // Add this chunk to the bytes remaining from last time
|
---|
70 | byte[] bytes_unprocessed = new byte[bytes_remaining.length + num_bytes_read_this_time];
|
---|
71 | for (int i = 0; i < bytes_remaining.length; i++) {
|
---|
72 | bytes_unprocessed[i] = bytes_remaining[i];
|
---|
73 | }
|
---|
74 | for (int i = 0; i < num_bytes_read_this_time; i++) {
|
---|
75 | bytes_unprocessed[bytes_remaining.length + i] = bytes_read_this_time[i];
|
---|
76 | }
|
---|
77 |
|
---|
78 | // Split the chunk into lines and filter them
|
---|
79 | int num_bytes_processed = 0;
|
---|
80 | int num_bytes_unfiltered = 0;
|
---|
81 | int last_line_start = 0;
|
---|
82 | for (int i = 0; (i < bytes_unprocessed.length && num_bytes_unfiltered < bytes_to_read); i++) {
|
---|
83 | // Found the end of a line
|
---|
84 | if (bytes_unprocessed[i] == '\n') {
|
---|
85 | int line_length = i - last_line_start + 1;
|
---|
86 | String line = new String(bytes_unprocessed, last_line_start, line_length);
|
---|
87 |
|
---|
88 | // If this line isn't filtered, write it to the data array
|
---|
89 | if (!filterFileLine(line)) {
|
---|
90 | for (int j = 0; (j < line_length && num_bytes_unfiltered < bytes_to_read); j++) {
|
---|
91 | data[num_bytes_unfiltered] = bytes_unprocessed[last_line_start + j];
|
---|
92 | num_bytes_unfiltered++;
|
---|
93 | num_bytes_processed++;
|
---|
94 | }
|
---|
95 | }
|
---|
96 | // Otherwise ignore it
|
---|
97 | else {
|
---|
98 | num_bytes_processed = i + 1;
|
---|
99 | }
|
---|
100 |
|
---|
101 | last_line_start = i + 1;
|
---|
102 | }
|
---|
103 | }
|
---|
104 |
|
---|
105 | // Create a new byte array containing any bytes remaining (these will be processed next time)
|
---|
106 | int num_bytes_remaining = bytes_unprocessed.length - num_bytes_processed;
|
---|
107 | bytes_remaining = new byte[num_bytes_remaining];
|
---|
108 | for (int i = 0; i < num_bytes_remaining; i++) {
|
---|
109 | bytes_remaining[i] = bytes_unprocessed[num_bytes_processed + i];
|
---|
110 | }
|
---|
111 |
|
---|
112 | return num_bytes_unfiltered;
|
---|
113 | }
|
---|
114 | catch (Exception exception) {
|
---|
115 | exception.printStackTrace();
|
---|
116 | }
|
---|
117 |
|
---|
118 | return -1;
|
---|
119 | }
|
---|
120 |
|
---|
121 |
|
---|
122 | private boolean filterFileLine(String line)
|
---|
123 | {
|
---|
124 | boolean filter_line = false;
|
---|
125 |
|
---|
126 | if (line.indexOf("<Section>") != -1) {
|
---|
127 | section_depth++;
|
---|
128 | }
|
---|
129 | if (line.indexOf("<Content>") != -1) {
|
---|
130 | in_section_content = true;
|
---|
131 | }
|
---|
132 |
|
---|
133 | // If we're in a subsection or in a content element, filter this line
|
---|
134 | if (section_depth > 1 || in_section_content) {
|
---|
135 | filter_line = true;
|
---|
136 | }
|
---|
137 |
|
---|
138 | if (line.indexOf("</Content>") != -1) {
|
---|
139 | in_section_content = false;
|
---|
140 | }
|
---|
141 | if (line.indexOf("</Section>") != -1) {
|
---|
142 | section_depth--;
|
---|
143 | }
|
---|
144 |
|
---|
145 | return filter_line;
|
---|
146 | }
|
---|
147 | }
|
---|
148 | }
|
---|