1 | package org.greenstone.gatherer.remote;
|
---|
2 |
|
---|
3 | import java.io.*;
|
---|
4 | import java.util.zip.*;
|
---|
5 |
|
---|
6 |
|
---|
7 | /**
|
---|
8 | */
|
---|
9 | public class ZipCollectionArchives
|
---|
10 | {
|
---|
11 | static public void main(String[] args)
|
---|
12 | {
|
---|
13 | if (args.length != 3) {
|
---|
14 | System.err.println("Usage: ZipCollectionArchives <zip-file> <collect-directory-path> <collection-name>");
|
---|
15 | return;
|
---|
16 | }
|
---|
17 |
|
---|
18 | String zip_file_path = args[0];
|
---|
19 | String collect_directory_path = args[1];
|
---|
20 | String collection_name = args[2];
|
---|
21 |
|
---|
22 | if (!collect_directory_path.endsWith(File.separator)) {
|
---|
23 | collect_directory_path += File.separator;
|
---|
24 | }
|
---|
25 |
|
---|
26 | try {
|
---|
27 | ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(zip_file_path));
|
---|
28 |
|
---|
29 | // Add archives directory, with doc.xml files only
|
---|
30 | String archives_relative_path = collection_name + File.separator + "archives";
|
---|
31 | ZipTools.ZipFilter collection_archives_zip_filter = new CollectionArchivesZipFilter();
|
---|
32 | ZipTools.addFileToZip(zos, collect_directory_path, archives_relative_path, collection_archives_zip_filter);
|
---|
33 |
|
---|
34 | zos.close();
|
---|
35 | }
|
---|
36 | catch (Exception exception) {
|
---|
37 | exception.printStackTrace();
|
---|
38 | }
|
---|
39 | }
|
---|
40 |
|
---|
41 |
|
---|
42 | static public class CollectionArchivesZipFilter
|
---|
43 | extends ZipTools.NullZipFilter
|
---|
44 | {
|
---|
45 | private byte[] bytes_remaining = new byte[0];
|
---|
46 | private int section_depth = 0;
|
---|
47 | private boolean in_section_content = false;
|
---|
48 |
|
---|
49 |
|
---|
50 | public boolean shouldIncludeFile(String relative_file_path)
|
---|
51 | {
|
---|
52 | // Only doc.xml files are strictly required, but we include archives.inf as well to prevent
|
---|
53 | // errors when zipping up the archives of a collection where no files were imported
|
---|
54 | // (in this case the zip file would contain nothing at all)
|
---|
55 | return (relative_file_path.equals("archives.inf") || relative_file_path.endsWith(File.separator + "archives.inf") || relative_file_path.equals("doc.xml") || relative_file_path.endsWith(File.separator + "doc.xml"));
|
---|
56 | }
|
---|
57 |
|
---|
58 |
|
---|
59 | public boolean shouldIncludeFileContent(String relative_file_path)
|
---|
60 | {
|
---|
61 | // Only content for doc.xml files is included
|
---|
62 | return (relative_file_path.equals("doc.xml") || relative_file_path.endsWith(File.separator + "doc.xml"));
|
---|
63 | }
|
---|
64 |
|
---|
65 |
|
---|
66 | public void filterFileContent(String relative_file_path, BufferedInputStream bis, ZipOutputStream zos)
|
---|
67 | {
|
---|
68 | // Reset the status in case there were errors in previous doc.xml files
|
---|
69 | section_depth = 0;
|
---|
70 | in_section_content = false;
|
---|
71 |
|
---|
72 | // Filter out the <Content>...</Content> of the doc.xml files
|
---|
73 | try {
|
---|
74 | BufferedReader reader = new BufferedReader(new InputStreamReader(bis, "UTF-8"));
|
---|
75 | String line = null;
|
---|
76 | while ((line = reader.readLine()) != null) {
|
---|
77 | // If this line isn't filtered, write it out to zos
|
---|
78 | if (!filterFileLine(line)) {
|
---|
79 | line += "\n";
|
---|
80 | byte[] bytes = line.getBytes("UTF-8");
|
---|
81 | zos.write(bytes, 0, bytes.length);
|
---|
82 | }
|
---|
83 | }
|
---|
84 | }
|
---|
85 | catch (Exception exception) {
|
---|
86 | exception.printStackTrace();
|
---|
87 | }
|
---|
88 | }
|
---|
89 |
|
---|
90 |
|
---|
91 | private boolean filterFileLine(String line)
|
---|
92 | {
|
---|
93 | boolean filter_line = false;
|
---|
94 |
|
---|
95 | if (line.indexOf("<Section>") != -1) {
|
---|
96 | section_depth++;
|
---|
97 | }
|
---|
98 | if (line.indexOf("<Content>") != -1) {
|
---|
99 | in_section_content = true;
|
---|
100 | }
|
---|
101 |
|
---|
102 | // If we're in a subsection or in a content element, filter this line
|
---|
103 | if (section_depth > 1 || in_section_content) {
|
---|
104 | filter_line = true;
|
---|
105 | }
|
---|
106 |
|
---|
107 | if (line.indexOf("</Content>") != -1) {
|
---|
108 | in_section_content = false;
|
---|
109 | }
|
---|
110 | if (line.indexOf("</Section>") != -1) {
|
---|
111 | section_depth--;
|
---|
112 | }
|
---|
113 |
|
---|
114 | return filter_line;
|
---|
115 | }
|
---|
116 | }
|
---|
117 | }
|
---|