source: main/trunk/gli/src/org/greenstone/gatherer/remote/ZipCollectionArchives.java@ 21787

Last change on this file since 21787 was 13406, checked in by mdewsnip, 17 years ago

(Remote building) Hugely simplified the zip file filtering in the process of fixing a nasty bug with zipping up collection archives.

  • Property svn:keywords set to Author Date Id Revision
File size: 3.3 KB
Line 
1package org.greenstone.gatherer.remote;
2
3import java.io.*;
4import java.util.zip.*;
5
6
7/**
8 */
9public class ZipCollectionArchives
10{
11 static public void main(String[] args)
12 {
13 if (args.length != 3) {
14 System.err.println("Usage: ZipCollectionArchives <zip-file> <collect-directory-path> <collection-name>");
15 return;
16 }
17
18 String zip_file_path = args[0];
19 String collect_directory_path = args[1];
20 String collection_name = args[2];
21
22 if (!collect_directory_path.endsWith(File.separator)) {
23 collect_directory_path += File.separator;
24 }
25
26 try {
27 ZipOutputStream zos = new ZipOutputStream(new FileOutputStream(zip_file_path));
28
29 // Add archives directory, with doc.xml files only
30 String archives_relative_path = collection_name + File.separator + "archives";
31 ZipTools.ZipFilter collection_archives_zip_filter = new CollectionArchivesZipFilter();
32 ZipTools.addFileToZip(zos, collect_directory_path, archives_relative_path, collection_archives_zip_filter);
33
34 zos.close();
35 }
36 catch (Exception exception) {
37 exception.printStackTrace();
38 }
39 }
40
41
42 static public class CollectionArchivesZipFilter
43 extends ZipTools.NullZipFilter
44 {
45 private byte[] bytes_remaining = new byte[0];
46 private int section_depth = 0;
47 private boolean in_section_content = false;
48
49
50 public boolean shouldIncludeFile(String relative_file_path)
51 {
52 // Only doc.xml files are strictly required, but we include archives.inf as well to prevent
53 // errors when zipping up the archives of a collection where no files were imported
54 // (in this case the zip file would contain nothing at all)
55 return (relative_file_path.equals("archives.inf") || relative_file_path.endsWith(File.separator + "archives.inf") || relative_file_path.equals("doc.xml") || relative_file_path.endsWith(File.separator + "doc.xml"));
56 }
57
58
59 public boolean shouldIncludeFileContent(String relative_file_path)
60 {
61 // Only content for doc.xml files is included
62 return (relative_file_path.equals("doc.xml") || relative_file_path.endsWith(File.separator + "doc.xml"));
63 }
64
65
66 public void filterFileContent(String relative_file_path, BufferedInputStream bis, ZipOutputStream zos)
67 {
68 // Reset the status in case there were errors in previous doc.xml files
69 section_depth = 0;
70 in_section_content = false;
71
72 // Filter out the <Content>...</Content> of the doc.xml files
73 try {
74 BufferedReader reader = new BufferedReader(new InputStreamReader(bis, "UTF-8"));
75 String line = null;
76 while ((line = reader.readLine()) != null) {
77 // If this line isn't filtered, write it out to zos
78 if (!filterFileLine(line)) {
79 line += "\n";
80 byte[] bytes = line.getBytes("UTF-8");
81 zos.write(bytes, 0, bytes.length);
82 }
83 }
84 }
85 catch (Exception exception) {
86 exception.printStackTrace();
87 }
88 }
89
90
91 private boolean filterFileLine(String line)
92 {
93 boolean filter_line = false;
94
95 if (line.indexOf("<Section>") != -1) {
96 section_depth++;
97 }
98 if (line.indexOf("<Content>") != -1) {
99 in_section_content = true;
100 }
101
102 // If we're in a subsection or in a content element, filter this line
103 if (section_depth > 1 || in_section_content) {
104 filter_line = true;
105 }
106
107 if (line.indexOf("</Content>") != -1) {
108 in_section_content = false;
109 }
110 if (line.indexOf("</Section>") != -1) {
111 section_depth--;
112 }
113
114 return filter_line;
115 }
116 }
117}
Note: See TracBrowser for help on using the repository browser.