Changeset 13406


Ignore:
Timestamp:
2006-12-01T16:01:40+13:00 (17 years ago)
Author:
mdewsnip
Message:

(Remote building) Hugely simplified the zip file filtering in the process of fixing a nasty bug with zipping up collection archives.

Location:
trunk/gli/src/org/greenstone/gatherer/remote
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gli/src/org/greenstone/gatherer/remote/ZipCollectionArchives.java

    r13390 r13406  
    5757
    5858
    59     public int filterFileContent(BufferedInputStream bis, byte[] data, int bytes_to_read)
     59    public boolean shouldIncludeFileContent(String relative_file_path)
    6060    {
     61        // Only content for doc.xml files is included
     62        return (relative_file_path.equals("doc.xml") || relative_file_path.endsWith(File.separator + "doc.xml"));
     63    }
     64
     65
     66    public void filterFileContent(String relative_file_path, BufferedInputStream bis, ZipOutputStream zos)
     67    {
     68        // Reset the status in case there were errors in previous doc.xml files
     69        section_depth = 0;
     70        in_section_content = false;
     71
     72        // Filter out the <Content>...</Content> of the doc.xml files
    6173        try {
    62         // Read the next chunk of bytes from the file
    63         byte[] bytes_read_this_time = new byte[bytes_to_read];
    64         int num_bytes_read_this_time = bis.read(bytes_read_this_time, 0, bytes_to_read);
    65         if (num_bytes_read_this_time == -1) {
    66             // We must be at the end of the file
    67             return -1;
    68         }
    69 
    70         // Add this chunk to the bytes remaining from last time
    71         byte[] bytes_unprocessed = new byte[bytes_remaining.length + num_bytes_read_this_time];
    72         for (int i = 0; i < bytes_remaining.length; i++) {
    73             bytes_unprocessed[i] = bytes_remaining[i];
    74         }
    75         for (int i = 0; i < num_bytes_read_this_time; i++) {
    76             bytes_unprocessed[bytes_remaining.length + i] = bytes_read_this_time[i];
    77         }
    78 
    79         // Split the chunk into lines and filter them
    80         int num_bytes_processed = 0;
    81         int num_bytes_unfiltered = 0;
    82         int last_line_start = 0;
    83         for (int i = 0; (i < bytes_unprocessed.length && num_bytes_unfiltered < bytes_to_read); i++) {
    84             // Found the end of a line
    85             if (bytes_unprocessed[i] == '\n') {
    86             int line_length = i - last_line_start + 1;
    87             String line = new String(bytes_unprocessed, last_line_start, line_length);
    88 
    89             // If this line isn't filtered, write it to the data array
    90             if (!filterFileLine(line)) {
    91                 for (int j = 0; (j < line_length && num_bytes_unfiltered < bytes_to_read); j++) {
    92                 data[num_bytes_unfiltered] = bytes_unprocessed[last_line_start + j];
    93                 num_bytes_unfiltered++;
    94                 num_bytes_processed++;
    95                 }
    96             }
    97             // Otherwise ignore it
    98             else {
    99                 num_bytes_processed = i + 1;
    100             }
    101 
    102             last_line_start = i + 1;
     74        BufferedReader reader = new BufferedReader(new InputStreamReader(bis, "UTF-8"));
     75        String line = null;
     76        while ((line = reader.readLine()) != null) {
     77            // If this line isn't filtered, write it out to zos
     78            if (!filterFileLine(line)) {
     79            line += "\n";
     80            byte[] bytes = line.getBytes("UTF-8");
     81            zos.write(bytes, 0, bytes.length);
    10382            }
    10483        }
    105 
    106         // Create a new byte array containing any bytes remaining (these will be processed next time)
    107         int num_bytes_remaining = bytes_unprocessed.length - num_bytes_processed;
    108         bytes_remaining = new byte[num_bytes_remaining];
    109         for (int i = 0; i < num_bytes_remaining; i++) {
    110             bytes_remaining[i] = bytes_unprocessed[num_bytes_processed + i];
    111         }
    112 
    113         return num_bytes_unfiltered;
    11484        }
    11585        catch (Exception exception) {
    11686        exception.printStackTrace();
    11787        }
    118 
    119         return -1;
    12088    }
    12189
  • trunk/gli/src/org/greenstone/gatherer/remote/ZipTools.java

    r13328 r13406  
    8787            if (zip_filter.shouldIncludeFileContent(relative_file_path)) {
    8888            BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));
    89             byte[] data = new byte[1024];
    90             int bytes_read;
    91             while ((bytes_read = zip_filter.filterFileContent(bis, data, 1024)) > -1) {
    92                 zos.write(data, 0, bytes_read);
    93             }
     89            zip_filter.filterFileContent(relative_file_path, bis, zos);
    9490            bis.close();
    9591            }
     
    109105    public boolean shouldIncludeFileContent(String relative_file_path);
    110106
    111     public int filterFileContent(BufferedInputStream bis, byte[] data, int bytes_to_read);
     107    public void filterFileContent(String relative_file_path, BufferedInputStream bis, ZipOutputStream zos);
    112108    }
    113109
     
    130126
    131127
    132     public int filterFileContent(BufferedInputStream bis, byte[] data, int bytes_to_read)
     128    public void filterFileContent(String relative_file_path, BufferedInputStream bis, ZipOutputStream zos)
    133129    {
     130        // No filtering: just read the file and write it directly out to zip
    134131        try {
    135         return bis.read(data, 0, bytes_to_read);
     132        byte[] data = new byte[1024];
     133        int bytes_read;
     134        while ((bytes_read = bis.read(data, 0, 1024)) > -1) {
     135            zos.write(data, 0, bytes_read);
     136        }
    136137        }
    137138        catch (Exception exception) {
     139        // We can't use DebugStream here
    138140        exception.printStackTrace();
    139141        }
    140 
    141         return -1;
    142142    }
    143143    }
Note: See TracChangeset for help on using the changeset viewer.