1 | /**
|
---|
2 | *#########################################################################
|
---|
3 | *
|
---|
4 | * A component of the Gatherer application, part of the Greenstone digital
|
---|
5 | * library suite from the New Zealand Digital Library Project at the
|
---|
6 | * University of Waikato, New Zealand.
|
---|
7 | *
|
---|
8 | * <BR><BR>
|
---|
9 | *
|
---|
10 | * Author: Greenstone Digital Library, University of Waikato
|
---|
11 | *
|
---|
12 | * <BR><BR>
|
---|
13 | *
|
---|
14 | * Copyright (C) 2020 New Zealand Digital Library Project
|
---|
15 | *
|
---|
16 | * <BR><BR>
|
---|
17 | *
|
---|
18 | * This program is free software; you can redistribute it and/or modify
|
---|
19 | * it under the terms of the GNU General Public License as published by
|
---|
20 | * the Free Software Foundation; either version 2 of the License, or
|
---|
21 | * (at your option) any later version.
|
---|
22 | *
|
---|
23 | * <BR><BR>
|
---|
24 | *
|
---|
25 | * This program is distributed in the hope that it will be useful,
|
---|
26 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
28 | * GNU General Public License for more details.
|
---|
29 | *
|
---|
30 | * <BR><BR>
|
---|
31 | *
|
---|
32 | * You should have received a copy of the GNU General Public License
|
---|
33 | * along with this program; if not, write to the Free Software
|
---|
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
35 | *########################################################################
|
---|
36 | */
|
---|
37 | package org.greenstone.gatherer.metadata;
|
---|
38 |
|
---|
39 | import java.io.*;
|
---|
40 | import java.util.*;
|
---|
41 | import javax.swing.filechooser.FileNameExtensionFilter;
|
---|
42 | import javax.swing.JFileChooser;
|
---|
43 | import javax.swing.JFrame;
|
---|
44 | import javax.swing.JOptionPane;
|
---|
45 |
|
---|
46 | import org.apache.commons.csv.*;
|
---|
47 |
|
---|
48 | import org.greenstone.gatherer.util.SafeProcess;
|
---|
49 | import org.greenstone.gatherer.DebugStream;
|
---|
50 | import org.greenstone.gatherer.Dictionary;
|
---|
51 | import org.greenstone.gatherer.metadata.MetadataElement;
|
---|
52 | import org.greenstone.gatherer.metadata.MetadataValue;
|
---|
53 | import org.greenstone.gatherer.metadata.MetadataXMLFileManager;
|
---|
54 |
|
---|
55 |
|
---|
56 |
|
---|
57 | /**
|
---|
58 | * Class to export GLI metadata of a collection to a metadata.csv file.
|
---|
59 | * This class can also merge GLI meta for the collection onto an existing metadata.csv file.
|
---|
60 | * Merging is a cumulative process.
|
---|
61 | * Duplicate entries and values are not preserved.
|
---|
62 | * Uses TreeMap and TreeSet to keep everything alphabetically ordered.
|
---|
63 | * TODO: What about ordering by unicode. Is that the natural ordering for Java Strings?
|
---|
64 | * If so, this would support keeping metadata values ordered regardless of script used.
|
---|
65 | */
|
---|
66 | public class MetadataToCSV implements FileFilter {
|
---|
67 | private char meta_field_sep = ','; // comma is default field separator for CSV, comma separated values
|
---|
68 | private String meta_value_sep_re = "\\|"; // must escape | to get regex
|
---|
69 | private char meta_value_sep_char = '|'; // when written out to file
|
---|
70 | private String collection_directory_path = "";
|
---|
71 | private String coll_importdir_path = "";
|
---|
72 | private final int import_path_length;
|
---|
73 |
|
---|
74 | /** The CSV metadata file to be read and rewritten. */
|
---|
75 | //private String metadataCSVFilename = "metadata.csv";
|
---|
76 | private File metadataCSVFile;
|
---|
77 |
|
---|
78 | /** TODO: Is this useful?
|
---|
79 | * Not yet implemented: if this flag is true, then if a file mentioned in metadata.csv does not exist,
|
---|
80 | * its entry is dropped and won't appear again when the metadata.csv is written out again.
|
---|
81 | */
|
---|
82 | //private boolean removeMetaForFilesThatDoNotExist = false;
|
---|
83 |
|
---|
84 | private final String IMPORT_DIRNAME = "import";
|
---|
85 |
|
---|
86 | /** A Map of all files/docs in this collection and their metadata,
|
---|
87 | * itself tuples of metadata field names and their (possibly multiple) metadata values. */
|
---|
88 | TreeMap<File, TreeMap<String,TreeSet<String>>> collMetaMap = new TreeMap<File, TreeMap<String,TreeSet<String>>>();
|
---|
89 |
|
---|
90 | public MetadataToCSV(String collDirPath) {
|
---|
91 | this.collection_directory_path = collDirPath;
|
---|
92 | this.coll_importdir_path = collDirPath + IMPORT_DIRNAME + File.separator; //new File(collDirPath, IMPORT_DIRNAME).getAbsolutePath();
|
---|
93 | import_path_length = this.coll_importdir_path.length();
|
---|
94 | this.metadataCSVFile = new File(coll_importdir_path, "metadata.csv");
|
---|
95 | }
|
---|
96 |
|
---|
97 | public MetadataToCSV(String collDirPath, File metadataCSV) {
|
---|
98 | this(collDirPath);
|
---|
99 | this.metadataCSVFile = metadataCSVFile;
|
---|
100 | }
|
---|
101 |
|
---|
102 | public MetadataToCSV(String collDirPath, File metadataCSVFile, char metafieldSepChar, String readMetaValSepExpression, char writeMetaValSepChar) {
|
---|
103 | this(collDirPath, metadataCSVFile);
|
---|
104 | this.meta_field_sep = metafieldSepChar;
|
---|
105 | this.meta_value_sep_re = readMetaValSepExpression;
|
---|
106 | this.meta_value_sep_char = writeMetaValSepChar;
|
---|
107 | }
|
---|
108 |
|
---|
109 | /** Remove import path prefix from given file. Returned is the path of file relative to import. */
|
---|
110 | public String fileToRelativeString(File f) {
|
---|
111 | String fullPath = f.getAbsolutePath();
|
---|
112 | //System.err.println("@@@ fullpath: " + fullPath);
|
---|
113 | //System.err.println("@@@ coll_importdir_path: " + this.coll_importdir_path);
|
---|
114 | int indexMatch = fullPath.indexOf(coll_importdir_path);
|
---|
115 | if(indexMatch == -1) {
|
---|
116 | return fullPath;
|
---|
117 | } else {
|
---|
118 | return fullPath.substring(indexMatch+import_path_length);
|
---|
119 | }
|
---|
120 | }
|
---|
121 |
|
---|
122 |
|
---|
123 | /** helper methods to export metadata for collection files to csv
|
---|
124 | * Returns a Navigable Sorted Map of file names in the collection (relative to import folder), ordered alphabetically,
|
---|
125 | * mapped to each file's metadata, sorted alphabetically by metadata field name, and list of metadata values sorted alphabetically
|
---|
126 | */
|
---|
127 | public TreeMap<File, TreeMap<String,TreeSet<String>>> getAllAssignedMetadataForAllFiles() {
|
---|
128 | TreeMap<File, TreeMap<String,TreeSet<String>>> files_with_meta = new TreeMap<File, TreeMap<String,TreeSet<String>>>();
|
---|
129 |
|
---|
130 | ArrayList<File> files = listFilesInCollection(this.collection_directory_path);
|
---|
131 | Iterator<File> i = files.iterator();
|
---|
132 |
|
---|
133 | while(i.hasNext()) {
|
---|
134 | File f = i.next();
|
---|
135 | ArrayList file_meta = MetadataXMLFileManager.getMetadataAssignedToFile(f);
|
---|
136 |
|
---|
137 | //files_with_meta.put(f, file_meta);
|
---|
138 | TreeMap<String,TreeSet<String>> fileToMetaMap = new TreeMap<String,TreeSet<String>>();
|
---|
139 |
|
---|
140 | // debugging display
|
---|
141 | ///System.err.println("Meta for file: " + f.getAbsolutePath());
|
---|
142 | Iterator it = file_meta.iterator();
|
---|
143 | while(it.hasNext()) {
|
---|
144 | MetadataValue meta = (MetadataValue)it.next();
|
---|
145 | String metaValue = meta.getValue();
|
---|
146 | MetadataElement metaEl = meta.getMetadataElement();
|
---|
147 | String metaFieldName = metaEl.getFullName();
|
---|
148 | ///System.err.println(" field: " + metaFieldName);
|
---|
149 | ///System.err.println(" value: " + metaValue);
|
---|
150 |
|
---|
151 | TreeSet<String> vals = fileToMetaMap.get(metaFieldName);
|
---|
152 | if(vals == null) {
|
---|
153 | vals = new TreeSet<String>();
|
---|
154 | vals.add(metaValue);
|
---|
155 | fileToMetaMap.put(metaFieldName, vals);
|
---|
156 | } else {
|
---|
157 | vals.add(metaValue);
|
---|
158 | }
|
---|
159 | }
|
---|
160 |
|
---|
161 | files_with_meta.put(f, fileToMetaMap);
|
---|
162 | }
|
---|
163 |
|
---|
164 | return files_with_meta;
|
---|
165 | }
|
---|
166 |
|
---|
167 | // Get all meta in any metadata.csv file
|
---|
168 | // and add to it all meta assigned for docs in this collection
|
---|
169 | public void amalgamateAllMeta() {
|
---|
170 | TreeMap<File, TreeMap<String,TreeSet<String>>> assignedMeta = getAllAssignedMetadataForAllFiles();
|
---|
171 | TreeMap<File, TreeMap<String,TreeSet<String>>> csvFileMeta = loadMetaFromCSVFile(this.metadataCSVFile);
|
---|
172 |
|
---|
173 | if(collMetaMap.size() == 0) {
|
---|
174 |
|
---|
175 | if(assignedMeta.keySet().size() > csvFileMeta.keySet().size()) {
|
---|
176 | collMetaMap = assignedMeta;
|
---|
177 | merge(collMetaMap, csvFileMeta);
|
---|
178 | } else {
|
---|
179 | collMetaMap = csvFileMeta;
|
---|
180 | merge(collMetaMap, assignedMeta);
|
---|
181 | }
|
---|
182 | } else {
|
---|
183 |
|
---|
184 | merge(collMetaMap, assignedMeta);
|
---|
185 | merge(collMetaMap, csvFileMeta);
|
---|
186 | }
|
---|
187 |
|
---|
188 | }
|
---|
189 |
|
---|
190 | public TreeSet<String> getAllCollHeadings(TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap) {
|
---|
191 | TreeSet<String> collHeadings = new TreeSet<String>();
|
---|
192 |
|
---|
193 | if(metaMap == null || metaMap.size() == 0) {
|
---|
194 | return collHeadings;
|
---|
195 | }
|
---|
196 | // get all meta field names and add into collHeadings. As it's a TreeSet,
|
---|
197 | // duplicates will be automatically ignored and collheadings will be sorted
|
---|
198 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
199 | while(iFiles.hasNext()) {
|
---|
200 | File f = iFiles.next();
|
---|
201 | TreeMap<String, TreeSet<String>> metaFields = metaMap.get(f);
|
---|
202 | Iterator<String> iMetaFields = metaFields.keySet().iterator();
|
---|
203 | while(iMetaFields.hasNext()) {
|
---|
204 | String fieldName = iMetaFields.next();
|
---|
205 | collHeadings.add(fieldName);
|
---|
206 | }
|
---|
207 | }
|
---|
208 |
|
---|
209 | return collHeadings;
|
---|
210 | }
|
---|
211 |
|
---|
212 | /** merge metaMap param into baseMetaMap: only portions not already present in baseMetaMap are added in
|
---|
213 | * whether these are new file entries, new metadata field entries for extant files, or metadata values for extant fields of files.
|
---|
214 | * A simple map.putALL() will not do the trick as collMetaMap is a complicated data structure.
|
---|
215 | */
|
---|
216 | public void merge(TreeMap<File, TreeMap<String,TreeSet<String>>> baseMetaMap, TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap) {
|
---|
217 |
|
---|
218 | if(metaMap == null || metaMap.size() == 0) {
|
---|
219 | // nothing to do
|
---|
220 | return;
|
---|
221 | }
|
---|
222 |
|
---|
223 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
224 | while(iFiles.hasNext()) {
|
---|
225 | File f = iFiles.next();
|
---|
226 |
|
---|
227 | // check if this file already has an entry in baseMetaMap
|
---|
228 | TreeMap<String, TreeSet<String>> origMetaFields = baseMetaMap.get(f);
|
---|
229 |
|
---|
230 | TreeMap<String, TreeSet<String>> metaFields = metaMap.get(f);
|
---|
231 | Iterator<String> iMetaFields = metaFields.keySet().iterator();
|
---|
232 |
|
---|
233 | // if file in metaMap didn't exist in baseMetaMap, easy: just copy its entry across in entirety
|
---|
234 | if(origMetaFields == null) {
|
---|
235 | metaMap.put(f, metaFields);
|
---|
236 | continue;
|
---|
237 | }
|
---|
238 |
|
---|
239 | // else, file already exists in baseMetaMap, need to check if we have to merge any meta on the file
|
---|
240 | while(iMetaFields.hasNext()) {
|
---|
241 | String fieldName = iMetaFields.next();
|
---|
242 | TreeSet<String> metaValues = metaFields.get(fieldName);
|
---|
243 |
|
---|
244 | // check if this metadata field exists for the same file in baseMetaMap
|
---|
245 | TreeSet<String> origMetaValues = origMetaFields.get(fieldName);
|
---|
246 | if(origMetaValues == null) { // this metadata field name did not exist for file in baseMetaMap,
|
---|
247 | // so copy all vals for this fieldName into baseMetaMap's entry for this file
|
---|
248 | origMetaFields.put(fieldName, metaValues);
|
---|
249 | continue; // continue on inner loop
|
---|
250 | }
|
---|
251 |
|
---|
252 | // else the meta fieldName existed for that file in baseMetaMap
|
---|
253 | // Check if any of the metadata values didn't already exist, else add them in
|
---|
254 | Iterator<String> iMetaValues = metaValues.iterator();
|
---|
255 | while(iMetaValues.hasNext()) {
|
---|
256 | String metaValue = iMetaValues.next();
|
---|
257 |
|
---|
258 | if(!origMetaValues.contains(metaValue)) {
|
---|
259 | origMetaValues.add(metaValue);
|
---|
260 | }
|
---|
261 | }
|
---|
262 |
|
---|
263 | }
|
---|
264 | }
|
---|
265 | }
|
---|
266 |
|
---|
267 |
|
---|
268 | /** If successfully wrote out collection's meta from to a CSV file,
|
---|
269 | * then will need to remove all meta from GLI (metadata.xml files).
|
---|
270 | * Just del or rename those files to .bak?
|
---|
271 | * This dangerous method goes through all the metadata.xml files that were in use so far
|
---|
272 | * and removes all the child elements from meta xml files' DirectoryMetadata root elements
|
---|
273 | */
|
---|
274 | public boolean moveMetaXMLToCSV(File csvFile, JFrame parent) {
|
---|
275 |
|
---|
276 | // Warn the user about the operation being destructive
|
---|
277 | int result = JOptionPane.showConfirmDialog(parent,
|
---|
278 | Dictionary.get("ExportMeta.MoveMetaXMLToCSV_Warning_Message"),
|
---|
279 | Dictionary.get("General.Warning"),
|
---|
280 | JOptionPane.OK_CANCEL_OPTION,
|
---|
281 | JOptionPane.WARNING_MESSAGE);
|
---|
282 | if(result == JOptionPane.CANCEL_OPTION || result == JOptionPane.CLOSED_OPTION) {
|
---|
283 | // NO_OPTION shouldn't happen
|
---|
284 | return false;
|
---|
285 | }
|
---|
286 |
|
---|
287 | boolean success = exportMetaXMLToCSV(csvFile);
|
---|
288 |
|
---|
289 | if(success) { // now it's backed up to a metadatacsv file, can clear all metadata from metaXML files
|
---|
290 |
|
---|
291 | System.err.println("About to clear all metadata in collection...");
|
---|
292 | MetadataXMLFileManager.clearAllMetadataInCollection();
|
---|
293 | } else {
|
---|
294 | JOptionPane.showMessageDialog(parent,
|
---|
295 | Dictionary.get("ExportMeta.MoveMetaXMLToCSV_Failed_Message"),
|
---|
296 | Dictionary.get("General.Error"),
|
---|
297 | JOptionPane.ERROR_MESSAGE);
|
---|
298 | //System.err.println("@@@ Failed to properly export metadata.xml files' contents for this collection to CSV. Will not remove metadata.xml files");
|
---|
299 | }
|
---|
300 |
|
---|
301 | return success;
|
---|
302 | }
|
---|
303 |
|
---|
304 | /** If given a new file to create, creates the specified meta csv file from GLI's meta for the current collection.
|
---|
305 | * If the file exists, this will append the GLI metadata without checking if the file already contains the same entries. */
|
---|
306 | public boolean exportMetaXMLToCSV(File csvFile) {
|
---|
307 | boolean appendSetting = false;
|
---|
308 | boolean success = false;
|
---|
309 |
|
---|
310 | if(csvFile.exists()) {
|
---|
311 | //appendSetting = true; // better to call the other version of this method in this case?
|
---|
312 | amalgamateAllMeta();
|
---|
313 | success = writeMetaToCSV(collMetaMap, csvFile, appendSetting);
|
---|
314 | } else { // no preexisting metadata.csv file, just write out GLI meta
|
---|
315 | TreeMap<File, TreeMap<String,TreeSet<String>>> assignedMeta = getAllAssignedMetadataForAllFiles();
|
---|
316 | success = writeMetaToCSV(assignedMeta, csvFile, appendSetting);
|
---|
317 | }
|
---|
318 |
|
---|
319 | return success;
|
---|
320 | }
|
---|
321 |
|
---|
322 | private boolean writeMetaToCSV(TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap, File csvFile, boolean appendSetting) {
|
---|
323 | boolean success = true;
|
---|
324 |
|
---|
325 | // First would need to write the row of all headings
|
---|
326 | TreeSet<String> metaFieldColumnHeadings = getAllCollHeadings(metaMap);
|
---|
327 | // Careful, collHeadings are alphabetically ordered, but not all docs may have meta for each column heading/metadata field name
|
---|
328 | // Need metadataFieldNames in an indexed array
|
---|
329 | Vector<String> columnHeadings = new Vector<String>(metaFieldColumnHeadings.size());
|
---|
330 | // put the Filename column as first item
|
---|
331 | columnHeadings.add("Filename");
|
---|
332 | columnHeadings.addAll(metaFieldColumnHeadings); // now have an indexed, yet still ordered, list of all column headings(the meta fieldnames)
|
---|
333 |
|
---|
334 | CSVFormat customCSVFormat = CSVFormat.DEFAULT
|
---|
335 | .withDelimiter(meta_field_sep)
|
---|
336 | .withIgnoreSurroundingSpaces(false)
|
---|
337 | .withQuoteMode(QuoteMode.MINIMAL)
|
---|
338 | .withTrim();
|
---|
339 |
|
---|
340 | try (CSVPrinter printer = new CSVPrinter(new FileWriter(csvFile, appendSetting), customCSVFormat)) {
|
---|
341 | printer.printRecord(columnHeadings);
|
---|
342 | // https://javadoc.io/doc/org.apache.commons/commons-csv/latest/index.html
|
---|
343 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
344 | while(iFiles.hasNext()) {
|
---|
345 | File f = iFiles.next();
|
---|
346 | String relFilename = fileToRelativeString(f);
|
---|
347 | // write out the filename field of this record
|
---|
348 | printer.print(relFilename);
|
---|
349 |
|
---|
350 | TreeMap<String, TreeSet<String>> fileMetadata = metaMap.get(f);
|
---|
351 | // now get each metadata field's value in the order of the column headings, and write them out
|
---|
352 | //for(String metaFieldName : columnHeadings) {
|
---|
353 | for(int i = 1; i < columnHeadings.size(); i++) { // skip past Filename coll heading, already written out
|
---|
354 | String metaFieldName = columnHeadings.get(i);
|
---|
355 | TreeSet<String> metavalues = fileMetadata.get(metaFieldName);
|
---|
356 | StringBuffer allMetaValuesForField = new StringBuffer();
|
---|
357 | if(metavalues == null || metavalues.size() == 0) {
|
---|
358 | // this file does not have (metavalues) such a metaFieldName, the cell for this column is empty
|
---|
359 | //System.err.println("No meta values for fieldname: " + metaFieldName);
|
---|
360 | printer.print(allMetaValuesForField);
|
---|
361 | } else {
|
---|
362 | for(String metavalue : metavalues) {
|
---|
363 | //metavalue = metavalue.trim();
|
---|
364 | allMetaValuesForField.append(meta_value_sep_char);
|
---|
365 | allMetaValuesForField.append(metavalue);
|
---|
366 | }
|
---|
367 | // write out the current metadata field of this record
|
---|
368 | // remove the extra meta_value_separator_char added the first time
|
---|
369 | printer.print(allMetaValuesForField.substring(1));
|
---|
370 | }
|
---|
371 | }
|
---|
372 |
|
---|
373 | printer.println(); // done writing a record
|
---|
374 | }
|
---|
375 | } catch (IOException ex) {
|
---|
376 | success = false;
|
---|
377 | DebugStream.printStackTrace(ex);
|
---|
378 | System.err.println("Caught exception when writing meta to CSVFile " + csvFile.getAbsolutePath());
|
---|
379 | System.err.println("\t" + ex.getMessage());
|
---|
380 | }
|
---|
381 |
|
---|
382 | return success;
|
---|
383 | }
|
---|
384 |
|
---|
385 |
|
---|
386 | public TreeMap<File, TreeMap<String,TreeSet<String>>> loadMetaFromCSVFile(File csvFile) {
|
---|
387 | TreeMap<File, TreeMap<String,TreeSet<String>>> csvFileMeta = new TreeMap<File, TreeMap<String,TreeSet<String>>>();
|
---|
388 |
|
---|
389 | if(!csvFile.exists()) {
|
---|
390 | return csvFileMeta;
|
---|
391 | }
|
---|
392 |
|
---|
393 | Reader in = null;
|
---|
394 | //try(Reader in = new FileReader(csvFile);) { // try-with-resources may break on older Java that we use to build GS3 binaries
|
---|
395 | try {
|
---|
396 | in = new FileReader(csvFile);
|
---|
397 | boolean headingRow = true;
|
---|
398 |
|
---|
399 | // https://javadoc.io/doc/org.apache.commons/commons-csv/latest/index.html
|
---|
400 | CSVFormat lenientCSVFormat = CSVFormat.DEFAULT
|
---|
401 | .withDelimiter(meta_field_sep)
|
---|
402 | .withFirstRecordAsHeader()
|
---|
403 | .withCommentMarker('#')
|
---|
404 | .withIgnoreSurroundingSpaces()
|
---|
405 | .withTrim();
|
---|
406 |
|
---|
407 | // https://stackoverflow.com/questions/36269387/get-csv-file-header-using-apache-commons
|
---|
408 | // The first col heading which is the Filename
|
---|
409 | // the remaining CSV column headings are the metadata field names
|
---|
410 |
|
---|
411 | CSVParser parser = lenientCSVFormat.parse(in);
|
---|
412 |
|
---|
413 | //String[] metaFieldNames = lenientCSVFormat.getHeader(); // didn't work
|
---|
414 | // getHeaders() returns List<String>, convert to String[] array
|
---|
415 | String[] metaFieldNames = parser.getHeaderNames().toArray(new String[0]);
|
---|
416 |
|
---|
417 | for (CSVRecord record : parser) {
|
---|
418 |
|
---|
419 | // a new row, represents a new file's meta
|
---|
420 | TreeMap<String,TreeSet<String>> meta = new TreeMap<String,TreeSet<String>>();
|
---|
421 |
|
---|
422 | for(int i = 0; i < record.size(); i++) { //for (String field : record) {
|
---|
423 | String field = record.get(i);
|
---|
424 |
|
---|
425 | if(i == 0) { // col 0 = Filename
|
---|
426 | String filename = field;
|
---|
427 | // TODO: filenames are stored relative to import folder, convert to full path for internal use?
|
---|
428 | File fullPathFile = new File(coll_importdir_path + filename);
|
---|
429 | ///System.err.println("Found Filename meta: " + filename);
|
---|
430 | csvFileMeta.put(fullPathFile, meta);
|
---|
431 | } else {
|
---|
432 | // not Filename, but metadata field name, add into meta map for this file
|
---|
433 | TreeSet<String> metaValues = new TreeSet<String>();
|
---|
434 | String metadataFieldName = metaFieldNames[i]; // get column heading=meta field name for current cell
|
---|
435 | meta.put(metadataFieldName, metaValues);
|
---|
436 | ///System.err.println("Found value for meta field: " + metadataFieldName);
|
---|
437 | // Split the field to get all metavalues for this metadata field name
|
---|
438 | // and add to metaValues set
|
---|
439 | String unparsedMetaVal = field.trim();
|
---|
440 | String[] metadataValues = unparsedMetaVal.split(meta_value_sep_re);
|
---|
441 | for(String metaVal : metadataValues) {
|
---|
442 | metaVal = metaVal.trim(); // get rid of whitespaces around separator char
|
---|
443 | if(!metaVal.equals("")) {
|
---|
444 | ///System.err.println("Found value for meta field: " + metaVal);
|
---|
445 | metaValues.add(metaVal);
|
---|
446 | }
|
---|
447 | }
|
---|
448 | }
|
---|
449 | }
|
---|
450 | }
|
---|
451 | } catch(Exception e) {
|
---|
452 | DebugStream.printStackTrace(e);
|
---|
453 | DebugStream.println("@@@ Error reading from CSV file: " + csvFile.getAbsolutePath());
|
---|
454 | } finally {
|
---|
455 | SafeProcess.closeResource(in);
|
---|
456 | }
|
---|
457 |
|
---|
458 | //this.print(csvFileMeta);
|
---|
459 | return csvFileMeta;
|
---|
460 | }
|
---|
461 |
|
---|
462 | /** For debugging */
|
---|
463 | public void print(TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap ) {
|
---|
464 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
465 | while(iFiles.hasNext()) {
|
---|
466 | File f = iFiles.next();
|
---|
467 | TreeMap<String, TreeSet<String>> metaFields = metaMap.get(f);
|
---|
468 | if(metaFields != null) {
|
---|
469 | System.err.println("Meta for file: " + fileToRelativeString(f)); //f.getAbsolutePath());
|
---|
470 | }
|
---|
471 | Iterator<String> iMetaFields = metaFields.keySet().iterator();
|
---|
472 | if(!iMetaFields.hasNext()) {
|
---|
473 | System.err.println("No meta for file!");
|
---|
474 | }
|
---|
475 | while(iMetaFields.hasNext()) {
|
---|
476 | String fieldName = iMetaFields.next();
|
---|
477 | System.err.println("\tMetafield: " + fieldName);
|
---|
478 |
|
---|
479 | TreeSet<String> metaValues = metaFields.get(fieldName);
|
---|
480 | Iterator<String> iMetaValues = metaValues.iterator();
|
---|
481 | while(iMetaValues.hasNext()) {
|
---|
482 | String metaValue = iMetaValues.next();
|
---|
483 | System.err.println("\t\tValue: " + metaValue);
|
---|
484 | }
|
---|
485 | }
|
---|
486 | }
|
---|
487 | }
|
---|
488 |
|
---|
489 | /** For debugging */
|
---|
490 | public void printOrderedCollectionMeta() {
|
---|
491 | //TreeMap<File, TreeMap<String,TreeSet<String>>> collMetaMap = getAllAssignedMetadataForAllFiles();
|
---|
492 |
|
---|
493 | amalgamateAllMeta();
|
---|
494 | this.print(collMetaMap);
|
---|
495 | }
|
---|
496 |
|
---|
497 | public ArrayList<File> listFilesInCollection(String collection_directory_path) {
|
---|
498 |
|
---|
499 | ///System.err.println("coll dir path: " + collection_directory_path);
|
---|
500 |
|
---|
501 | // only files in import folder have meta. Don't list files outside import folder
|
---|
502 | File collDir = new File(collection_directory_path, IMPORT_DIRNAME);
|
---|
503 |
|
---|
504 | ArrayList<File> files = new ArrayList<File>();
|
---|
505 |
|
---|
506 | //FileFilter collDocsFilter = new CollectionDocFileFilter();
|
---|
507 | getAllFiles(files, collDir, this);
|
---|
508 |
|
---|
509 | return files;
|
---|
510 | }
|
---|
511 |
|
---|
512 | public void getAllFiles(ArrayList<File> files, File path, FileFilter filter) {
|
---|
513 | File[] fileList = path.listFiles(filter);
|
---|
514 | for(int i = 0; i < fileList.length; i++) {
|
---|
515 | File f = fileList[i];
|
---|
516 | if(f.isFile()) {
|
---|
517 | files.add(f);
|
---|
518 | } else {
|
---|
519 | getAllFiles(files, f, filter);
|
---|
520 | }
|
---|
521 | }
|
---|
522 | }
|
---|
523 |
|
---|
524 | /** Filter to only accept Gathered GS documents
|
---|
525 | * to produce the list of files for which we need to export GLI metadata info to CSV.
|
---|
526 | */
|
---|
527 | //private class CollectionDocFileFilter implements FileFilter {
|
---|
528 | @Override
|
---|
529 | public boolean accept(File pathname) {
|
---|
530 | String tailname = pathname.getName();
|
---|
531 | if(pathname.isDirectory()) {
|
---|
532 | if(tailname.equals(".svn")) {
|
---|
533 | return false;
|
---|
534 | }
|
---|
535 | } else {
|
---|
536 | if(pathname.equals(metadataCSVFile)) { // skip any meta csv file user exported/put into import
|
---|
537 | return false;
|
---|
538 | } else if(tailname.equals("metadata.xml")) {
|
---|
539 | return false;
|
---|
540 | } else if(tailname.endsWith("~")) {
|
---|
541 | return false;
|
---|
542 | } else if(tailname.endsWith(".bak")) {
|
---|
543 | return false;
|
---|
544 | }
|
---|
545 | }
|
---|
546 | // accept all other file types
|
---|
547 | return true;
|
---|
548 | }
|
---|
549 | //}
|
---|
550 |
|
---|
551 | public static File chooseMetaCSVFile(String defaultSearchPath, JFrame parent) {
|
---|
552 | JFileChooser chooser = new JFileChooser(defaultSearchPath);
|
---|
553 | chooser.setFileSelectionMode(JFileChooser.FILES_ONLY);
|
---|
554 | chooser.setDialogTitle(Dictionary.get("ExportMeta.ChooseMetaCSVFile"));
|
---|
555 | FileNameExtensionFilter filter = new FileNameExtensionFilter("CSV spreadsheet file", "csv");
|
---|
556 | chooser.setFileFilter(filter);//.addChoosableFileFilter(filter);
|
---|
557 | int returnVal = chooser.showOpenDialog(parent);
|
---|
558 | if(returnVal == JFileChooser.APPROVE_OPTION) {
|
---|
559 | File selectedFile = chooser.getSelectedFile();
|
---|
560 | ///System.err.println("File selected: " + selectedFile.getAbsolutePath());
|
---|
561 | return selectedFile;
|
---|
562 | } else {
|
---|
563 | return null;
|
---|
564 | }
|
---|
565 | }
|
---|
566 | }
|
---|
567 |
|
---|
568 |
|
---|
569 |
|
---|
570 |
|
---|