1 | /**
|
---|
2 | *#########################################################################
|
---|
3 | *
|
---|
4 | * A component of the Gatherer application, part of the Greenstone digital
|
---|
5 | * library suite from the New Zealand Digital Library Project at the
|
---|
6 | * University of Waikato, New Zealand.
|
---|
7 | *
|
---|
8 | * <BR><BR>
|
---|
9 | *
|
---|
10 | * Author: Greenstone Digital Library, University of Waikato
|
---|
11 | *
|
---|
12 | * <BR><BR>
|
---|
13 | *
|
---|
14 | * Copyright (C) 2020 New Zealand Digital Library Project
|
---|
15 | *
|
---|
16 | * <BR><BR>
|
---|
17 | *
|
---|
18 | * This program is free software; you can redistribute it and/or modify
|
---|
19 | * it under the terms of the GNU General Public License as published by
|
---|
20 | * the Free Software Foundation; either version 2 of the License, or
|
---|
21 | * (at your option) any later version.
|
---|
22 | *
|
---|
23 | * <BR><BR>
|
---|
24 | *
|
---|
25 | * This program is distributed in the hope that it will be useful,
|
---|
26 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
28 | * GNU General Public License for more details.
|
---|
29 | *
|
---|
30 | * <BR><BR>
|
---|
31 | *
|
---|
32 | * You should have received a copy of the GNU General Public License
|
---|
33 | * along with this program; if not, write to the Free Software
|
---|
34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
35 | *########################################################################
|
---|
36 | */
|
---|
37 | package org.greenstone.gatherer.metadata;
|
---|
38 |
|
---|
39 | import java.io.*;
|
---|
40 | import java.util.*;
|
---|
41 | import javax.swing.filechooser.FileNameExtensionFilter;
|
---|
42 | import javax.swing.JFileChooser;
|
---|
43 | import javax.swing.JFrame;
|
---|
44 | import javax.swing.JOptionPane;
|
---|
45 |
|
---|
46 | import org.apache.commons.csv.*;
|
---|
47 |
|
---|
48 | import org.greenstone.gatherer.DebugStream;
|
---|
49 | import org.greenstone.gatherer.Dictionary;
|
---|
50 | import org.greenstone.gatherer.metadata.MetadataElement;
|
---|
51 | import org.greenstone.gatherer.metadata.MetadataValue;
|
---|
52 | import org.greenstone.gatherer.metadata.MetadataXMLFileManager;
|
---|
53 | import org.greenstone.gatherer.util.SafeProcess;
|
---|
54 | import org.greenstone.gatherer.util.Utility;
|
---|
55 |
|
---|
56 |
|
---|
57 | /**
|
---|
58 | * Class to export GLI metadata of a collection to a metadata.csv file.
|
---|
59 | * This class can also merge GLI meta for the collection onto an existing metadata.csv file.
|
---|
60 | * Merging is a cumulative process.
|
---|
61 | * Duplicate entries and values are not preserved.
|
---|
62 | * Uses TreeMap and TreeSet to keep everything alphabetically ordered.
|
---|
63 | * TODO: What about ordering by unicode. Is that the natural ordering for Java Strings?
|
---|
64 | * If so, this would support keeping metadata values ordered regardless of script used.
|
---|
65 | */
|
---|
66 | public class MetadataToCSV implements FileFilter {
|
---|
67 | private char meta_field_sep = ','; // comma is default field separator for CSV, comma separated values
|
---|
68 | private String meta_value_sep_re = "\\|"; // must escape | to get regex
|
---|
69 | private char meta_value_sep_char = '|'; // when written out to file
|
---|
70 | private String collection_directory_path = "";
|
---|
71 | private String coll_importdir_path = "";
|
---|
72 | private final int import_path_length;
|
---|
73 |
|
---|
74 | /** The CSV metadata file to be read and rewritten. */
|
---|
75 | //private String metadataCSVFilename = "metadata.csv";
|
---|
76 | private File metadataCSVFile;
|
---|
77 |
|
---|
78 | /** TODO: Is this useful?
|
---|
79 | * Not yet implemented: if this flag is true, then if a file mentioned in metadata.csv does not exist,
|
---|
80 | * its entry is dropped and won't appear again when the metadata.csv is written out again.
|
---|
81 | */
|
---|
82 | //private boolean removeMetaForFilesThatDoNotExist = false;
|
---|
83 |
|
---|
84 | private final String IMPORT_DIRNAME = "import";
|
---|
85 |
|
---|
86 | /** A Map of all files/docs in this collection and their metadata,
|
---|
87 | * itself tuples of metadata field names and their (possibly multiple) metadata values. */
|
---|
88 | TreeMap<File, TreeMap<String,TreeSet<String>>> collMetaMap = new TreeMap<File, TreeMap<String,TreeSet<String>>>();
|
---|
89 |
|
---|
90 | public MetadataToCSV(String collDirPath) {
|
---|
91 | this.collection_directory_path = collDirPath;
|
---|
92 | this.coll_importdir_path = collDirPath + IMPORT_DIRNAME + File.separator; //new File(collDirPath, IMPORT_DIRNAME).getAbsolutePath();
|
---|
93 | import_path_length = this.coll_importdir_path.length();
|
---|
94 | this.metadataCSVFile = new File(coll_importdir_path, "metadata.csv");
|
---|
95 | }
|
---|
96 |
|
---|
97 | public MetadataToCSV(String collDirPath, File metadataCSV) {
|
---|
98 | this(collDirPath);
|
---|
99 | this.metadataCSVFile = metadataCSVFile;
|
---|
100 | }
|
---|
101 |
|
---|
102 | public MetadataToCSV(String collDirPath, File metadataCSVFile, char metafieldSepChar, String readMetaValSepExpression, char writeMetaValSepChar) {
|
---|
103 | this(collDirPath, metadataCSVFile);
|
---|
104 | this.meta_field_sep = metafieldSepChar;
|
---|
105 | this.meta_value_sep_re = readMetaValSepExpression;
|
---|
106 | this.meta_value_sep_char = writeMetaValSepChar;
|
---|
107 | }
|
---|
108 |
|
---|
109 | /** Remove import path prefix from given file. Returned is the path of file relative to import. */
|
---|
110 | private String fileToRelativeString(File f) {
|
---|
111 | String fullPath = f.getAbsolutePath();
|
---|
112 | //System.err.println("@@@ fullpath: " + fullPath);
|
---|
113 | //System.err.println("@@@ coll_importdir_path: " + this.coll_importdir_path);
|
---|
114 | int indexMatch = fullPath.indexOf(coll_importdir_path);
|
---|
115 | if(indexMatch == -1) {
|
---|
116 | return fullPath;
|
---|
117 | } else {
|
---|
118 | fullPath = fullPath.substring(indexMatch+import_path_length);
|
---|
119 | // MetadataCSVPlugin wants URL style slashes (forward slashes) not Windows backslashes
|
---|
120 | // as file separator. But on Linux, backslashes have a different meaning in filepaths,
|
---|
121 | // so must only replace \ with / if we're on Windows.
|
---|
122 | if(Utility.isWindows()) {
|
---|
123 | fullPath = fullPath.replace("\\", "/");
|
---|
124 | }
|
---|
125 | return fullPath;
|
---|
126 | }
|
---|
127 | }
|
---|
128 |
|
---|
129 |
|
---|
130 | /** helper methods to export metadata for collection files to csv
|
---|
131 | * Returns a Navigable Sorted Map of file names in the collection (relative to import folder), ordered alphabetically,
|
---|
132 | * mapped to each file's metadata, sorted alphabetically by metadata field name, and list of metadata values sorted alphabetically
|
---|
133 | */
|
---|
134 | public TreeMap<File, TreeMap<String,TreeSet<String>>> getAllAssignedMetadataForAllFiles() {
|
---|
135 | TreeMap<File, TreeMap<String,TreeSet<String>>> files_with_meta = new TreeMap<File, TreeMap<String,TreeSet<String>>>();
|
---|
136 |
|
---|
137 | ArrayList<File> files = listFilesInCollection(this.collection_directory_path);
|
---|
138 | Iterator<File> i = files.iterator();
|
---|
139 |
|
---|
140 | while(i.hasNext()) {
|
---|
141 | File f = i.next();
|
---|
142 | ArrayList file_meta = MetadataXMLFileManager.getMetadataAssignedToFile(f);
|
---|
143 |
|
---|
144 | //files_with_meta.put(f, file_meta);
|
---|
145 | TreeMap<String,TreeSet<String>> fileToMetaMap = new TreeMap<String,TreeSet<String>>();
|
---|
146 |
|
---|
147 | // debugging display
|
---|
148 | ///System.err.println("Meta for file: " + f.getAbsolutePath());
|
---|
149 | Iterator it = file_meta.iterator();
|
---|
150 | while(it.hasNext()) {
|
---|
151 | MetadataValue meta = (MetadataValue)it.next();
|
---|
152 | String metaValue = meta.getValue();
|
---|
153 | MetadataElement metaEl = meta.getMetadataElement();
|
---|
154 | String metaFieldName = metaEl.getFullName();
|
---|
155 | ///System.err.println(" field: " + metaFieldName);
|
---|
156 | ///System.err.println(" value: " + metaValue);
|
---|
157 |
|
---|
158 | TreeSet<String> vals = fileToMetaMap.get(metaFieldName);
|
---|
159 | if(vals == null) {
|
---|
160 | vals = new TreeSet<String>();
|
---|
161 | vals.add(metaValue);
|
---|
162 | fileToMetaMap.put(metaFieldName, vals);
|
---|
163 | } else {
|
---|
164 | vals.add(metaValue);
|
---|
165 | }
|
---|
166 | }
|
---|
167 |
|
---|
168 | files_with_meta.put(f, fileToMetaMap);
|
---|
169 | }
|
---|
170 |
|
---|
171 | return files_with_meta;
|
---|
172 | }
|
---|
173 |
|
---|
174 | // Get all meta in any metadata.csv file
|
---|
175 | // and add to it all meta assigned for docs in this collection
|
---|
176 | private void amalgamateAllMeta() {
|
---|
177 | TreeMap<File, TreeMap<String,TreeSet<String>>> assignedMeta = getAllAssignedMetadataForAllFiles();
|
---|
178 | TreeMap<File, TreeMap<String,TreeSet<String>>> csvFileMeta = loadMetaFromCSVFile(this.metadataCSVFile);
|
---|
179 |
|
---|
180 | if(collMetaMap.size() == 0) {
|
---|
181 |
|
---|
182 | if(assignedMeta.keySet().size() > csvFileMeta.keySet().size()) {
|
---|
183 | collMetaMap = assignedMeta;
|
---|
184 | merge(collMetaMap, csvFileMeta);
|
---|
185 | } else {
|
---|
186 | collMetaMap = csvFileMeta;
|
---|
187 | merge(collMetaMap, assignedMeta);
|
---|
188 | }
|
---|
189 | } else {
|
---|
190 |
|
---|
191 | merge(collMetaMap, assignedMeta);
|
---|
192 | merge(collMetaMap, csvFileMeta);
|
---|
193 | }
|
---|
194 |
|
---|
195 | }
|
---|
196 |
|
---|
197 | private TreeSet<String> getAllCollHeadings(TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap) {
|
---|
198 | TreeSet<String> collHeadings = new TreeSet<String>();
|
---|
199 |
|
---|
200 | if(metaMap == null || metaMap.size() == 0) {
|
---|
201 | return collHeadings;
|
---|
202 | }
|
---|
203 | // get all meta field names and add into collHeadings. As it's a TreeSet,
|
---|
204 | // duplicates will be automatically ignored and collheadings will be sorted
|
---|
205 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
206 | while(iFiles.hasNext()) {
|
---|
207 | File f = iFiles.next();
|
---|
208 | TreeMap<String, TreeSet<String>> metaFields = metaMap.get(f);
|
---|
209 | Iterator<String> iMetaFields = metaFields.keySet().iterator();
|
---|
210 | while(iMetaFields.hasNext()) {
|
---|
211 | String fieldName = iMetaFields.next();
|
---|
212 | collHeadings.add(fieldName);
|
---|
213 | }
|
---|
214 | }
|
---|
215 |
|
---|
216 | return collHeadings;
|
---|
217 | }
|
---|
218 |
|
---|
219 | /** merge metaMap param into baseMetaMap: only portions not already present in baseMetaMap are added in
|
---|
220 | * whether these are new file entries, new metadata field entries for extant files, or metadata values for extant fields of files.
|
---|
221 | * A simple map.putALL() will not do the trick as collMetaMap is a complicated data structure.
|
---|
222 | */
|
---|
223 | private void merge(TreeMap<File, TreeMap<String,TreeSet<String>>> baseMetaMap, TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap) {
|
---|
224 |
|
---|
225 | if(metaMap == null || metaMap.size() == 0) {
|
---|
226 | // nothing to do
|
---|
227 | return;
|
---|
228 | }
|
---|
229 |
|
---|
230 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
231 | while(iFiles.hasNext()) {
|
---|
232 | File f = iFiles.next();
|
---|
233 |
|
---|
234 | // check if this file already has an entry in baseMetaMap
|
---|
235 | TreeMap<String, TreeSet<String>> origMetaFields = baseMetaMap.get(f);
|
---|
236 |
|
---|
237 | TreeMap<String, TreeSet<String>> metaFields = metaMap.get(f);
|
---|
238 | Iterator<String> iMetaFields = metaFields.keySet().iterator();
|
---|
239 |
|
---|
240 | // if file in metaMap didn't exist in baseMetaMap, easy: just copy its entry across in entirety
|
---|
241 | if(origMetaFields == null) {
|
---|
242 | metaMap.put(f, metaFields);
|
---|
243 | continue;
|
---|
244 | }
|
---|
245 |
|
---|
246 | // else, file already exists in baseMetaMap, need to check if we have to merge any meta on the file
|
---|
247 | while(iMetaFields.hasNext()) {
|
---|
248 | String fieldName = iMetaFields.next();
|
---|
249 | TreeSet<String> metaValues = metaFields.get(fieldName);
|
---|
250 |
|
---|
251 | // check if this metadata field exists for the same file in baseMetaMap
|
---|
252 | TreeSet<String> origMetaValues = origMetaFields.get(fieldName);
|
---|
253 | if(origMetaValues == null) { // this metadata field name did not exist for file in baseMetaMap,
|
---|
254 | // so copy all vals for this fieldName into baseMetaMap's entry for this file
|
---|
255 | origMetaFields.put(fieldName, metaValues);
|
---|
256 | continue; // continue on inner loop
|
---|
257 | }
|
---|
258 |
|
---|
259 | // else the meta fieldName existed for that file in baseMetaMap
|
---|
260 | // Check if any of the metadata values didn't already exist, else add them in
|
---|
261 | Iterator<String> iMetaValues = metaValues.iterator();
|
---|
262 | while(iMetaValues.hasNext()) {
|
---|
263 | String metaValue = iMetaValues.next();
|
---|
264 |
|
---|
265 | if(!origMetaValues.contains(metaValue)) {
|
---|
266 | origMetaValues.add(metaValue);
|
---|
267 | }
|
---|
268 | }
|
---|
269 |
|
---|
270 | }
|
---|
271 | }
|
---|
272 | }
|
---|
273 |
|
---|
274 |
|
---|
275 | /** If successfully wrote out collection's meta from to a CSV file,
|
---|
276 | * then will need to remove all meta from GLI (metadata.xml files).
|
---|
277 | * Just del or rename those files to .bak?
|
---|
278 | * This dangerous method goes through all the metadata.xml files that were in use so far
|
---|
279 | * and removes all the child elements from meta xml files' DirectoryMetadata root elements
|
---|
280 | */
|
---|
281 | public boolean convertMetaXMLToCSV(File csvFile, JFrame parent) {
|
---|
282 |
|
---|
283 | // Warn the user about the operation being destructive
|
---|
284 | int result = JOptionPane.showConfirmDialog(parent,
|
---|
285 | Dictionary.get("MetaToCSV.ConvertMetaXMLToCSV_Warning_Message"),
|
---|
286 | Dictionary.get("General.Warning"),
|
---|
287 | JOptionPane.OK_CANCEL_OPTION,
|
---|
288 | JOptionPane.WARNING_MESSAGE);
|
---|
289 | if(result == JOptionPane.CANCEL_OPTION || result == JOptionPane.CLOSED_OPTION) {
|
---|
290 | // NO_OPTION shouldn't happen
|
---|
291 | return false;
|
---|
292 | }
|
---|
293 |
|
---|
294 | boolean success = exportMetaXMLToCSV(csvFile);
|
---|
295 |
|
---|
296 | if(success) { // now it's backed up to a metadatacsv file, can clear all metadata from metaXML files
|
---|
297 |
|
---|
298 | System.err.println("About to clear all metadata in collection...");
|
---|
299 | MetadataXMLFileManager.clearAllMetadataInCollection();
|
---|
300 | } else {
|
---|
301 | JOptionPane.showMessageDialog(parent,
|
---|
302 | Dictionary.get("MetaToCSV.ConvertMetaXMLToCSV_Failed_Message"),
|
---|
303 | Dictionary.get("General.Error"),
|
---|
304 | JOptionPane.ERROR_MESSAGE);
|
---|
305 | //System.err.println("@@@ Failed to properly export metadata.xml files' contents for this collection to CSV. Will not remove metadata.xml files");
|
---|
306 | }
|
---|
307 |
|
---|
308 | return success;
|
---|
309 | }
|
---|
310 |
|
---|
311 | /** If given a new file to create, creates the specified meta csv file from GLI's meta for the current collection.
|
---|
312 | * If the file exists, this will append the GLI metadata without checking if the file already contains the same entries. */
|
---|
313 | public boolean exportMetaXMLToCSV(File csvFile) {
|
---|
314 | boolean appendSetting = false;
|
---|
315 | boolean success = false;
|
---|
316 |
|
---|
317 | if(csvFile.exists()) {
|
---|
318 | //appendSetting = true; // better to call the other version of this method in this case?
|
---|
319 | amalgamateAllMeta();
|
---|
320 | success = writeMetaToCSV(collMetaMap, csvFile, appendSetting);
|
---|
321 | } else { // no preexisting metadata.csv file, just write out GLI meta
|
---|
322 | TreeMap<File, TreeMap<String,TreeSet<String>>> assignedMeta = getAllAssignedMetadataForAllFiles();
|
---|
323 | success = writeMetaToCSV(assignedMeta, csvFile, appendSetting);
|
---|
324 | }
|
---|
325 |
|
---|
326 | return success;
|
---|
327 | }
|
---|
328 |
|
---|
329 | private boolean writeMetaToCSV(TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap, File csvFile, boolean appendSetting) {
|
---|
330 | boolean success = true;
|
---|
331 |
|
---|
332 | // First would need to write the row of all headings
|
---|
333 | TreeSet<String> metaFieldColumnHeadings = getAllCollHeadings(metaMap);
|
---|
334 | // Careful, collHeadings are alphabetically ordered, but not all docs may have meta for each column heading/metadata field name
|
---|
335 | // Need metadataFieldNames in an indexed array
|
---|
336 | Vector<String> columnHeadings = new Vector<String>(metaFieldColumnHeadings.size());
|
---|
337 | // put the Filename column as first item
|
---|
338 | columnHeadings.add("Filename");
|
---|
339 | columnHeadings.addAll(metaFieldColumnHeadings); // now have an indexed, yet still ordered, list of all column headings(the meta fieldnames)
|
---|
340 |
|
---|
341 | CSVFormat customCSVFormat = CSVFormat.DEFAULT
|
---|
342 | .withDelimiter(meta_field_sep)
|
---|
343 | .withIgnoreSurroundingSpaces(false)
|
---|
344 | .withQuoteMode(QuoteMode.MINIMAL)
|
---|
345 | .withTrim();
|
---|
346 |
|
---|
347 | // try-with-resources breaks on 64 bit Linux nightly binary VM as that uses JDK 6.
|
---|
348 | //try (CSVPrinter printer = new CSVPrinter(new FileWriter(csvFile, appendSetting), customCSVFormat)) {
|
---|
349 |
|
---|
350 | CSVPrinter printer = null;
|
---|
351 | try {
|
---|
352 | printer = new CSVPrinter(new FileWriter(csvFile, appendSetting), customCSVFormat);
|
---|
353 |
|
---|
354 | printer.printRecord(columnHeadings);
|
---|
355 | // https://javadoc.io/doc/org.apache.commons/commons-csv/latest/index.html
|
---|
356 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
357 | while(iFiles.hasNext()) {
|
---|
358 | File f = iFiles.next();
|
---|
359 | String relFilename = fileToRelativeString(f);
|
---|
360 | // write out the filename field of this record
|
---|
361 | printer.print(relFilename);
|
---|
362 |
|
---|
363 | TreeMap<String, TreeSet<String>> fileMetadata = metaMap.get(f);
|
---|
364 | // now get each metadata field's value in the order of the column headings, and write them out
|
---|
365 | //for(String metaFieldName : columnHeadings) {
|
---|
366 | for(int i = 1; i < columnHeadings.size(); i++) { // skip past Filename coll heading, already written out
|
---|
367 | String metaFieldName = columnHeadings.get(i);
|
---|
368 | TreeSet<String> metavalues = fileMetadata.get(metaFieldName);
|
---|
369 | StringBuffer allMetaValuesForField = new StringBuffer();
|
---|
370 | if(metavalues == null || metavalues.size() == 0) {
|
---|
371 | // this file does not have (metavalues) such a metaFieldName, the cell for this column is empty
|
---|
372 | //System.err.println("No meta values for fieldname: " + metaFieldName);
|
---|
373 | printer.print(allMetaValuesForField);
|
---|
374 | } else {
|
---|
375 | for(String metavalue : metavalues) {
|
---|
376 | //metavalue = metavalue.trim();
|
---|
377 | allMetaValuesForField.append(meta_value_sep_char);
|
---|
378 | allMetaValuesForField.append(metavalue);
|
---|
379 | }
|
---|
380 | // write out the current metadata field of this record
|
---|
381 | // remove the extra meta_value_separator_char added the first time
|
---|
382 | printer.print(allMetaValuesForField.substring(1));
|
---|
383 | }
|
---|
384 | }
|
---|
385 |
|
---|
386 | printer.println(); // done writing a record
|
---|
387 | }
|
---|
388 |
|
---|
389 | //printer.close(true); // flush and close, only from Java 7/8 version of commons-csv
|
---|
390 | printer.close();
|
---|
391 |
|
---|
392 | } catch (IOException ex) {
|
---|
393 | success = false;
|
---|
394 | DebugStream.printStackTrace(ex);
|
---|
395 | System.err.println("Caught exception when writing meta to CSVFile " + csvFile.getAbsolutePath());
|
---|
396 | System.err.println("\t" + ex.getMessage());
|
---|
397 |
|
---|
398 | SafeProcess.closeResource(printer);
|
---|
399 | }
|
---|
400 |
|
---|
401 | return success;
|
---|
402 | }
|
---|
403 |
|
---|
404 |
|
---|
405 | private TreeMap<File, TreeMap<String,TreeSet<String>>> loadMetaFromCSVFile(File csvFile) {
|
---|
406 | TreeMap<File, TreeMap<String,TreeSet<String>>> csvFileMeta = new TreeMap<File, TreeMap<String,TreeSet<String>>>();
|
---|
407 |
|
---|
408 | if(!csvFile.exists()) {
|
---|
409 | return csvFileMeta;
|
---|
410 | }
|
---|
411 |
|
---|
412 | Reader in = null;
|
---|
413 | //try(Reader in = new FileReader(csvFile);) { // try-with-resources may break on older Java that we use to build GS3 binaries
|
---|
414 | try {
|
---|
415 | in = new FileReader(csvFile);
|
---|
416 | boolean headingRow = true;
|
---|
417 |
|
---|
418 | // https://javadoc.io/doc/org.apache.commons/commons-csv/latest/index.html
|
---|
419 | CSVFormat lenientCSVFormat = CSVFormat.DEFAULT
|
---|
420 | .withDelimiter(meta_field_sep)
|
---|
421 | .withFirstRecordAsHeader()
|
---|
422 | .withCommentMarker('#')
|
---|
423 | .withIgnoreSurroundingSpaces()
|
---|
424 | .withTrim();
|
---|
425 |
|
---|
426 | // https://stackoverflow.com/questions/36269387/get-csv-file-header-using-apache-commons
|
---|
427 | // The first col heading which is the Filename
|
---|
428 | // the remaining CSV column headings are the metadata field names
|
---|
429 |
|
---|
430 | CSVParser parser = lenientCSVFormat.parse(in);
|
---|
431 |
|
---|
432 | //String[] metaFieldNames = lenientCSVFormat.getHeader(); // didn't work
|
---|
433 | // getHeaders() returns List<String>, convert to String[] array
|
---|
434 |
|
---|
435 | //String[] metaFieldNames = parser.getHeaderNames().toArray(new String[0]); // not available in Java-6 release of Commons-CSV
|
---|
436 | String[] metaFieldNames = parser.getHeaderMap().keySet().toArray(new String[0]);
|
---|
437 |
|
---|
438 | for (CSVRecord record : parser) {
|
---|
439 |
|
---|
440 | // a new row, represents a new file's meta
|
---|
441 | TreeMap<String,TreeSet<String>> meta = new TreeMap<String,TreeSet<String>>();
|
---|
442 |
|
---|
443 | for(int i = 0; i < record.size(); i++) { //for (String field : record) {
|
---|
444 | String field = record.get(i);
|
---|
445 |
|
---|
446 | if(i == 0) { // col 0 = Filename
|
---|
447 | String filename = field;
|
---|
448 | // TODO: filenames are stored relative to import folder, convert to full path for internal use?
|
---|
449 | // Relative filepaths are stored with URL style slashes not OS specific slashes
|
---|
450 | // For Windows, reconvert to \
|
---|
451 | //File fullPathFile = new File(coll_importdir_path, filename); // would this work to
|
---|
452 | // create OS specific paths, even if filename has slashes the wrong way round for Windows?
|
---|
453 | if(Utility.isWindows()) {
|
---|
454 | filename = filename.replace("/", "\\");
|
---|
455 | }
|
---|
456 | File fullPathFile = new File(coll_importdir_path + filename);
|
---|
457 | ///System.err.println("Found Filename meta: " + filename);
|
---|
458 | csvFileMeta.put(fullPathFile, meta);
|
---|
459 | } else {
|
---|
460 | // not Filename, but metadata field name, add into meta map for this file
|
---|
461 | TreeSet<String> metaValues = new TreeSet<String>();
|
---|
462 | String metadataFieldName = metaFieldNames[i]; // get column heading=meta field name for current cell
|
---|
463 | meta.put(metadataFieldName, metaValues);
|
---|
464 | ///System.err.println("Found value for meta field: " + metadataFieldName);
|
---|
465 | // Split the field to get all metavalues for this metadata field name
|
---|
466 | // and add to metaValues set
|
---|
467 | String unparsedMetaVal = field.trim();
|
---|
468 | String[] metadataValues = unparsedMetaVal.split(meta_value_sep_re);
|
---|
469 | for(String metaVal : metadataValues) {
|
---|
470 | metaVal = metaVal.trim(); // get rid of whitespaces around separator char
|
---|
471 | if(!metaVal.equals("")) {
|
---|
472 | ///System.err.println("Found value for meta field: " + metaVal);
|
---|
473 | metaValues.add(metaVal);
|
---|
474 | }
|
---|
475 | }
|
---|
476 | }
|
---|
477 | }
|
---|
478 | }
|
---|
479 | } catch(Exception e) {
|
---|
480 | DebugStream.printStackTrace(e);
|
---|
481 | DebugStream.println("@@@ Error reading from CSV file: " + csvFile.getAbsolutePath());
|
---|
482 | } finally {
|
---|
483 | SafeProcess.closeResource(in);
|
---|
484 | }
|
---|
485 |
|
---|
486 | //this.print(csvFileMeta);
|
---|
487 | return csvFileMeta;
|
---|
488 | }
|
---|
489 |
|
---|
490 | /** For debugging */
|
---|
491 | private void print(TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap ) {
|
---|
492 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
493 | while(iFiles.hasNext()) {
|
---|
494 | File f = iFiles.next();
|
---|
495 | TreeMap<String, TreeSet<String>> metaFields = metaMap.get(f);
|
---|
496 | if(metaFields != null) {
|
---|
497 | System.err.println("Meta for file: " + fileToRelativeString(f)); //f.getAbsolutePath());
|
---|
498 | }
|
---|
499 | Iterator<String> iMetaFields = metaFields.keySet().iterator();
|
---|
500 | if(!iMetaFields.hasNext()) {
|
---|
501 | System.err.println("No meta for file!");
|
---|
502 | }
|
---|
503 | while(iMetaFields.hasNext()) {
|
---|
504 | String fieldName = iMetaFields.next();
|
---|
505 | System.err.println("\tMetafield: " + fieldName);
|
---|
506 |
|
---|
507 | TreeSet<String> metaValues = metaFields.get(fieldName);
|
---|
508 | Iterator<String> iMetaValues = metaValues.iterator();
|
---|
509 | while(iMetaValues.hasNext()) {
|
---|
510 | String metaValue = iMetaValues.next();
|
---|
511 | System.err.println("\t\tValue: " + metaValue);
|
---|
512 | }
|
---|
513 | }
|
---|
514 | }
|
---|
515 | }
|
---|
516 |
|
---|
517 | /** For debugging */
|
---|
518 | private void printOrderedCollectionMeta() {
|
---|
519 | //TreeMap<File, TreeMap<String,TreeSet<String>>> collMetaMap = getAllAssignedMetadataForAllFiles();
|
---|
520 |
|
---|
521 | amalgamateAllMeta();
|
---|
522 | this.print(collMetaMap);
|
---|
523 | }
|
---|
524 |
|
---|
525 | public ArrayList<File> listFilesInCollection(String collection_directory_path) {
|
---|
526 |
|
---|
527 | ///System.err.println("coll dir path: " + collection_directory_path);
|
---|
528 |
|
---|
529 | // only files in import folder have meta. Don't list files outside import folder
|
---|
530 | File collDir = new File(collection_directory_path, IMPORT_DIRNAME);
|
---|
531 |
|
---|
532 | ArrayList<File> files = new ArrayList<File>();
|
---|
533 |
|
---|
534 | //FileFilter collDocsFilter = new CollectionDocFileFilter();
|
---|
535 | getAllFiles(files, collDir, this);
|
---|
536 |
|
---|
537 | return files;
|
---|
538 | }
|
---|
539 |
|
---|
540 | private void getAllFiles(ArrayList<File> files, File path, FileFilter filter) {
|
---|
541 | File[] fileList = path.listFiles(filter);
|
---|
542 | for(int i = 0; i < fileList.length; i++) {
|
---|
543 | File f = fileList[i];
|
---|
544 | if(f.isFile()) {
|
---|
545 | files.add(f);
|
---|
546 | } else {
|
---|
547 | getAllFiles(files, f, filter);
|
---|
548 | }
|
---|
549 | }
|
---|
550 | }
|
---|
551 |
|
---|
552 | /** Filter to only accept Gathered GS documents
|
---|
553 | * to produce the list of files for which we need to export GLI metadata info to CSV.
|
---|
554 | */
|
---|
555 | //private class CollectionDocFileFilter implements FileFilter {
|
---|
556 | @Override
|
---|
557 | public boolean accept(File pathname) {
|
---|
558 | String tailname = pathname.getName();
|
---|
559 | if(pathname.isDirectory()) {
|
---|
560 | if(tailname.equals(".svn")) {
|
---|
561 | return false;
|
---|
562 | }
|
---|
563 | } else {
|
---|
564 | if(pathname.equals(metadataCSVFile)) { // skip any meta csv file user exported/put into import
|
---|
565 | return false;
|
---|
566 | } else if(tailname.equals("metadata.xml")) {
|
---|
567 | return false;
|
---|
568 | } else if(tailname.endsWith("~")) {
|
---|
569 | return false;
|
---|
570 | } else if(tailname.endsWith(".bak")) {
|
---|
571 | return false;
|
---|
572 | }
|
---|
573 | }
|
---|
574 | // accept all other file types
|
---|
575 | return true;
|
---|
576 | }
|
---|
577 | //}
|
---|
578 |
|
---|
579 | public static File chooseMetaCSVFile(String defaultSearchPath, boolean convertNotExport, JFrame parent) {
|
---|
580 | JFileChooser chooser = new JFileChooser(defaultSearchPath);
|
---|
581 | chooser.setFileSelectionMode(JFileChooser.FILES_ONLY);
|
---|
582 | String actionName = Dictionary.get("MetaToCSV.ExportAction"); // Export or Convert
|
---|
583 | if(convertNotExport) {
|
---|
584 | actionName = Dictionary.get("MetaToCSV.ConvertAction");
|
---|
585 | }
|
---|
586 | chooser.setDialogTitle(Dictionary.get("MetaToCSV.ChooseMetaCSVFile", actionName));
|
---|
587 | chooser.setApproveButtonText(Dictionary.get("MetaToCSV.Choose"));//actionName);
|
---|
588 | FileNameExtensionFilter filter = new FileNameExtensionFilter(Dictionary.get("MetaToCSV.CSVFileExtensionType"), "csv");
|
---|
589 | chooser.setFileFilter(filter);//.addChoosableFileFilter(filter);
|
---|
590 | int returnVal = chooser.showOpenDialog(parent);
|
---|
591 | if(returnVal == JFileChooser.APPROVE_OPTION) {
|
---|
592 | File selectedFile = chooser.getSelectedFile();
|
---|
593 | ///System.err.println("File selected: " + selectedFile.getAbsolutePath());
|
---|
594 | return selectedFile;
|
---|
595 | } else {
|
---|
596 | return null;
|
---|
597 | }
|
---|
598 | }
|
---|
599 | }
|
---|
600 |
|
---|
601 |
|
---|
602 |
|
---|
603 |
|
---|