[34263] | 1 | /**
|
---|
| 2 | *#########################################################################
|
---|
| 3 | *
|
---|
| 4 | * A component of the Gatherer application, part of the Greenstone digital
|
---|
| 5 | * library suite from the New Zealand Digital Library Project at the
|
---|
| 6 | * University of Waikato, New Zealand.
|
---|
| 7 | *
|
---|
| 8 | * <BR><BR>
|
---|
| 9 | *
|
---|
| 10 | * Author: Greenstone Digital Library, University of Waikato
|
---|
| 11 | *
|
---|
| 12 | * <BR><BR>
|
---|
| 13 | *
|
---|
| 14 | * Copyright (C) 2020 New Zealand Digital Library Project
|
---|
| 15 | *
|
---|
| 16 | * <BR><BR>
|
---|
| 17 | *
|
---|
| 18 | * This program is free software; you can redistribute it and/or modify
|
---|
| 19 | * it under the terms of the GNU General Public License as published by
|
---|
| 20 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 21 | * (at your option) any later version.
|
---|
| 22 | *
|
---|
| 23 | * <BR><BR>
|
---|
| 24 | *
|
---|
| 25 | * This program is distributed in the hope that it will be useful,
|
---|
| 26 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 27 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 28 | * GNU General Public License for more details.
|
---|
| 29 | *
|
---|
| 30 | * <BR><BR>
|
---|
| 31 | *
|
---|
| 32 | * You should have received a copy of the GNU General Public License
|
---|
| 33 | * along with this program; if not, write to the Free Software
|
---|
| 34 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 35 | *########################################################################
|
---|
| 36 | */
|
---|
| 37 | package org.greenstone.gatherer.metadata;
|
---|
| 38 |
|
---|
| 39 | import java.io.*;
|
---|
| 40 | import java.util.*;
|
---|
| 41 | import javax.swing.JFileChooser;
|
---|
| 42 | import javax.swing.filechooser.FileNameExtensionFilter;
|
---|
| 43 | import javax.swing.JFrame;
|
---|
| 44 |
|
---|
| 45 | import org.apache.commons.csv.*;
|
---|
| 46 |
|
---|
| 47 | import org.greenstone.gatherer.util.SafeProcess;
|
---|
| 48 | //import org.greenstone.gatherer.Configuration;
|
---|
| 49 | import org.greenstone.gatherer.DebugStream;
|
---|
| 50 | import org.greenstone.gatherer.Dictionary;
|
---|
| 51 | import org.greenstone.gatherer.Gatherer;
|
---|
| 52 | //import org.greenstone.gatherer.gui.WarningDialog;
|
---|
| 53 | import org.greenstone.gatherer.collection.Collection;
|
---|
| 54 | import org.greenstone.gatherer.metadata.MetadataChangedListener;
|
---|
| 55 | import org.greenstone.gatherer.metadata.MetadataElement; //
|
---|
| 56 | import org.greenstone.gatherer.metadata.MetadataSet;
|
---|
| 57 | import org.greenstone.gatherer.metadata.MetadataSetManager;
|
---|
| 58 | import org.greenstone.gatherer.metadata.MetadataXMLFileManager;
|
---|
| 59 | import org.greenstone.gatherer.metadata.MetadataValue;//
|
---|
| 60 |
|
---|
| 61 |
|
---|
| 62 | /**
|
---|
| 63 | * Class to export GLI metadata of a collection to a metadata.csv file.
|
---|
| 64 | * This class can also merge GLI meta for the collection onto an existing metadata.csv file.
|
---|
| 65 | * Merging is a cumulative process.
|
---|
| 66 | * Duplicate entries and values are not preserved.
|
---|
| 67 | * Uses TreeMap and TreeSet to keep everything alphabetically ordered.
|
---|
| 68 | * TODO: What about ordering by unicode. Is that the natural ordering for Java Strings?
|
---|
| 69 | * If so, this would support keeping metadata values ordered regardless of script used.
|
---|
| 70 | */
|
---|
| 71 | public class MetadataToCSV implements FileFilter {
|
---|
| 72 | private char meta_field_sep = ','; // comma is default field separator for CSV, comma separated values
|
---|
| 73 | private String meta_value_sep_re = "\\|"; // must escape | to get regex
|
---|
| 74 | private char meta_value_sep_char = '|'; // when written out to file
|
---|
| 75 | private String collection_directory_path = "";
|
---|
| 76 | private String coll_importdir_path = "";
|
---|
| 77 | private final int import_path_length;
|
---|
| 78 |
|
---|
| 79 | /** The CSV metadata file to be read and rewritten. */
|
---|
| 80 | //private String metadataCSVFilename = "metadata.csv";
|
---|
| 81 | private File metadataCSVFile;
|
---|
| 82 |
|
---|
| 83 | /** Is this useful?
|
---|
| 84 | * Not yet implemented: if this flag is true, then if a file mentioned in metadata.csv does not exist,
|
---|
| 85 | * its entry is dropped and won't appear again when the metadata.csv is written out again.
|
---|
| 86 | */
|
---|
| 87 | //private boolean removeMetaForFilesThatDoNotExist = false;
|
---|
| 88 |
|
---|
| 89 | private final String IMPORT_DIRNAME = "import";
|
---|
| 90 |
|
---|
| 91 | /** A Map of all files/docs in this collection and their metadata,
|
---|
| 92 | * itself tuples of metadata field names and their (possibly multiple) metadata values. */
|
---|
| 93 | TreeMap<File, TreeMap<String,TreeSet<String>>> collMetaMap = new TreeMap<File, TreeMap<String,TreeSet<String>>>();
|
---|
| 94 |
|
---|
| 95 | public MetadataToCSV(String collDirPath) {
|
---|
| 96 | this.collection_directory_path = collDirPath;
|
---|
| 97 | this.coll_importdir_path = collDirPath + IMPORT_DIRNAME + File.separator; //new File(collDirPath, IMPORT_DIRNAME).getAbsolutePath();
|
---|
| 98 | import_path_length = this.coll_importdir_path.length();
|
---|
| 99 | this.metadataCSVFile = new File(coll_importdir_path, "metadata.csv");
|
---|
| 100 | }
|
---|
| 101 |
|
---|
| 102 | public MetadataToCSV(String collDirPath, File metadataCSV) {
|
---|
| 103 | this(collDirPath);
|
---|
| 104 | this.metadataCSVFile = metadataCSVFile;
|
---|
| 105 | }
|
---|
| 106 |
|
---|
| 107 | public MetadataToCSV(String collDirPath, File metadataCSVFile, char metafieldSepChar, String readMetaValSepExpression, char writeMetaValSepChar) {
|
---|
| 108 | this(collDirPath, metadataCSVFile);
|
---|
| 109 | this.meta_field_sep = metafieldSepChar;
|
---|
| 110 | this.meta_value_sep_re = readMetaValSepExpression;
|
---|
| 111 | this.meta_value_sep_char = writeMetaValSepChar;
|
---|
| 112 | }
|
---|
| 113 |
|
---|
| 114 | /** Remove import path prefix from given file. Returned is the path of file relative to import. */
|
---|
| 115 | public String fileToRelativeString(File f) {
|
---|
| 116 | String fullPath = f.getAbsolutePath();
|
---|
| 117 | //System.err.println("@@@ fullpath: " + fullPath);
|
---|
| 118 | //System.err.println("@@@ coll_importdir_path: " + this.coll_importdir_path);
|
---|
| 119 | int indexMatch = fullPath.indexOf(coll_importdir_path);
|
---|
| 120 | if(indexMatch == -1) {
|
---|
| 121 | return fullPath;
|
---|
| 122 | } else {
|
---|
| 123 | return fullPath.substring(indexMatch+import_path_length);
|
---|
| 124 | }
|
---|
| 125 | }
|
---|
| 126 |
|
---|
| 127 |
|
---|
| 128 | /** helper methods to export metadata for collection files to csv
|
---|
| 129 | * Returns a Navigable Sorted Map of file names in the collection (relative to import folder), ordered alphabetically,
|
---|
| 130 | * mapped to each file's metadata, sorted alphabetically by metadata field name, and list of metadata values sorted alphabetically
|
---|
| 131 | */
|
---|
| 132 | public TreeMap<File, TreeMap<String,TreeSet<String>>> getAllAssignedMetadataForAllFiles() {
|
---|
| 133 | TreeMap<File, TreeMap<String,TreeSet<String>>> files_with_meta = new TreeMap<File, TreeMap<String,TreeSet<String>>>();
|
---|
| 134 |
|
---|
| 135 | ArrayList<File> files = listFilesInCollection(this.collection_directory_path);
|
---|
| 136 | Iterator<File> i = files.iterator();
|
---|
| 137 |
|
---|
| 138 | while(i.hasNext()) {
|
---|
| 139 | File f = i.next();
|
---|
| 140 | ArrayList file_meta = MetadataXMLFileManager.getMetadataAssignedToFile(f);
|
---|
| 141 |
|
---|
| 142 | //files_with_meta.put(f, file_meta);
|
---|
| 143 | TreeMap<String,TreeSet<String>> fileToMetaMap = new TreeMap<String,TreeSet<String>>();
|
---|
| 144 |
|
---|
| 145 | // debugging display
|
---|
| 146 | ///System.err.println("Meta for file: " + f.getAbsolutePath());
|
---|
| 147 | Iterator it = file_meta.iterator();
|
---|
| 148 | while(it.hasNext()) {
|
---|
| 149 | MetadataValue meta = (MetadataValue)it.next();
|
---|
| 150 | String metaValue = meta.getValue();
|
---|
| 151 | MetadataElement metaEl = meta.getMetadataElement();
|
---|
| 152 | String metaFieldName = metaEl.getFullName();
|
---|
| 153 | ///System.err.println(" field: " + metaFieldName);
|
---|
| 154 | ///System.err.println(" value: " + metaValue);
|
---|
| 155 |
|
---|
| 156 | TreeSet<String> vals = fileToMetaMap.get(metaFieldName);
|
---|
| 157 | if(vals == null) {
|
---|
| 158 | vals = new TreeSet<String>();
|
---|
| 159 | vals.add(metaValue);
|
---|
| 160 | fileToMetaMap.put(metaFieldName, vals);
|
---|
| 161 | } else {
|
---|
| 162 | vals.add(metaValue);
|
---|
| 163 | }
|
---|
| 164 | }
|
---|
| 165 |
|
---|
| 166 | files_with_meta.put(f, fileToMetaMap);
|
---|
| 167 | }
|
---|
| 168 |
|
---|
| 169 | return files_with_meta;
|
---|
| 170 | }
|
---|
| 171 |
|
---|
| 172 | // Get all meta in any metadata.csv file
|
---|
| 173 | // and add to it all meta assigned for docs in this collection
|
---|
| 174 | public void amalgamateAllMeta() {
|
---|
| 175 | TreeMap<File, TreeMap<String,TreeSet<String>>> assignedMeta = getAllAssignedMetadataForAllFiles();
|
---|
| 176 | TreeMap<File, TreeMap<String,TreeSet<String>>> csvFileMeta = loadMetaFromCSVFile(this.metadataCSVFile);
|
---|
| 177 |
|
---|
| 178 | if(collMetaMap.size() == 0) {
|
---|
| 179 |
|
---|
| 180 | if(assignedMeta.keySet().size() > csvFileMeta.keySet().size()) {
|
---|
| 181 | collMetaMap = assignedMeta;
|
---|
| 182 | merge(collMetaMap, csvFileMeta);
|
---|
| 183 | } else {
|
---|
| 184 | collMetaMap = csvFileMeta;
|
---|
| 185 | merge(collMetaMap, assignedMeta);
|
---|
| 186 | }
|
---|
| 187 | } else {
|
---|
| 188 |
|
---|
| 189 | merge(collMetaMap, assignedMeta);
|
---|
| 190 | merge(collMetaMap, csvFileMeta);
|
---|
| 191 | }
|
---|
| 192 |
|
---|
| 193 | }
|
---|
| 194 |
|
---|
| 195 | public TreeSet<String> getAllCollHeadings(TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap) {
|
---|
| 196 | TreeSet<String> collHeadings = new TreeSet<String>();
|
---|
| 197 |
|
---|
| 198 | if(metaMap == null || metaMap.size() == 0) {
|
---|
| 199 | return collHeadings;
|
---|
| 200 | }
|
---|
| 201 | // get all meta field names and add into collHeadings. As it's a TreeSet,
|
---|
| 202 | // duplicates will be automatically ignored and collheadings will be sorted
|
---|
| 203 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
| 204 | while(iFiles.hasNext()) {
|
---|
| 205 | File f = iFiles.next();
|
---|
| 206 | TreeMap<String, TreeSet<String>> metaFields = metaMap.get(f);
|
---|
| 207 | Iterator<String> iMetaFields = metaFields.keySet().iterator();
|
---|
| 208 | while(iMetaFields.hasNext()) {
|
---|
| 209 | String fieldName = iMetaFields.next();
|
---|
| 210 | collHeadings.add(fieldName);
|
---|
| 211 | }
|
---|
| 212 | }
|
---|
| 213 |
|
---|
| 214 | return collHeadings;
|
---|
| 215 | }
|
---|
| 216 |
|
---|
| 217 | /** merge metaMap param into baseMetaMap: only portions not already present in baseMetaMap are added in
|
---|
| 218 | * whether these are new file entries, new metadata field entries for extant files, or metadata values for extant fields of files.
|
---|
| 219 | * A simple map.putALL() will not do the trick as collMetaMap is a complicated data structure.
|
---|
| 220 | */
|
---|
| 221 | public void merge(TreeMap<File, TreeMap<String,TreeSet<String>>> baseMetaMap, TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap) {
|
---|
| 222 |
|
---|
| 223 | if(metaMap == null || metaMap.size() == 0) {
|
---|
| 224 | // nothing to do
|
---|
| 225 | return;
|
---|
| 226 | }
|
---|
| 227 |
|
---|
| 228 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
| 229 | while(iFiles.hasNext()) {
|
---|
| 230 | File f = iFiles.next();
|
---|
| 231 |
|
---|
| 232 | // check if this file already has an entry in baseMetaMap
|
---|
| 233 | TreeMap<String, TreeSet<String>> origMetaFields = baseMetaMap.get(f);
|
---|
| 234 |
|
---|
| 235 | TreeMap<String, TreeSet<String>> metaFields = metaMap.get(f);
|
---|
| 236 | Iterator<String> iMetaFields = metaFields.keySet().iterator();
|
---|
| 237 |
|
---|
| 238 | // if file in metaMap didn't exist in baseMetaMap, easy: just copy its entry across in entirety
|
---|
| 239 | if(origMetaFields == null) {
|
---|
| 240 | metaMap.put(f, metaFields);
|
---|
| 241 | continue;
|
---|
| 242 | }
|
---|
| 243 |
|
---|
| 244 | // else, file already exists in baseMetaMap, need to check if we have to merge any meta on the file
|
---|
| 245 | while(iMetaFields.hasNext()) {
|
---|
| 246 | String fieldName = iMetaFields.next();
|
---|
| 247 | TreeSet<String> metaValues = metaFields.get(fieldName);
|
---|
| 248 |
|
---|
| 249 | // check if this metadata field exists for the same file in baseMetaMap
|
---|
| 250 | TreeSet<String> origMetaValues = origMetaFields.get(fieldName);
|
---|
| 251 | if(origMetaValues == null) { // this metadata field name did not exist for file in baseMetaMap,
|
---|
| 252 | // so copy all vals for this fieldName into baseMetaMap's entry for this file
|
---|
| 253 | origMetaFields.put(fieldName, metaValues);
|
---|
| 254 | continue; // continue on inner loop
|
---|
| 255 | }
|
---|
| 256 |
|
---|
| 257 | // else the meta fieldName existed for that file in baseMetaMap
|
---|
| 258 | // Check if any of the metadata values didn't already exist, else add them in
|
---|
| 259 | Iterator<String> iMetaValues = metaValues.iterator();
|
---|
| 260 | while(iMetaValues.hasNext()) {
|
---|
| 261 | String metaValue = iMetaValues.next();
|
---|
| 262 |
|
---|
| 263 | if(!origMetaValues.contains(metaValue)) {
|
---|
| 264 | origMetaValues.add(metaValue);
|
---|
| 265 | }
|
---|
| 266 | }
|
---|
| 267 |
|
---|
| 268 | }
|
---|
| 269 | }
|
---|
| 270 | }
|
---|
| 271 |
|
---|
| 272 |
|
---|
| 273 | /** If successfully wrote out collection's meta from to a CSV file,
|
---|
| 274 | * then will need to remove all meta from GLI (metadata.xml files).
|
---|
| 275 | * Just del or rename those files to .bak?
|
---|
| 276 | */
|
---|
| 277 | public void moveGLIMetaToCSV(File csvFile) {
|
---|
| 278 | boolean success = exportGLIMetaToCSV(csvFile);
|
---|
| 279 | // TODO
|
---|
| 280 | if(success) {
|
---|
| 281 | } else {
|
---|
| 282 | System.err.println("Failed to export GLI metadata for this collection to CSV properly. Will not remove metadata.xml files");
|
---|
| 283 | }
|
---|
| 284 | }
|
---|
| 285 |
|
---|
| 286 | /** If given a new file to create, creates the specified meta csv file from GLI's meta for the current collection.
|
---|
| 287 | * If the file exists, this will append the GLI metadata without checking if the file already contains the same entries. */
|
---|
| 288 | public boolean exportGLIMetaToCSV(File csvFile) {
|
---|
| 289 | boolean appendSetting = false;
|
---|
| 290 | boolean success = false;
|
---|
| 291 |
|
---|
| 292 | // if(csvFile.exists()) {
|
---|
| 293 | // appendSetting = true; // TODO: better to call the other version of this method in this case?
|
---|
| 294 | // }
|
---|
| 295 | // TreeMap<File, TreeMap<String,TreeSet<String>>> assignedMeta = getAllAssignedMetadataForAllFiles();
|
---|
| 296 | // writeMetaToCSV(assignedMeta, csvFile, appendSetting);
|
---|
| 297 |
|
---|
| 298 | if(csvFile.exists()) {
|
---|
| 299 | //appendSetting = true; // better to call the other version of this method in this case?
|
---|
| 300 | amalgamateAllMeta();
|
---|
| 301 | success = writeMetaToCSV(collMetaMap, csvFile, appendSetting);
|
---|
| 302 | } else { // no preexisting metadata.csv file, just write out GLI meta
|
---|
| 303 | TreeMap<File, TreeMap<String,TreeSet<String>>> assignedMeta = getAllAssignedMetadataForAllFiles();
|
---|
| 304 | success = writeMetaToCSV(assignedMeta, csvFile, appendSetting);
|
---|
| 305 | }
|
---|
| 306 |
|
---|
| 307 | return success;
|
---|
| 308 | }
|
---|
| 309 |
|
---|
| 310 | private boolean writeMetaToCSV(TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap, File csvFile, boolean appendSetting) {
|
---|
| 311 | boolean success = true;
|
---|
| 312 |
|
---|
| 313 | // First would need to write the row of all headings
|
---|
| 314 | TreeSet<String> metaFieldColumnHeadings = getAllCollHeadings(metaMap);
|
---|
| 315 | // Careful, collHeadings are alphabetically ordered, but not all docs may have meta for each column heading/metadata field name
|
---|
| 316 | // Need metadataFieldNames in an indexed array
|
---|
| 317 | Vector<String> columnHeadings = new Vector<String>(metaFieldColumnHeadings.size());
|
---|
| 318 | // put the Filename column as first item
|
---|
| 319 | columnHeadings.add("Filename");
|
---|
| 320 | columnHeadings.addAll(metaFieldColumnHeadings); // now have an indexed, yet still ordered, list of all column headings(the meta fieldnames)
|
---|
| 321 |
|
---|
| 322 | CSVFormat customCSVFormat = CSVFormat.DEFAULT
|
---|
| 323 | .withDelimiter(meta_field_sep)
|
---|
| 324 | .withIgnoreSurroundingSpaces(false)
|
---|
| 325 | .withQuoteMode(QuoteMode.MINIMAL)
|
---|
| 326 | .withTrim();
|
---|
| 327 |
|
---|
| 328 | try (CSVPrinter printer = new CSVPrinter(new FileWriter(csvFile, appendSetting), customCSVFormat)) {
|
---|
| 329 | printer.printRecord(columnHeadings);
|
---|
| 330 | // https://javadoc.io/doc/org.apache.commons/commons-csv/latest/index.html
|
---|
| 331 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
| 332 | while(iFiles.hasNext()) {
|
---|
| 333 | File f = iFiles.next();
|
---|
| 334 | String relFilename = fileToRelativeString(f);
|
---|
| 335 | // write out the filename field of this record
|
---|
| 336 | printer.print(relFilename);
|
---|
| 337 |
|
---|
| 338 | TreeMap<String, TreeSet<String>> fileMetadata = metaMap.get(f);
|
---|
| 339 | // now get each metadata field's value in the order of the column headings, and write them out
|
---|
| 340 | //for(String metaFieldName : columnHeadings) {
|
---|
| 341 | for(int i = 1; i < columnHeadings.size(); i++) { // skip past Filename coll heading, already written out
|
---|
| 342 | String metaFieldName = columnHeadings.get(i);
|
---|
| 343 | TreeSet<String> metavalues = fileMetadata.get(metaFieldName);
|
---|
| 344 | StringBuffer allMetaValuesForField = new StringBuffer();
|
---|
| 345 | if(metavalues == null || metavalues.size() == 0) {
|
---|
| 346 | // this file does not have (metavalues) such a metaFieldName, the cell for this column is empty
|
---|
| 347 | //System.err.println("No meta values for fieldname: " + metaFieldName);
|
---|
| 348 | printer.print(allMetaValuesForField);
|
---|
| 349 | } else {
|
---|
| 350 | for(String metavalue : metavalues) {
|
---|
| 351 | //metavalue = metavalue.trim();
|
---|
| 352 | allMetaValuesForField.append(meta_value_sep_char);
|
---|
| 353 | allMetaValuesForField.append(metavalue);
|
---|
| 354 | }
|
---|
| 355 | // write out the current metadata field of this record
|
---|
| 356 | // remove the extra meta_value_separator_char added the first time
|
---|
| 357 | printer.print(allMetaValuesForField.substring(1));
|
---|
| 358 | }
|
---|
| 359 | }
|
---|
| 360 |
|
---|
| 361 | printer.println(); // done writing a record
|
---|
| 362 | }
|
---|
| 363 | } catch (IOException ex) {
|
---|
| 364 | success = false;
|
---|
| 365 | DebugStream.printStackTrace(ex);
|
---|
| 366 | System.err.println("Caught exception when writing meta to CSVFile " + csvFile.getAbsolutePath());
|
---|
| 367 | System.err.println("\t" + ex.getMessage());
|
---|
| 368 | }
|
---|
| 369 |
|
---|
| 370 | return success;
|
---|
| 371 | }
|
---|
| 372 |
|
---|
| 373 |
|
---|
| 374 | public TreeMap<File, TreeMap<String,TreeSet<String>>> loadMetaFromCSVFile(File csvFile) {
|
---|
| 375 | TreeMap<File, TreeMap<String,TreeSet<String>>> csvFileMeta = new TreeMap<File, TreeMap<String,TreeSet<String>>>();
|
---|
| 376 |
|
---|
| 377 | if(!csvFile.exists()) {
|
---|
| 378 | return csvFileMeta;
|
---|
| 379 | }
|
---|
| 380 |
|
---|
| 381 | Reader in = null;
|
---|
| 382 | //try(Reader in = new FileReader(csvFile);) { // try-with-resources may break on older Java that we use to build GS3 binaries
|
---|
| 383 | try {
|
---|
| 384 | in = new FileReader(csvFile);
|
---|
| 385 | boolean headingRow = true;
|
---|
| 386 |
|
---|
| 387 | // https://javadoc.io/doc/org.apache.commons/commons-csv/latest/index.html
|
---|
| 388 | CSVFormat lenientCSVFormat = CSVFormat.DEFAULT
|
---|
| 389 | .withDelimiter(meta_field_sep)
|
---|
| 390 | .withFirstRecordAsHeader()
|
---|
| 391 | .withCommentMarker('#')
|
---|
| 392 | .withIgnoreSurroundingSpaces()
|
---|
| 393 | .withTrim();
|
---|
| 394 |
|
---|
| 395 | // https://stackoverflow.com/questions/36269387/get-csv-file-header-using-apache-commons
|
---|
| 396 | // The first col heading which is the Filename
|
---|
| 397 | // the remaining CSV column headings are the metadata field names
|
---|
| 398 |
|
---|
| 399 | CSVParser parser = lenientCSVFormat.parse(in);
|
---|
| 400 |
|
---|
| 401 | //String[] metaFieldNames = lenientCSVFormat.getHeader(); // didn't work
|
---|
| 402 | // getHeaders() returns List<String>, convert to String[] array
|
---|
| 403 | String[] metaFieldNames = parser.getHeaderNames().toArray(new String[0]);
|
---|
| 404 |
|
---|
| 405 | for (CSVRecord record : parser) {
|
---|
| 406 |
|
---|
| 407 | // a new row, represents a new file's meta
|
---|
| 408 | TreeMap<String,TreeSet<String>> meta = new TreeMap<String,TreeSet<String>>();
|
---|
| 409 |
|
---|
| 410 | for(int i = 0; i < record.size(); i++) { //for (String field : record) {
|
---|
| 411 | String field = record.get(i);
|
---|
| 412 |
|
---|
| 413 | if(i == 0) { // col 0 = Filename
|
---|
| 414 | String filename = field;
|
---|
| 415 | // TODO: filenames are stored relative to import folder, convert to full path for internal use?
|
---|
| 416 | File fullPathFile = new File(coll_importdir_path + filename);
|
---|
| 417 | ///System.err.println("Found Filename meta: " + filename);
|
---|
| 418 | csvFileMeta.put(fullPathFile, meta);
|
---|
| 419 | } else {
|
---|
| 420 | // not Filename, but metadata field name, add into meta map for this file
|
---|
| 421 | TreeSet<String> metaValues = new TreeSet<String>();
|
---|
| 422 | String metadataFieldName = metaFieldNames[i]; // get column heading=meta field name for current cell
|
---|
| 423 | meta.put(metadataFieldName, metaValues);
|
---|
| 424 | ///System.err.println("Found value for meta field: " + metadataFieldName);
|
---|
| 425 | // Split the field to get all metavalues for this metadata field name
|
---|
| 426 | // and add to metaValues set
|
---|
| 427 | String unparsedMetaVal = field.trim();
|
---|
| 428 | String[] metadataValues = unparsedMetaVal.split(meta_value_sep_re);
|
---|
| 429 | for(String metaVal : metadataValues) {
|
---|
| 430 | metaVal = metaVal.trim(); // get rid of whitespaces around separator char
|
---|
| 431 | if(!metaVal.equals("")) {
|
---|
| 432 | ///System.err.println("Found value for meta field: " + metaVal);
|
---|
| 433 | metaValues.add(metaVal);
|
---|
| 434 | }
|
---|
| 435 | }
|
---|
| 436 | }
|
---|
| 437 | }
|
---|
| 438 | }
|
---|
| 439 | } catch(Exception e) {
|
---|
| 440 | DebugStream.printStackTrace(e);
|
---|
| 441 | DebugStream.println("@@@ Error reading from CSV file: " + csvFile.getAbsolutePath());
|
---|
| 442 | } finally {
|
---|
| 443 | SafeProcess.closeResource(in);
|
---|
| 444 | }
|
---|
| 445 |
|
---|
| 446 | //this.print(csvFileMeta);
|
---|
| 447 | return csvFileMeta;
|
---|
| 448 | }
|
---|
| 449 |
|
---|
| 450 | /** For debugging */
|
---|
| 451 | public void print(TreeMap<File, TreeMap<String,TreeSet<String>>> metaMap ) {
|
---|
| 452 | Iterator<File> iFiles = metaMap.keySet().iterator();
|
---|
| 453 | while(iFiles.hasNext()) {
|
---|
| 454 | File f = iFiles.next();
|
---|
| 455 | TreeMap<String, TreeSet<String>> metaFields = metaMap.get(f);
|
---|
| 456 | if(metaFields != null) {
|
---|
| 457 | System.err.println("Meta for file: " + fileToRelativeString(f)); //f.getAbsolutePath());
|
---|
| 458 | }
|
---|
| 459 | Iterator<String> iMetaFields = metaFields.keySet().iterator();
|
---|
| 460 | if(!iMetaFields.hasNext()) {
|
---|
| 461 | System.err.println("No meta for file!");
|
---|
| 462 | }
|
---|
| 463 | while(iMetaFields.hasNext()) {
|
---|
| 464 | String fieldName = iMetaFields.next();
|
---|
| 465 | System.err.println("\tMetafield: " + fieldName);
|
---|
| 466 |
|
---|
| 467 | TreeSet<String> metaValues = metaFields.get(fieldName);
|
---|
| 468 | Iterator<String> iMetaValues = metaValues.iterator();
|
---|
| 469 | while(iMetaValues.hasNext()) {
|
---|
| 470 | String metaValue = iMetaValues.next();
|
---|
| 471 | System.err.println("\t\tValue: " + metaValue);
|
---|
| 472 | }
|
---|
| 473 | }
|
---|
| 474 | }
|
---|
| 475 | }
|
---|
| 476 |
|
---|
| 477 | /** For debugging */
|
---|
| 478 | public void printOrderedCollectionMeta() {
|
---|
| 479 | //TreeMap<File, TreeMap<String,TreeSet<String>>> collMetaMap = getAllAssignedMetadataForAllFiles();
|
---|
| 480 |
|
---|
| 481 | amalgamateAllMeta();
|
---|
| 482 | this.print(collMetaMap);
|
---|
| 483 | }
|
---|
| 484 |
|
---|
| 485 | public ArrayList<File> listFilesInCollection(String collection_directory_path) {
|
---|
| 486 |
|
---|
| 487 | ///System.err.println("coll dir path: " + collection_directory_path);
|
---|
| 488 |
|
---|
| 489 | // only files in import folder have meta. Don't list files outside import folder
|
---|
| 490 | File collDir = new File(collection_directory_path, IMPORT_DIRNAME);
|
---|
| 491 |
|
---|
| 492 | ArrayList<File> files = new ArrayList<File>();
|
---|
| 493 |
|
---|
| 494 | //FileFilter collDocsFilter = new CollectionDocFileFilter();
|
---|
| 495 | getAllFiles(files, collDir, this);
|
---|
| 496 |
|
---|
| 497 | return files;
|
---|
| 498 | }
|
---|
| 499 |
|
---|
| 500 | public void getAllFiles(ArrayList<File> files, File path, FileFilter filter) {
|
---|
| 501 | File[] fileList = path.listFiles(filter);
|
---|
| 502 | for(int i = 0; i < fileList.length; i++) {
|
---|
| 503 | File f = fileList[i];
|
---|
| 504 | if(f.isFile()) {
|
---|
| 505 | files.add(f);
|
---|
| 506 | } else {
|
---|
| 507 | getAllFiles(files, f, filter);
|
---|
| 508 | }
|
---|
| 509 | }
|
---|
| 510 | }
|
---|
| 511 |
|
---|
| 512 | /** Filter to only allow Gathered GS documents
|
---|
| 513 | * to produce the list of files for which we need to export GLI metadata info to CSV.
|
---|
| 514 | */
|
---|
| 515 | //private class CollectionDocFileFilter implements FileFilter {
|
---|
| 516 | @Override
|
---|
| 517 | public boolean accept(File pathname) {
|
---|
| 518 | String tailname = pathname.getName();
|
---|
| 519 | if(pathname.isDirectory()) {
|
---|
| 520 | if(tailname.equals(".svn")) {
|
---|
| 521 | return false;
|
---|
| 522 | }
|
---|
| 523 | } else {
|
---|
| 524 | if(pathname.equals(metadataCSVFile)) { // skip any meta csv file user exported/put into import
|
---|
| 525 | return false;
|
---|
| 526 | } else if(tailname.equals("metadata.xml")) {
|
---|
| 527 | return false;
|
---|
| 528 | } else if(tailname.endsWith("~")) {
|
---|
| 529 | return false;
|
---|
| 530 | } else if(tailname.endsWith(".bak")) {
|
---|
| 531 | return false;
|
---|
| 532 | }
|
---|
| 533 | }
|
---|
| 534 | // accept all other file types
|
---|
| 535 | return true;
|
---|
| 536 | }
|
---|
| 537 | //}
|
---|
| 538 |
|
---|
| 539 | public static File chooseMetaCSVFile(String defaultSearchPath, JFrame parent) {
|
---|
| 540 | JFileChooser chooser = new JFileChooser(defaultSearchPath);
|
---|
| 541 | chooser.setFileSelectionMode(JFileChooser.FILES_ONLY);
|
---|
| 542 | chooser.setDialogTitle(Dictionary.get("ExportMeta.ChooseMetaCSVFile"));
|
---|
| 543 | FileNameExtensionFilter filter = new FileNameExtensionFilter("CSV spreadsheet file", "csv");
|
---|
| 544 | chooser.setFileFilter(filter);//.addChoosableFileFilter(filter);
|
---|
| 545 | int returnVal = chooser.showOpenDialog(parent);
|
---|
| 546 | if(returnVal == JFileChooser.APPROVE_OPTION) {
|
---|
| 547 | File selectedFile = chooser.getSelectedFile();
|
---|
| 548 | ///System.err.println("File selected: " + selectedFile.getAbsolutePath());
|
---|
| 549 | return selectedFile;
|
---|
| 550 | } else {
|
---|
| 551 | return null;
|
---|
| 552 | }
|
---|
| 553 | }
|
---|
| 554 | }
|
---|
| 555 |
|
---|
| 556 |
|
---|
| 557 |
|
---|
| 558 |
|
---|