/**
 *############################################################################
 * A component of the Greenstone Librarian Interface, part of the Greenstone
 * digital library suite from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * Author: Michael Dewsnip, NZDL Project, University of Waikato, NZ
 *
 * Copyright (C) 2010 Greenstone Digital Library Project
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *############################################################################
 */
 
package org.greenstone.gatherer.metadata;

import java.io.File;
import java.net.*;
import java.nio.charset.*;
import java.util.*;
import org.greenstone.gatherer.collection.CollectionManager;
import org.greenstone.gatherer.DebugStream;

import java.util.regex.Matcher;
import java.util.regex.Pattern;



/** Static access class that contains many of the methods used to work with filename encodings. 
* Works closely with classes FileNode, CollectionTreeNode, MetadataXMLFile, MetadataXMLFileManager
* to maintain a map of URLEncodedFilenames to their filename encodings.
* The process of filename encoding further affects the CollectionManager which refreshes its CollectionTree, 
* FileManager (move, delete, rename actions), MetadataValueTableModel, EnrichPane. */

public class FilenameEncoding {
	/** Display of filenames in the trees are in URL encoding, if debugging */
	public static boolean DEBUGGING = false;

	/** Set to false by Gatherer if the locale is UTF-8, as Java's handling is 
	* such that non-UTF8 filename encodings on a UTF-8 locale are destructively 
	* converted so that the bytecodes in the filename are not preserved. */
    public static boolean MULTIPLE_FILENAME_ENCODINGS_SUPPORTED = false;

	/** Also set by Gatherer. 
	* If the OS supports multiple filename encodings, we will be working with URL strings
	* and the applicable separators are always the forward slash ("/") not File.separator. 
	* If  multiple filename encodings are not supported, we're dealing with File.separator. */
    public static String URL_FILE_SEPARATOR = File.separator;
    

	/** gs.filenameEncoding is a special sort of metadata that is not merely to be stored along 
	 * with a file, but is to be applied in real-time on the file's name in the CollectionTree 
	 * display. Since FileNodes are constantly destroyed and reconstructed by that Tree when 
	 * its nodes are expanded and contracted, storing the filename encodings of each file along
	 * with the file in a FileNode doesn't help because it doesn't last. Instead of rediscovering
	 * the encoding at every stage by querying the metadataXML file, we store the encodings for
	 * fast access: in a map of (URLEncodedFilePath, filename-encoding) pairs. 
	 * The current design of the map is to only store any active filename metadata assigned
	 * directly at that file/folder's level, and if there is none discovered at that level, then
	 * storing the empty string for it. Therefore, if the hashmap contains no entry for 
	 * a file, it means this still needs to be retrieved. */
    public static Map map = new HashMap();
	
	/** Compiled pattern for hex entities of characters. These are of the forn "&#x....;" with 1 to 4 digits */
	public static final Pattern HEX_PATTERN = Pattern.compile("(&#x[0-9a-zA-Z]{1,4}+;)");

    /** The hex entity version of the ampersand character.
     * We use this in place of the ampersand character in filenames in metadata.xml files to
     * preserve the reference to the literal ampersand in the real file name on the file system.
     */
    public static final String HEX_ENTITY_AMPERSAND = FilenameEncoding.hexEntityForChar("&"); //"&#x26;";
    

//*********************** BUSY REFRESHING / REQUIRING  REFRESH *********************

    /** Set to true if filename encoding metadata was changed. Called by the enter keyPress 
	* event in gui.EnrichPane and when the gs.FilenameEncoding field loses focus. */
    private static boolean refreshRequired = false;

    synchronized public static boolean isRefreshRequired() { 
		return refreshRequired; 
	}
	
    synchronized public static void setRefreshRequired(boolean state) { 
		if(MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
			refreshRequired = state;
		} else {
			refreshRequired = false; 
		}		
	}

//************************** MAP RETRIEVAL METHODS ******************************	

	/** Returns the cumulative gs.filenameEncoding metadata 
	* assigned to a file inside the collection. */    
	public static String findFilenameEncoding(
			File file, String urlEncodedFilePath, boolean bruteForceLookup) 
	{
		//if(bruteForceLookup) {
		//	return findFilenameEncodingBruteForce(file, urlEncodedFilePath, bruteForceLookup);
		//}		
	
		String encoding = "";
		
		// Check any assigned encoding at this level, starting with the map first
		// and else retrieving the filename encoding from the metadata file
		if(!map.containsKey(urlEncodedFilePath)) {
		
			// Check for filename encoding metadata *directly* associated with the file
			// Now don't need to get any inherited encoding metadata here, because of 
			// the way we're storing and retrieving encoding information from the map.			
			ArrayList list = MetadataXMLFileManager.getMetadataAssignedDirectlyToFile(file, true); // true: gets gs.filenameEncoding only
			if(!list.isEmpty()) {
			    MetadataValue metavalue = (MetadataValue)list.get(0); // get(list.size()-1); 
			    encoding = metavalue.getValue();			    
			} // else no filename encoding set yet at this level
		
			// Now we've done a lookup at this level cache the result in the map,
			// including empty strings, to indicate that we've done a full lookup
			map.put(urlEncodedFilePath, encoding);
		} 
		else { // an entry exists in the map, get it from there
			encoding = (String)map.get(urlEncodedFilePath);
		}	
		
		// if no meta was specified at at the file level, look for any inherited metadata
		if(encoding.equals("")) {
			encoding = getInheritedFilenameEncoding(urlEncodedFilePath, file);
		}
		
		//System.err.println("\n@@@@Looked for: " + urlEncodedFilePath + " | found: " + encoding);
		return encoding; // found something in map, may still be "", but it's what was stored
	}
	
	/** Checks the file-to-encoding map for all the superfolders of the given 
	 * filename in sequence for an applicable encoding. Note that the file/folder
	 * at the level of urlFoldername (and dir) has already been inspected. */
    static public String getInheritedFilenameEncoding(String urlFoldername, File dir) 
	{
		String encoding = "";
		boolean done = false;
		
		// don't want to search past import folder which is as 
		// far as we need to go to determine inherited encodings
		File importDir = new File(CollectionManager.getLoadedCollectionImportDirectoryPath());
		if(dir.equals(importDir)) { // if the top-level dir was already checked, we're done
			done = true;
		}

		// For directories, first remove trailing file separator in order to start checking from higher level folders
		int lastIndex = urlFoldername.length()-1;
		char urlFileSeparatorChar = URL_FILE_SEPARATOR.charAt(0);
		if(urlFoldername.charAt(lastIndex) == urlFileSeparatorChar) { 
		    urlFoldername = urlFoldername.substring(0, lastIndex);
		}

		while(!done) {
			// get the folder that's one level up
			dir = dir.getParentFile();		
			
		    int index = urlFoldername.lastIndexOf(URL_FILE_SEPARATOR);		
		    if(index == -1) { // no more slashes
				done = true;
		    } else { 
				urlFoldername = urlFoldername.substring(0, index);
		    }
		    
			// now look in the map to see whether there's an encoding for this folder
		    String folder = urlFoldername + URL_FILE_SEPARATOR;
		    if(map.containsKey(folder)) {
				encoding = (String)map.get(folder); // may be ""				
		    } else { // no entry in map, so look in the metadata.xml at this folder level
				ArrayList list = MetadataXMLFileManager.getMetadataAssignedDirectlyToFile(
						dir, true); // true: gets gs.filenameEncoding only
				if(!list.isEmpty()) {
				    MetadataValue metavalue = (MetadataValue)list.get(0); // get(list.size()-1); 
				    encoding = metavalue.getValue();			    
				}
				map.put(folder, encoding); // may be ""
			}
			
			if(!encoding.equals("")){
			    done = true;
			} // else if "", loop to check next folder up
			else if(dir.equals(importDir)) { // don't iterate past the import folder, which we've now checked
				done = true;
			}
		}

		return encoding;
    }

	/** Called by GUIManager when a collection is closed. This then empties the 
	* file-to-encoding map which is applicable only on a per-collection basis */
    static public void closeCollection() {	
		//printFilenameMap("Closing collection. Clearing file-to-encoding map of entries:");
		map.clear();
    }

	// Useful for debugging: prints contents of file-to-encoding map
    static public void printFilenameMap(String heading) {
		System.err.println("\n********************************************");
		System.err.println(heading.toUpperCase());
		Iterator entries = map.entrySet().iterator();
		while(entries.hasNext()) {
		    Map.Entry entry = (Map.Entry)entries.next();
		    System.err.println("+ " + (String)entry.getKey() + ": " + (String)entry.getValue());
		}
		System.err.println("********************************************\n");
    }
	
	// UNUSED at present. Brute force version of the findFilenameEncoding() method
	// Doesn't use the map, but gets *all* the metadata assigned to a file/folder to 
	// work out the encoding applicable to a file/folder.
    public static String findFilenameEncodingBruteForce(File file, String urlEncodedFilename, 
					     boolean bruteForceLookup) 
    {
		System.err.println("\n***** BRUTE FORCE getFilenameEncoding() called\n");
	
	
		String encoding = "";

		// Check for filename encoding metadata *directly* associated with the file
		// Now don't need to get any inherited encoding metadata here, because of 
		// the way we're storing and retrieving encoding information from the map.

		ArrayList list = MetadataXMLFileManager.getMetadataAssignedToFile(file, true); // true: gets gs.filenameEncoding only
		if(!list.isEmpty()) {	   
		    // try to get the filename encoding meta that was assigned last to this 
		    // file, even though it makes no sense to have multiple values for it
		    MetadataValue metavalue = (MetadataValue)list.get(list.size()-1); 
		    encoding = metavalue.getValue();
		    
		    if(encoding == null) { // unlikely ???
				System.err.println("**** ERROR: encoding for " 
						+ urlEncodedFilename + " is NULL!");    
				encoding = "";
		    }
		} // else no filename encoding set yet, perhaps
		//System.err.println("**** Found encoding for " + urlEncodedFilename + " " + encoding);
		return encoding;
    }

//****************************** APPLYING ENCODINGS TO FILENAMES *****************************
	
    /** URL encoded version of the byte codes of the given file's name */
    public static String calcURLEncodedFilePath(File file) {	
		if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
		    return file.getAbsolutePath();
		}
		else {
		    String filename = fileToURLEncoding(file);
		    return filename;
		}
    }

    /** URL encoded version of the byte codes of this file's name */
    public static String calcURLEncodedFileName(String urlfilepath) {
		String filename = urlfilepath;
		if(filename.endsWith(URL_FILE_SEPARATOR)) { // directory, remove trailing slash
		    filename = filename.substring(0, filename.length() - 1);
		}	    
		
		// remove the directory prefix (if any) to get the filename
		int index = filename.lastIndexOf(URL_FILE_SEPARATOR);
		if(index != -1) {
		    filename = filename.substring(index+1); // skip separator
		}
		
		return filename;    
    }

	/** Given a string representing an alias to an official encoding (and unofficial ones 
	* starting with "Latin-"), attempts to work out what the canonical encoding for that is. 
	* If the given encoding is unrecognised, it is returned as is. */
    public static String canonicalEncodingName(String encoding) {
		String canonicalEncoding = encoding;
		try {
			// Latin-1 -> ISO-8859-1
			String alias = canonicalEncoding.toLowerCase();
			if(alias.startsWith("latin")){								
				canonicalEncoding = "ISO-8859" + alias.substring("latin".length());
			}

			// canonical encoding for official aliases
			canonicalEncoding = Charset.forName(canonicalEncoding).name();			
			return canonicalEncoding;
		} catch (Exception e) {
			System.err.println("(Could not recognise encoding (alias): " 
					+ encoding + ".)");
		    return encoding; // no alias could be found, return the original parameter
		}
    }

//************************* GETTING THE URL ENCODING OF FILENAMES *********************************

	/** 
	 * Given a String containing hexentities, will convert back into the unicode version of the String.
	 * e.g. A string like "02 T&#x113;n&#x101; Koutou\.mp3" will be returned as "02 Tena Koutou\.mp3" with macrons on e and a
	 * I've tested this in a separate file that imports java.util.regex.Matcher and java.util.regex.Pattern
	 * and contains a copy of Utility.debugUnicodeString(String) with the following main function:
	    public static void main(String args[]) {
			String str = "02 T&#x113;n&#x101; Koutou\\.mp3"; // or more basic case: String str = "mmmm&#x101;nnnn&#x113;pppp\\.txt";		
			System.err.println("About to decode hex string: " + str);
			String result = decodeStringContainingHexEntities(str);
			System.err.println("Decoded hex string: " + result + " - debug unicode form: " + debugUnicodeString(result));
		}
	*/
	public static String decodeStringContainingHexEntities(String str) {
		String result = "";		
		Matcher matcher = HEX_PATTERN.matcher(str);
		
		int searchFromIndex = 0;
		int endMatchIndex = -1;
		
		while(matcher.find(searchFromIndex)) {
			String hexPart = matcher.group();
			//System.err.println("Found hexpart match: " + hexPart);
			
			int startMatchIndex = matcher.start();
			endMatchIndex = matcher.end();
			result += str.substring(searchFromIndex, startMatchIndex);			
			
			String hexNumberStr = hexPart.substring(3, hexPart.length()-1); // lose the "&#x" prefix and the ";" suffix to get just the hex number portion of the match
			// https://stackoverflow.com/questions/16625865/java-unicode-to-hex-string
			// https://stackoverflow.com/questions/11194513/convert-hex-string-to-int			
			
			//System.err.println("hexNumberStr so far: " + hexNumberStr);
			hexNumberStr = "0x" + hexNumberStr; // e.g "0xDDDD"
			//int hexNumber = Integer.parseInt(hexNumberStr);
			int hexNumber = Integer.decode(hexNumberStr);
			String hexNumberAsChar = Character.toString((char) hexNumber);
			result += hexNumberAsChar;
			 
			searchFromIndex = endMatchIndex;
			
		}
		
		if(endMatchIndex != -1) { // attach any suffix once we finished processing all the hex codes
			result += str.substring(endMatchIndex);
			//System.err.println("suffix: " + str.substring(endMatchIndex));
		}
		else { // there were no hex codes to decode, return string as is
			result = str;
		}
		
		return result;
	}	

	
	// Dr Bainbridge's methods
	/* On Linux machines that are set to using an ISO-8859 (Latin) type encoding, 
	* we can work with URL-encoded filenames in Java. Java works with whatever 
	* encoding the filesystem uses. Unlike systems working with UTF-8, where Java 
	* interprets filenames as UTF-8 (a destructive process since characters invalid
	* for UTF-8 are replaced with the invalid character, which means the original
	* character's byte codes can not be regained), working with an ISO-8859-1 
	* system means the original byte codes of the characters are preserved, 
	* regardless of whether the characters represent ISO-8859-1 or not. Such byte
	* codes are converted by the following method to the correct URL versions of 
	* the strings that the filenames represent (that is, the correct URL representations
	* of the filenames in their original encodings). This is useful for interactions with
	* Perl as Java and Perl can use URL-encoded filenames to talk about the same files
	* on the file system, instead of having to work out what encoding they are in. */
	
    public static String fileToURLEncoding(File file) {
	// on a UTF-8 file system, DO NOT do the stuff further below,
	// just return input filename param, but with any & in the filename replaced with its hex entity
		if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
		    // protect ampersands in filenames by converting it to its hex entity
		    String filepath = file.getAbsolutePath();
		    filepath = filepath.replace("&", HEX_ENTITY_AMPERSAND);
		    return filepath;
		}
		
		String filename_url_encoded = "";
		
		// The following test for whether the file exists or not is a problem
		// when a File object--whose actual file is in the process of being moved 
		// and therefore temporarily does not 'exist' on the actual system--can't
		// be URL encoded: the following would return "" when a file doesn't exist.
		// So commenting out the test.
		/* 
		if(!file.getName().equals("recycle")) {
		    if(!file.isFile() && !file.isDirectory()) {
			System.err.println("*** ERROR. Java can't see file: " + file.getAbsolutePath());
			return "";
		    }
		    
		    if(!file.exists()) {
			System.err.println("*** NOTE: File doesn't exist: " + file.getAbsolutePath());
			return ""; //file.getName();
		    }
		}
		*/

		URI filename_uri = file.toURI();
		try {
		    // The trick:
		    //  1. toASCIIString() will %xx encode values > 127
		    //  2. Decode the result to "ISO-8859-1" 
		    //  3. URL encode the bytes to string
		    
		    // Step 2 forces the string to be 8-bit values.  It
		    // doesn't matter if the starting raw filename was *not*
		    // in the ISO-8859-1 encoding, the effect is to ensure
		    // we have an 8-bit byte string that (numerically)
		    // captures the right value.  These numerical values are
		    // then used to determine how to URL encode it
		    
			String filename_ascii = filename_uri.toASCIIString();
			
			// The URI.toASCIIString() call above only encodes values > 127.
			// But we also need to protect + and & signs in filenames. Do this by URL encoding.
			// But need to double URL encode, else it will get decoded too early, in methods called shortly hereafter.
			filename_ascii = filename_ascii.replace("+", "%252B"); // +'s ASCII code is 43 decimal, 2b in hex, 2B when uppercased
			filename_ascii = filename_ascii.replace("&", "%2526"); // &'s ASCII code is 36 in decimal, and 26 in hex			
			
			// Before proceeding, protect & in the filename too.
			// &'s ASCII code is 36 in decimal, and 26 in hex, so replace with &#x26; (HEX_ENTITY_AMPERSAND)
			// But dangerous to do simple replace if there are &#x...; entities in the filename already!
			// That is, we'll want to protect & by replacing with &'s hex value, but we don't want to replace the & in "&#x....;" with the same!
			//filename_url_encoded = filename_url_encoded.replace("&", "&x26;");// SO THIS IS BAD
			//filename_url_encoded = filename_url_encoded.replace("&", hexEntityForChar("&"));// SAME, STILL BAD
			///filename_ascii = escapeAllCharWithHexEntity(filename_ascii, '&'); // Good: CAREFULLY replaces & that are not part of hex entities			
			
		    String filename_raw_bytes = URLDecoder.decode(filename_ascii,"ISO-8859-1");			
		    filename_url_encoded = iso_8859_1_filename_to_url_encoded(filename_raw_bytes);
			
			// For chars that were protected by being URL encoded, now convert them to the correct version we want them in.
			// For +: this char is special in regex, so it needs to be converted from URL encoding back to + so it will get properly escaped for regex
			// For &: this char is special in XML, so since the call to iso_8859_1_filename_to_url_encoded() is over, we can finally convert & to hex entity now.
			//filename_url_encoded = filename_url_encoded.replace("%2B", "&#x2B;"); // Don't do this, won't get regex escaped when converted back to a + by caller
			filename_url_encoded = filename_url_encoded.replace("%2B", "+"); // + signs are special, as they will need to be escaped since the caller wants the filename representing a regex
			filename_url_encoded = filename_url_encoded.replace("%26", HEX_ENTITY_AMPERSAND); // convert URL encoding for ampersand into hex entity for ampersand
		}
		catch (Exception e) {
		    e.printStackTrace();
		    // Give up trying to convert
		    filename_url_encoded = file.getAbsolutePath(); 
		}
		return filename_url_encoded;
    }
    
    // For unicode codepoints see:
    // http://unicode.org/Public/MAPPINGS/ISO8859/8859-1.TXT for ISO8859-1 (Latin-1)
    // where 0xE2 maps to codepoint 0x00E2 and is defined as "Latin small letter a with circumflex"
    // http://unicode.org/Public/MAPPINGS/ISO8859/8859-7.TXT for ISO8859-7 (Greek)
    // where 0xE2 maps to codepoint 0x03B2 and is defined as "Greek small letter beta"    
    public static String iso_8859_1_filename_to_url_encoded(String raw_bytes_filename) 
		throws Exception
    {
		String urlEncoded = "";

		try {
		    // By this point we have a UTF-8 encoded string that captures
		    // what the ISO-8859-1 (Latin-1) character is that corresponded to the
		    // 8-bit numeric value for that character in the filename
		    // on the file system
		    
		    // For example:
		    //   File system char:             <lower-case beta char in Latin-7> = %E2
		    //   Equivalent Latin 1 char:      <lower-case a with circumflex>    = %E2
		    //   Mapped to UTF-8:              <lower-case a with circumflex>    = <C3><A2>

		    // Our task is to take the string the contains <C3><A2> and ensure that
		    // we "see" it as <E2>

		    byte [] raw_bytes = raw_bytes_filename.getBytes("ISO-8859-1");
		    String unicode_filename = new String(raw_bytes,"UTF-8");
		    
		    for(int i = 0; i < unicode_filename.length(); i++) {
			char charVal = unicode_filename.charAt(i);
			if ((int)charVal > 255) {
				urlEncoded += String.format("&#x%02X;", (int)charVal);
			}
			else if((int)charVal > 127) {
			    urlEncoded += String.format("%%%02X", (int)charVal);
			} else {
			    urlEncoded += String.format("%c", (char)charVal);
			}
		    }
		}
		catch (Exception e) {
		    //e.printStackTrace();
		    throw(e);
		}

		return urlEncoded;
    }

    // unused for now
    public static String raw_filename_to_url_encoded(String fileName)
		throws Exception
    {
		String urlEncoded = "";
		try {
		    byte[] bytes = fileName.getBytes();
		    
		    for(int i = 0; i < bytes.length; i++) {
			// mask each byte (by applying & 0xFF) to make the signed
			// byte (in the range -128 to 127) unsigned (in the range
			// 0 to 255).

			int byteVal = (int)(bytes[i] & 0xFF);
			
			if(byteVal > 127) {
			    urlEncoded += String.format("%%%02X", (int)byteVal);
			} else {
			    urlEncoded += String.format("%c",(char)byteVal);
			}
		    }
		}
		catch (Exception e) {
		    //e.printStackTrace();
		    throw(e);
		}

		return urlEncoded;
    }
   
 // FURTHER HELPER METHODS 
 
	/**
	 * Produce the equivalent of method fileToURLEncoding(), but taking a String as input parameter.
	 * If filename is relative, then the current directory (gli?) will be prefixed to what is returned
	 * and should be removed manually by the caller. Alternatively, for relative paths, call the variant
	 * relativeFilenameToURLEncoding(String), which will remove any added filepath prefix.
	*/
	public static String fullFilepathToURLEncoding(String filename) {
	    // on a UTF-8 file system, DO NOT do the stuff further below,
	    // just return input filename param, but with any & in the filename replaced with its hex entity
		if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) {
		    return filename.replace("&", HEX_ENTITY_AMPERSAND);
		}
		
		File file = new File (filename);
		String filename_url_encoded = fileToURLEncoding(file);
		
		// if the current directory (".") was passed in as filename,
		// then the filename_url_encoded looks like /full/path/./
		// In that case, remove the ./ at the end
		if (filename_url_encoded.endsWith(FilenameEncoding.URL_FILE_SEPARATOR+"."+FilenameEncoding.URL_FILE_SEPARATOR)) {
			filename_url_encoded = filename_url_encoded.substring(0, filename_url_encoded.length()-2); // cut off /. at end
		}
		
		return filename_url_encoded;
	}
 
	/**
	 * Produce the equivalent of method fileToURLEncoding(), but taking a String as input parameter
	 * If filename is a relative path, call this method to get it specially URL encoded.
	 * This method will remove the current directory that is prefixed as an intermediary step.
	*/
	public static String relativeFilenameToURLEncoding(String filename) {
		if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
		    return filename.replace("&", HEX_ENTITY_AMPERSAND);
		}
		
		String curr_directory_path = FilenameEncoding.fullFilepathToURLEncoding(".");
		return filenameToURLEncodingWithPrefixRemoved(filename, curr_directory_path);
	}
	
	/**
	 * Produce the equivalent of method fileToURLEncoding(), but taking a String as input parameter
	 * Convenience method that will return the specially URL encoded version of filename
	 * with the provided removeFilePathPrefix removed */
	public static String filenameToURLEncodingWithPrefixRemoved(String filename, String removeFilePathPrefix) {
		if(!MULTIPLE_FILENAME_ENCODINGS_SUPPORTED) { // on a UTF-8 file system, DO NOT do the stuff below, just return input param
		    return filename.replace("&", HEX_ENTITY_AMPERSAND);
		}
		
		File file = new File (filename);		
		String filename_url_encoded = fileToURLEncoding(file); // returns a full filepath
		
		// now lop off the given removeFilePathPrefix that FilenameEncoding.filenameToURLEncoding(STRING) variant would have added
		filename_url_encoded = filename_url_encoded.substring(removeFilePathPrefix.length());
		// remove any remaining slash prefix
		if (filename_url_encoded.startsWith(FilenameEncoding.URL_FILE_SEPARATOR)) {
			filename_url_encoded = filename_url_encoded.substring(FilenameEncoding.URL_FILE_SEPARATOR.length());
		}
		
		return filename_url_encoded;
	}
	
// UNUSED now, but useful functions and escapeAllCharWithHexEntity() took effort to write.
	
	/**
	 * Attempting to produce the equivalent method fileToURLEncoding(), but taking a String as input parameter
	 * UNUSED - REPLACED by filenameToURLEncoding(String str) which reuses existing fileToURLEncoding(File) method.
	*/
	public static String stringToHex(String str) {

		String hex_str = "";
		for(int i = 0; i < str.length(); i++) {
			int charCode = str.codePointAt(i); // unicode codepoint / ASCII code
			
			// ASCII table: https://cdn.sparkfun.com/assets/home_page_posts/2/1/2/1/ascii_table_black.png
			// If the unicode character code pt is less than the ASCII code for space and greater than for tilda, let's display the char in hex (x0000 format)
			if((charCode >= 20 && charCode <= 126) || charCode == 9 || charCode == 10 || charCode == 13 /*|| charCode == 36 || charCode == 43*/) { // space, tilda, TAB, LF, CR are printable, leave them in for XML element printing. And spaces and plus signs (ASCII codes 36 and 43) need to be converted to hex too 
				hex_str += str.charAt(i);
			} else {
				hex_str += "&#x" + String.format("%x", charCode).toUpperCase() + ";"; // looks like: "&#x[up-to-4-hexdigits-in-UPPERCASE];"
			}
		}
		  
		return hex_str;
	 }
	
	/** Takes a String containing a single char and returns the hex entity for it */
	public static String hexEntityForChar(String char_as_string) {
		int charCode = char_as_string.codePointAt(0); // unicode codepoint / ASCII code		
		String hexCodeStr = "&#x" + String.format("%x", charCode).toUpperCase() + ";";
		return hexCodeStr;
	 } 
	
	/**
	 * Given a String containing 0 or more occurrences of CHARACTER,
	 * this method will replace all occurrences of that CHARACTER with its hex entity variant, "&x....;"
	 * Special care is taken where the CHARACTER to be replaced is &,
	 * as in that case, we don't want to replace any existing hex entities already present in the String.
	*/
	public static String escapeAllCharWithHexEntity(String str, char CHARACTER) {
		
		if(str.indexOf(CHARACTER) == -1) { // nothing to replace, we're done
			return str;			
		}
		
		String char_as_string = Character.toString(CHARACTER);
		String hexCodeString = hexEntityForChar(char_as_string);
		
		Matcher hexPatternMatch = HEX_PATTERN.matcher(str);	// looks for a hex entity, which has the pattern "&#x....;"
		
		// want to replace all & with &x26; (the hex for ampsersand) IFF the & is not already a hexcode/doesn't already match HEX_PATTERN
		int searchIndex = 0;
		
		boolean finished = false;
		while(!finished) {			
			
			searchIndex = str.indexOf(CHARACTER, searchIndex);
			
			if(searchIndex == -1) {
				finished = true;
			}	
			else {				
				
				// replacing ampersands, &, is a special case: don't want to replace the & of (hex) entities in the string:
				if(hexPatternMatch.find(searchIndex) && searchIndex == hexPatternMatch.start()) {
					searchIndex = hexPatternMatch.end();
				} else {
					
					String tmp = str.substring(0, searchIndex) + hexCodeString;
					searchIndex++;
					if(str.length() > searchIndex) {
						tmp += str.substring(searchIndex);
					}
					str = tmp;
					searchIndex = searchIndex+ hexCodeString.length() - 1;
					
					// String has been modified, so have to update Matcher
					hexPatternMatch = HEX_PATTERN.matcher(str);
					
					if(searchIndex >= str.length()) {
						finished = true;
					}
				}
			}
		}
		
		return str;
	}
}