* - username * - password * - host (may - but need not - be prefixed with either of the protocols * "http://" and "https://" * - port * - errorMessage (displayed near the top of the popup dialog). Can be "". *

elements and find the //

and return that NodeList sections = docEl.getElementsByTagName(SECTION_ELEMENT); for(int i = 0; i < sections.getLength(); i++) { Element e = (Element)sections.item(i); if(e.hasAttribute(ID) && e.getAttribute(ID).equals(sectionNumber)) { Element substructure = getSubstructure(e, structure); return getStructureInfo(substructure.getOwnerDocument(), e, info); } } return null; // not found } /** Implements browsing document titles of a greenstone collection stored in * the fedora repository by letter. * @return the document pids whose titles start with the given letter. * @param collName - the name of the collection. * @param letter - the starting letter to browse by. */ public String[] browseTitlesByLetter(final String collName, final String letter) throws RemoteException, FedoraVersionNotSupportedException { String[] pids = null; // We want to do the following kind of search (assuming letter=f // and collName=demo): // pid~greenstone:demo* title~f* // We don't need to normalise the letter first (to search titles starting // with both uppercase and lowercase versions of the letter), because // Fedora always searches for both. // HOWEVER, searching for title~f* returns all documents containing f (or F) // ANYWHERE in their titles! // SOLUTION: search the collection for all titles containing f as given, // retrieving pid and title fields. Then from the list of results, select // only those titles that start with the given letter. // This may seem an unnecessarily cumbersome job (when it looked like it // should have worked with just title~f*), BUT, at least the resulting // documents will be reduced to a set of titles containing f; rather than // having to search *all* documents in the collection. final String title = letter+WILDCARD; FieldSearchResult objects = findObjectsWithTitlesContaining( collName, title); ObjectFields[] results = objects.getResultList(); TreeSet v = new TreeSet(); // TreeSet to return the results in //alphabetical order for(int i = 0; i < results.length; i++) { // from the result list, select those titles that don't // just *contain* the letter, but actually start with it: String resultTitle = results[i].getTitle(0); if(resultTitle.toLowerCase().startsWith(letter.toLowerCase())) { String pid = results[i].getPid(); // skip the collection object itself if(!pid.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) { v.add(pid); //LOG.debug(resultTitle); } } } pids = new String[v.size()]; v.toArray(pids); return pids; } /** Implements querying document DC titles of a greenstone collection stored in * the fedora repository for a term that may occur anywhere in their titles. * @return the document pids whose DC titles contain the parameter term. * @param titleContents - the word or phrase to search the collection's * document titles for. Only one word, and this method finds Greenstone * DOCUMENT titles CONTAINING that word (if any). * @param startsWith - if true, searches for titles that start with * titleContents. Else it searches for titles that contain titleContents. */ public String[] searchDocumentTitles(String collName, String titleContents, boolean startsWith) throws RemoteException, FedoraVersionNotSupportedException { String[] pids = null; // We want to do the following kind of search (when written in Fedora's // REST format - see http://localhost:8080/fedora/search): // pid~greenstone:-* title~<1st word of titleContents> // We don't need to normalise the word first (to search titles starting // with both uppercase and lowercase versions of it), because // Fedora always searches for the normalised word. // 2 difficulties: // - We can only search for single words with Fedora's Conditional Search. // Obtain pids and titles of documents containing the first word and then // we filter the titles to those containing the entire phrase of // titleContents. // - Searching for title~FirstWord returns all documents containing // this word ANYWHERE in their titles. If parameter startsWith is false, // then this is fine. But if parameter startsWith is true, then go // through all the resulting titles found (containing FirstWord), select // only pids of those titles that contain the entire phrase titleContents final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD; int indexOfFirstSpace = titleContents.indexOf(' '); // check for space // if titleContents is a phrase (contains space), then it's not // a single word, in which case search for just the first word String title = titleContents; // assume it's a single word if(indexOfFirstSpace != -1) // if not single word but a phrase, store title = titleContents.substring(0, indexOfFirstSpace); // 1st word FieldSearchResult objects = findObjectsWithTitlesContaining( collName, title); if(objects == null) { final String[] empty = {}; return empty; } // Go through all the titles found and for those that match the criteria*, // store their pid. *Criteria: titles that start with OR contain the // word OR phrase of titleContents. ObjectFields[] results = objects.getResultList(); Vector v = new Vector(); // return pids in the order found for(int i = 0; i < results.length; i++) { // from the result list, select those titles that don't // just *contain* the first word, but the entire phrase of // words in titleContents: String resultTitle = results[i].getTitle(0); boolean accepted = false; // accept the resultTitle found String resultPID = results[i].getPid(); // skip the collection object itself, since it's not a document if(resultPID.equalsIgnoreCase(GREENSTONE_+collName+_COLLECTION)) { accepted = false; } // if titleContents is a single word and we are checking // whether resultTitle contains titleContents: else if(indexOfFirstSpace == -1) { // titleContents is a single word if(!startsWith) // titles that *contain* the word titleContents accepted = true; //accept all titles found // else startWith: accept titles starting with word titleContents else if (resultTitle.toLowerCase().startsWith( titleContents.toLowerCase())) accepted = true; } else { // otherwise, titleContents is a phrase of >1 word, need // to check that the result title contains the entire phrase if(startsWith && resultTitle.toLowerCase().startsWith( titleContents.toLowerCase())) accepted = true; else if(!startsWith && resultTitle.toLowerCase().contains( titleContents.toLowerCase())) accepted = true; } // if the resultTitle fit the criteria, store its pid if(accepted) { v.add(resultPID); //System.out.println(resultTitle); } } pids = new String[v.size()]; v.toArray(pids); return pids; } /** * @param collName - the collection of documents we'll be searching in. * @param titleWord - the word we'll be searching the document titles for. * (Fedora's search returns all objects whose title contains that word). * * Two kinds of search are provided by Fedora as stated in FedoraAccess.java * (see link): *

	 * "There are two search methods: a search on all fields or a search on
	 * specific fields. To search all fields the setTerms function of the 
	 * FieldSearchQuery must be used, with the paramater being the desired string.
	 * 
	 * To search by specific fields, you must create an array of Condition
	 * objects. Each condition consists of three parts: 
	 * the field to be searched (.setProperty()), 
	 * the operation to be used (.setOperator(ComparisonOperator. <operator>)), 
	 * and the search string (.setValue())"
	 *

* We want to use the second search method above when browsing and searching, * and search for: pid~greenstone:<collName>* title~<letter>* * or pid~greenstone:<collName>* title~<first word of search phrase> * See also fedora-2.2.1-src/src/java/fedora/client/search/Search.java. * * The fedora/tomcat/webapps/fedora/WEB-INF/web.xml is where the REST-based * web services are defined. (The web.xml defines the "Servlets for REST-based * interfaces to the Fedora Repository Server"). * Do a search on the word "search": * fedora.server.access.FieldSearchServlet is the class we need to look at * It accesses a different Condition.java class: fedora.server.search.Condition.java * The above is what is used by the REST-based interface in FieldSearchServlet.java * While fedora-2.2.1-src/build/wsdl/fedora/server/types/gen/Condition.java * is what's used in the fedora client application that makes use of * the SOAP-based interface. * * @see FedoraAccess.java * @see MaAPI * @see Fedora server type Condition.java * @see Fedora client test Search.java */ protected FieldSearchResult findObjectsWithTitlesContaining( String collName, final String titleWord) throws RemoteException, FedoraVersionNotSupportedException { // Searching for pids of the form "greenstone:gs2mgdemo-*"; final String pid = GREENSTONE_+collName+HYPHEN+WILDCARD; Condition[] conditions = new Condition[2]; conditions[0] = new Condition("pid", ComparisonOperator.has, pid); conditions[1] = new Condition("title", ComparisonOperator.has, titleWord); FieldSearchQuery query = new FieldSearchQuery(); query.setConditions(conditions); // We'd like pid and title returned for each object, because we'll make // use of title. We pass maxResults=null to get all objects that match // (i.e. all collections). FieldSearchResult objects = null; final String[] retrieveFields = {"pid", "title"}; try { objects = AutoFinder.findObjects( APIA, retrieveFields, maxresults, query); // collection = APIA.findObjects(new String[]{"pid", "title"}, // new NonNegativeInteger(Integer.toString(maxresults)), query); } catch(RemoteException ex) { if(fedoraVersion.compareTo(SUPPORTED_VERSION) < 0) { // fedoraVersion is too low, searching/browsing is not possible // (because class Condition has changed after 2.0, from 2.1.1 // onwards) throw new FedoraVersionNotSupportedException(fedoraVersion); } else { LOG.error( "Remote exception when calling web service operation " + "findObject() to execute search:\n" + ex.getMessage()); ex.printStackTrace(); throw ex; } } return objects; // return the FieldSearchResult objects found } /** @return the <docName> in the parameter docPID (which is of the form: * greenstone:<colname>-<docName>) * @param docPID - pid of a greenstone document in the fedora repository. */ public String getDocName(String docPID) { return docPID.substring(docPID.indexOf('-')+1); } /** @return the <name> in the parameter collPID * (greenstone:<name>-collection) * If collPID is a docPID, this method does the same: return the <name> * in the docPID (greenstone:<name>-docID). * @param collPID - pid of a greenstone collection in the fedora repository. */ public String getCollectionName(String collPID) { return collPID.substring(collPID.indexOf(':')+1, collPID.indexOf('-')); } /** Return the TOC substructure requested * @return an element containing a copy if element e with either only its child * elements or with all its descendants and/or its ancestors or only its parent * and/or its siblings (depending on what the parameter structure specifies). * @param e - the element to start copying from and whose structure is requested. * @param structure - a string containing any combination of the values: * ancestors, parent, siblings, children, descendants, * specifying the portion of the structure to retrieve. * @see Sun java thread on transforming a DOM XML to a String */ protected Element getSubstructure(Element original, String structure) { Document doc = builder.newDocument(); boolean descendants = (structure.indexOf("descendants") != -1) ? true : false; Node current = doc.importNode(original, descendants); // descendants=true: import/copy descendants. // Else, copy just current node original (later copy its direct children) Node parentOfCurrent = null; Node parentOfOriginal = original.getParentNode(); if(parentOfOriginal == original.getOwnerDocument()) { // don't want document node (original is docRoot) parentOfOriginal = null; } if(parentOfOriginal == null) { // no parentNode, so current is the root node. // can't get ancestors/parent/siblings, since all these need parentNode doc.appendChild(current); } else { // siblings, ancestors and parent requests all require parent node to exist // First check if we need to get ancestors, else for whether parent is required if(structure.indexOf("ancestors") != -1) { parentOfCurrent = doc.importNode(parentOfOriginal, false); Node child = null; Node parent = parentOfCurrent; // the copy Node n = parentOfOriginal.getParentNode(); // the doc to copy from while(n != null && n != original.getOwnerDocument()) { child = parent; parent = doc.importNode(n, false); // no descendants parent.appendChild(child); n = n.getParentNode(); } doc.appendChild(parent); // need to put the copied node into a document // else it won't have a parent doc (DOMSource can't work with it // without it having a document parent). } else if(structure.indexOf("parent") != -1) { parentOfCurrent = doc.importNode(parentOfOriginal, false); //parentOfCurrent.appendChild(current); doc.appendChild(parentOfCurrent); } // a request for siblings is independently tested for if(structure.indexOf("siblings") != -1) { // only import parent if we didn't already import // it for a request for ancestors or parent if(parentOfCurrent == null) { parentOfCurrent = doc.importNode(parentOfOriginal, false); doc.appendChild(parentOfCurrent); // this becomes the root } // now the siblings of current (children of parentOfCurrent) NodeList children = parentOfOriginal.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { Node n = children.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { if((Element)n != original) { // skip original which was already imported Node child = doc.importNode(n, false); // no descendants parentOfCurrent.appendChild(child); } else { // already imported Current element, insert at this position parentOfCurrent.appendChild(current); } } } } else if(parentOfCurrent != null) { // include current node for ancestors and parent requests // (sibling request adds the current node into a particular position) parentOfCurrent.appendChild(current); // need to put the copied node into a document // else it won't have a parent doc (DOMSource can't work with it // without it having a document parent). } else { // when only children or descendants were requested, current becomes root document doc.appendChild(current); } } // if we are not recursively copying all descendants, then copy just // the childnodes of current: if(structure.indexOf("children") != -1 && !descendants) { // then copy just the children // get e's children and copy them into the new document NodeList children = original.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { // create copy Node n = doc.importNode(children.item(i), false); // attach it to parent current.appendChild(n); // Now we need to indicate whether this new node (child) is a leaf // or not. (This is necessary for getChildrenOfSection(), else // it's hard to know if the children are leaves or have further // subsections. if(n.getNodeName().equals(SECTION_ELEMENT)) { // we're dealing only with section children // Check if the matching original had children: Element originalsChild = (Element)children.item(i); NodeList grandchildren = originalsChild.getElementsByTagName(SECTION_ELEMENT); if(grandchildren.getLength() > 0) { // original's child has children, so indicate this // in the copied child: Element child = (Element)current; // child.setAttribute(TYPE, INTERNAL_NODE); } } } } return doc.getDocumentElement(); } /** Return the TOC substructure with the requested structural info. * @return an element containing a copy if element e with either only its child * elements or with all its descendants and/or its ancestors or only its parent * and/or its siblings (depending on what the parameter structure specifies). * Returns null if the element, e, passed in is null. * @param doc - the new document into whose root element the structural information * will be inserted as attributes. * @param e - the element to start copying from and whose structure is requested. * @param info - a string containing any combination of the values: numChildren, * numSiblings, siblingPosition. The requested info gets added as attributes to * the returned root element. * @see Sun java thread on transforming a DOM XML to a String */ protected Element getStructureInfo(Document doc, Element e, String info) { if(e == null) { return null; } Element root = doc.getDocumentElement(); if(!info.equals("")) { if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1 || info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) { //int numChildren = e.getElementsByTagName(SECTION_ELEMENT).getLength(); int numChildren = 0; NodeList children = e.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { Node n = children.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { numChildren++; } } if(info.indexOf(AbstractBasicDocument.INFO_NUM_CHILDREN) != -1) { root.setAttribute(AbstractBasicDocument.INFO_NUM_CHILDREN, Integer.toString(numChildren)); } if(info.indexOf(AbstractBasicDocument.INFO_DOC_TYPE) != -1) { //String docType = (numChildren > 0) ? "hierarchy" : "simple"; String docType = "hierarchy"; root.setAttribute(AbstractBasicDocument.INFO_DOC_TYPE, docType); } } if(info.indexOf("ibling") != -1) { // siblingPosition or numSiblings int numSiblings = 0; int siblingPosition = 0; Node parent = e.getParentNode(); if(parent == null) { numSiblings = 0; siblingPosition = 1; } else { //numSiblings = parent.getChildNodes().getLength(); NodeList siblings = parent.getChildNodes(); for(int i = 0; i < siblings.getLength(); i++) { Node n = siblings.item(i); if(n.getNodeName().equals(SECTION_ELEMENT)) { if(e == (Element)n) { siblingPosition = numSiblings+1; } else { // count every sibling section element, except e itself numSiblings++; } } } } if(info.indexOf(AbstractBasicDocument.INFO_NUM_SIBS) != -1) { root.setAttribute(AbstractBasicDocument.INFO_NUM_SIBS, Integer.toString(numSiblings)); } if(info.indexOf(AbstractBasicDocument.INFO_SIB_POS) != -1) { root.setAttribute(AbstractBasicDocument.INFO_SIB_POS, Integer.toString(siblingPosition)); } } } return root; } /** * Return a datastream of a document, given the document's id * and the item id of the datastream which is to be retrieved. * @return the XML (in String form) of the item denoted by itemID * that's part of the fedora data object denoted by docPID. * itemID may be something like EX.2.1/EX2.3.3 or SECTION1.4.3 * Can't retrieve images denoted by itemID using this method, only items * that are of XML format. * @param docPID - pid of a greenstone document in the fedora repository. * @param itemID - the itemID of a datastream of the fedora object * identified by docPID. */ protected String getItem(String docPID, String itemID) throws RemoteException, UnsupportedEncodingException { // MIMETypedStream getDatastreamDissemination( // String pid, String dsID, asOfDateTime) MIMETypedStream datastream = APIA.getDatastreamDissemination(docPID, itemID, null); return new String(datastream.getStream(), UTF8); } /** Given something like str="SECTION1.2.1" and prefix="SECTION" this method * returns "1.2.1". * The exception is that for cases like EX.2.1, which ought to have been EX1.2.1, * this method would return "1.2.1". Similarly, DC.2.1 would return "1.2.1". * However, the string str is returned unchanged if the prefix does not occur * at the start of str. * @return the String parameter str without the prefix. * It can be used to return the number of an itemID of a greenstone document * stored in the fedora repository without the given prefix. * @param prefix - the prefix which ought to be removed from the itemID. * @param str - the value of the itemID. */ protected String removePrefix(String str, String prefix) { // do nothing in those cases where the prefix is not in param str if(!str.startsWith(prefix)) return str; // otherwise: if(prefix.equals(EX+".") || prefix.equals(DC+".")) { return "1" + str.substring(prefix.length()); } else { return str.substring(prefix.length()); } } /** Given a number of the form x(.y.z), this method returns this number * as is, except when x = 1, in which case, it would return .y.z * That is, given number=3.2.1, this method would return 3.2.1 * But, given number=1.2.3, this method would return .2.3. * When number=1, it is NOT a special case: "" is returned as explained. * @param number - a proper (fedora-greenstone document) section number * @return the same number as it ought to be for the associated EX, DC datastreama. */ protected String convertToMetaNumber(String number) { if(number.startsWith("1.") || number.equals("1")) return number.substring(1); // remove the first char: the initial '1' else return number; } /** @return fedora's baseURL. It's of the form * "http://localhost:8080/fedora" */ public String getBaseURL() { return baseURL; } /** @return the portAddressURL (in use) of the Fedora APIA * web service (should be the endpoint location in the APIA's * WSDL file). * It's usually of the form baseURL+"/services/access" */ public String getPortAddressURL() { return this.baseURL + this.portAddressSuffix; } /** @return the baseURL for gsdlAssocFiles */ public String getAssocFileBaseURL() { return baseURL + "/get/"; } public static void main(String args[]) { try { FedoraConnection fedoraCon = new FedoraConnection(new File("fedoraGS3.properties")); String[] pids = null; pids = fedoraCon.getCollections(); String[] titles = fedoraCon.getCollectionTitles(pids); for(int i = 0; i < pids.length; i++) { System.out.println("extracted title:" + titles[i]); String[] docPIDs = fedoraCon.getCollectionDocs(pids[i]); String[] docTitles = fedoraCon.getDocTitles(docPIDs); for(int j = 0; j < docPIDs.length; j++) { System.out.println("\tExtr doc title: " + docTitles[j]); } } String PID = "greenstone:gs2mgdemo-collection"; String docPID = "greenstone:gs2mgdemo-HASH529078c732a453b1d4a505"; //HASHbf4b4675045599fbc3e2b1"; String dcXML = fedoraCon.getDC(PID); String exXML = fedoraCon.getEX(PID); String tocXML = fedoraCon.getTOC(docPID); System.out.println("Dublin Core Metadata for " + PID + " is:\n" + dcXML); System.out.println("GS3 extracted metadata for " + PID + " is:\n" + exXML); System.out.println("Table of Contents for " + docPID + " is:\n" + tocXML); String[] sectionNames = fedoraCon.getSectionNames(docPID); System.out.println("\nSection names for " + docPID + " are:"); for(int i = 0; i < sectionNames.length; i++) System.out.println(sectionNames[i]); String[] sectionNumbers = fedoraCon.getSectionNumbers(docPID); //String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNumbers); String[] sectionTitles = fedoraCon.getTitles(docPID, sectionNames); System.out.println("\nSection numbers for " + docPID + " are:"); for(int i = 0; i < sectionNumbers.length; i++) { //System.out.println(sectionNumbers[i] + " " + sectionTitles[i]); System.out.println(sectionNames[i] + " " + sectionTitles[i]); } String sectionID = "SECTION1"; //SECTION1.5 System.out.println("\n"); System.out.println( "browsing greenstone's gs2mgdemo collection by (first) letter F:"); pids = fedoraCon.browseTitlesByLetter("gs2mgdemo", "f"); for(int i = 0; i < pids.length; i++) System.out.println(pids[i]); System.out.println( "\nsearching greenstone's gs2mgppdemo collection for Gender Equality:"); pids = fedoraCon.searchDocumentTitles("gs2mgdemo", "Gender Equality", false); for(int i = 0; i < pids.length; i++) System.out.println(pids[i]); System.out.println("\nDone - exiting."); System.exit(0); } catch(RemoteException re) { System.out.println("Remote Exception when calling web service operation\n" + re.getMessage()); re.printStackTrace(); } catch(Exception e) { System.out.println("Unable to instantiate FedoraConnection\n" + e); e.printStackTrace(); //LOG.error("Unable to instantiate FedoraConnection\n" + e, e); } } }