- Timestamp:
- 2012-05-23T16:47:11+12:00 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java
r20817 r25649 36 36 import org.apache.log4j.Logger; 37 37 38 /** Retrieve documents from a gs2 lucene collection. Note that this doesn't 39 actually use lucene, as the documents are stored in XML files */ 40 public class GS2LuceneRetrieve 41 extends AbstractGS2DocumentRetrieve 38 /** 39 * Retrieve documents from a gs2 lucene collection. Note that this doesn't 40 * actually use lucene, as the documents are stored in XML files 41 */ 42 public class GS2LuceneRetrieve extends AbstractGS2DocumentRetrieve 42 43 { 43 44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());45 46 47 protected static final String DOC_LEVEL="Doc";48 protected static final String SEC_LEVEL="Sec";49 protected static final String ID_ATT = "gs2:docOID";50 44 51 // Parameters used 52 private static final String LEVEL_PARAM = "level"; 53 54 // Elements used in the config file that are specific to this class 55 private static final String DEFAULT_LEVEL_ELEM = "defaultLevel"; 56 57 private String default_level = null; 58 private String text_dir = null; 45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName()); 59 46 60 private boolean text_available = true; 61 62 public GS2LuceneRetrieve() { 63 } 64 65 public void cleanUp() { 66 super.cleanUp(); 67 } 68 69 /** configure this service */ 70 public boolean configure(Element info, Element extra_info) 71 { 72 if (!super.configure(info, extra_info)){ 73 return false; 47 protected static final String DOC_LEVEL = "Doc"; 48 protected static final String SEC_LEVEL = "Sec"; 49 protected static final String ID_ATT = "gs2:docOID"; 50 51 // Parameters used 52 private static final String LEVEL_PARAM = "level"; 53 54 // Elements used in the config file that are specific to this class 55 private static final String DEFAULT_LEVEL_ELEM = "defaultLevel"; 56 57 private String default_level = null; 58 private String text_dir = null; 59 60 private boolean text_available = true; 61 62 public GS2LuceneRetrieve() 63 { 74 64 } 75 76 // Do specific configuration 77 logger.info("Configuring GS2LuceneRetrieve..."); 78 79 text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar+"text"+File.separatorChar; 80 if (!(new File(text_dir).isDirectory())) { 81 logger.error("Text directory "+text_dir+" does not exist, will be unable to retrieve text for "+cluster_name); 82 text_available = false; 83 return true; // return true so that we still get the other services for the collection 65 66 public void cleanUp() 67 { 68 super.cleanUp(); 84 69 } 85 // Get the default level out of <defaultLevel> (buildConfig.xml) 86 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM); 87 if (def != null) { 88 this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT); 70 71 /** configure this service */ 72 public boolean configure(Element info, Element extra_info) 73 { 74 if (!super.configure(info, extra_info)) 75 { 76 return false; 77 } 78 79 // Do specific configuration 80 logger.info("Configuring GS2LuceneRetrieve..."); 81 82 text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar + "text" + File.separatorChar; 83 if (!(new File(text_dir).isDirectory())) 84 { 85 logger.error("Text directory " + text_dir + " does not exist, will be unable to retrieve text for " + cluster_name); 86 text_available = false; 87 return true; // return true so that we still get the other services for the collection 88 } 89 // Get the default level out of <defaultLevel> (buildConfig.xml) 90 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM); 91 if (def != null) 92 { 93 this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT); 94 } 95 if (this.default_level == null || this.default_level.equals("")) 96 { 97 logger.error("Default level not specified for " + this.cluster_name + ", assuming " + DOC_LEVEL); 98 this.default_level = DOC_LEVEL; 99 } 100 101 return true; 102 89 103 } 90 if (this.default_level == null || this.default_level.equals("")) { 91 logger.error("Default level not specified for "+this.cluster_name+", assuming "+DOC_LEVEL); 92 this.default_level = DOC_LEVEL; 93 } 94 95 return true; 96 97 } 98 99 /** returns the content of a node 100 * should return a nodeContent element: 101 * <nodeContent>text content or other elements</nodeContent> 102 */ 103 protected Element getNodeContent(String doc_id, String lang) throws GSException { 104 String [] args = new String[1]; 105 args[0] = doc_id; 106 String doc_content = getTextString("TextRetrievalError", lang, args); 107 try { 108 if (!text_available) { 109 throw new Exception("No text directory available"); 110 } 111 112 DBInfo info=this.coll_db.getInfo(OID.getTop(doc_id)); 113 if (info == null) { 114 throw new Exception("Couldn't get database entry for "+OID.getTop(doc_id)); 115 } 116 117 String archivedir=info.getInfo("archivedir"); 118 File doc_xml_file = new File(text_dir+archivedir+File.separatorChar+"doc.xml"); 119 if (!doc_xml_file.isFile()) { 120 throw new Exception("Doc XML file "+doc_xml_file.getPath()+" does not exist"); 121 } 122 Document doc_xml_doc = this.converter.getDOM(doc_xml_file, "utf-8"); 123 if (doc_xml_doc == null) { 124 throw new Exception("Couldn't parse file "+doc_xml_file.getPath()); 125 } 126 Element full_document = doc_xml_doc.getDocumentElement(); 127 if (full_document == null) { 128 throw new Exception("Couldn't parse file "+doc_xml_file.getPath()); 129 } 130 Element current_section = null; 131 if (default_level.equals(DOC_LEVEL)) { 132 current_section = full_document; 133 } else { 134 current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_id); 135 } 136 if (current_section == null) { 137 throw new Exception("Couldn't find section "+ doc_id+" in file "+doc_xml_file.getPath()); 138 } 139 doc_content = GSXML.getNodeText(current_section); 140 if (doc_content == null) { 141 doc_content = ""; 142 } else { 143 doc_content = resolveTextMacros(doc_content, doc_id, lang); 144 } 145 } catch (Exception e) { 146 logger.error("Error trying to get document text for "+doc_id+" in collection "+this.cluster_name+": "+e); 104 105 /** 106 * returns the content of a node should return a nodeContent element: 107 * <nodeContent>text content or other elements</nodeContent> 108 */ 109 protected Element getNodeContent(String doc_id, String lang) throws GSException 110 { 111 String[] args = new String[1]; 112 args[0] = doc_id; 113 String doc_content = getTextString("TextRetrievalError", lang, args); 114 try 115 { 116 if (!text_available) 117 { 118 throw new Exception("No text directory available"); 119 } 120 121 DBInfo info = this.coll_db.getInfo(OID.getTop(doc_id)); 122 if (info == null) 123 { 124 throw new Exception("Couldn't get database entry for " + OID.getTop(doc_id)); 125 } 126 127 String archivedir = info.getInfo("archivedir"); 128 File doc_xml_file = new File(text_dir + archivedir + File.separatorChar + "doc.xml"); 129 if (!doc_xml_file.isFile()) 130 { 131 throw new Exception("Doc XML file " + doc_xml_file.getPath() + " does not exist"); 132 } 133 Document doc_xml_doc = this.converter.getDOM(doc_xml_file, "utf-8"); 134 if (doc_xml_doc == null) 135 { 136 throw new Exception("Couldn't parse file " + doc_xml_file.getPath()); 137 } 138 Element full_document = doc_xml_doc.getDocumentElement(); 139 if (full_document == null) 140 { 141 throw new Exception("Couldn't parse file " + doc_xml_file.getPath()); 142 } 143 Element current_section = null; 144 if (default_level.equals(DOC_LEVEL)) 145 { 146 current_section = full_document; 147 } 148 else 149 { 150 current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_id); 151 } 152 if (current_section == null) 153 { 154 throw new Exception("Couldn't find section " + doc_id + " in file " + doc_xml_file.getPath()); 155 } 156 doc_content = GSXML.getNodeText(current_section); 157 if (doc_content == null) 158 { 159 doc_content = ""; 160 } 161 else 162 { 163 doc_content = resolveTextMacros(doc_content, doc_id, lang); 164 } 165 } 166 catch (Exception e) 167 { 168 logger.error("Error trying to get document text for " + doc_id + " in collection " + this.cluster_name + ": " + e); 169 } 170 171 Element content_node = this.doc.createElement(GSXML.NODE_CONTENT_ELEM); 172 Text t = this.doc.createTextNode(doc_content); 173 content_node.appendChild(t); 174 return content_node; 147 175 } 148 149 Element content_node = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);150 Text t = this.doc.createTextNode(doc_content);151 content_node.appendChild(t);152 return content_node;153 }154 176 }
Note:
See TracChangeset
for help on using the changeset viewer.