- Timestamp:
- 2007-01-11T14:55:04+13:00 (17 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java
r13270 r13576 20 20 21 21 // Greenstone classes 22 import org.greenstone.mgpp.*;23 22 import org.greenstone.gsdl3.core.GSException; 24 23 import org.greenstone.gsdl3.util.GSFile; 25 24 import org.greenstone.gsdl3.util.GSXML; 26 25 import org.greenstone.gsdl3.util.GDBMWrapper; 26 import org.greenstone.gsdl3.util.DBInfo; 27 import org.greenstone.gsdl3.util.GSHTML; 28 import org.greenstone.gsdl3.util.OID; 27 29 // XML classes 30 import org.w3c.dom.Document; 28 31 import org.w3c.dom.Element; 29 32 import org.w3c.dom.Text; … … 34 37 import org.apache.log4j.Logger; 35 38 39 /** Retrieve documents from a gs2 lucene collection. Note that this doesn't 40 actually use lucene, as the documents are stored in XML files */ 36 41 public class GS2LuceneRetrieve 37 42 extends AbstractGS2DocumentRetrieve 38 43 { 39 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName()); 44 45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName()); 46 47 48 protected static final String DOC_LEVEL="Doc"; 49 protected static final String SEC_LEVEL="Sec"; 50 protected static final String ID_ATT = "gs2:id"; 40 51 41 52 // Parameters used 42 53 private static final String LEVEL_PARAM = "level"; 43 54 44 55 // Elements used in the config file that are specific to this class 45 56 private static final String DEFAULT_LEVEL_ELEM = "defaultLevel"; 46 47 private MGPPWrapper mgpp_src = null;48 57 49 58 private String default_level = null; 50 private String mgpp_textdir = null; 51 59 private String text_dir = null; 60 61 private boolean text_available = true; 62 52 63 public GS2LuceneRetrieve() { 53 this.mgpp_src = new MGPPWrapper();54 64 } 55 65 56 66 public void cleanUp() { 57 67 super.cleanUp(); 58 this.mgpp_src.unloadIndexData();59 68 } 60 69 61 70 /** configure this service */ 62 71 public boolean configure(Element info, Element extra_info) … … 65 74 return false; 66 75 } 67 76 68 77 // Do specific configuration 69 78 logger.info("Configuring GS2LuceneRetrieve..."); 70 71 // TODO - is there anything we need to do here? 72 // set up XML parser?? 73 79 80 text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar+"text"+File.separatorChar; 81 if (!(new File(text_dir).isDirectory())) { 82 logger.error("Text directory "+text_dir+" does not exist, will be unable to retrieve text for "+cluster_name); 83 text_available = false; 84 return true; // return true so that we still get the other services for the collection 85 } 86 // Get the default level out of <defaultLevel> (buildConfig.xml) 87 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM); 88 if (def != null) { 89 this.default_level = def.getAttribute(GSXML.NAME_ATT); 90 } 91 if (this.default_level == null || this.default_level.equals("")) { 92 logger.error("Default level not specified for "+this.cluster_name+", assuming "+DOC_LEVEL); 93 this.default_level = DOC_LEVEL; 94 } 95 74 96 return true; 75 97 76 98 } 77 99 … … 80 102 * <nodeContent>text content or other elements</nodeContent> 81 103 */ 82 protected Element getNodeContent(String doc_id) throws GSException { 83 104 protected Element getNodeContent(String doc_id, String lang) throws GSException { 105 String doc_content = getTextString("TextRetrievalError", lang); 106 try { 107 if (!text_available) { 108 throw new Exception("No text directory available"); 109 } 110 111 long doc_num = this.gdbm_src.OID2Docnum(doc_id); 112 if (doc_num == -1) { 113 throw new Exception("OID "+doc_id +" couldn't be converted to lucene doc num"); 114 } 115 116 DBInfo info=this.gdbm_src.getInfo(OID.getTop(doc_id)); 117 if (info == null) { 118 throw new Exception("Couldn't get GDBM database entry for "+OID.getTop(doc_id)); 119 } 120 121 String archivedir=info.getInfo("archivedir"); 122 File doc_xml_file = new File(text_dir+archivedir+File.separatorChar+"doc.xml"); 123 if (!doc_xml_file.isFile()) { 124 throw new Exception("Doc XML file "+doc_xml_file.getPath()+" does not exist"); 125 } 126 Document doc_xml_doc = this.converter.getDOM(doc_xml_file); 127 if (doc_xml_doc == null) { 128 throw new Exception("Couldn't parse file "+doc_xml_file.getPath()); 129 } 130 Element full_document = doc_xml_doc.getDocumentElement(); 131 if (full_document == null) { 132 throw new Exception("Couldn't parse file "+doc_xml_file.getPath()); 133 } 134 Element current_section = null; 135 if (default_level.equals(DOC_LEVEL)) { 136 current_section = full_document; 137 } else { 138 current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, String.valueOf(doc_num)); 139 } 140 if (current_section == null) { 141 throw new Exception("Couldn't find section "+ doc_num+" in file "+doc_xml_file.getPath()); 142 } 143 doc_content = GSXML.getNodeText(current_section); 144 if (doc_content == null) { 145 doc_content = ""; 146 } else { 147 doc_content = resolveTextMacros(doc_content, doc_id, lang); 148 } 149 } catch (Exception e) { 150 logger.error("Error trying to get document text for "+doc_id+" in collection "+this.cluster_name+": "+e); 151 } 84 152 85 153 Element content_node = this.doc.createElement(GSXML.NODE_CONTENT_ELEM); 86 87 String doc_content = "";88 try {89 // TODO get the doc content from the index text dir90 } catch (Exception e) {91 logger.info("exception happended getting doc content for " + doc_id);92 doc_content = "dummy content for doc "+doc_id +"\n";93 94 }95 96 154 Text t = this.doc.createTextNode(doc_content); 97 155 content_node.appendChild(t); 98 156 return content_node; 99 157 } 100 101 102 158 }
Note:
See TracChangeset
for help on using the changeset viewer.