source: greenstone3/branches/customizingGreenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java@ 15787

Last change on this file since 15787 was 15787, checked in by oranfry, 16 years ago

updating from trunk: brought in trunk changes from r15191 to r15785

  • Property svn:keywords set to Author Date Id Revision
File size: 5.5 KB
Line 
1/*
2 * GS2LuceneRetrieve.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSFile;
24import org.greenstone.gsdl3.util.GSXML;
25import org.greenstone.gsdl3.util.DBInfo;
26import org.greenstone.gsdl3.util.GSHTML;
27import org.greenstone.gsdl3.util.OID;
28// XML classes
29import org.w3c.dom.Document;
30import org.w3c.dom.Element;
31import org.w3c.dom.Text;
32
33// General Java classes
34import java.io.File;
35
36import org.apache.log4j.Logger;
37
38/** Retrieve documents from a gs2 lucene collection. Note that this doesn't
39 actually use lucene, as the documents are stored in XML files */
40public class GS2LuceneRetrieve
41 extends AbstractGS2DocumentRetrieve
42{
43
44 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());
45
46
47 protected static final String DOC_LEVEL="Doc";
48 protected static final String SEC_LEVEL="Sec";
49 protected static final String ID_ATT = "gs2:id";
50
51 // Parameters used
52 private static final String LEVEL_PARAM = "level";
53
54 // Elements used in the config file that are specific to this class
55 private static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
56
57 private String default_level = null;
58 private String text_dir = null;
59
60 private boolean text_available = true;
61
62 public GS2LuceneRetrieve() {
63 }
64
65 public void cleanUp() {
66 super.cleanUp();
67 }
68
69 /** configure this service */
70 public boolean configure(Element info, Element extra_info)
71 {
72 if (!super.configure(info, extra_info)){
73 return false;
74 }
75
76 // Do specific configuration
77 logger.info("Configuring GS2LuceneRetrieve...");
78
79 text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar+"text"+File.separatorChar;
80 if (!(new File(text_dir).isDirectory())) {
81 logger.error("Text directory "+text_dir+" does not exist, will be unable to retrieve text for "+cluster_name);
82 text_available = false;
83 return true; // return true so that we still get the other services for the collection
84 }
85 // Get the default level out of <defaultLevel> (buildConfig.xml)
86 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
87 if (def != null) {
88 this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT);
89 }
90 if (this.default_level == null || this.default_level.equals("")) {
91 logger.error("Default level not specified for "+this.cluster_name+", assuming "+DOC_LEVEL);
92 this.default_level = DOC_LEVEL;
93 }
94
95 return true;
96
97 }
98
99 /** returns the content of a node
100 * should return a nodeContent element:
101 * <nodeContent>text content or other elements</nodeContent>
102 */
103 protected Element getNodeContent(String doc_id, String lang) throws GSException {
104 String doc_content = getTextString("TextRetrievalError", lang);
105 try {
106 if (!text_available) {
107 throw new Exception("No text directory available");
108 }
109
110 String doc_num = this.coll_db.OID2Docnum(doc_id);
111 if (doc_num == null || doc_num.equals("")) {
112 throw new Exception("OID "+doc_id +" couldn't be converted to lucene doc num");
113 }
114
115 DBInfo info=this.coll_db.getInfo(OID.getTop(doc_id));
116 if (info == null) {
117 throw new Exception("Couldn't get database entry for "+OID.getTop(doc_id));
118 }
119
120 String archivedir=info.getInfo("archivedir");
121 File doc_xml_file = new File(text_dir+archivedir+File.separatorChar+"doc.xml");
122 if (!doc_xml_file.isFile()) {
123 throw new Exception("Doc XML file "+doc_xml_file.getPath()+" does not exist");
124 }
125 Document doc_xml_doc = this.converter.getDOM(doc_xml_file);
126 if (doc_xml_doc == null) {
127 throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
128 }
129 Element full_document = doc_xml_doc.getDocumentElement();
130 if (full_document == null) {
131 throw new Exception("Couldn't parse file "+doc_xml_file.getPath());
132 }
133 Element current_section = null;
134 if (default_level.equals(DOC_LEVEL)) {
135 current_section = full_document;
136 } else {
137 current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_num);
138 }
139 if (current_section == null) {
140 throw new Exception("Couldn't find section "+ doc_num+" in file "+doc_xml_file.getPath());
141 }
142 doc_content = GSXML.getNodeText(current_section);
143 if (doc_content == null) {
144 doc_content = "";
145 } else {
146 doc_content = resolveTextMacros(doc_content, doc_id, lang);
147 }
148 } catch (Exception e) {
149 logger.error("Error trying to get document text for "+doc_id+" in collection "+this.cluster_name+": "+e);
150 }
151
152 Element content_node = this.doc.createElement(GSXML.NODE_CONTENT_ELEM);
153 Text t = this.doc.createTextNode(doc_content);
154 content_node.appendChild(t);
155 return content_node;
156 }
157}
Note: See TracBrowser for help on using the repository browser.