source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GS2LuceneRetrieve.java@ 29318

Last change on this file since 29318 was 28966, checked in by kjdon, 10 years ago

Lots of changes. Mainly to do with removing this.doc from everywhere. Document is not thread safe. Now we tend to create a new Document everytime we are starting a new page/message etc. in service this.desc_doc is available as teh document to create service info stuff. But it should only be used for this and not for other messages. newDOM is now static for XMLConverter. method param changes for some GSXML methods.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1/*
2 * GS2LuceneRetrieve.java
3 * Copyright (C) 2005 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21// Greenstone classes
22import org.greenstone.gsdl3.core.GSException;
23import org.greenstone.gsdl3.util.GSFile;
24import org.greenstone.gsdl3.util.GSXML;
25import org.greenstone.gsdl3.util.DBInfo;
26import org.greenstone.gsdl3.util.GSHTML;
27import org.greenstone.gsdl3.util.OID;
28// XML classes
29import org.w3c.dom.Document;
30import org.w3c.dom.Element;
31import org.w3c.dom.Text;
32
33// General Java classes
34import java.io.File;
35
36import org.apache.log4j.Logger;
37
38/**
39 * Retrieve documents from a gs2 lucene collection. Note that this doesn't
40 * actually use lucene, as the documents are stored in XML files
41 */
42public class GS2LuceneRetrieve extends AbstractGS2DocumentRetrieve
43{
44
45 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GS2LuceneRetrieve.class.getName());
46
47 protected static final String DOC_LEVEL = "Doc";
48 protected static final String SEC_LEVEL = "Sec";
49 protected static final String ID_ATT = "gs2:docOID";
50
51 // Parameters used
52 private static final String LEVEL_PARAM = "level";
53
54 // Elements used in the config file that are specific to this class
55 private static final String DEFAULT_LEVEL_ELEM = "defaultLevel";
56
57 private String default_level = null;
58 private String text_dir = null;
59
60 private boolean text_available = true;
61
62 public GS2LuceneRetrieve()
63 {
64 }
65
66 public void cleanUp()
67 {
68 super.cleanUp();
69 }
70
71 /** configure this service */
72 public boolean configure(Element info, Element extra_info)
73 {
74 if (!super.configure(info, extra_info))
75 {
76 return false;
77 }
78
79 // Do specific configuration
80 logger.info("Configuring GS2LuceneRetrieve...");
81
82 text_dir = GSFile.collectionIndexDir(this.site_home, this.cluster_name) + File.separatorChar + "text" + File.separatorChar;
83 if (!(new File(text_dir).isDirectory()))
84 {
85 logger.error("Text directory " + text_dir + " does not exist, will be unable to retrieve text for " + cluster_name);
86 text_available = false;
87 return true; // return true so that we still get the other services for the collection
88 }
89 // Get the default level out of <defaultLevel> (buildConfig.xml)
90 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_LEVEL_ELEM);
91 if (def != null)
92 {
93 this.default_level = def.getAttribute(GSXML.SHORTNAME_ATT);
94 }
95 if (this.default_level == null || this.default_level.equals(""))
96 {
97 logger.error("Default level not specified for " + this.cluster_name + ", assuming " + DOC_LEVEL);
98 this.default_level = DOC_LEVEL;
99 }
100
101 return true;
102
103 }
104
105 /**
106 * returns the content of a node should return a nodeContent element:
107 * <nodeContent>text content or other elements</nodeContent>
108 */
109 protected Element getNodeContent(Document doc, String doc_id, String lang) throws GSException
110 {
111 String[] args = new String[1];
112 args[0] = doc_id;
113 String doc_content = getTextString("TextRetrievalError", lang, args);
114 try
115 {
116 if (!text_available)
117 {
118 throw new Exception("No text directory available");
119 }
120
121 DBInfo info = this.coll_db.getInfo(OID.getTop(doc_id));
122 if (info == null)
123 {
124 throw new Exception("Couldn't get database entry for " + OID.getTop(doc_id));
125 }
126
127 String archivedir = info.getInfo("archivedir");
128 File doc_xml_file = new File(text_dir + archivedir + File.separatorChar + "doc.xml");
129 if (!doc_xml_file.isFile())
130 {
131 throw new Exception("Doc XML file " + doc_xml_file.getPath() + " does not exist");
132 }
133 Document doc_xml_doc = this.converter.getDOM(doc_xml_file, "utf-8");
134 if (doc_xml_doc == null)
135 {
136 throw new Exception("Couldn't parse file " + doc_xml_file.getPath());
137 }
138 Element full_document = doc_xml_doc.getDocumentElement();
139 if (full_document == null)
140 {
141 throw new Exception("Couldn't parse file " + doc_xml_file.getPath());
142 }
143 Element current_section = null;
144 if (default_level.equals(DOC_LEVEL))
145 {
146 current_section = full_document;
147 }
148 else
149 {
150 current_section = GSXML.getNamedElement(full_document, SEC_LEVEL, ID_ATT, doc_id);
151 }
152 if (current_section == null)
153 {
154 throw new Exception("Couldn't find section " + doc_id + " in file " + doc_xml_file.getPath());
155 }
156 doc_content = GSXML.getNodeText(current_section);
157 if (doc_content == null)
158 {
159 doc_content = "";
160 }
161 else
162 {
163 doc_content = resolveTextMacros(doc_content, doc_id, lang);
164 }
165 }
166 catch (Exception e)
167 {
168 logger.error("Error trying to get document text for " + doc_id + " in collection " + this.cluster_name + ": " + e);
169 }
170
171 Element content_node = doc.createElement(GSXML.NODE_CONTENT_ELEM);
172 Text t = doc.createTextNode(doc_content);
173 content_node.appendChild(t);
174 return content_node;
175 }
176}
Note: See TracBrowser for help on using the repository browser.