source: trunk/gsdl3/src/java/org/greenstone/gsdl3/service/GS3MGRetrieve.java@ 8439

Last change on this file since 8439 was 8439, checked in by kjdon, 20 years ago

when creating doc nodes, now we choose hierarchy or simple based on whether there is a section structure in the database. we used to always change doc ids that matched the whole structure to be the first section id, eg 20041102:1 to 20041102:1-1. this is now only done for hierarchical docs, not for simple docs.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.6 KB
Line 
1/*
2 * GS3MGRetrieve.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.service;
20
21
22// Greenstone classes
23import org.greenstone.mg.*;
24import org.greenstone.gsdl3.util.*;
25
26// XML classes
27import org.w3c.dom.Element;
28
29// General Java classes
30import java.io.File;
31
32
33/** The content retrieval class for GS3 building
34 *
35 * @author <a href="mailto:[email protected]">Katherine Don</a>
36 * @version $Revision: 8439 $
37 */
38
39public class GS3MGRetrieve
40 extends ServiceRack {
41 // the services on offer
42 protected static final String DOCUMENT_CONTENT_RETRIEVE_SERVICE = "DocumentContentRetrieve";
43
44 // Parameters used
45 private static final String INDEX_PARAM = "index";
46
47 // Elements used in the config file that are specific to this class
48 private static final String DEFAULT_INDEX_ELEM = "defaultIndex";
49
50 private MGWrapper mg_src = null;
51 protected SQLQuery database = null;
52
53 protected Element config_info = null; // the xml from the config file
54
55 private String default_index = null;
56
57 /** constructor */
58 public GS3MGRetrieve()
59 {
60 this.database = new SQLQuery();
61 this.mg_src = new MGWrapper();
62 }
63
64
65 /** configure this service */
66 public boolean configure(Element info, Element extra_info)
67 {
68 this.config_info = info;
69 // Do specific configuration
70 System.out.println("Configuring GS3MGRetrieve...");
71 // System.out.println("info:\n" + converter_.getString(info));
72 // System.out.println("extra_info:\n" + converter_.getString(extra_info));
73
74 // Get the default index out of <defaultIndex> (buildConfig.xml)
75 Element def = (Element) GSXML.getChildByTagName(info, DEFAULT_INDEX_ELEM);
76 if (def != null) {
77 this.default_index = def.getAttribute(GSXML.NAME_ATT);
78 }
79 if (this.default_index == null || this.default_index.equals("")) {
80 System.err.println("Error: default index not specified!");
81 return false;
82 }
83
84 // System.out.println("Default index: " + this.default_index);
85 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
86 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
87 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
88 this.short_service_info.appendChild(dcr_service);
89
90 // check that site_home is set
91 if (this.site_home == null || this.site_home.equals("")) {
92 System.err.println("GS3Retrieve Error: site_home is not set, so cannot work out the site name and cannot determine the database name");
93 return false;
94 }
95 String site_name = this.site_home.substring(this.site_home.lastIndexOf(File.separator)+1);
96 if (site_name.equals("")) {
97 System.err.println("GS3Retrieve Error: Cannot extract the site name from site home: "+this.site_home);
98 return false;
99 }
100 if (!database.setDatabase(site_name+"_"+this.cluster_name)) {
101 System.err.println("GS3Retrieve Error: Could not open SQL database!");
102 return false;
103 }
104
105 // look for document display format
106 String path = GSPath.appendLink(GSXML.DISPLAY_ELEM, GSXML.FORMAT_ELEM);
107 Element display_format = (Element)GSXML.getNodeByPath(extra_info, path);
108 if (display_format != null) {
109 this.format_info_map.put(DOCUMENT_CONTENT_RETRIEVE_SERVICE, this.doc.importNode(display_format, true));
110 // should we make a copy?
111 }
112
113 return true;
114 }
115
116 protected Element getServiceDescription(String service_id, String lang, String subset) {
117 if (service_id.equals(DOCUMENT_CONTENT_RETRIEVE_SERVICE)) {
118 Element dcr_service = this.doc.createElement(GSXML.SERVICE_ELEM);
119 dcr_service.setAttribute(GSXML.TYPE_ATT, GSXML.SERVICE_TYPE_RETRIEVE);
120 dcr_service.setAttribute(GSXML.NAME_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
121 return dcr_service;
122 }
123
124 return null;
125 }
126 /** Retrieve the content of a document */
127 protected Element processDocumentContentRetrieve(Element request)
128 {
129 // Create a new (empty) result message
130 Element result = this.doc.createElement(GSXML.RESPONSE_ELEM);
131 result.setAttribute(GSXML.FROM_ATT, DOCUMENT_CONTENT_RETRIEVE_SERVICE);
132 result.setAttribute(GSXML.TYPE_ATT, GSXML.REQUEST_TYPE_PROCESS);
133
134 // Get the parameters of the request - no parameters at this stage
135 //Element param_list = (Element) GSXML.getChildByTagName(request, GSXML.PARAM_ELEM+GSXML.LIST_MODIFIER);
136
137 // Get the request content
138
139 Element query_doc_list = (Element) GSXML.getChildByTagName(request, GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
140 if (query_doc_list == null) {
141 System.err.println("Error: DocumentContentRetrieve request specified no doc nodes.\n");
142 return result;
143 }
144
145 Element doc_list = this.doc.createElement(GSXML.DOC_NODE_ELEM+GSXML.LIST_MODIFIER);
146 result.appendChild(doc_list);
147
148 // The location of the MG index and text files
149 String basedir = GSFile.collectionBaseDir(this.site_home, this.cluster_name) + File.separatorChar; // Needed by MG
150 String textdir = GSFile.collectionTextPath("index");//this.cluster_name);
151 // index is only needed to start up MG, not used so just use the default index
152 String indexpath = GSFile.collectionIndexPath("index"/*this.cluster_name*/, this.default_index);
153 this.mg_src.setIndex(indexpath);
154
155 // Get the documents
156 String[] doc_ids = GSXML.getAttributeValuesFromList(query_doc_list,
157 GSXML.NODE_ID_ATT);
158 for (int i = 0; i < doc_ids.length; i++) {
159 //ystem.out.println("getting doc id "+doc_ids[i]);
160 String doc_id = doc_ids[i];
161 if (GS3OID.needsTranslating(doc_id)) {
162 //doc_id = this.gdbm_src.translateOID(doc_id);
163 doc_id = GS3OID.translateOID(doc_id);
164 //ystem.out.println("translated id = "+doc_id);
165 }
166 if (GS3OID.isDocTop(doc_id) && database.isHierarchicalDocument(doc_id)) {
167 // if we have a whole doc id, and the document is hierarchical,
168 // we want to change the id to be the top id of the section
169 // hierarchy
170 doc_id = GS3OID.createOID(doc_id, "1");
171 }
172 String doc_num = this.database.OID2MGNum(doc_id);
173 // doc nums have the index prefixed
174 doc_num = doc_num.substring(doc_num.indexOf(".")+1);
175 int doc_int = Integer.parseInt(doc_num);
176
177 String doc_content = "";
178 try {
179 doc_content = this.mg_src.getDocument(basedir, textdir, doc_int);
180 } catch (Exception e) {
181 System.out.println("exception happended with mg_src.getDocument()");
182 doc_content = "this is the content for section hash id "+ doc_id+", mg doc num "+doc_int+"\n";
183 }
184 //ystem.out.println("Doc content: " + doc_content + "|");
185 //ystem.out.println("Doc ID: " + doc_id);
186 // remove any ctrl-c or ctrl-b (I hope these are the right codes)
187 doc_content = doc_content.replaceAll("\u0002|\u0003", "");
188 // replace _httpimg_ with the correct address
189 //doc_content = resolveImages(doc_content, doc_id);
190 // Stick it in a text node
191 Element doc = this.doc.createElement(GSXML.DOC_NODE_ELEM);
192 doc.setAttribute(GSXML.NODE_ID_ATT, doc_id);
193 GSXML.addDocText(this.doc, doc, doc_content);
194 doc_list.appendChild(doc);
195 }
196
197 return result;
198 }
199
200}
Note: See TracBrowser for help on using the repository browser.