source: gs3-extensions/fedora/trunk/src/fedoragsearch-files/foxmlToLuceneGenerated.xslt.in@ 26434

Last change on this file since 26434 was 26434, checked in by ak19, 8 years ago

While getting GS3-with-Fedora collections to work on Puka discovered more issues. 1. Several additional files needed to become template files with placeholder values for the GS3 tomcat server and port, fedora password and index write lock timeout value. Other files needed further modifications (particularly fedora.fcfg.in and gsearch's fgsconfig-basic.properties) to make sure server and port values weren't fixed to localhost and 8383 defaults. 2. The build.xml had to be updated to make changes to these new and modified files during the installation process. 3. There's now a new target in build.xml: uninstall-fedora. 4. README file update with all this information, as well as corrections and clarifications.

File size: 15.8 KB
Line 
1<?xml version="1.0" encoding="UTF-8"?>
2
3
4<xsl:stylesheet version="1.0" exclude-result-prefixes="exts" xmlns:audit="info:fedora/fedora-system:def/audit#" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:exts="xalan://dk.defxws.fedoragsearch.server.GenericOperationsImpl" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dtu_meta="http://www.dtu.dk/dtu_meta/" xmlns:foxml="info:fedora/fedora-system:def/foxml#" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="http://www.dtu.dk/dtu_meta/meta/" xmlns:ex="http://www.greenstone.org/namespace/fake/ex" xmlns:dls="http://www.greenstone.org/namespace/fake/dls">
5<xsl:output method="xml" indent="yes" encoding="UTF-8"/>
6<xsl:param name="REPOSITORYNAME" select="'FgsRepos'"/>
7<xsl:param name="REPOSBASEURL" select="'http://@tomcatserver@:@tomcatport@/fedora'"/>
8<xsl:param name="FEDORASOAP" select="'http://@tomcatserver@:@tomcatport@/fedora/services'"/>
9<xsl:param name="FEDORAUSER" select="'fedoraAdmin'"/>
10<xsl:param name="FEDORAPASS" select="'@fedorapassw@'"/>
11<xsl:param name="TRUSTSTOREPATH" select="'trustStorePath'"/>
12<xsl:param name="TRUSTSTOREPASS" select="'trustStorePass'"/>
13<xsl:variable name="PID" select="/foxml:digitalObject/@PID"/>
14<xsl:template match="/">
15<IndexDocument boost="1.0">
16<xsl:attribute name="PID">
17<xsl:value-of select="$PID"/>
18</xsl:attribute>
19<!--The PID attribute is mandatory for indexing to work-->
20<!--The following allows only active FedoraObjects to be indexed.-->
21<xsl:if test="foxml:digitalObject/foxml:objectProperties/foxml:property[@NAME='info:fedora/fedora-system:def/model#state' and @VALUE='Active']">
22<xsl:if test="not(foxml:digitalObject/foxml:datastream[@ID='METHODMAP'] or foxml:digitalObject/foxml:datastream[@ID='DS-COMPOSITE-MODEL'])">
23<xsl:if test="starts-with($PID,'')">
24<xsl:apply-templates mode="activeFedoraObject"/>
25</xsl:if>
26</xsl:if>
27</xsl:if>
28</IndexDocument>
29</xsl:template>
30<xsl:template match="/foxml:digitalObject" mode="activeFedoraObject">
31<!--The PID index field lets you search on the PID value-->
32<IndexField IFname="PID" index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0">
33<xsl:value-of select="$PID"/>
34</IndexField>
35<IndexField IFname="REPOSITORYNAME" index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0">
36<xsl:value-of select="$REPOSITORYNAME"/>
37</IndexField>
38<IndexField IFname="REPOSBASEURL" index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0">
39<xsl:value-of select="substring($FEDORASOAP, 1, string-length($FEDORASOAP)-9)"/>
40</IndexField>
41<IndexField IFname="TITLE_UNTOK" index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0">
42<xsl:value-of select="foxml:datastream/foxml:datastreamVersion[last()]/foxml:xmlContent/oai_dc:dc/dc:title"/>
43</IndexField>
44<IndexField IFname="AUTHOR_UNTOK" index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0">
45<xsl:value-of select="foxml:datastream/foxml:datastreamVersion[last()]/foxml:xmlContent/oai_dc:dc/dc:creator"/>
46</IndexField>
47<!--indexing foxml property fields-->
48<xsl:for-each select="foxml:objectProperties/foxml:property">
49<IndexField index="UN_TOKENIZED" store="YES" termVector="NO">
50<xsl:attribute name="IFname">
51<xsl:value-of select="concat('fgs.', substring-after(@NAME,'#'))"/>
52</xsl:attribute>
53<xsl:value-of select="@VALUE"/>
54</IndexField>
55</xsl:for-each>
56<!--indexing foxml fields-->
57<xsl:for-each select="//audit:action">
58<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="audit.action" displayName="audit.action">
59<xsl:value-of select="text()"/>
60</IndexField>
61</xsl:for-each>
62<xsl:for-each select="//audit:componentID">
63<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="audit.componentID" displayName="audit.componentID">
64<xsl:value-of select="text()"/>
65</IndexField>
66</xsl:for-each>
67<xsl:for-each select="//audit:date">
68<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="audit.date" displayName="audit.date">
69<xsl:value-of select="text()"/>
70</IndexField>
71</xsl:for-each>
72<xsl:for-each select="//audit:justification">
73<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="audit.justification" displayName="audit.justification">
74<xsl:value-of select="text()"/>
75</IndexField>
76</xsl:for-each>
77<xsl:for-each select="//audit:process">
78<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="audit.process" displayName="audit.process">
79<xsl:value-of select="text()"/>
80</IndexField>
81</xsl:for-each>
82<xsl:for-each select="//audit:process/@type">
83<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="audit.process_type" displayName="audit.process_type">
84<xsl:value-of select="."/>
85</IndexField>
86</xsl:for-each>
87<xsl:for-each select="//audit:record/@ID">
88<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="audit.record_ID" displayName="audit.record_ID">
89<xsl:value-of select="."/>
90</IndexField>
91</xsl:for-each>
92<xsl:for-each select="//audit:responsibility">
93<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="audit.responsibility" displayName="audit.responsibility">
94<xsl:value-of select="text()"/>
95</IndexField>
96</xsl:for-each>
97<xsl:for-each select="//dc:creator">
98<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.creator" displayName="dc.creator">
99<xsl:value-of select="text()"/>
100</IndexField>
101</xsl:for-each>
102<xsl:for-each select="//dc:date">
103<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.date" displayName="dc.date">
104<xsl:value-of select="text()"/>
105</IndexField>
106</xsl:for-each>
107<xsl:for-each select="//dc:description">
108<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.description" displayName="dc.description">
109<xsl:value-of select="text()"/>
110</IndexField>
111</xsl:for-each>
112<xsl:for-each select="//dc:format">
113<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.format" displayName="dc.format">
114<xsl:value-of select="text()"/>
115</IndexField>
116</xsl:for-each>
117<xsl:for-each select="//dc:identifier">
118<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.identifier" displayName="dc.identifier">
119<xsl:value-of select="text()"/>
120</IndexField>
121</xsl:for-each>
122<xsl:for-each select="//dc:publisher">
123<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.publisher" displayName="dc.publisher">
124<xsl:value-of select="text()"/>
125</IndexField>
126</xsl:for-each>
127<xsl:for-each select="//dc:relation">
128<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.relation" displayName="dc.relation">
129<xsl:value-of select="text()"/>
130</IndexField>
131</xsl:for-each>
132<xsl:for-each select="//dc:rights">
133<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.rights" displayName="dc.rights">
134<xsl:value-of select="text()"/>
135</IndexField>
136</xsl:for-each>
137<xsl:for-each select="//dc:subject">
138<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.subject" displayName="dc.subject">
139<xsl:value-of select="text()"/>
140</IndexField>
141</xsl:for-each>
142<xsl:for-each select="//dc:title">
143<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="dc.title" displayName="dc.title">
144<xsl:value-of select="text()"/>
145</IndexField>
146</xsl:for-each>
147<xsl:for-each select="//foxml:contentLocation">
148<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="foxml.contentLocation" displayName="foxml.contentLocation">
149<xsl:value-of select="text()"/>
150</IndexField>
151</xsl:for-each>
152<xsl:for-each select="//foxml:contentLocation/@REF">
153<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.contentLocation_REF" displayName="foxml.contentLocation_REF">
154<xsl:value-of select="."/>
155</IndexField>
156</xsl:for-each>
157<xsl:for-each select="//foxml:contentLocation/@TYPE">
158<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.contentLocation_TYPE" displayName="foxml.contentLocation_TYPE">
159<xsl:value-of select="."/>
160</IndexField>
161</xsl:for-each>
162<xsl:for-each select="//foxml:datastream/@CONTROL_GROUP">
163<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastream_CONTROL_GROUP" displayName="foxml.datastream_CONTROL_GROUP">
164<xsl:value-of select="."/>
165</IndexField>
166</xsl:for-each>
167<xsl:for-each select="//foxml:datastream/@ID">
168<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastream_ID" displayName="foxml.datastream_ID">
169<xsl:value-of select="."/>
170</IndexField>
171</xsl:for-each>
172<xsl:for-each select="//foxml:datastream/@STATE">
173<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastream_STATE" displayName="foxml.datastream_STATE">
174<xsl:value-of select="."/>
175</IndexField>
176</xsl:for-each>
177<xsl:for-each select="//foxml:datastream/@VERSIONABLE">
178<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastream_VERSIONABLE" displayName="foxml.datastream_VERSIONABLE">
179<xsl:value-of select="."/>
180</IndexField>
181</xsl:for-each>
182<xsl:for-each select="//foxml:datastreamVersion/@CREATED">
183<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastreamVersion_CREATED" displayName="foxml.datastreamVersion_CREATED">
184<xsl:value-of select="."/>
185</IndexField>
186</xsl:for-each>
187<xsl:for-each select="//foxml:datastreamVersion/@FORMAT_URI">
188<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastreamVersion_FORMAT_URI" displayName="foxml.datastreamVersion_FORMAT_URI">
189<xsl:value-of select="."/>
190</IndexField>
191</xsl:for-each>
192<xsl:for-each select="//foxml:datastreamVersion/@ID">
193<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastreamVersion_ID" displayName="foxml.datastreamVersion_ID">
194<xsl:value-of select="."/>
195</IndexField>
196</xsl:for-each>
197<xsl:for-each select="//foxml:datastreamVersion/@LABEL">
198<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastreamVersion_LABEL" displayName="foxml.datastreamVersion_LABEL">
199<xsl:value-of select="."/>
200</IndexField>
201</xsl:for-each>
202<xsl:for-each select="//foxml:datastreamVersion/@MIMETYPE">
203<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastreamVersion_MIMETYPE" displayName="foxml.datastreamVersion_MIMETYPE">
204<xsl:value-of select="."/>
205</IndexField>
206</xsl:for-each>
207<xsl:for-each select="//foxml:datastreamVersion/@SIZE">
208<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.datastreamVersion_SIZE" displayName="foxml.datastreamVersion_SIZE">
209<xsl:value-of select="."/>
210</IndexField>
211</xsl:for-each>
212<xsl:for-each select="//foxml:digitalObject/@PID">
213<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.digitalObject_PID" displayName="foxml.digitalObject_PID">
214<xsl:value-of select="."/>
215</IndexField>
216</xsl:for-each>
217<xsl:for-each select="//foxml:digitalObject/@VERSION">
218<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.digitalObject_VERSION" displayName="foxml.digitalObject_VERSION">
219<xsl:value-of select="."/>
220</IndexField>
221</xsl:for-each>
222<xsl:for-each select="//foxml:digitalObject/@xsi:schemaLocation">
223<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.digitalObject_xsi:schemaLocation" displayName="foxml.digitalObject_xsi:schemaLocation">
224<xsl:value-of select="."/>
225</IndexField>
226</xsl:for-each>
227<xsl:for-each select="//foxml:property">
228<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="foxml.property" displayName="foxml.property">
229<xsl:value-of select="text()"/>
230</IndexField>
231</xsl:for-each>
232<xsl:for-each select="//foxml:property/@NAME">
233<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.property_NAME" displayName="foxml.property_NAME">
234<xsl:value-of select="."/>
235</IndexField>
236</xsl:for-each>
237<xsl:for-each select="//foxml:property/@VALUE">
238<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="foxml.property_VALUE" displayName="foxml.property_VALUE">
239<xsl:value-of select="."/>
240</IndexField>
241</xsl:for-each>
242<xsl:for-each select="//meta:creator">
243<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="meta.creator" displayName="meta.creator">
244<xsl:value-of select="text()"/>
245</IndexField>
246</xsl:for-each>
247<xsl:for-each select="//meta:description">
248<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="meta.description" displayName="meta.description">
249<xsl:value-of select="text()"/>
250</IndexField>
251</xsl:for-each>
252<xsl:for-each select="//meta:publisher">
253<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="meta.publisher" displayName="meta.publisher">
254<xsl:value-of select="text()"/>
255</IndexField>
256</xsl:for-each>
257<xsl:for-each select="//meta:subject">
258<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="meta.subject" displayName="meta.subject">
259<xsl:value-of select="text()"/>
260</IndexField>
261</xsl:for-each>
262<xsl:for-each select="//meta:title">
263<IndexField index="TOKENIZED" store="YES" termVector="YES" boost="1.0" IFname="meta.title" displayName="meta.title">
264<xsl:value-of select="text()"/>
265</IndexField>
266</xsl:for-each>
267<xsl:for-each select="//oai_dc:dc/@xsi:schemaLocation">
268<IndexField index="UN_TOKENIZED" store="YES" termVector="NO" boost="1.0" IFname="oai_dc.dc_xsi:schemaLocation" displayName="oai_dc.dc_xsi:schemaLocation">
269<xsl:value-of select="."/>
270</IndexField>
271</xsl:for-each>
272<!-- a datastream is fetched, if its mimetype
273 can be handled, the text becomes the value of the field.
274 This is the version using PDFBox,
275 below is the new version using Apache Tika. -->
276<xsl:for-each select="foxml:datastream[starts-with(@ID,'EX')]/foxml:datastreamVersion[last()]/foxml:xmlContent/ex:ex/ex:metadata">
277 <IndexField index="TOKENIZED" store="YES" termVector="YES">
278 <xsl:attribute name="IFname">
279 <xsl:value-of select="concat('ex.', @name)"/>
280 </xsl:attribute>
281 <xsl:value-of select="text()"/>
282 </IndexField>
283</xsl:for-each>
284
285<xsl:for-each select="foxml:datastream[starts-with(@ID,'DLS')]/foxml:datastreamVersion[last()]/foxml:xmlContent/dls:dls/dls:metadata">
286 <IndexField index="TOKENIZED" store="YES" termVector="YES">
287 <xsl:attribute name="IFname">
288 <xsl:value-of select="concat('dls.', @name)"/>
289 </xsl:attribute>
290 <xsl:value-of select="text()"/>
291 </IndexField>
292</xsl:for-each>
293
294<!---->
295<!-- Text and metadata extraction using Apache Tika. -->
296<xsl:for-each select="foxml:datastream[@CONTROL_GROUP='M' or @CONTROL_GROUP='E' or @CONTROL_GROUP='R']">
297<xsl:value-of disable-output-escaping="yes" select="exts:getDatastreamFromTika($PID, $REPOSITORYNAME, @ID, 'IndexField', concat('ds.', @ID), concat('dsmd.', @ID, '.'), '', $FEDORASOAP, $FEDORAUSER, $FEDORAPASS, $TRUSTSTOREPATH, $TRUSTSTOREPASS)"/>
298</xsl:for-each>
299<!--creating an index field with all text from the foxml record and its datastreams-->
300<IndexField IFname="foxml.all.text" index="TOKENIZED" store="YES" termVector="YES">
301<xsl:for-each select="//text()">
302<xsl:value-of select="."/>
303<xsl:text> </xsl:text>
304</xsl:for-each>
305<xsl:for-each select="//foxml:datastream[@CONTROL_GROUP='M' or @CONTROL_GROUP='E' or @CONTROL_GROUP='R']">
306<xsl:value-of select="exts:getDatastreamText($PID, $REPOSITORYNAME, @ID, $FEDORASOAP, $FEDORAUSER, $FEDORAPASS, $TRUSTSTOREPATH, $TRUSTSTOREPASS)"/>
307<xsl:text> </xsl:text>
308</xsl:for-each>
309</IndexField>
310
311<IndexField IFname="ds.fulltext" index="TOKENIZED" store="YES" termVector="YES">
312 <xsl:for-each select="//foxml:datastream[@CONTROL_GROUP='M' or @CONTROL_GROUP='E' or @CONTROL_GROUP='R']">
313 <xsl:value-of select="exts:getDatastreamText($PID, $REPOSITORYNAME, @ID, $FEDORASOAP, $FEDORAUSER, $FEDORAPASS, $TRUSTSTOREPATH, $TRUSTSTOREPASS)"/>
314 <xsl:text> </xsl:text>
315 </xsl:for-each>
316</IndexField>
317</xsl:template>
318</xsl:stylesheet>
Note: See TracBrowser for help on using the repository browser.