source: main/trunk/greenstone2/collect/modelcol/etc/collectionConfig.xml@ 34172

Last change on this file since 34172 was 34172, checked in by ak19, 4 years ago

Some minor improvements to the UnknownConverterPlugin settings for tika's conversion (of docx files) to html. Also documenting the reasoning.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.1 KB
Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<CollectionConfig xmlns:gsf="http://www.greenstone.org/greenstone3/schema/ConfigFormat" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:gslib="http://www.greenstone.org/skinning">
3 <metadataList>
4 <metadata name="creator">**creator**</metadata>
5 <metadata name="maintainer">**maintainer**</metadata>
6 <metadata name="public">**public**</metadata>
7 </metadataList>
8 <displayItemList>
9 <displayItem assigned="true" lang="en" name="name">**title**</displayItem>
10 <displayItem assigned="true" lang="en" name="description">**about**</displayItem>
11 <!-- shortDescription is used as a tooltip for collection icon on home page-->
12 <displayItem assigned="true" lang="en" name="shortDescription">**title**</displayItem>
13 </displayItemList>
14 <!-- Global format statement -->
15 <format>
16 <gsf:template name="choose-title">
17 <gsf:choose-metadata>
18 <gsf:metadata name="dc.Title"/>
19 <gsf:metadata name="exp.Title"/>
20 <gsf:metadata name="ex.dc.Title"/>
21 <gsf:metadata name="Title"/>
22 <gsf:default>Untitled</gsf:default>
23 </gsf:choose-metadata>
24 </gsf:template>
25 <!-- modify the collection description template to output the "this collection contains X documents and was last built Y days ago" message -->
26 <xsl:template name="coll-description">
27 <p><gslib:collectionDescriptionTextAndServicesLinks/></p>
28 <xsl:variable name="raw_date"><gslib:collectionMeta name="buildDate"/></xsl:variable>
29 <xsl:variable name="formatted_date"><xsl:value-of select="util:formatTimeStamp($raw_date, 0, 3, /page/@lang)"/></xsl:variable>
30 <xsl:variable name="numdocs"><gslib:collectionMeta name="numDocs"/></xsl:variable>
31 <p><xsl:value-of select="util:getInterfaceText($interface_name, /page/@lang, 'about.standarddescriptiondays', concat($numdocs, ';', $formatted_date))"/></p>
32 </xsl:template>
33 </format>
34 <search type="lucene">
35 <level name="document">
36 <displayItem lang="en" name="name">document</displayItem>
37 </level>
38 <level name="section">
39 <displayItem lang="en" name="name">section</displayItem>
40 </level>
41 <defaultLevel name="document"/>
42 <index name="text">
43 <displayItem lang="en" name="name">text</displayItem>
44 </index>
45 <index name="dc.Title,ex.dc.Title,Title">
46 <displayItem lang="en" name="name">titles</displayItem>
47 </index>
48 <index name="Source">
49 <displayItem lang="en" name="name">filenames</displayItem>
50 </index>
51 <defaultIndex name="text"/>
52 <sort name="rank">
53 <displayItem lang="en" name="name">rank</displayItem>
54 </sort>
55 <sort name="none">
56 <displayItem lang="en" name="name">natural (build) order</displayItem>
57 </sort>
58 <searchType name="plain"/>
59 <searchType name="simpleform"/>
60 <searchType name="advancedform"/>
61 <format>
62 <gsf:template match="documentNode">
63 <td valign="top">
64 <gsf:link type="document">
65 <gsf:icon type="document"/>
66 </gsf:link>
67 </td>
68 <td>
69 <gsf:link type="document">
70 <xsl:call-template name="choose-title"/>
71 </gsf:link>
72 </td>
73 </gsf:template>
74 </format>
75 </search>
76
77 <infodb type="jdbm"/>
78
79 <import>
80 <pluginList>
81 <plugin name="ZIPPlugin"/>
82 <plugin name="GreenstoneXMLPlugin"/>
83 <plugin name="TextPlugin"/>
84 <plugin name="HTMLPlugin"/>
85 <plugin name="EmailPlugin"/>
86 <plugin name="PDFv2Plugin"/>
87 <!-- Configuring an UnknownConverterPlugin for docx processing with Tika -->
88 <plugin name="UnknownConverterPlugin">
89 <option name="-exec_cmd" value="java -jar $GSDLHOME/ext/tika/tika-app-1.24.1.jar --html --pretty-print --encoding=UTF-8 %%INPUT_FILE &gt; %%OUTPUT"/>
90 <option name="-convert_to" value="html"/>
91 <option name="-mime_type" value="application/vnd.openxmlformats-officedocument.wordprocessingml.document"/>
92 <option name="-srcicon" value="icondocx"/>
93 <option name="-process_extension" value="docx"/>
94 </plugin>
95 <plugin name="RTFPlugin"/>
96 <plugin name="WordPlugin"/>
97 <plugin name="PostScriptPlugin"/>
98 <plugin name="PowerPointPlugin"/>
99 <plugin name="ExcelPlugin"/>
100 <plugin name="ImagePlugin"/>
101 <plugin name="ISISPlugin"/>
102 <plugin name="NulPlugin"/>
103 <plugin name="OAIPlugin"/>
104 <plugin name="MetadataXMLPlugin"/>
105 <plugin name="ArchivesInfPlugin"/>
106 <plugin name="DirectoryPlugin"/>
107 </pluginList>
108 </import>
109 <browse>
110 <classifier name="List">
111 <option name="-metadata" value="dc.Title,Title"/>
112 <option name="-partition_type_within_level" value="approximate_size"/>
113 <option name="-numeric_partition_type_within_level" value="approximate_size"/>
114 <!-- only use one dc.Title/Title value. change to allvalues to use all of them -->
115 <option name="-metadata_selection_mode_within_level" value="firstvalue"/>
116 <format>
117 <gsf:template match="documentNode">
118 <td valign="top">
119 <gsf:link type="document">
120 <gsf:icon type="document"/>
121 </gsf:link>
122 </td>
123 <td valign="top">
124 <gsf:link type="source">
125 <gsf:choose-metadata>
126 <gsf:metadata name="thumbicon"/>
127 <gsf:metadata name="srcicon"/>
128 </gsf:choose-metadata>
129 </gsf:link>
130 </td>
131 <td valign="top">
132 <gsf:link type="document">
133 <!-- Instead of using choose-title here, we want to display the title that the document was classified on -->
134 <gsf:metadata name="dc.Title,Title" pos="classifiedBy"/>
135 <gsf:switch>
136 <gsf:metadata name="Source"/>
137 <gsf:when test="exists"><br/><i>(<gsf:metadata name="Source"/>)</i></gsf:when>
138 </gsf:switch>
139 </gsf:link>
140 </td>
141 </gsf:template>
142 </format>
143 </classifier>
144 <classifier name="List">
145 <option name="-metadata" value="Source"/>
146 <option name="-partition_type_within_level" value="approximate_size"/>
147 <option name="-numeric_partition_type_within_level" value="approximate_size"/>
148 </classifier>
149 <format>
150 <gsf:template match="documentNode">
151 <td valign="top">
152 <gsf:link type="document">
153 <gsf:icon type="document"/>
154 </gsf:link>
155 </td>
156 <td valign="top">
157 <gsf:link type="source">
158 <gsf:choose-metadata>
159 <gsf:metadata name="thumbicon"/>
160 <gsf:metadata name="srcicon"/>
161 </gsf:choose-metadata>
162 </gsf:link>
163 </td>
164 <td valign="top">
165 <gsf:link type="document">
166 <!-- Defined in the global format statement -->
167 <xsl:call-template name="choose-title"/>
168 <gsf:switch>
169 <gsf:metadata name="Source"/>
170 <gsf:when test="exists"><br/><i>(<gsf:metadata name="Source"/>)</i></gsf:when>
171 </gsf:switch>
172 </gsf:link>
173 </td>
174 </gsf:template>
175 <gsf:template match="classifierNode[@classifierStyle = 'VList']">
176 <td valign="top">
177 <gsf:link type="classifier" style="static">
178 <gsf:icon type="classifier"/>
179 </gsf:link>
180 </td>
181 <td valign="top">
182 <gsf:link type="classifier">
183 <gsf:metadata name="Title"/>
184 </gsf:link>
185 </td>
186 </gsf:template>
187 <gsf:template match="classifierNode[@classifierStyle = 'HList']">
188 <gsf:link type="classifier" style="static">
189 <gsf:metadata name="Title"/>
190 </gsf:link>
191 </gsf:template>
192 </format>
193 </browse>
194 <display>
195 <format>
196 <gsf:option name="TOC" value="true"/>
197 <gsf:option name="allowUserComments" value="false"/>
198 <gsf:option name="allowDocumentEditing" value="true"/>
199 <gsf:option name="allowMapGPSEditing" value="true"/>
200 <!--
201 Overwriting this template allows you to change the heading of the document.
202 -->
203 <!--
204 <gsf:template name="documentHeading">
205 <span style="font-weight:bold; font-size: 120%;">
206 <xsl:call-template name="choose-title"/>
207 </span>
208 </gsf:template>
209 -->
210
211 <!--
212 Overwriting this template can be used to redefine the content of the whole document.
213 This is useful for simple documents, but not recommended for more complex documents
214 (e.g. hierachical and paged documents) as it can prevent any sub-sections from showing.
215 -->
216 <!--
217 <gsf:template name="documentContent">
218 <div id="gs-document">
219 <xsl:call-template name="documentPre"/>
220 <xsl:call-template name="wrappedSectionImage"/>
221 <div id="gs-document-text">
222 <xsl:call-template name="documentNodeText"/>
223 </div>
224 </div>
225 </gsf:template>
226 -->
227
228 <!--
229 Overwriting this template can be used to change the content of section headings.
230 -->
231 <!--
232 <gsf:template name="sectionHeading">
233 <xsl:call-template name="choose-title"/>
234 </gsf:template>
235 -->
236
237 <!--
238 Overwriting this template can be used to change the content of the top-level section.
239 -->
240 <!--
241 <gsf:template name="topLevelSectionContent">
242 <xsl:call-template name="wrappedSectionImage"/>
243 <xsl:call-template name="wrappedSectionText"/>
244 </gsf:template>
245 -->
246
247 <!--
248 Overwriting this template can be used to change the content of sections.
249 -->
250 <!--
251 <gsf:template name="sectionContent">
252 <xsl:call-template name="wrappedSectionImage"/>
253 <xsl:call-template name="wrappedSectionText"/>
254 </gsf:template>
255 -->
256 </format>
257 </display>
258 <replaceListRef id="gs2-standard"/>
259 <replaceListRef id="gs2-image" />
260 <serviceRackList>
261 <!-- comment out the following serviceRack if you want to disable RSS for this collection -->
262 <serviceRack name="RSSRetrieve"/>
263 <!-- comment out the following serviceRack if you want to disable OAI for this collection -->
264 <serviceRack name="OAIPMH">
265 <setName>**title**</setName>
266 <setDescription>**about**</setDescription>
267 <!-- uncomment the following and set the name attribute if
268 you want this collection to be part of a super set. -->
269 <!--<oaiSuperSet name="xxx"/>-->
270 <ListMetadataFormats>
271 <!--What metadata sets to support for this collection. You can select any set that is specified in OAIConfig.xml. To use a new set, you need to add a set definition in OAIConfig.xml, then reference it here, like:
272 <metadataFormat metadataPrefix="prefix"/> -->
273
274 <metadataFormat metadataPrefix="oai_dc">
275 <!-- you can customize the mappings by adding elements in here. See resources/oai/OAIConfig.xml for the format-->
276 </metadataFormat>
277 </ListMetadataFormats>
278 </serviceRack>
279 </serviceRackList>
280</CollectionConfig>
Note: See TracBrowser for help on using the repository browser.