source: main/trunk/model-sites-dev/heritage-nz/collect/reports-2019/etc/collectionConfig.xml@ 33030

Last change on this file since 33030 was 33030, checked in by davidb, 2 years ago

Added in UnknownPlugin to catch PDFs with no text in then, and so not processed by PDFv2Plugin convert_to 'text'

  • Property svn:mime-type set to text/plain
File size: 17.8 KB
Line 
1<?xml version="1.0" encoding="UTF-8"?>
2<CollectionConfig xmlns:gsf="http://www.greenstone.org/greenstone3/schema/ConfigFormat" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:gslib="http://www.greenstone.org/skinning">
3 <metadataList>
4 <metadata name="creator"></metadata>
5 <metadata name="maintainer"></metadata>
6 <metadata name="public">true</metadata>
7 </metadataList>
8 <displayItemList>
9<!--
10 (&lt;gsf:metaname name="numdocs"/&gt; to be precise, and growing)
11-->
12
13 <displayItem assigned="true" lang="en" name="name">PDF Reports</displayItem>
14 <displayItem assigned="true" lang="en" name="description">_about:aboutthiscollection_</displayItem>
15<!--
16 <displayItem assigned="true" lang="en" name="description">
17 &lt;h1&gt;(Unofficial) Archaeological Reports Digital Library&lt;/h1&gt;
18 &lt;b&gt;(Experimental demonstration using Greenstone3)&lt;/b&gt;
19
20 &lt;p&gt;
21 Heritage New Zealand holds an extensive collection
22 of unpublished archaeological reports resulting from
23 site surveys, excavations, research programmes and
24 archaeological authorities. There are over 7000
25 volumes dating from the early 1970s to the present
26 in the collection.
27 &lt;/p&gt;
28
29 &lt;figure class=""&gt;
30 &lt;img src="http://www.heritage.org.nz/-/media/heritage/images/protect/archaeology/digital-library-page.jpg?useCustomFunctions=1&amp;mw=920" width="220" alt="Archaeological digital library" /&gt;
31 &lt;/figure&gt;
32
33 &lt;p&gt;Should you prefer, you may contact us as follows:&lt;/p&gt;
34 &lt;p&gt;Archaeological Digital Library&lt;br&gt;
35 Heritage New Zealand&lt;br&gt;
36 National Office&lt;br&gt;
37 P O Box 2629&lt;br&gt;
38 Wellington 6140&lt;/p&gt;
39 &lt;p&gt;tel: (04) 472 4341&lt;/p&gt;
40
41
42 </displayItem>
43-->
44 <!-- shortDescription is used as a tooltip for collection icon on home page-->
45 <displayItem assigned="true" lang="en" name="shortDescription">Archaeological Reports Digital Library</displayItem>
46 </displayItemList>
47 <!-- Global format statement -->
48 <format>
49 <gsf:template name="choose-title">
50 <gsf:choose-metadata>
51 <gsf:metadata name="dc.Title"/>
52 <gsf:metadata name="exp.Title"/>
53 <gsf:metadata name="ex.dc.Title"/>
54 <gsf:metadata name="Title"/>
55 <gsf:default>Untitled</gsf:default>
56 </gsf:choose-metadata>
57 </gsf:template>
58 </format>
59 <search type="solr">
60 <level name="document">
61 <displayItem lang="en" name="name">document</displayItem>
62 </level>
63<!--
64 <level name="section">
65 <displayItem lang="en" name="name">section</displayItem>
66 </level>
67-->
68 <defaultLevel name="document"/>
69 <index name="text">
70 <displayItem lang="en" name="name">text</displayItem>
71 </index>
72 <index name="dc.Title,ex.dc.Title,Title">
73 <displayItem lang="en" name="name">titles</displayItem>
74 </index>
75 <index name="Author">
76 <displayItem lang="en" name="name">authors</displayItem>
77 </index>
78 <index name="AuthorityNo">
79 <displayItem lang="en" name="name">authority number</displayItem>
80 </index>
81 <index name="SiteNo">
82 <displayItem lang="en" name="name">site number</displayItem>
83 </index>
84
85 <defaultIndex name="text"/>
86 <sort name="rank">
87 <displayItem lang="en" name="name">rank</displayItem>
88 </sort>
89 <sort name="Date">
90 <displayItem lang="en" name="name">year</displayItem>
91 </sort>
92 <sort name="none">
93 <displayItem lang="en" name="name">natural (build) order</displayItem>
94 </sort>
95
96
97 <facet name="TLA">
98 <displayItem lang="en" name="name">Territorial Authorities</displayItem>
99 </facet>
100 <facet name="AuthorityNo">
101 <displayItem lang="en" name="name">Authority Number</displayItem>
102 </facet>
103 <facet name="SiteNo">
104 <displayItem lang="en" name="name">Site Number</displayItem>
105 </facet>
106
107 <searchType name="plain"/>
108 <!--
109 <searchType name="simpleform"/>
110 -->
111 <searchType name="advancedform"/>
112
113 <format>
114 <gsf:template match="documentNode">
115
116 <gsf:switch>
117 <gsf:metadata name="Plugin"/>
118 <gsf:when test="equals" test-value="CSVPlugin">
119 <td valign="top">
120 <gsf:link type="document">
121 <gsf:icon type="document"/>
122 </gsf:link>
123 </td>
124 <td>
125 <gsf:link type="document">
126 <xsl:call-template name="choose-title"/>
127 </gsf:link>
128 </td>
129
130 </gsf:when>
131 <gsf:otherwise>
132 <td valign="top">
133<!--
134 <gsf:link type="document">
135 <gsf:icon type="document"/>
136 </gsf:link>
137-->
138
139 <gsf:link type="source">
140 <gsf:choose-metadata>
141 <gsf:metadata name="thumbicon"/>
142 <gsf:metadata name="srcicon"/>
143 </gsf:choose-metadata>
144 </gsf:link>
145 </td>
146 <td>
147 <gsf:link type="document">
148 <xsl:call-template name="choose-title"/>
149 </gsf:link>
150 </td>
151
152 </gsf:otherwise>
153 </gsf:switch>
154
155
156 </gsf:template>
157 </format>
158 </search>
159
160 <infodb type="gdbm"/>
161
162 <import>
163 <pluginList>
164 <plugin name="ZIPPlugin"/>
165 <plugin name="GreenstoneXMLPlugin"/>
166 <plugin name="MetadataCSVPlugin">
167 <option name="-process_exp" value="\/?PDF-BOUND.*\.csv$"/>
168 <option name="-metadata_value_separator" value="\s*\|\s*"/>
169 </plugin>
170 <plugin name="CSVPlugin">
171 <option name="-process_exp" value="\/?UNBOUND.*\.csv$"/>
172 <option name="-block_exp" value=".*\.xls$"/>
173 <option name="-metadata_value_separator" value="\s*\|\s*"/>
174 </plugin>
175 <plugin name="TextPlugin"/>
176 <plugin name="HTMLPlugin"/>
177 <plugin name="EmailPlugin"/>
178 <plugin name="PDFv2Plugin">
179<!--
180 <option name="-convert_to" value="paged_pretty_html"/>
181-->
182<!--
183 <option name="-convert_to" value="pagedimg_png"/>
184-->
185<!--
186 <option name="-convert_to" value="paged_text"/>
187-->
188 <option name="-convert_to" value="text"/>
189 <option name="-enable_cache"/>
190 </plugin>
191 <plugin name="UnknownPlugin">
192 <option name="-process_extension" value="pdf"/>
193 <option name="-file_format" value="PDF"/>
194 <option name="-mime_type" value="application/pdf"/>
195 <option name="-srcicon" value="pdficon"/>
196 </plugin>
197
198<!--
199The following PDFs do not get processed by xpdftools.
200'Bacquie1.pdf' for example has no text in it
201Presume the same is true for the other files
202
203Digital-Library-PDFs/Reports A - B/Bacquie1.pdf
204Digital-Library-PDFs/Reports A - B/Bacquie2.pdf
205Digital-Library-PDFs/Reports A - B/Bacquie3.pdf
206Digital-Library-PDFs/Reports A - B/Bacquie4.pdf
207Digital-Library-PDFs/Reports A - B/Bacquie5.pdf
208Digital-Library-PDFs/Reports A - B/Best105.pdf
209Digital-Library-PDFs/Reports A - B/Best50.pdf
210Digital-Library-PDFs/Reports A - B/Bulmer6.pdf
211Digital-Library-PDFs/Reports C - D/Challis4.pdf
212Digital-Library-PDFs/Reports C - D/Cuniliffe36.pdf
213Digital-Library-PDFs/Reports C - D/Daisley3.pdf
214Digital-Library-PDFs/Reports E - F/Frank23.pdf
215Digital-Library-PDFs/Reports E - F/Fredericksen11.pdf
216Digital-Library-PDFs/Reports G - H/Geometria4.pdf
217Digital-Library-PDFs/Reports G - H/Geometria5.pdf
218Digital-Library-PDFs/Reports G - H/Geometria6.pdf
219Digital-Library-PDFs/Reports G - H/Gillies2.pdf
220Digital-Library-PDFs/Reports G - H/Gillies4.pdf
221Digital-Library-PDFs/Reports G - H/Gillies6.pdf
222Digital-Library-PDFs/Reports G - H/Grouden17.pdf
223Digital-Library-PDFs/Reports G - H/Hennessey107.pdf
224Digital-Library-PDFs/Reports G - H/Hennessey108.pdf
225Digital-Library-PDFs/Reports G - H/Hennessey109.pdf
226Digital-Library-PDFs/Reports G - H/Hughes243.pdf
227Digital-Library-PDFs/Reports G - H/Hughes257.pdf
228Digital-Library-PDFs/Reports G - H/Hughes258.pdf
229Digital-Library-PDFs/Reports G - H/Hughes259.pdf
230Digital-Library-PDFs/Reports I - J/Jeal9.pdf
231Digital-Library-PDFs/Reports I - J/Johnson98.pdf
232Digital-Library-PDFs/Reports K - L/Lawler40.pdf
233Digital-Library-PDFs/Reports K - L/Lawlor18.pdf
234Digital-Library-PDFs/Reports M - N/Moyle5.pdf
235Digital-Library-PDFs/Reports M - N/Nichol59.pdf
236Digital-Library-PDFs/Reports O - P/Phillips237.pdf
237Digital-Library-PDFs/Reports S - T/Sims2.pdf
238Digital-Library-PDFs/Reports S - T/Trendafilov13.pdf
239Digital-Library-PDFs/Reports W - Z/Young38.pdf
240
241-->
242 <plugin name="RTFPlugin"/>
243 <plugin name="WordPlugin"/>
244 <plugin name="PostScriptPlugin"/>
245 <plugin name="PowerPointPlugin"/>
246 <plugin name="ExcelPlugin"/>
247 <plugin name="ImagePlugin"/>
248 <plugin name="ISISPlugin"/>
249 <plugin name="NulPlugin"/>
250 <plugin name="OAIPlugin"/>
251 <plugin name="MetadataXMLPlugin"/>
252 <plugin name="ArchivesInfPlugin"/>
253 <plugin name="DirectoryPlugin"/>
254 </pluginList>
255 <importOption name="OIDtype" value="assigned"/>
256 <importOption name="OIDmetadata" value="hnz.Identifier"/>
257 </import>
258 <browse>
259<!--
260 <classifier name="List">
261 <option name="-metadata" value="dc.Title;Title"/>
262 <option name="-partition_type_within_level" value="approximate_size"/>
263 </classifier>
264
265
266
267 -mingroup <int> The smallest value that will cause a group in the
268 hierarchy to form.
269 Default: 1
270
271 -minnesting <int> The smallest value that will cause a list to be
272 converted into a nested list.
273 Default: 20
274
275 -mincompact <int> Minimum number of documents to be displayed per page.
276 Default: 10
277
278 -maxcompact <int> Maximum number of documents to be displayed per page.
279 Default: 30
280
281-->
282 <classifier name="AZList">
283 <option name="-metadata" value="dc.Title,Title"/>
284<!--
285 <option name="-removeprefix" value="\&quot;|\'|\(|(\d+\s*)"/>
286-->
287 <option name="-removeprefix" value="\&quot;|\'|\(|(\d+\s*)|(\d+\w*\s+and\s+\d+\w*)"/>
288 <option name="-removesuffix" value="\):.*$"/>
289 </classifier>
290<!--
291 <classifier name="AZCompactList">
292 <option name="-metadata" value="dc.Title,Title"/>
293 <option name="-removeprefix" value="\&quot;|\'|\(|(\d+\s*)|(\d+\s+and\d+)"/>
294 <option name="-removesuffix" value="\s*:.*$"/>
295 <option name="-mingroup" value="2"/>
296 <option name="-maxcompact" value="100"/>
297 </classifier>
298-->
299 <classifier name="AZCompactList">
300 <option name="-metadata" value="Author"/>
301 </classifier>
302 <classifier name="DateList">
303 <option name="-metadata" value="Date"/>
304 </classifier>
305
306 <classifier name="AZCompactList">
307 <option name="-metadata" value="TLA"/>
308 <option name="-buttonname" value="Territorial Authorities"/>
309 <option name="-maxcompact" value="200"/>
310 </classifier>
311
312 <classifier name="AZCompactList">
313 <option name="-metadata" value="AuthorityNo"/>
314 <option name="-buttonname" value="Authority Number"/>
315 </classifier>
316
317 <classifier name="AZCompactList">
318 <option name="-metadata" value="SiteNo"/>
319 <option name="-buttonname" value="Site Number"/>
320 </classifier>
321
322 <format>
323 <gsf:template match="documentNode">
324
325 <gsf:switch>
326 <gsf:metadata name="Plugin"/>
327 <gsf:when test="equals" test-value="CSVPlugin">
328 <td valign="top">
329 <gsf:link type="document">
330 <gsf:icon type="document"/>
331 </gsf:link>
332 </td>
333 <td valign="top">
334 <gsf:link type="document">
335 <!-- Defined in the global format statement -->
336 <xsl:call-template name="choose-title"/>
337 <gsf:switch>
338 <gsf:metadata name="Date"/>
339 <gsf:when test="exists">&#160;<i>(<gsf:metadata name="Date"/>)</i></gsf:when>
340 </gsf:switch>
341 </gsf:link>
342 </td>
343
344 </gsf:when>
345 <gsf:otherwise>
346 <td valign="top">
347<!--
348 <gsf:link type="document">
349 <gsf:icon type="document"/>
350 </gsf:link>
351-->
352
353 <gsf:link type="source">
354 <gsf:choose-metadata>
355 <gsf:metadata name="thumbicon"/>
356 <gsf:metadata name="srcicon"/>
357 </gsf:choose-metadata>
358 </gsf:link>
359 </td>
360 <td valign="top">
361 <gsf:link type="document">
362 <!-- Defined in the global format statement -->
363 <xsl:call-template name="choose-title"/>
364 <gsf:switch>
365 <gsf:metadata name="Date"/>
366 <gsf:when test="exists">&#160;<i>(<gsf:metadata name="Date"/>)</i></gsf:when>
367 </gsf:switch>
368 </gsf:link>
369 </td>
370
371 </gsf:otherwise>
372 </gsf:switch>
373
374 </gsf:template>
375 <gsf:template match="classifierNode[@classifierStyle = 'VList']">
376 <td valign="top">
377 <gsf:link type="classifier">
378 <gsf:icon type="classifier"/>
379 </gsf:link>
380 </td>
381 <td valign="top">
382 <gsf:metadata name="Title"/>
383 </td>
384 </gsf:template>
385 <gsf:template match="classifierNode[@classifierStyle = 'HList']">
386 <gsf:link type="classifier">
387 <gsf:metadata name="Title"/>
388 </gsf:link>
389 </gsf:template>
390 </format>
391 </browse>
392 <display>
393 <format>
394 <gsf:option name="TOC" value="true"/>
395
396 <!--
397 Overwriting this template allows you to change the heading of the document.
398 -->
399 <gsf:template name="documentHeading">
400 <gsf:switch>
401 <gsf:metadata name="srcicon"/>
402 <gsf:when test="exists">
403 <gsf:link type="source">
404 <gsf:metadata name="srcicon"/>
405 </gsf:link>
406 </gsf:when>
407 </gsf:switch>
408
409 <span style="font-weight:bold; font-size: 120%;">
410 <xsl:call-template name="choose-title"/>
411 </span>
412 </gsf:template>
413
414
415 <!--
416 Overwriting this template can be used to redefine the content of the whole document.
417 This is useful for simple documents, but not recommended for more complex documents
418 (e.g. hierachical and paged documents) as it can prevent any sub-sections from showing.
419 -->
420
421 <gsf:template name="documentContentMetadataTable">
422 <style>
423 tr:nth-child(odd) {background: #dddddd}
424 tr:nth-child(even) {background: #f5f3e5}
425 i { font-style: italic; }
426 td { padding: 2px; }
427 </style>
428 <table>
429 <tbody>
430 <tr>
431 <td>
432 Identifier:
433 </td>
434 <td>
435 <i><gsf:metadata name="hnz.Identifier"/></i>
436 </td>
437 </tr>
438
439 <tr>
440 <td>
441 Title:
442 </td>
443 <td>
444 <i>
445 <!--
446 <gsf:link type="source">
447 <gsf:metadata name="srcicon"/>
448 </gsf:link>
449 -->
450 <gsf:metadata name="dc.Title" highlight="true"/>
451
452 </i>
453 </td>
454 </tr>
455
456 <tr>
457 <td>
458 Author(s):
459 </td>
460 <td>
461 <gsf:foreach-metadata name="Author" separator="; ">
462 <i><gsf:meta-value/></i>
463 </gsf:foreach-metadata>
464 </td>
465 </tr>
466
467 <tr>
468 <td>
469 Date:
470 </td>
471 <td>
472 <i><gsf:metadata name="Date"/></i>
473 </td>
474 </tr>
475
476 <tr>
477 <td>
478 Territorial Authorities:
479 </td>
480 <td>
481 <i><gsf:metadata name="TLA"/></i>
482 </td>
483 </tr>
484
485 <tr>
486 <td>
487 Authority No:
488 </td>
489 <td>
490 <i><gsf:metadata name="AuthorityNo"/></i>
491 </td>
492 </tr>
493
494 <tr>
495 <td>
496 Site No:
497 </td>
498 <td>
499 <i><gsf:metadata name="SiteNo"/></i>
500 </td>
501 </tr>
502 </tbody>
503 </table>
504
505 </gsf:template>
506
507 <gsf:template name="documentContent">
508 <gsf:switch>
509 <gsf:metadata name="Plugin"/>
510 <gsf:when test="equals" test-value="CSVPlugin">
511 <xsl:call-template name="documentContentMetadataTable" />
512 </gsf:when>
513 <gsf:otherwise>
514
515 <gsf:switch>
516 <gsf:metadata name="Plugin"/>
517 <gsf:when test="equals" test-value="PDFv2Plugin">
518 <xsl:call-template name="documentContentMetadataTable" />
519 </gsf:when>
520 <gsf:otherwise>
521 <xsl:call-template name="wrappedSectionImage"/>
522 <xsl:call-template name="wrappedSectionText"/>
523 </gsf:otherwise>
524 </gsf:switch>
525
526 </gsf:otherwise>
527 </gsf:switch>
528 </gsf:template>
529<!--
530 <gsf:template name="wrappedDocument">
531 <xsl:call-template name="documentHeading"/><br/>
532 <xsl:call-template name="documentContent"/>
533 </gsf:template>
534-->
535 <!--
536 Overwriting this template can be used to change the content of section headings.
537 -->
538 <!--
539 <gsf:template name="sectionHeading">
540 <xsl:call-template name="choose-title"/>
541 </gsf:template>
542 -->
543
544 <!--
545 Overwriting this template can be used to change the content of the top-level section.
546 -->
547 <!--
548 <gsf:template name="topLevelSectionContent">
549
550 <xsl:call-template name="wrappedSectionImage"/>
551 <xsl:call-template name="wrappedSectionText"/>
552 </gsf:template>
553 -->
554
555 <!--
556 Overwriting this template can be used to change the content of sections.
557 -->
558 <!--
559 <gsf:template name="sectionContent">
560 <xsl:call-template name="wrappedSectionImage"/>
561 <xsl:call-template name="wrappedSectionText"/>
562 </gsf:template>
563 -->
564 </format>
565 </display>
566 <replaceListRef id="gs2-standard"/>
567 <replaceListRef id="gs2-image" />
568 <security default_access="public" scope="document" humanVerify="true" useRecaptcha="true"/>
569
570 <serviceRackList>
571 <!-- comment out the following serviceRack if you want to disable RSS for this collection -->
572 <serviceRack name="RSSRetrieve"/>
573 <!-- comment out the following serviceRack if you want to disable OAI for this collection -->
574 <serviceRack name="OAIPMH">
575 <setName>reports</setName>
576 <setDescription></setDescription>
577 <!-- uncomment the following and set the name attribute if
578 you want this collection to be part of a super set. -->
579 <!--<oaiSuperSet name="xxx"/>-->
580 <ListMetadataFormats>
581 <!--What metadata sets to support for this collection. You can select any set that is specified in OAIConfig.xml. To use a new set, you need to add a set definition in OAIConfig.xml, then reference it here, like:
582 <metadataFormat metadataPrefix="prefix"/> -->
583
584 <metadataFormat metadataPrefix="oai_dc">
585 <!-- you can customize the mappings by adding elements in here. See resources/oai/OAIConfig.xml for the format-->
586 </metadataFormat>
587 </ListMetadataFormats>
588 </serviceRack>
589 </serviceRackList>
590</CollectionConfig>
Note: See TracBrowser for help on using the repository browser.