source: other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Basic/etc/collect.cfg@ 29404

Last change on this file since 29404 was 29404, checked in by ak19, 9 years ago

Trying to rebuild the Word-PDF-Basic collection with unique dc.Title metadata for docs with identical final names and with the 2nd browsing classifier sorted on dc.Title, in order to produce a consistent order for browse classifier children (files under browsing classifiers). This is necessary for perl 5.18/5.17 and later, since they randomise the order of children of unsorted classifiers and for those children with identical filenames

File size: 2.1 KB
Line 
1creator
2maintainer
3public true
4
5buildtype mgpp
6infodbtype gdbm
7
8indexes text dc.Title,ex.dc.Title,ex.Title dc.Creator
9defaultindex text
10
11levels document
12
13indexoptions accentfold casefold stem
14
15defaultlevel document
16
17# import options, needed for diffcol testing
18# especially in conjunction with -sort flag to ArchivesInfPlugin
19OIDtype hash_on_full_filename
20sortmeta OID
21
22# Need to block cluster.ps for the model collection, since the paragraph
23# breaks in the extracted text for this one postscript file are different on
24# different OS when doing diffcol, possibly due to different versions of
25# ghostscript's ps2ascii. Otherwise, the text content is identical.
26plugin GreenstoneXMLPlugin
27plugin PDFPlugin
28plugin RTFPlugin
29plugin WordPlugin
30plugin PostScriptPlugin -block_exp cluster\.ps$ -convert_to text
31plugin ImagePlugin
32plugin EmbeddedMetadataPlugin
33plugin MetadataXMLPlugin
34plugin ArchivesInfPlugin -sort
35plugin DirectoryPlugin
36
37classify List -metadata dc.Title;ex.Title -partition_type_within_level approximate_size
38
39classify AZCompactList -metadata dc.Creator -sort dc.Title
40
41format VList "<td valign=\"top\">[link][icon][/link]</td>
42<td valign=\"top\">[ex.srclink]{Or}{[ex.thumbicon],[ex.srcicon]}[ex./srclink]</td>
43<td valign=\"top\">[highlight]
44{Or}{[dc.Title],[exp.Title],[ex.Title],Untitled}
45[/highlight]{If}{[ex.Source],<br><i>([ex.Source])</i>}</td>"
46
47format HList "[link][highlight][ex.Title][/highlight][/link]"
48
49format DocumentHeading "{Or}{[parent(Top):Title],[Title],untitled}<br>"
50
51format DocumentText "[Text]"
52
53format DocumentButtons "Detach|Highlight"
54
55format SearchTypes "plain,form"
56
57collectionmeta collectionname [l=en] "Word-PDF-Basic"
58collectionmeta .text [l=en] "_labeltext_"
59collectionmeta .dc.Title,ex.dc.Title,Title [l=en] "_labelTitle_"
60collectionmeta .document [l=en] "_textdocument_"
61collectionmeta .section [l=en] "_textsection_"
62collectionmeta .document:text [l=en] "_labeltext_"
63collectionmeta .document:dc.Title,Title,ex.dc.Title [l=en] "_labelTitle_"
64collectionmeta .document:Source [l=en] "_labelSource_"
65collectionmeta .document:dc.Title,Title [l=en] "_labelTitle_"
66collectionmeta .dc.Creator [l=en] "_labelCreator_"
Note: See TracBrowser for help on using the repository browser.