source: other-projects/nightly-tasks/diffcol/trunk/model-collect/PDFBox/etc/collect.cfg@ 27951

Last change on this file since 27951 was 27951, checked in by ak19, 11 years ago

Updating PDFBox collection with the extra metadata extracted (when using the PDFBox extension) sorted in doc.xml, for diffcol to give consistent results on CentOS and Ubuntu.

File size: 1.9 KB
Line 
1creator
2maintainer
3public true
4
5buildtype mgpp
6infodbtype gdbm
7
8indexes text dc.Title,ex.dc.Title,ex.Title ex.Source
9defaultindex text
10
11levels document
12
13# import options, needed for diffcol testing
14# especially in conjunction with -sort flag to ArchivesInfPlugin
15OIDtype hash_on_full_filename
16sortmeta OID
17
18plugin ZIPPlugin
19plugin GreenstoneXMLPlugin
20plugin TextPlugin
21plugin HTMLPlugin
22plugin EmailPlugin
23plugin PDFPlugin -pdfbox_conversion -convert_to html
24plugin RTFPlugin
25plugin WordPlugin
26plugin PostScriptPlugin
27plugin PowerPointPlugin
28plugin ExcelPlugin
29plugin ImagePlugin
30plugin ISISPlugin
31plugin NulPlugin
32plugin EmbeddedMetadataPlugin
33plugin MetadataXMLPlugin
34plugin ArchivesInfPlugin -sort
35plugin DirectoryPlugin
36
37classify List -metadata dc.Title;ex.Title -partition_type_within_level approximate_size
38classify List -metadata ex.Source -partition_type_within_level approximate_size
39
40format VList "<td valign=\"top\">[link][icon][/link]</td>
41<td valign=\"top\">[ex.srclink]{Or}{[ex.thumbicon],[ex.srcicon]}[ex./srclink]</td>
42<td valign=\"top\">[highlight]
43{Or}{[dc.Title],[exp.Title],[ex.Title],Untitled}
44[/highlight]{If}{[ex.Source],<br><i>([ex.Source])</i>}</td>"
45
46format HList "[link][highlight][ex.Title][/highlight][/link]"
47
48format DocumentHeading "{Or}{[parent(Top):Title],[Title],untitled}<br>"
49
50format DocumentText "[Text]"
51
52format DocumentButtons "Detach|Highlight"
53
54format SearchTypes "plain,form"
55
56collectionmeta collectionname [l=en] "PDFBox"
57collectionmeta .text [l=en] "_labeltext_"
58collectionmeta .dc.Title,ex.dc.Title,Title [l=en] "_labelTitle_"
59collectionmeta .Source [l=en] "_labelSource_"
60collectionmeta .document [l=en] "_textdocument_"
61collectionmeta .section [l=en] "_textsection_"
62collectionmeta .document:text [l=en] "_labeltext_"
63collectionmeta .document:dc.Title,Title,ex.dc.Title [l=en] "_labelTitle_"
64collectionmeta .document:Source [l=en] "_labelSource_"
65collectionmeta .document:dc.Title,Title [l=en] "_labelTitle_"
Note: See TracBrowser for help on using the repository browser.