source: other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/etc/collect.cfg@ 27761

Last change on this file since 27761 was 27761, checked in by ak19, 9 years ago

The previous versions of Word-PDF-Basic and Word-PDF-Formatting are now replaced with rebuilt model-cols where the cluster.ps postscript file is blocked from being processed, since ghostscript or its helper process ps2ascii break paragraphs of the extreacted text differently on different OS.

File size: 2.6 KB
Line 
1creator
2maintainer
3public true
4
5buildtype mgpp
6infodbtype gdbm
7
8indexes text dc.Title,ex.dc.Title,ex.Title dc.Creator
9defaultindex text
10
11levels document
12
13indexoptions accentfold casefold stem
14
15defaultlevel document
16
17# import options, needed for diffcol testing
18# especially in conjunction with -sort flag to ArchivesInfPlugin
19OIDtype hash_on_full_filename
20sortmeta OID
21
22# Need to block cluster.ps for the model collection, since the paragraph
23# breaks in the extracted text for this one postscript file are different on
24# different OS when doing diffcol, possibly due to different versions of
25# ghostscript's ps2ascii. Otherwise, the text content is identical.
26plugin GreenstoneXMLPlugin
27plugin PDFPlugin
28plugin RTFPlugin
29plugin WordPlugin
30plugin PostScriptPlugin -block_exp cluster\.ps$ -convert_to text
31plugin EmbeddedMetadataPlugin
32plugin MetadataXMLPlugin
33plugin ArchivesInfPlugin -sort
34plugin DirectoryPlugin
35
36classify List -metadata dc.Title;ex.Title -partition_type_within_level approximate_size
37
38classify AZCompactList -metadata dc.Creator -firstvalueonly
39
40format VList "<td valign=\"top\">[link][icon][/link]</td>
41<td valign=\"top\">[ex.srclink][ex.srcicon][ex./srclink]</td>
42<td valign=\"top\">[highlight]
43{Or}{[dc.Title],[ex.Title],Untitled}
44[/highlight]{If}{[ex.Source],<br><i>([ex.Source])</i>}</td>"
45
46format HList "[link][highlight][ex.Title][/highlight][/link]"
47
48format DocumentHeading "{Or}{[parent(Top):Title],[Title],untitled}<br>"
49
50format DocumentText "[Text]"
51
52format DocumentButtons "Detach|Highlight"
53
54format SearchTypes "plain,form"
55
56format SearchVList "<td valign=\"top\">[link][icon][/link]</td>
57<td valign=\"top\">[ex.srclink][ex.srcicon][ex./srclink]</td>
58<td valign=\"top\">[highlight]
59{Or}{[dc.Title],[ex.Title],Untitled}
60[/highlight]{If}{[ex.Source],<br><i>([ex.Source])</i>}</td>"
61
62format CL2VList "<td valign=\"top\">[link][icon][/link]</td>
63<td valign=\"top\">[ex.srclink][ex.srcicon][ex./srclink]</td>
64<td valign=\"top\">[highlight]
65{Or}{[dc.Title],[ex.Title],Untitled}
66[/highlight]{If}{[ex.Source],<br>[sibling(All\'<br/>\'):dc.Creator]<i>([ex.Source])</i>}</td>
67{If}{[numleafdocs],<td><i>([numleafdocs])</i></td>}"
68
69collectionmeta collectionname [l=en] "Word-PDF-Formatting"
70collectionmeta .text [l=en] "_labeltext_"
71collectionmeta .dc.Title,ex.dc.Title,Title [l=en] "_labelTitle_"
72collectionmeta .document [l=en] "_textdocument_"
73collectionmeta .section [l=en] "_textsection_"
74collectionmeta .document:text [l=en] "_labeltext_"
75collectionmeta .document:dc.Title,Title,ex.dc.Title [l=en] "_labelTitle_"
76collectionmeta .document:Source [l=en] "_labelSource_"
77collectionmeta .document:dc.Title,Title [l=en] "_labelTitle_"
78collectionmeta .dc.Creator [l=en] "_labelCreator_"
Note: See TracBrowser for help on using the repository browser.