root/other-projects/nightly-tasks/diffcol/trunk/model-collect/Word-PDF-Formatting/archives/HASHeaa2/9d2e0811.dir/doc.xml @ 29405

Revision 29405, 140.3 KB (checked in by ak19, 5 years ago)

Trying to rebuild the Word-PDF-Formatting collection with unique dc.Title metadata for docs that have identical final names and with the 2nd browsing classifier sorted on dc.Title, in order to produce a consistent order for browse classifiers' children (a consistent presentation order of the files under the browsing classifiers). This is necessary for perl 5.18/5.17 and later, since they randomise the order of children of unsorted classifiers and for those children with identical filenames. Changes made particularly to collect.cfg and import/metadata.xml

Line 
1<?xml version="1.0" encoding="utf-8" standalone="no"?>
2<!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">
3<Archive>
4<Section>
5  <Description>
6    <Metadata name="gsdldoctype">indexed_doc</Metadata>
7    <Metadata name="Language">en</Metadata>
8    <Metadata name="Encoding">utf8</Metadata>
9    <Metadata name="GENERATOR">wvWare/wvWare version 1.2.4</Metadata>
10    <Metadata name="Title">Greenstone: A Comprehensive Open-Source</Metadata>
11    <Metadata name="URL">http://Scratch/ak19/gs2-svn-22Oct2014/collect/Word-PDF-Formatting/tmp/1414470427_2/word03.html</Metadata>
12    <Metadata name="UTF8URL">http://Scratch/ak19/gs2-svn-22Oct2014/collect/Word-PDF-Formatting/tmp/1414470427_2/word03.html</Metadata>
13    <Metadata name="gsdlsourcefilename">import/word03.doc</Metadata>
14    <Metadata name="gsdlconvertedfilename">tmp/1414470427_2/word03.html</Metadata>
15    <Metadata name="OrigSource">word03.html</Metadata>
16    <Metadata name="Source">word03.doc</Metadata>
17    <Metadata name="SourceFile">word03.doc</Metadata>
18    <Metadata name="Plugin">WordPlugin</Metadata>
19    <Metadata name="FileSize">414720</Metadata>
20    <Metadata name="FilenameRoot">word03</Metadata>
21    <Metadata name="FileFormat">Word</Metadata>
22    <Metadata name="srcicon">_icondoc_</Metadata>
23    <Metadata name="srclink_file">doc.doc</Metadata>
24    <Metadata name="srclinkFile">doc.doc</Metadata>
25    <Metadata name="dc.Creator">Ian H. Witten</Metadata>
26    <Metadata name="dc.Creator">Rodger J. McNab</Metadata>
27    <Metadata name="dc.Creator">Stefan J. Boddie</Metadata>
28    <Metadata name="dc.Creator">David Bainbridge</Metadata>
29    <Metadata name="dc.Title">Greenstone: A comprehensive open-source digital library software system - doc</Metadata>
30    <Metadata name="Identifier">HASHeaa29d2e081149673150f3</Metadata>
31    <Metadata name="lastmodified">1414470425</Metadata>
32    <Metadata name="lastmodifieddate">20141028</Metadata>
33    <Metadata name="oailastmodified">1414470428</Metadata>
34    <Metadata name="oailastmodifieddate">20141028</Metadata>
35    <Metadata name="assocfilepath">HASHeaa2/9d2e0811.dir</Metadata>
36    <Metadata name="gsdlassocfile">word030.png:image/png:</Metadata>
37    <Metadata name="gsdlassocfile">word031.png:image/png:</Metadata>
38    <Metadata name="gsdlassocfile">word032.png:image/png:</Metadata>
39    <Metadata name="gsdlassocfile">word033.png:image/png:</Metadata>
40    <Metadata name="gsdlassocfile">word034.png:image/png:</Metadata>
41    <Metadata name="gsdlassocfile">word035.png:image/png:</Metadata>
42    <Metadata name="gsdlassocfile">doc.doc:application/msword:</Metadata>
43  </Description>
44  <Content>
45
46&lt;!--Section Begins--&gt;&lt;br&gt;
47
48
49
50&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;center&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
51
52&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
53&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Greenstone:  A Comprehensive Open-Source Digital Library Software System&lt;/span&gt;&lt;/b&gt;
54&lt;/p&gt;&lt;/div&gt;
55
56
57
58&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;center&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
59
60&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
61&lt;b&gt;&lt;/b&gt;
62&lt;/p&gt;&lt;/div&gt;
63
64
65
66&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;center&quot; style=&quot;margin: 1.74mm 0.00mm 1.39mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
67
68&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
69&lt;i&gt;Ian H. Witten,&lt;/i&gt;&lt;i&gt;&lt;sup&gt;*&lt;/sup&gt;&lt;/i&gt;&lt;i&gt; Rodger J. McNab,&lt;/i&gt;&lt;i&gt;&lt;sup&gt;&amp;dagger;&lt;/sup&gt;&lt;/i&gt;&lt;i&gt; Stefan J. Boddie,&lt;/i&gt;&lt;i&gt;&lt;sup&gt;*&lt;/sup&gt;&lt;/i&gt;&lt;i&gt; David Bainbridge&lt;/i&gt;&lt;i&gt;&lt;sup&gt;*&lt;/sup&gt;&lt;/i&gt;&lt;i&gt; &lt;/i&gt;&lt;i&gt;&lt;sup&gt;&lt;/sup&gt;&lt;/i&gt;
70&lt;/p&gt;&lt;/div&gt;
71
72
73&lt;table width=&quot;98.51%&quot; border=&quot;1&quot; cols=&quot;2&quot; rows=&quot;1&quot;&gt;
74&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;46.57%&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;center&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
75
76&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
77&lt;sup&gt;*&lt;/sup&gt; Dept of Computer Science
78&lt;/p&gt;&lt;/div&gt;
79
80
81
82&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;center&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
83
84&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
85University of Waikato, New Zealand
86&lt;/p&gt;&lt;/div&gt;
87
88
89
90&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;center&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
91
92&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
93E-mail:  {ihw, sjboddie, davidb}@cs.waikato.ac.nz
94&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
95
96
97
98&lt;td bgcolor=&quot;White&quot; width=&quot;53.43%&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;center&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
99
100&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
101&lt;sup&gt;&amp;dagger; &lt;/sup&gt;Digilib Systems
102&lt;/p&gt;&lt;/div&gt;
103
104
105
106&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;center&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
107
108&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
109Hamilton, New Zealand
110&lt;/p&gt;&lt;/div&gt;
111
112
113
114&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;center&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
115
116&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
117E-mail:  rodger@digilibs.com
118&lt;/p&gt;&lt;/div&gt;
119
120
121
122&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;center&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
123
124&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
125&lt;sup&gt;&lt;/sup&gt;
126&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
127
128&lt;/tr&gt;
129
130&lt;/table&gt;
131
132&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
133
134&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
135&lt;b&gt;&lt;/b&gt;
136&lt;/p&gt;&lt;/div&gt;
137
138
139
140&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
141
142&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
143&lt;b&gt;&lt;/b&gt;
144&lt;/p&gt;&lt;/div&gt;
145
146&lt;!--Section Ends--&gt;
147
148&lt;!--Section Begins--&gt;&lt;br&gt;
149
150
151
152&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 0.00mm 0.49mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
153
154&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
155&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Abstract&lt;/span&gt;&lt;/b&gt;
156&lt;/p&gt;&lt;/div&gt;
157
158
159
160&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
161
162&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
163This paper describes the Greenstone digital library software, a comprehensive, open-source system for the construction and presentation of information collections. Collections built with Greenstone offer effective full-text searching and metadata-based browsing facilities that are attractive and easy to use. Moreover, they are easily maintainable and can be augmented and rebuilt entirely automatically. The system is extensible: software &amp;ldquo;plugins&amp;rdquo; accommodate different document and metadata types.
164&lt;/p&gt;&lt;/div&gt;
165
166
167
168&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
169
170&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
171&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Introduction&lt;/span&gt;&lt;/b&gt;
172&lt;/p&gt;&lt;/div&gt;
173
174
175
176&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
177
178&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
179Notwithstanding intense research activity in the digital library field during the second half of the 1990s, comprehensive software systems for creating digital libraries are not widely available. In fact, the usual solution when creating a digital library is also the most obvious&amp;mdash;just put it on the Web. But consider how much effort is involved in constructing a Web site for a digital library. To be effective it needs to be visually attractive and ergonomically easy to use, incorporate convenient and powerful searching capabilities, and offer rich and natural browsing facilities. Above all it must be easy to maintain and augment, which presents a significant challenge if any manual organization is involved.
180&lt;/p&gt;&lt;/div&gt;
181
182
183
184&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
185
186&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
187The alternative is to automate these activities through software tools. But the broad scope of digital library requirements makes this a daunting prospect. Ideally the software should incorporate facilities ranging from multilingual information retrieval to distributed computing protocols, from interoperability to search engine technology, from metadata standards to multiformat document parsing, from multimedia to multiple operating systems, from Web browsers to plug-and-play DVDs.
188&lt;/p&gt;&lt;/div&gt;
189
190
191
192&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
193
194&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
195The Greenstone Digital Library Software from the New Zealand Digital Library (NZDL) project tackles this issue by providing a new way of organizing information and making it available over the Internet. A &lt;i&gt;collection&lt;/i&gt; of information comprises several (typically several thousand, or several million) &lt;i&gt;documents&lt;/i&gt;, and a uniform interface is provided to all documents in a collection. A library may include many different collections, each organized differently&amp;mdash;though there is a strong family resemblance in how collections are presented.
196&lt;/p&gt;&lt;/div&gt;
197
198
199
200&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
201
202&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
203Making information available using this system is far more than &amp;ldquo;just putting it on the Web.&amp;rdquo; The collection becomes maintainable, searchable, and browsable. Each collection, prior to presentation, undergoes a &amp;ldquo;building&amp;rdquo; process that, once established, is completely automatic. This process creates all the structures that are used at run-time for accessing the collection. Searching is based on various indexes, while browsing is based on various metadata; support structures for both are created during the building operation. When new material appears it can be fully incorporated into the collection by rebuilding.
204&lt;/p&gt;&lt;/div&gt;
205
206
207
208&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
209
210&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
211To address the exceptionally broad demands of digital libraries, the system is public and extensible. It is issued under the Gnu public license and, in the spirit of open-source software, users are invited to contribute modifications and enhancements. Only through an international cooperative effort will digital library software become sufficiently comprehensive to meet the world's needs. Currently the Greenstone software is used at sites in Canada, Germany, New Zealand, Romania, UK, and the US, and collections range from newspaper articles to technical documents, from educational journals to oral history, from visual art to folksongs. The software has been used for collections in many different languages, and for CD-ROMs that have been published by the United Nations and other humanitarian agencies in Belgium, France, Japan, and the US for distribution in developing countries (Humanity Libraries, 1998; PAHO, 1999; UNESCO, 1999; UNU, 1998). Further details can be obtained from &lt;i&gt;www.nzdl.org&lt;/i&gt;.
212&lt;/p&gt;&lt;/div&gt;
213
214
215
216&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 17.26mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
217
218&lt;p style=&quot;text-indent: -17.26mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
219&lt;img width=&quot;740&quot; height=&quot;658&quot; alt=&quot;0x01 graphic&quot; src=&quot;_httpdocimg_/word030.png&quot;&gt;&lt;br&gt;
220&lt;/p&gt;&lt;/div&gt;
221
222
223
224&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 17.26mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
225
226&lt;p style=&quot;text-indent: -17.26mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
227&lt;b&gt;Figure &lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;1&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;: Searching the HDL collection&lt;/b&gt;
228&lt;/p&gt;&lt;/div&gt;
229
230
231
232&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
233
234&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
235This paper sets the scene with a brief discussion of what a digital library is. We then give an overview of the facilities offered by Greenstone and show how end users find information in collections. Next we describe the files and directories involved in a collection, and then discuss the processes of updating existing collections and creating new ones, including extending the software to provide new facilities. We conclude with an overview of related work.
236&lt;/p&gt;&lt;/div&gt;
237
238
239
240&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
241
242&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
243&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;What is a digital library?&lt;/span&gt;&lt;/b&gt;
244&lt;/p&gt;&lt;/div&gt;
245
246
247
248&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
249
250&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
251Ten definitions of the term &amp;ldquo;digital library&amp;rdquo; have been culled from the literature by Fox (1998), and their spirit is captured in the following brief characterization:
252&lt;/p&gt;&lt;/div&gt;
253
254
255
256&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 7.55mm 0.00mm 7.81mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
257
258&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
259&lt;i&gt;A collection of digital objects, including text, video, and audio, along with methods for access and retrieval, and for selection, organization and maintenance of the collection&lt;/i&gt;
260&lt;/p&gt;&lt;/div&gt;
261
262
263
264&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
265
266&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
267(Akscyn and Witten, 1998). Lesk (1998) views digital libraries as &amp;ldquo;organized collections of digital information,&amp;rdquo; and wisely recommends that they articulate the principles governing what is included and how the collection is organized.
268&lt;/p&gt;&lt;/div&gt;
269
270
271
272&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
273
274&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
275Digital libraries are generally distinguished from the World-Wide Web, the essential difference being in selection and organization. But they are not generally distinguished from a web &lt;i&gt;site&lt;/i&gt;: indeed, virtually all extant digital libraries manifest themselves as a web site. Hence the obvious question: to make a digital library, why not just put the information on the Web?
276&lt;/p&gt;&lt;/div&gt;
277
278
279
280&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
281
282&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
283But we make a distinction between a digital library and a web site that lies at the heart of our software design: one should easily be able to add new material to a library without having to integrate it manually or edit its content in any way. Once added, new material should immediately become a first-class component of the library. And what permits it to be integrated into existing searching and browsing structures without any manual intervention is &lt;i&gt;metadata&lt;/i&gt;. This provides sufficient focus to the concept of &amp;ldquo;digital library&amp;rdquo; to support the development of a construction kit.
284&lt;/p&gt;&lt;/div&gt;
285
286
287
288&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
289
290&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
291&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Overview of Greenstone&lt;/span&gt;&lt;/b&gt;
292&lt;/p&gt;&lt;/div&gt;
293
294
295
296&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
297
298&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
299Information collections built by Greenstone combine extensive full-text search facilities with browsing indexes based on different metadata types. There are several ways for users to find information, although they differ between collections depending on the metadata available and the collection design. Typically you can &lt;i&gt;search for particular words&lt;/i&gt; that appear in the text, or within a section of a document, or within a title or section heading. You can &lt;i&gt;browse documents by title&lt;/i&gt;: just click on the displayed book icon to read it. You can &lt;i&gt;browse documents by subject&lt;/i&gt;. Subjects are represented by bookshelves: just click on a shelf to see the books. Where appropriate, documents come complete with a table of contents (constructed automatically): you can click on a chapter or subsection to open it, expand the full table of contents, or expand the full document.
300&lt;/p&gt;&lt;/div&gt;
301
302
303
304&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
305
306&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
307An example of searching is shown in Figure 1 where documents in the Global Help Project's Humanity Development Library (HDL) are being searched for chapters matching the word &lt;i&gt;butterfly&lt;/i&gt;. In Figure 2 the same collection is being browsed by subject: by clicking on the bookshelf icons the user has discovered an item under Section 16, Animal Husbandry. Pursuing an interest in butterfly farming, the user selects a book by clicking on its book icon. In Figure 3 the front cover of the book is displayed as a graphic on the left, and the automatically constructed table of contents appears at the start of the document. The current focus, &lt;i&gt;Introduction and Summary&lt;/i&gt;, is shown in bold in the table of contents with its text starting further down the page.
308&lt;/p&gt;&lt;/div&gt;
309
310
311
312&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
313
314&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
315In accordance with Lesk's advice, a statement of purpose and coverage accompanies each collection, along with an explanation of how it is organized (Figure 1 shows the start of this). A distinction is made between &lt;i&gt;searching&lt;/i&gt; and &lt;i&gt;browsing&lt;/i&gt;. Searching is full-text, and&amp;mdash;depending on the collection's design&amp;mdash;the user can choose between indexes built from different parts of the documents, or from different metadata. Some collections have an index of full documents, an index of sections, an index of paragraphs, an index of titles, and an index of section headings, each of which can be searched for particular words or phrases. Browsing involves data structures created from metadata that the user can examine: lists of authors, lists of titles, lists of dates, hierarchical classification structures, and so on. Data structures for both browsing and searching are built according to instructions in a configuration file, which controls both building and serving the collection. Sample configuration files are discussed below.
316&lt;/p&gt;&lt;/div&gt;
317
318
319
320&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
321
322&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
323
324&lt;/p&gt;&lt;/div&gt;
325
326
327
328&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
329
330&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
331&lt;img width=&quot;740&quot; height=&quot;658&quot; alt=&quot;0x01 graphic&quot; src=&quot;_httpdocimg_/word031.png&quot;&gt;&lt;br&gt;&lt;b&gt;&lt;/b&gt;
332&lt;/p&gt;&lt;/div&gt;
333
334
335
336&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
337
338&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
339&lt;b&gt;Figure &lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;2&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;: Browsing the HDL collection by subject&lt;/b&gt;&lt;b&gt;&lt;/b&gt;
340&lt;/p&gt;&lt;/div&gt;
341
342
343
344&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
345
346&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
347Rich browsing facilities can be provided by manually linking parts of documents together and building explicit indexes and tables of contents. However, manually-created linking becomes difficult to maintain, and often falls into disrepair when a collection expands. The Greenstone software takes a different tack: it facilitates &lt;i&gt;maintainability&lt;/i&gt; by creating all searching and browsing structures automatically from the documents themselves. No links are inserted by hand. This means that when new documents in the same format become available, they can be added automatically. Indeed, for some collections this is done by processes that wake up regularly, scout for new material, and rebuild the indexes&amp;mdash;all without manual intervention.
348&lt;/p&gt;&lt;/div&gt;
349
350
351
352&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
353
354&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
355Collections comprise many documents: thousands, tens of thousands, or even millions. Each document may be hierarchically organized into &lt;i&gt;sections&lt;/i&gt; (subsections, sub-subsections, and so on). Each section comprises one or more &lt;i&gt;paragraphs&lt;/i&gt;. Metadata such as author, title, date, keywords, and so on, may be associated with documents, or with individual sections of documents. This is the raw material for indexes. It must either be provided explicitly for each document and section (for example, in an accompanying spreadsheet) or be derivable automatically from the source documents. Metadata is converted to Dublin Core and stored with the document for internal use.
356&lt;/p&gt;&lt;/div&gt;
357
358
359
360&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
361
362&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
363In order to accommodate different kinds of source documents, the software is organized so that &amp;ldquo;plugins&amp;rdquo; can be written for new document types. Plugins exist for plain text documents, HTML documents, email documents, and bibliographic formats. Word documents are handled by saving them as HTML; PostScript ones by applying a preprocessor (Nevill-Manning &lt;i&gt;et al&lt;/i&gt;., 1998). Specially written plugins also exist for proprietary formats such as that used by the BBC archives department. A collection may have source documents in different forms: it is just a matter of specifying all the necessary plugins. In order to build browsing indexes from metadata, an analogous scheme of &amp;ldquo;classifiers&amp;rdquo; is used: classifiers create indexes of various kinds based on metadata. Source documents are brought into the Greenstone system through a process called &lt;i&gt;importing&lt;/i&gt;, which uses the plugins and classifiers specified in the collection configuration file.
364&lt;/p&gt;&lt;/div&gt;
365
366
367
368&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
369
370&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
371The international Unicode character set is used throughout, so documents&amp;mdash;and interfaces&amp;mdash;can be written in any language. Collections have so far been produced in English, French, Spanish, German, Maori, Chinese, and Arabic. The NZDL Web site provides numerous examples. Collections can contain text, pictures, and even audio and video clips; a text-only version of the interface is also provided to accommodate visually impaired users. Compression technology is used to ensure best use of storage (Witten &lt;i&gt;et al&lt;/i&gt;., 1999). Most non-textual material is either linked to textual documents or accompanied by textual descriptions (such as photo captions) to allow full-text searching and browsing. However, the architecture permits the implementation of plugins and classifiers even for non-textual data.
372&lt;/p&gt;&lt;/div&gt;
373
374
375
376&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
377
378&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
379The system includes an &amp;ldquo;administrative&amp;rdquo; function whereby specified users can examine the composition of all collections, protect documents so that they can only be accessed by registered users on presentation of a password, and so on. Logs of user activity are kept that record all queries made to every Greenstone collection (though this facility can be disabled).
380&lt;/p&gt;&lt;/div&gt;
381
382
383
384&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
385
386&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
387Although primarily designed for Internet access over the World-Wide Web, collections can be made available, in precisely the same form, on CD-ROM. In either case they are accessed through any Web browser. Greenstone CD-ROMs operate on a standalone PC under Windows 3.X, 95, 98, and NT, and the interaction is identical to accessing the collection on the Web&amp;mdash;except that response is faster and more predictable. The requirement to operate on early Windows systems is one that plagues the software design, but is crucial for many users&amp;mdash;particularly those in underdeveloped countries seeking access to humanitarian aid collections. If the PC is connected to a network (intranet or Internet), a custom-built Web server provided on each CD makes exactly the same information available to others through their standard Web browser. The use of compression ensures that the greatest possible volume of information can be packed on to a CD-ROM.
388&lt;/p&gt;&lt;/div&gt;
389
390
391
392&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
393
394&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
395The collection-serving software operates under Unix and Windows NT, and works with standard Web servers. A flexible process structure allows different collections to be served by different computers, yet be presented to the user in the same way, on the same Web page, as part of the same digital library, even as part of the same collection (McNab and Witten, 1998). Existing collections can be updated and new ones brought on-line at any time, without bringing the system down; the process responsible for the user interface will notice (through periodic polling) when new collections appear and add them to the list presented to the user.
396&lt;/p&gt;&lt;/div&gt;
397
398
399
400&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;center&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
401
402&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
403&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;&lt;img width=&quot;760&quot; height=&quot;662&quot; alt=&quot;0x01 graphic&quot; src=&quot;_httpdocimg_/word032.png&quot;&gt;&lt;br&gt;&lt;/span&gt;&lt;/b&gt;&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;&lt;/span&gt;&lt;/b&gt;
404&lt;/p&gt;&lt;/div&gt;
405
406
407
408&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
409
410&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
411&lt;b&gt;Figure &lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;3&lt;/b&gt;&lt;b&gt;&lt;/b&gt;&lt;b&gt;: Reading a book in the HDL&lt;/b&gt;&lt;b&gt;&lt;/b&gt;
412&lt;/p&gt;&lt;/div&gt;
413
414
415
416&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
417
418&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
419&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Finding information &lt;/span&gt;&lt;/b&gt;
420&lt;/p&gt;&lt;/div&gt;
421
422
423
424&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
425
426&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
427Greenstone digital library systems generally include several separate collections. A home page allows you to select a collection; in addition, each collection has its own &amp;ldquo;about&amp;rdquo; page that gives you information about how the collection is organized and the principles governing what is included.
428&lt;/p&gt;&lt;/div&gt;
429
430
431
432&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
433
434&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
435All icons in the screenshots of Figures 1-4 are clickable. Those icons at the top of the page return to the home page, provide help text, and allow you to set user interface and searching preferences. The navigation bar underneath gives access to the searching and browsing facilities, which differ from one collection to another.
436&lt;/p&gt;&lt;/div&gt;
437
438
439
440&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
441
442&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
443Each of the five buttons provides a different way to find information. You can &lt;i&gt;search for particular words&lt;/i&gt; that appear in the text from the &amp;ldquo;search&amp;rdquo; page (or from the &amp;ldquo;about&amp;rdquo; page of Figure 1). This collection contains indexes of chapters, section titles, and entire books. The default search interface is a simple one, suitable for casual users; advanced searching&amp;mdash;which allows full Boolean expressions, phrase searching, case and stemming control&amp;mdash;can be enabled from the &lt;i&gt;Preferences&lt;/i&gt; page.
444&lt;/p&gt;&lt;/div&gt;
445
446
447
448&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
449
450&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
451This collection has four browsable metadata indexes. You can &lt;i&gt;access publications by subject&lt;/i&gt; by clicking the &lt;i&gt;subjects&lt;/i&gt; button, which brings up a list of subjects, represented by bookshelves (Figure 2). You can &lt;i&gt;access publications by title&lt;/i&gt; by clicking &lt;i&gt;titles a-z&lt;/i&gt; (Figure 4), which brings up a list of books in alphabetic order. You can &lt;i&gt;access publications by organization&lt;/i&gt; (i.e. Dublin Core &amp;ldquo;publisher&amp;rdquo;), bringing up a list of organizations. You can &lt;i&gt;access publications by &amp;ldquo;how to&amp;rdquo; listing&lt;/i&gt;, yielding a list of hints defined by the collection's editors. We use the Dublin Core as a base and extend it in an &lt;i&gt;ad hoc&lt;/i&gt; manner to accommodate the individual requirements of collection designers.
452&lt;/p&gt;&lt;/div&gt;
453
454
455
456&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
457
458&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
459&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Files in a collection&lt;/span&gt;&lt;/b&gt;
460&lt;/p&gt;&lt;/div&gt;
461
462
463
464&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
465
466&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
467When a new collection is created or material is added to an existing one, the original source documents are first brought into the system through a process known as &amp;ldquo;importing.&amp;rdquo; This involves converting documents into a simple HTML-like format known as GML (for &amp;ldquo;Greenstone Markup Language&amp;rdquo;), which includes any metadata associated with the document. Documents are assumed to be in the Unicode UTF-8 code (of which the ASCII characters form a subset).
468&lt;/p&gt;&lt;/div&gt;
469
470
471
472&lt;p&gt;&lt;div name=&quot;Heading 2&quot; align=&quot;left&quot; style=&quot;margin: 6.94mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
473
474&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
475&lt;b&gt;Files and directories&lt;/b&gt;
476&lt;/p&gt;&lt;/div&gt;
477
478
479
480&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
481
482&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
483There is a separate directory for each collection, which contains five subdirectories: the original raw material (&lt;i&gt;import&lt;/i&gt;), the GML files created from this (&lt;i&gt;archives&lt;/i&gt;), the final collection as it is served to users (&lt;i&gt;index&lt;/i&gt;), a directory for use during the building process (&lt;i&gt;building&lt;/i&gt;), and one for any supporting files (&lt;i&gt;etc&lt;/i&gt;)&amp;mdash;including the configuration file that controls the collection creation procedure. Additional files might be required: for example, building a hierarchy of classifications requires a data file of sub-classifications.
484&lt;/p&gt;&lt;/div&gt;
485
486
487
488&lt;p&gt;&lt;div name=&quot;Heading 2&quot; align=&quot;left&quot; style=&quot;margin: 6.94mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
489
490&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
491&lt;b&gt;The imported documents&lt;/b&gt;
492&lt;/p&gt;&lt;/div&gt;
493
494
495
496&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
497
498&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
499In order to identify documents internally, a unique object identifier or OID is assigned to each original source document when it is imported (formed by hashing the content, to overcome file duplication effects caused by mirroring) and stored as metadata within that document. It is important that OIDs persist throughout the index-building process&amp;mdash;so that a user's search history is unaffected by rebuilding the collection. OIDs are assigned by hashing the contents of the original source document.
500&lt;/p&gt;&lt;/div&gt;
501
502
503
504&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
505
506&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
507Once imported, each document is stored in its own subdirectory of &lt;i&gt;archives&lt;/i&gt;, along with any associated files&amp;mdash;for example, images. To ensure compatibility with Windows 3.0, only eight characters are used in directory and file names, which causes annoying but essentially trivial complications.
508&lt;/p&gt;&lt;/div&gt;
509
510
511
512&lt;p&gt;&lt;div name=&quot;Heading 2&quot; align=&quot;left&quot; style=&quot;margin: 6.94mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
513
514&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
515&lt;b&gt;Inside the documents&lt;/b&gt;
516&lt;/p&gt;&lt;/div&gt;
517
518
519
520&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
521
522&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
523The GML format imposes a limited amount of structure on documents. Documents are divided into paragraphs. They can be split hierarchically into sections and subsections. OIDs are extended to identify these components by appending numbers, separated by periods, to a document's OID. When a book is read, its section hierarchy is visible as the table of contents (Figure 3). Chapters, sections, subsections, and pages are all implemented simply as &amp;ldquo;sections&amp;rdquo; within the document. In some collections documents do not have a hierarchical subsection structure, but are split into pages to permit browsing within a retrieved document.
524&lt;/p&gt;&lt;/div&gt;
525
526
527
528&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
529
530&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
531The document structure is used for searchable indexes. There are three levels of index: &lt;i&gt;documents&lt;/i&gt;, &lt;i&gt;sections&lt;/i&gt;, and &lt;i&gt;paragraphs&lt;/i&gt;, corresponding to the distinctions that GML makes&amp;mdash;the hierarchical structure is flattened for the purposes of creating these indexes. Indexes can be of text, or metadata, or any combination. Thus you can create a searchable index of section titles, and/or authors, and/or document descriptions, as well as the document text.
532&lt;/p&gt;&lt;/div&gt;
533
534
535
536&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
537
538&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
539&lt;img width=&quot;698&quot; height=&quot;698&quot; alt=&quot;0x01 graphic&quot; src=&quot;_httpdocimg_/word033.png&quot;&gt;&lt;br&gt;
540&lt;/p&gt;&lt;/div&gt;
541
542
543
544&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
545
546&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
547&lt;b&gt;Figure 4&lt;/b&gt;&lt;b&gt;: Browsing titles in the HDL&lt;/b&gt;&lt;b&gt;&lt;/b&gt;
548&lt;/p&gt;&lt;/div&gt;
549
550
551
552&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
553
554&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
555&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Updating existing collections&lt;/span&gt;&lt;/b&gt;
556&lt;/p&gt;&lt;/div&gt;
557
558
559
560&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
561
562&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
563Updating an existing collection with new files in the same format is easy. For example, the raw material for the HDL is supplied in the form of HTML files marked up with &amp;lt;&amp;lt;TOC&amp;gt;&amp;gt; tags to split books into sections and subsections, and &amp;lt;&amp;lt;I&amp;gt;&amp;gt; tags to indicate where an image is to be inserted. For each book in the library there is a directory that contains a single HTML file representing the book, and separate files containing the associated images. An accompanying spreadsheet file contains the classification hierarchy; this is converted to a simple file format (using Excel's &lt;i&gt;Save As&lt;/i&gt; command).
564&lt;/p&gt;&lt;/div&gt;
565
566
567
568&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
569
570&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
571Since the collection exists, its directory is already set up with subdirectories &lt;i&gt;import&lt;/i&gt;, &lt;i&gt;archives&lt;/i&gt;, &lt;i&gt;building&lt;/i&gt;, &lt;i&gt;index&lt;/i&gt;, and &lt;i&gt;etc&lt;/i&gt;, and the &lt;i&gt;etc&lt;/i&gt; directory will contain a suitable collection configuration file.
572&lt;/p&gt;&lt;/div&gt;
573
574
575
576&lt;p&gt;&lt;div name=&quot;Heading 2&quot; align=&quot;left&quot; style=&quot;margin: 6.94mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
577
578&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
579&lt;b&gt;The updating procedure&lt;/b&gt;
580&lt;/p&gt;&lt;/div&gt;
581
582
583
584&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
585
586&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
587To update a collection, the new raw material is placed in the &lt;i&gt;import&lt;/i&gt; directory, in whatever form it is available. Then the &lt;i&gt;import&lt;/i&gt; process is invoked, which converts the files into GML using the specified plugins. Old material for which GML files have previously been created is not re-imported. Then the &lt;i&gt;build&lt;/i&gt; process is invoked to build the requisite indexes for the collection. Finally, the contents of the &lt;i&gt;building&lt;/i&gt; directory are moved into the &lt;i&gt;index&lt;/i&gt; directory, and the new version of the collection automatically becomes live.
588&lt;/p&gt;&lt;/div&gt;
589
590
591
592&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
593
594&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
595This procedure may seem cumbersome. But all the steps are necessary for efficient operation with large collections. The &lt;i&gt;import&lt;/i&gt; process could be performed on the fly during the building operation&amp;mdash;but because building indexes is a multipass operation, the often lengthy importing would be repeated several times. The &lt;i&gt;build&lt;/i&gt; process can take considerable time&amp;mdash;a day or two, for very large collections. Consequently, the results are placed in the &lt;i&gt;building&lt;/i&gt; directory so that, if the collection already exists, it will continue to be served to users in its old form throughout the building operation.
596&lt;/p&gt;&lt;/div&gt;
597
598
599
600&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
601
602&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
603Active users of the collection will not be disturbed when the new version becomes live&amp;mdash;they will probably not even notice. The persistent OIDs ensure that interactions remain coherent&amp;mdash;users who are examining the results of a query or browse operation will still retrieve the expected documents&amp;mdash;and if a search is actually in progress when the change takes place the program detects the resulting file-structure inconsistency and automatically and transparently re-executes the query, this time on the new version of the collection.
604&lt;/p&gt;&lt;/div&gt;
605
606
607
608&lt;p&gt;&lt;div name=&quot;Heading 2&quot; align=&quot;left&quot; style=&quot;margin: 6.94mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
609
610&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
611&lt;b&gt;How it works&lt;/b&gt;
612&lt;/p&gt;&lt;/div&gt;
613
614
615
616&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
617
618&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
619The original material in the &lt;i&gt;import&lt;/i&gt; directory may be in any format, and plugins are required to process each format type. The plugins that a collection uses must be specified in the collection configuration file. The &lt;i&gt;import&lt;/i&gt; program reads the list of plugins and passes each document to each plugin in order until it finds one that can process it. When updating an existing collection, all plugins necessary to process new material should already have been specified in the configuration file.
620&lt;/p&gt;&lt;/div&gt;
621
622
623
624&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
625
626&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
627The building step creates the indexes for both searching and browsing. The MG software is generally used to do the searching (Witten &lt;i&gt;et al.&lt;/i&gt;, 1999), and the &lt;i&gt;mgbuild&lt;/i&gt; module is automatically invoked to create each of the indexes that is required. For example, the Humanity Development Library has three indexes, one for entire books, one for chapters, and one for section titles. Subdirectories of the &lt;i&gt;index&lt;/i&gt; directory are created for each of these indexes.
628&lt;/p&gt;&lt;/div&gt;
629
630
631&lt;table width=&quot;92.93%25&quot; border=&quot;1&quot; cols=&quot;4&quot; rows=&quot;12&quot;&gt;
632&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
633
634&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
635
636&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
637
638
639
640&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
641
642&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
643creator
644&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
645
646
647
648&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
649
650&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
651davidb@cs.waikato.ac.nz
652&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
653
654
655
656&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
657
658&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
6591
660&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
661
662&lt;/tr&gt;
663
664
665
666&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
667
668&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
669
670&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
671
672
673
674&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
675
676&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
677maintainer
678&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
679
680
681
682&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
683
684&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
685davidb@cs.waikato.ac.nz
686&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
687
688
689
690&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
691
692&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
6932
694&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
695
696&lt;/tr&gt;
697
698
699
700&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
701
702&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
703
704&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
705
706
707
708&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
709
710&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
711public
712&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
713
714
715
716&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
717
718&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
719True
720&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
721
722
723
724&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
725
726&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
7273
728&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
729
730&lt;/tr&gt;
731
732
733
734&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
735
736&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
737
738&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
739
740
741
742&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
743
744&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
745
746&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
747
748
749
750&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
751
752&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
753
754&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
755
756
757
758&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
759
760&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
7614
762&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
763
764&lt;/tr&gt;
765
766
767
768&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
769
770&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
771
772&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
773
774
775
776&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
777
778&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
779indexes
780&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
781
782
783
784&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
785
786&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
787document:text
788&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
789
790
791
792&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
793
794&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
7955
796&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
797
798&lt;/tr&gt;
799
800
801
802&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
803
804&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
805
806&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
807
808
809
810&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
811
812&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
813defaultindex
814&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
815
816
817
818&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
819
820&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
821document:text
822&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
823
824
825
826&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
827
828&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
8296
830&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
831
832&lt;/tr&gt;
833
834
835
836&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
837
838&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
839
840&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
841
842
843
844&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
845
846&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
847plugins
848&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
849
850
851
852&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
853
854&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
855GMLPlug TEXTPlug ArcPlug RecPlug
856&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
857
858
859
860&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
861
862&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
8637
864&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
865
866&lt;/tr&gt;
867
868
869
870&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
871
872&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
873
874&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
875
876
877
878&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
879
880&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
881
882&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
883
884
885
886&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
887
888&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
889
890&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
891
892
893
894&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
895
896&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
8978
898&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
899
900&lt;/tr&gt;
901
902
903
904&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
905
906&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
907
908&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
909
910
911
912&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
913
914&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
915classify
916&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
917
918
919
920&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
921
922&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
923AZList metadata=Title
924&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
925
926
927
928&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
929
930&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
9319
932&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
933
934&lt;/tr&gt;
935
936
937
938&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
939
940&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
941
942&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
943
944
945
946&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
947
948&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
949
950&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
951
952
953
954&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
955
956&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
957
958&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
959
960
961
962&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
963
964&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
96510
966&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
967
968&lt;/tr&gt;
969
970
971
972&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
973
974&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
975
976&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
977
978
979
980&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
981
982&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
983collectionmeta
984&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
985
986
987
988&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
989
990&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
991collectionname    &amp;quot;generic text collection&amp;quot;
992&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
993
994
995
996&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
997
998&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
99911
1000&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1001
1002&lt;/tr&gt;
1003
1004
1005
1006&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1007
1008&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1009(a)
1010&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1011
1012
1013
1014&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1015
1016&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1017collectionmeta
1018&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1019
1020
1021
1022&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1023
1024&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1025.document:text    &amp;quot;documents&amp;quot;
1026&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1027
1028
1029
1030&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1031
1032&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
103312
1034&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1035
1036&lt;/tr&gt;
1037
1038&lt;/table&gt;
1039
1040&lt;p&gt;&lt;div name=&quot;label&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1041
1042&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1043
1044&lt;/p&gt;&lt;/div&gt;
1045
1046
1047&lt;table width=&quot;92.93%25&quot; border=&quot;1&quot; cols=&quot;4&quot; rows=&quot;17&quot;&gt;
1048&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1049
1050&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1051
1052&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1053
1054
1055
1056&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1057
1058&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1059creator
1060&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1061
1062
1063
1064&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1065
1066&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1067davidb@cs.waikato.ac.nz
1068&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1069
1070
1071
1072&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1073
1074&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
10751
1076&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1077
1078&lt;/tr&gt;
1079
1080
1081
1082&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1083
1084&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1085
1086&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1087
1088
1089
1090&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1091
1092&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1093maintainer
1094&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1095
1096
1097
1098&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1099
1100&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1101davidb@cs.waikato.ac.nz
1102&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1103
1104
1105
1106&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1107
1108&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
11092
1110&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1111
1112&lt;/tr&gt;
1113
1114
1115
1116&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1117
1118&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1119
1120&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1121
1122
1123
1124&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1125
1126&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1127public
1128&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1129
1130
1131
1132&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1133
1134&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1135True
1136&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1137
1138
1139
1140&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1141
1142&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
11433
1144&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1145
1146&lt;/tr&gt;
1147
1148
1149
1150&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1151
1152&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1153
1154&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1155
1156
1157
1158&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1159
1160&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1161
1162&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1163
1164
1165
1166&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1167
1168&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1169
1170&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1171
1172
1173
1174&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1175
1176&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
11774
1178&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1179
1180&lt;/tr&gt;
1181
1182
1183
1184&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1185
1186&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1187
1188&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1189
1190
1191
1192&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1193
1194&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1195indexes
1196&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1197
1198
1199
1200&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1201
1202&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1203document:text document:From
1204&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1205
1206
1207
1208&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1209
1210&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
12115
1212&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1213
1214&lt;/tr&gt;
1215
1216
1217
1218&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1219
1220&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1221
1222&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1223
1224
1225
1226&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1227
1228&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1229defaultindex
1230&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1231
1232
1233
1234&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1235
1236&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1237document:text
1238&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1239
1240
1241
1242&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1243
1244&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
12456
1246&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1247
1248&lt;/tr&gt;
1249
1250
1251
1252&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1253
1254&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1255
1256&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1257
1258
1259
1260&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1261
1262&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1263plugins
1264&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1265
1266
1267
1268&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1269
1270&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1271GMLPlug EMAILPlug ArcPlug RecPlug
1272&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1273
1274
1275
1276&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1277
1278&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
12797
1280&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1281
1282&lt;/tr&gt;
1283
1284
1285
1286&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1287
1288&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1289
1290&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1291
1292
1293
1294&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1295
1296&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1297
1298&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1299
1300
1301
1302&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1303
1304&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1305
1306&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1307
1308
1309
1310&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1311
1312&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
13138
1314&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1315
1316&lt;/tr&gt;
1317
1318
1319
1320&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1321
1322&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1323
1324&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1325
1326
1327
1328&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1329
1330&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1331classify
1332&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1333
1334
1335
1336&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1337
1338&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1339AZList metadata=Title
1340&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1341
1342
1343
1344&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1345
1346&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
13479
1348&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1349
1350&lt;/tr&gt;
1351
1352
1353
1354&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1355
1356&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1357
1358&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1359
1360
1361
1362&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1363
1364&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1365classify
1366&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1367
1368
1369
1370&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1371
1372&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1373DateList
1374&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1375
1376
1377
1378&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1379
1380&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
138110
1382&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1383
1384&lt;/tr&gt;
1385
1386
1387
1388&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1389
1390&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1391
1392&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1393
1394
1395
1396&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1397
1398&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1399
1400&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1401
1402
1403
1404&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1405
1406&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1407
1408&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1409
1410
1411
1412&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1413
1414&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
141511
1416&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1417
1418&lt;/tr&gt;
1419
1420
1421
1422&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1423
1424&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1425
1426&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1427
1428
1429
1430&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1431
1432&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1433collectionmeta
1434&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1435
1436
1437
1438&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1439
1440&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1441collectionname    &amp;quot;Email messages&amp;quot;
1442&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1443
1444
1445
1446&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1447
1448&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
144912
1450&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1451
1452&lt;/tr&gt;
1453
1454
1455
1456&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1457
1458&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1459
1460&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1461
1462
1463
1464&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1465
1466&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1467collectionmeta
1468&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1469
1470
1471
1472&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1473
1474&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1475.document:text    &amp;quot;documents&amp;quot;
1476&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1477
1478
1479
1480&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1481
1482&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
148313
1484&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1485
1486&lt;/tr&gt;
1487
1488
1489
1490&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1491
1492&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1493
1494&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1495
1496
1497
1498&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1499
1500&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1501collectionmeta
1502&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1503
1504
1505
1506&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1507
1508&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1509.document:From    &amp;quot;email senders&amp;quot;
1510&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1511
1512
1513
1514&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1515
1516&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
151714
1518&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1519
1520&lt;/tr&gt;
1521
1522
1523
1524&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1525
1526&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1527
1528&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1529
1530
1531
1532&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1533
1534&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1535
1536&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1537
1538
1539
1540&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1541
1542&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1543
1544&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1545
1546
1547
1548&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1549
1550&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
155115
1552&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1553
1554&lt;/tr&gt;
1555
1556
1557
1558&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1559
1560&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1561
1562&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1563
1564
1565
1566&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1567
1568&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1569format
1570&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1571
1572
1573
1574&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1575
1576&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1577QueryResults /
1578&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1579
1580
1581
1582&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1583
1584&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
158516
1586&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1587
1588&lt;/tr&gt;
1589
1590
1591
1592&lt;tr&gt;&lt;td bgcolor=&quot;White&quot; width=&quot;6.36%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1593
1594&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1595(b)
1596&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1597
1598
1599
1600&lt;td bgcolor=&quot;White&quot; width=&quot;19.69%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1601
1602&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1603
1604&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1605
1606
1607
1608&lt;td bgcolor=&quot;White&quot; width=&quot;64.87%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1609
1610&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1611&amp;lt;td&amp;gt;[link][icon][/link]&amp;lt;/td&amp;gt;&amp;lt;td&amp;gt;[Title]&amp;lt;/td&amp;gt;&amp;lt;td&amp;gt;[Author]&amp;lt;/td&amp;gt;
1612&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1613
1614
1615
1616&lt;td bgcolor=&quot;White&quot; width=&quot;9.08%25&quot; rowspan=&quot;1&quot; colspan=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1617
1618&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
161917
1620&lt;/p&gt;&lt;/div&gt;&lt;/td&gt;
1621
1622&lt;/tr&gt;
1623
1624&lt;/table&gt;
1625
1626&lt;p&gt;&lt;div name=&quot;label&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1627
1628&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1629&lt;b&gt;Figure 5: Collection configuration files (a) generic, (b) for an email collection&lt;/b&gt;
1630&lt;/p&gt;&lt;/div&gt;
1631
1632
1633
1634&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1635
1636&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1637MG also compresses the text of the collection; and the image files are linked into the &lt;i&gt;index&lt;/i&gt; subdirectory. Now none of the material in the &lt;i&gt;import&lt;/i&gt; and &lt;i&gt;archives&lt;/i&gt; directories is needed to run the collection and can be removed from the file system (though they would be needed if the collection were rebuilt).
1638&lt;/p&gt;&lt;/div&gt;
1639
1640
1641
1642&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1643
1644&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1645Associated with each collection is a database stored in GDBM (Gnu database manager) format. This contains an entry for each document, giving its OID, its internal MG document number, and metadata such as title. Information for each of the browsing indexes, which appear as buttons on the Greenstone search/browse bar, is also extracted during the building process and stored in the database. A &amp;ldquo;classifier&amp;rdquo; program is required for each browsing index to extract the appropriate information from GML documents. Like plugins, classifiers are written on an &lt;i&gt;ad hoc&lt;/i&gt; basis for the particular information required, and where possible reused from one collection to another.
1646&lt;/p&gt;&lt;/div&gt;
1647
1648
1649
1650&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1651
1652&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1653The building program creates the indexes based on whatever appears in the &lt;i&gt;archives&lt;/i&gt; directory. The first plugin specified by all collections is one that processes GML files, and so if &lt;i&gt;archives&lt;/i&gt; contains imported files they will be processed correctly. If it contains material in the original format, that will be converted using the appropriate plugin. Thus the import process is optional.
1654&lt;/p&gt;&lt;/div&gt;
1655
1656
1657
1658&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1659
1660&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1661GML is designed to be fast and easy to parse, an important requirement when millions of documents are to be processed. Something as simple as requiring tags to be lower-case, for example, yields a substantial speed-up. In certain circumstances, however, it might be preferable to use a standardized format such as XML. This is straightforward to implement_just write an XML plugin_although we have not done so ourselves. Given the transitory nature of the imported data, to date, we have found GML a satisfactory and beneficial format.
1662&lt;/p&gt;&lt;/div&gt;
1663
1664
1665
1666&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1667
1668&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1669&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Creating new collections&lt;/span&gt;&lt;/b&gt;
1670&lt;/p&gt;&lt;/div&gt;
1671
1672
1673
1674&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1675
1676&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1677Building new collections from scratch is only slightly different from updating an existing collection. The key new requirement is creating a collection configuration file, and a software utility is provided to help. Two pieces of information are required for this: the name of the directory that the collection will use (into which the source data and other files will eventually be placed), and a contact e-mail address for use if any problems are encountered by the software once the collection is up and running. The utility creates files and directories within the newly-named directory to support a generic collection of plain text documents. With suitable data placed in the &lt;i&gt;import&lt;/i&gt; directory, building the collection at this point will yield a document-level searchable index of all the text and a browsable list of &amp;ldquo;titles&amp;rdquo; (defined in this case to be the document filenames).
1678&lt;/p&gt;&lt;/div&gt;
1679
1680
1681
1682&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1683
1684&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1685To enhance the functionality and presentation&amp;mdash; something anything but the most trivial collection will require&amp;mdash;the configuration file must be edited. For a collection sourced from documents in an already supported data format, presented in a similar fashion to an existing collection, the amount of editing is minimal. Importing new data formats and browsing metadata in ways not currently supported are more complex activities that require programming skills.
1686&lt;/p&gt;&lt;/div&gt;
1687
1688
1689
1690&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 17.26mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1691
1692&lt;p style=&quot;text-indent: -17.26mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1693&lt;img width=&quot;698&quot; height=&quot;698&quot; alt=&quot;0x01 graphic&quot; src=&quot;_httpdocimg_/word034.png&quot;&gt;&lt;br&gt;
1694&lt;/p&gt;&lt;/div&gt;
1695
1696
1697
1698&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1699
1700&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1701&lt;b&gt;Figure 6&lt;/b&gt;&lt;b&gt;: Searching bookmarked Web pages&lt;/b&gt;&lt;b&gt;&lt;i&gt;&lt;/i&gt;&lt;/b&gt;
1702&lt;/p&gt;&lt;/div&gt;
1703
1704
1705
1706&lt;p&gt;&lt;div name=&quot;Heading 2&quot; align=&quot;left&quot; style=&quot;margin: 6.94mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1707
1708&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1709&lt;b&gt;Modifying the configuration file&lt;/b&gt;
1710&lt;/p&gt;&lt;/div&gt;
1711
1712
1713
1714&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1715
1716&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1717Figure 5b shows simple alterations to the generic configuration file in Figure 5a that was generated by the new-collection utility. &lt;i&gt;TEXTPlug&lt;/i&gt; is replaced with &lt;i&gt;EMAILPlug&lt;/i&gt; (line 7) which reads email files and extracts metadata (&lt;i&gt;From&lt;/i&gt;, &lt;i&gt;To&lt;/i&gt;, &lt;i&gt;Date&lt;/i&gt;, &lt;i&gt;Subject&lt;/i&gt;) from them. A classifier for dates is added (line 10) to make the collection browsable chronologically. The default presentation of search results is overridden (line 17) to display both the title of the message (i.e. Dublin Core &lt;i&gt;Title&lt;/i&gt;) and its sender (i.e. Dublin Core &lt;i&gt;Author&lt;/i&gt;). Elements in square brackets, such as &lt;i&gt;[Title]&lt;/i&gt;, are replaced by the metadata associated with a particular document. The built-in term &lt;i&gt;[icon]&lt;/i&gt; produces a suitable image that represents the document (such as a book icon or page icon), and the &lt;i&gt;[link]&amp;hellip;[/link]&lt;/i&gt; construct forms a hyperlink to the complete document. Anything else in the format statement, which in this case is solely table-cell tags in HTML, is passed through to the page being displayed.
1718&lt;/p&gt;&lt;/div&gt;
1719
1720
1721
1722&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1723
1724&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1725As this example shows, creating a new collection that stays within the bounds of the library's established capabilities falls within the capability of many computer users&amp;mdash;for instance, computer-trained librarians. Extending Greenstone to handle new document formats and browse metadata in new ways is more challenging.
1726&lt;/p&gt;&lt;/div&gt;
1727
1728
1729
1730&lt;p&gt;&lt;div name=&quot;Heading 2&quot; align=&quot;left&quot; style=&quot;margin: 6.94mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1731
1732&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1733&lt;b&gt;Writing new plugins and classifiers&lt;/b&gt;
1734&lt;/p&gt;&lt;/div&gt;
1735
1736
1737
1738&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1739
1740&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1741Extensibility  is  obtained through  plugins  and  classifiers.
1742&lt;/p&gt;&lt;/div&gt;
1743
1744
1745
1746&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1747
1748&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1749These are modules of code that can be slotted into the system to enhance its capabilities. Plugins parse documents, extracting the text and metadata to be indexed. Classifiers control how metadata is brought together to form browsable data structures. Both are specified in an object-oriented framework using inheritance to minimize the amount of code written.
1750&lt;/p&gt;&lt;/div&gt;
1751
1752
1753
1754&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1755
1756&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1757A plugin must specify three things: what file formats it can handle, how they should be parsed, and whether the plugin is recursive. File formats are normally determined using regular expression matching on the filename. For example, the HTML plugin accepts all files that end in &lt;i&gt;.htm&lt;/i&gt;, .&lt;i&gt;html&lt;/i&gt;, &lt;i&gt;.HTM&lt;/i&gt;, or &lt;i&gt;.HTML&lt;/i&gt;. (It is quite possible, however, to write plugins that &amp;ldquo;look inside&amp;rdquo; the file as well.) For other files, the plugin returns &lt;i&gt;undefined&lt;/i&gt; and the file is passed to the next plugin in the collection's configuration file (e.g. Figure 5 line 7). If it can, the plugin parses the file and returns the number of documents processed. This involves extracting text and metadata and adding it to the library's content through calls to &lt;i&gt;add text&lt;/i&gt; and &lt;i&gt;add metadata&lt;/i&gt;.
1758&lt;/p&gt;&lt;/div&gt;
1759
1760
1761
1762&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1763
1764&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1765Some plugins (&amp;ldquo;recursive&amp;rdquo; ones) add extra files into the stream of data processed during the building phase by artificially reactivating the list of plugins. This is how directory hierarchies are traversed.
1766&lt;/p&gt;&lt;/div&gt;
1767
1768
1769
1770&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1771
1772&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1773Plugins are small modules of code that are easy to write. We monitored the time it took to develop a new one that was different to any we had produced so far. We chose to make as an example a collection of HTML bookmark files, the motivation being to produce a convenient way of searching and browsing one's bookmarked Web pages. Figure 6 shows a user searching for bookmarked pages about &lt;i&gt;music&lt;/i&gt;. The new plugin took under an hour to write, and was 160 lines long (ignoring blank lines and comments)&amp;mdash;about the average length of existing plugins.
1774&lt;/p&gt;&lt;/div&gt;
1775
1776
1777
1778&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1779
1780&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1781Classifiers are more general than plugins because they work on GML-format data. For example, any plugin that generates date metadata in accordance with the Dublin core can request the collection to be browsable chronologically by specifying the &lt;i&gt;DateList&lt;/i&gt; classifier in the collection's configuration file (Figure 7). Classifiers are more elaborate than most plugins, but new ones are seldom required. The average length of existing classifiers is 230 lines.
1782&lt;/p&gt;&lt;/div&gt;
1783
1784
1785
1786&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1787
1788&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1789Classifiers must specify three things: an initialization routine, how individual documents are classified, and the final browsable data structure. Initialization takes care of any options specified in the configuration file (such as &lt;i&gt;metadata=Title &lt;/i&gt;on line 9 of Figure 5b). Classifying individual documents is an iterative process: for each one, a call to &lt;i&gt;document-classify&lt;/i&gt; is made. On presentation of the document's OID, the necessary metadata is located and used to control where the document is added to the browsable data structure being constructed.
1790&lt;/p&gt;&lt;/div&gt;
1791
1792
1793
1794&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1795
1796&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1797Once all documents have been added, a request is made for the completed data structure. Some classifiers return the data structure directly; others transform the data structure before it is returned. For example, the &lt;i&gt;AZList&lt;/i&gt; classifier divides the alphabetically sorted list of metadata into separate pages of about the same size and returns the alphabetic ranges for each one (Figure 4).
1798&lt;/p&gt;&lt;/div&gt;
1799
1800
1801
1802&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1803
1804&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1805&lt;img width=&quot;698&quot; height=&quot;698&quot; alt=&quot;0x01 graphic&quot; src=&quot;_httpdocimg_/word035.png&quot;&gt;&lt;br&gt;
1806&lt;/p&gt;&lt;/div&gt;
1807
1808
1809
1810&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1811
1812&lt;p style=&quot;text-indent: 0.00mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1813&lt;b&gt;Figure 7&lt;/b&gt;&lt;b&gt;: Browsing a newspaper collection by date&lt;/b&gt;&lt;b&gt;&lt;/b&gt;
1814&lt;/p&gt;&lt;/div&gt;
1815
1816
1817
1818&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1819
1820&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1821&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Overview of related work&lt;/span&gt;&lt;/b&gt;
1822&lt;/p&gt;&lt;/div&gt;
1823
1824
1825
1826&lt;p&gt;&lt;div name=&quot;Plain Text&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.24mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1827
1828&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1829Two projects that provide substantial open source digital library software are Dienst (Lagoze and Fielding, 1998) and Harvest (Bowman &lt;i&gt;et al.&lt;/i&gt;, 1994). The origins of Dienst (&lt;i&gt;www.cs.cornell.edu/cdlrg&lt;/i&gt;) stretch back to 1992. The term has come to represent three entities: a conceptual architecture for distributed digital libraries; an open protocol for service communication; and a software system that implements the protocol. To date, five sample digital libraries have been built using this technology. They manifest themselves in two forms: technical reports and primary source documents.
1830&lt;/p&gt;&lt;/div&gt;
1831
1832
1833
1834&lt;p&gt;&lt;div name=&quot;Plain Text&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.24mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1835
1836&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1837Best known is NCSTRL, the Networked Computer Science Technical Reference Library project (&lt;i&gt;www.ncstrl.org&lt;/i&gt;). This collection facilitates searching by title, author and abstract, and browsing by year and author, across a distributed network of document repositories. Documents can (where supported) be delivered in various formats such as PostScript, a thumbnail overview of the pages, and a GIF image of a particular page.
1838&lt;/p&gt;&lt;/div&gt;
1839
1840
1841
1842&lt;p&gt;&lt;div name=&quot;Plain Text&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.24mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1843
1844&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1845The &lt;i&gt;Making of America&lt;/i&gt; resource is an example of a collection based around primary sources_in this case American social history, 1830−1900. It has a different &amp;ldquo;look and feel&amp;rdquo; to NCSTRL, being strongly oriented toward browsing rather than searching. A user navigates their way through a hierarchical structure of hyperlinks to reach a book of interest. The book itself is a series of scanned images: delivery options include going directly to a page number, next and previous page buttons, and displaying a particular page at different resolutions. A text version of the page is also available upon which a searching option is also provided.
1846&lt;/p&gt;&lt;/div&gt;
1847
1848
1849
1850&lt;p&gt;&lt;div name=&quot;Plain Text&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1851
1852&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1853Started in 1994, Harvest is also a long-running research project. It provides an efficient means of gathering source data from the Internet and distributing indexing information over the Internet. This is accomplished through five components: &lt;i&gt;gatherer&lt;/i&gt;, &lt;i&gt;broker&lt;/i&gt;, &lt;i&gt;indexer&lt;/i&gt;, &lt;i&gt;replicator&lt;/i&gt; and &lt;i&gt;cache&lt;/i&gt;. The first three are central to creating, updating and searching a collection; the last two help to improve performance over the Internet through transparent mirroring and caching techniques.
1854&lt;/p&gt;&lt;/div&gt;
1855
1856
1857
1858&lt;p&gt;&lt;div name=&quot;Plain Text&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.24mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1859
1860&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1861The system is configurable and customizable. While searching is most commonly implemented using Glimpse (&lt;i&gt;glimpse.cs.arizona.edu&lt;/i&gt;), in principle any search engine that supports incremental updates and Boolean combinations of attribute-based queries can be used. It is possible to control what type of documents are gathered during creation and updating, and how the query interface looks and is laid out.
1862&lt;/p&gt;&lt;/div&gt;
1863
1864
1865
1866&lt;p&gt;&lt;div name=&quot;Plain Text&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.24mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1867
1868&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1869Sample collections cited by the developers include 21,000 computer science technical reports and 7,000 home pages. Other examples include a sizable collection of agriculture-related electronic journals and magazines called &amp;ldquo;tomato-juice&amp;rdquo; (accessed through &lt;i&gt;hegel.lib.ncsu.edu&lt;/i&gt;) and a full-text index of library-related electronic serials (&lt;i&gt;sunsite.berkeley.edu/IndexMorganagus&lt;/i&gt;). Harvest is also often used to index Web sites (for example &lt;i&gt;www.middlebury.edu&lt;/i&gt;).
1870&lt;/p&gt;&lt;/div&gt;
1871
1872
1873
1874&lt;p&gt;&lt;div name=&quot;Plain Text&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.24mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1875
1876&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1877Comparing Greenstone with Dienst and Harvest, there are both similarities and differences. All provide substantial digital library systems, hence common themes recur, but they are driven by projects with different aims. Harvest, for instance, was not conceived as a digital library project at all, but by virtue of its selective document gathering process it can be classed (and is used) as one. While it provides sophisticated search options, it lacks the complementary service of browsing. Furthermore it adds no structure or order to the documents collected, relying on whatever structures are present in the site that they were gathered from. A proven strength of the design is its flexibility through configuration and customization_an element also present in Greenstone.
1878&lt;/p&gt;&lt;/div&gt;
1879
1880
1881
1882&lt;p&gt;&lt;div name=&quot;Plain Text&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.24mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1883
1884&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1885Dienst_best exemplified through the NCSTRL work_supports searching and browsing, like Greenstone. Both use open protocols. Differences include a high reliance in Dienst on user-supplied information when a document is added, and a smaller range of document types supported&amp;mdash;although Dienst does include a document model that should, over time, allow this to expand with relative ease.
1886&lt;/p&gt;&lt;/div&gt;
1887
1888
1889
1890&lt;p&gt;&lt;div name=&quot;Plain Text&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.24mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1891
1892&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1893There are also commercial systems that provide similar digital library services to those described. However, since corporate culture instills proprietary attitudes there is little opportunity for advancement through a shared collaborative effort. Consequently they are not reviewed here.
1894&lt;/p&gt;&lt;/div&gt;
1895
1896
1897
1898&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1899
1900&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1901&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Conclusions&lt;/span&gt;&lt;/b&gt;
1902&lt;/p&gt;&lt;/div&gt;
1903
1904
1905
1906&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1907
1908&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1909Greenstone is a comprehensive software system for creating digital library collections. It builds data structures for searching and browsing from the material provided, rather than relying on any hand-crafting. The process is controlled by a configuration file, and once a collection exists new material can be added completely automatically. Browsing is based on Dublin Core metadata.
1910&lt;/p&gt;&lt;/div&gt;
1911
1912
1913
1914&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1915
1916&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1917New collections can be developed easily, particularly if they resemble existing ones. Extensibility is achieved through software &amp;ldquo;plugins&amp;rdquo; that can be written to accommodate documents, and metadata, in different formats. Standard plugins exist for many document types; new ones are easily written. Browsing is controlled by &amp;ldquo;classifiers&amp;rdquo; that process metadata into browsing structures (by date, alphabetical, hierarchical, etc).
1918&lt;/p&gt;&lt;/div&gt;
1919
1920
1921
1922&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1923
1924&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1925However, the most powerful support for extensibility is achieved not by technical means but by making the source code freely available under the Gnu public license. Only through an international cooperative effort will digital library software become sufficiently comprehensive to meet the world's needs with the richness and flexibility that users deserve.
1926&lt;/p&gt;&lt;/div&gt;
1927
1928
1929
1930&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 4.17mm 0.50mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1931
1932&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1933&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;Acknowledgments&lt;/span&gt;&lt;/b&gt;
1934&lt;/p&gt;&lt;/div&gt;
1935
1936
1937
1938&lt;p&gt;&lt;div name=&quot;paragraph&quot; align=&quot;left&quot; style=&quot;margin: 2.08mm 0.00mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1939
1940&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 3.819444mm; color: Black; background-color: White; &quot;&gt;
1941We gratefully acknowledge all those who have worked on the Greenstone software, and all members of the New Zealand Digital Library project for their enthusiasm and ideas.
1942&lt;/p&gt;&lt;/div&gt;
1943
1944
1945
1946&lt;p&gt;&lt;div name=&quot;Heading 1&quot; align=&quot;left&quot; style=&quot;margin: 0.00mm 0.49mm 0.00mm 0.00mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1947
1948&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1949&lt;b&gt;&lt;span style=&quot;text-transform:uppercase&quot;&gt;References&lt;/span&gt;&lt;/b&gt;
1950&lt;/p&gt;&lt;/div&gt;
1951
1952
1953
1954&lt;ol type=&quot;1&quot;&gt;
1955&lt;li value=&quot;1&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1956
1957&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1958Akscyn, R.M. and Witten, I.H. (1998) &amp;ldquo;Report on First Summit on International Cooperation on Digital Libraries.&amp;rdquo; ks.com/idla-wp-oct98.
1959&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
1960
1961
1962
1963&lt;li value=&quot;2&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1964
1965&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1966Bowman, C.M., Danzig, P.B., Manber, U., and Schwartz, M.F. &amp;ldquo;Scalable Internet resource discovery: Research problems and approaches&amp;rdquo; &lt;i&gt;Communications of the ACM,&lt;/i&gt; Vol. 37, No. 8, pp. 98−107, 1994.
1967&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
1968
1969
1970
1971&lt;li value=&quot;3&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1972
1973&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1974Fox, E. (1998) &amp;ldquo;Digital library definitions.&amp;rdquo; ei.cs.vt.edu/~fox/dlib/def.html.
1975&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
1976
1977
1978
1979&lt;li value=&quot;4&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1980
1981&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1982Humanity Libraries (1998) &lt;i&gt;Humanity Development Library&lt;/i&gt;. CD-ROM produced by the Global Help Project, Antwerp, Belgium.
1983&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
1984
1985
1986
1987&lt;li value=&quot;5&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1988
1989&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1990Lagoze, C. and Fielding, D &amp;ldquo;Defining Collections in Distributed Digital Libraries&amp;rdquo; &lt;i&gt;D-Lib Magazine&lt;/i&gt;, Nov. 1998.
1991&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
1992
1993
1994
1995&lt;li value=&quot;6&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
1996
1997&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
1998PAHO (1999) &lt;i&gt;Virtual Disaster Library&lt;/i&gt;. CD-ROM produced by the Pan-American Health Organization, Washington DC, USA.
1999&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
2000
2001
2002
2003&lt;li value=&quot;7&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2004
2005&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2006McNab, R.J., Witten, I.H. and Boddie, S.J. (1998) &amp;ldquo;A distributed digital library architecture incorporating different index styles.&amp;rdquo; &lt;i&gt;Proc IEEE Advances in Digital Libraries&lt;/i&gt;, Santa Barbara, CA, pp. 36-45.
2007&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
2008
2009
2010
2011&lt;li value=&quot;8&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2012
2013&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2014Nevill-Manning, C.G., Reed, T., and Witten, I.H. (1998) &amp;ldquo;Extracting text from PostScript&amp;rdquo; &lt;i&gt;Software&amp;mdash;Practice and Experience&lt;/i&gt;, Vol. 28, No. 5, pp. 481-491; April.
2015&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
2016
2017
2018
2019&lt;li value=&quot;9&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2020
2021&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2022UNESCO (1999) &lt;i&gt;SAHEL point DOC: Anthologie du développement au Sahel&lt;/i&gt;. CD-ROM produced by UNESCO, Paris, France.
2023&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
2024
2025
2026
2027&lt;li value=&quot;10&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2028
2029&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2030UNU (1998) &lt;i&gt;Collection on critical global issues.&lt;/i&gt; CD-ROM produced by the United Nations University Press, Tokyo, Japan.
2031&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
2032
2033
2034
2035&lt;li value=&quot;11&quot;&gt;&lt;p&gt;&lt;div name=&quot;References&quot; align=&quot;left&quot; style=&quot;margin: 1.04mm 0.00mm 0.00mm 6.25mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2036
2037&lt;p style=&quot;text-indent: -6.25mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2038Witten, I.H., Moffat, A. and Bell, T. (1999) &lt;i&gt;Managing Gigabytes: compressing and indexing documents and images&lt;/i&gt;, Morgan Kaufmann, second edition.
2039&lt;/p&gt;&lt;/div&gt;&lt;/li&gt;
2040
2041
2042
2043&lt;/ol&gt;&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2044
2045&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2046
2047&lt;/p&gt;&lt;/div&gt;
2048
2049
2050
2051&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2052
2053&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2054
2055&lt;/p&gt;&lt;/div&gt;
2056
2057
2058
2059&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2060
2061&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2062
2063&lt;/p&gt;&lt;/div&gt;
2064
2065
2066
2067&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2068
2069&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2070
2071&lt;/p&gt;&lt;/div&gt;
2072
2073
2074
2075&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2076
2077&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2078
2079&lt;/p&gt;&lt;/div&gt;
2080
2081
2082
2083&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2084
2085&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2086
2087&lt;/p&gt;&lt;/div&gt;
2088
2089
2090
2091&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2092
2093&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2094
2095&lt;/p&gt;&lt;/div&gt;
2096
2097
2098
2099&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2100
2101&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2102
2103&lt;/p&gt;&lt;/div&gt;
2104
2105
2106
2107&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2108
2109&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2110
2111&lt;/p&gt;&lt;/div&gt;
2112
2113
2114
2115&lt;p&gt;&lt;div name=&quot;Header&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2116
2117&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2118&lt;span style=&quot;text-transform:lowercase&quot;&gt;&lt;/span&gt;
2119&lt;/p&gt;&lt;/div&gt;
2120
2121
2122
2123&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2124
2125&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2126
2127&lt;/p&gt;&lt;/div&gt;
2128
2129
2130
2131&lt;p&gt;&lt;div name=&quot;Footer&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2132
2133&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2134&lt;span style=&quot;text-transform:lowercase&quot;&gt;&lt;/span&gt;
2135&lt;/p&gt;&lt;/div&gt;
2136
2137
2138
2139&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2140
2141&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2142
2143&lt;/p&gt;&lt;/div&gt;
2144
2145
2146
2147&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2148
2149&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2150
2151&lt;/p&gt;&lt;/div&gt;
2152
2153
2154
2155&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 17.26mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2156
2157&lt;p style=&quot;text-indent: -17.26mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2158
2159&lt;/p&gt;&lt;/div&gt;
2160
2161
2162
2163&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 17.26mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2164
2165&lt;p style=&quot;text-indent: -17.26mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2166
2167&lt;/p&gt;&lt;/div&gt;
2168
2169
2170
2171&lt;p&gt;&lt;div name=&quot;Caption&quot; align=&quot;center&quot; style=&quot;margin: 2.08mm 2.74mm 0.00mm 17.26mm;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2172
2173&lt;p style=&quot;text-indent: -17.26mm; text-align: center; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2174
2175&lt;/p&gt;&lt;/div&gt;
2176
2177
2178
2179&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2180
2181&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2182
2183&lt;/p&gt;&lt;/div&gt;
2184
2185
2186
2187&lt;p&gt;&lt;div name=&quot;Normal&quot; align=&quot;left&quot; style=&quot;  padding: 0.00mm 0.00mm 0.00mm 0.00mm; &quot;&gt;
2188
2189&lt;p style=&quot;text-indent: 0.00mm; text-align: left; line-height: 4.166667mm; color: Black; background-color: White; &quot;&gt;
2190
2191&lt;/p&gt;&lt;/div&gt;
2192
2193&lt;!--Section Ends--&gt;
2194
2195
2196
2197&lt;!--
2198&lt;hr&gt;
2199&lt;address&gt;
2200&lt;a href=&quot;_httpextlink_&amp;amp;rl=0&amp;amp;href=http:%2f%2fwvware.sourceforge.net&quot;&gt;&lt;img
2201src=&quot;_httpdocimg_/wvSmall.gif&quot; height=31 width=47
2202align=left border=0 alt=&quot;wvWare&quot;&gt;&lt;/a&gt;
2203&lt;a href=&quot;_httpextlink_&amp;amp;rl=0&amp;amp;href=http:%2f%2fvalidator.w3.org%2fcheck%2freferer&quot;&gt;&lt;img
2204src=&quot;_httpdocimg_/vh40.gif&quot; height=31 width=88
2205align=right border=0 alt=&quot;Valid HTML 4.0!&quot;&gt;&lt;/a&gt;
2206Document created with &lt;a href=&quot;_httpextlink_&amp;amp;rl=0&amp;amp;href=http:%2f%2fwvware.sourceforge.net&quot;&gt;wvWare/wvWare version 1.2.4&lt;/a&gt;&lt;br&gt;
2207&lt;/address&gt;
2208--&gt;
2209
2210
2211</Content>
2212</Section>
2213</Archive>
Note: See TracBrowser for help on using the browser.