source: other-projects/nightly-tasks/diffcol/trunk/gs3-model-collect/Word-PDF-Enhanced/archives/HASHeaa2.dir/doc.xml@ 30029

Last change on this file since 30029 was 30029, checked in by ak19, 9 years ago

Adding the Enhanced Word tutorial collection that uses Windows Scripting. Pre-built on Windows 7 64 bit.

File size: 145.1 KB
Line 
1<?xml version="1.0" encoding="utf-8" standalone="no"?>
2<!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">
3<Archive>
4<Section>
5 <Description>
6 <Metadata name="gsdldoctype">indexed_doc</Metadata>
7 <Metadata name="Language">en</Metadata>
8 <Metadata name="Encoding">windows_1252</Metadata>
9 <Metadata name="Creator">dg5</Metadata>
10 <Metadata name="Title">1997-00 Listing of Working Papers</Metadata>
11 <Metadata name="URL">http://C:/Users/Anupama/GS307_13July2015/web/sites/localsite/collect/Word-PDF-Enhanced/tmp/1436775751/word01.html</Metadata>
12 <Metadata name="UTF8URL">http://C:/Users/Anupama/GS307_13July2015/web/sites/localsite/collect/Word-PDF-Enhanced/tmp/1436775751/word01.html</Metadata>
13 <Metadata name="gsdlsourcefilename">import\word01.doc</Metadata>
14 <Metadata name="gsdlconvertedfilename">tmp\1436775751\word01.html</Metadata>
15 <Metadata name="OrigSource">word01.html</Metadata>
16 <Metadata name="Source">word01.doc</Metadata>
17 <Metadata name="SourceFile">word01.doc</Metadata>
18 <Metadata name="Plugin">WordPlugin</Metadata>
19 <Metadata name="FileSize">110080</Metadata>
20 <Metadata name="FilenameRoot">word01</Metadata>
21 <Metadata name="FileFormat">Word</Metadata>
22 <Metadata name="srcicon">_icondoc_</Metadata>
23 <Metadata name="srclink_file">doc.doc</Metadata>
24 <Metadata name="srclinkFile">doc.doc</Metadata>
25 <Metadata name="Identifier">HASHeaa2992e081949673150f3</Metadata>
26 <Metadata name="lastmodified">1436763858</Metadata>
27 <Metadata name="lastmodifieddate">20150713</Metadata>
28 <Metadata name="oailastmodified">1436775752</Metadata>
29 <Metadata name="oailastmodifieddate">20150713</Metadata>
30 <Metadata name="assocfilepath">HASHeaa2.dir</Metadata>
31 <Metadata name="gsdlassocfile">doc.doc:application/msword:</Metadata>
32 </Description>
33 <Content>
34
35
36
37&lt;div class=WordSection1&gt;
38
39
40
41&lt;p class=MsoTitle&gt;&lt;span lang=EN-US&gt;1997-00 Listing of Working Papers &lt;/span&gt;&lt;/p&gt;
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/1&lt;/span&gt;&lt;/p&gt;
58
59
60
61&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Using
62
63compression to identify acronyms in text&lt;/span&gt;&lt;/p&gt;
64
65
66
67&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Stuart &lt;span
68
69class=SpellE&gt;Yeates&lt;/span&gt;, David Bainbridge, Ian H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
70
71
72
73&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Text mining is
74
75about looking for patterns in natural language text, and may be defined as the
76
77process of &lt;span class=SpellE&gt;analyzing&lt;/span&gt; text to extract information from
78
79it for particular purposes.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;In previous
80
81work, we claimed that compression is a key technology for text mining, and
82
83backed this up with a study that showed how particular kinds of lexical
84
85tokens—names, dates, locations, &lt;i style='mso-bidi-font-style:normal'&gt;etc.&lt;/i&gt;—can
86
87be identified and located in running text, using compression models to provide
88
89the leverage necessary to distinguish different token types (Witten &lt;i
90
91style='mso-bidi-font-style:normal'&gt;et al.&lt;/i&gt;, 1999)&lt;/span&gt;&lt;/p&gt;
92
93
94
95
96
97
98
99&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/2&lt;/span&gt;&lt;/p&gt;
100
101
102
103&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Text &lt;span
104
105class=SpellE&gt;categorization&lt;/span&gt; using compression models&lt;/span&gt;&lt;/p&gt;
106
107
108
109&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
110
111lang=EN-GB&gt;Eibe&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt; Frank, Chang &lt;span class=SpellE&gt;Chui&lt;/span&gt;,
112
113Ian H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
114
115
116
117&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Text &lt;span
118
119class=SpellE&gt;categorization&lt;/span&gt;, or the assignment of natural language texts
120
121to predefined categories based on their content, is of growing importance as
122
123the volume of information available on the internet continues to overwhelm
124
125us.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The use of predefined categories implies
126
127a “supervised learning” approach to &lt;span class=SpellE&gt;categorization&lt;/span&gt;,
128
129where already-classified articles – which effectively define the categories –
130
131are used as “training data” to build a model that can be used for classifying
132
133new articles that comprise the “test data”.&lt;span style='mso-spacerun:yes'&gt; 
134
135&lt;/span&gt;This contrasts with “unsupervised” learning, where there is no training
136
137data and clusters of like documents are sought amongst the test articles.&lt;span
138
139style='mso-spacerun:yes'&gt;  &lt;/span&gt;With supervised learning, meaningful labels
140
141(such as &lt;span class=SpellE&gt;keyphrases&lt;/span&gt;) are attached to the training
142
143documents, and appropriate labels can be assigned automatically to test
144
145documents depending on which category they fall into.&lt;/span&gt;&lt;/p&gt;
146
147
148
149
150
151
152
153&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/3&lt;/span&gt;&lt;/p&gt;
154
155
156
157&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Reserved for
158
159Sally Jo&lt;/span&gt;&lt;/p&gt;
160
161
162
163
164
165
166
167&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/4&lt;/span&gt;&lt;/p&gt;
168
169
170
171&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Interactive
172
173machine learning—letting users build classifiers&lt;/span&gt;&lt;/p&gt;
174
175
176
177&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Malcolm Ware, &lt;span
178
179class=SpellE&gt;Eibe&lt;/span&gt; Frank, Geoffrey Holmes, Mark Hall, Ian H. &lt;span
180
181class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
182
183
184
185&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;According to
186
187standard procedure, building a classifier is a fully automated process that
188
189follows data preparation by a domain expert.&lt;span style='mso-spacerun:yes'&gt; 
190
191&lt;/span&gt;In contrast, &amp;lt;I&amp;gt;interactive&amp;lt;/I&amp;gt;machine learning engages
192
193users in actually generating the classifier themselves.&lt;span
194
195style='mso-spacerun:yes'&gt;  &lt;/span&gt;This offers a natural way of integrating
196
197background knowledge into the &lt;span class=SpellE&gt;modeling&lt;/span&gt; stage—so long
198
199as interactive tools can be designed that support efficient and effective
200
201communication.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;This paper shows that
202
203appropriate techniques can empower users to create models that compete with
204
205classifiers built by state-of-the-art learning algorithms.&lt;span
206
207style='mso-spacerun:yes'&gt;  &lt;/span&gt;It demonstrates that users—even users who are
208
209not domain experts—can often construct good classifiers, without any help from
210
211a learning algorithm, using a simple two-dimensional visual interface.&lt;span
212
213style='mso-spacerun:yes'&gt;  &lt;/span&gt;Experiments demonstrate that, not
214
215surprisingly, success hinges on the domain: if a few attributes can support
216
217good predictions, users generate accurate classifiers, whereas domains with
218
219many high-order attribute interactions &lt;span class=SpellE&gt;favor&lt;/span&gt; standard
220
221machine learning techniques.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The future
222
223challenge is to achieve a symbiosis between human user and machine learning
224
225algorithm.&lt;/span&gt;&lt;/p&gt;
226
227
228
229
230
231
232
233&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/5&lt;/span&gt;&lt;/p&gt;
234
235
236
237&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;KEA: Practical
238
239automatic &lt;span class=SpellE&gt;keyphrase&lt;/span&gt; extraction&lt;/span&gt;&lt;/p&gt;
240
241
242
243&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Ian H. &lt;span
244
245class=SpellE&gt;Witten&lt;/span&gt;, Gordon W. &lt;span class=SpellE&gt;Paynter&lt;/span&gt;, &lt;span
246
247class=SpellE&gt;Eibe&lt;/span&gt; Frank, Carl &lt;span class=SpellE&gt;Gutwin&lt;/span&gt;, Craig G.
248
249&lt;span class=SpellE&gt;Nevill&lt;/span&gt;-Manning&lt;/span&gt;&lt;/p&gt;
250
251
252
253&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
254
255lang=EN-GB&gt;Keyphrases&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt; provide semantic metadata
256
257that &lt;span class=SpellE&gt;summarize&lt;/span&gt; and &lt;span class=SpellE&gt;characterize&lt;/span&gt;
258
259documents.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;This paper describes &lt;span
260
261class=SpellE&gt;Kea&lt;/span&gt;, an algorithm for automatically extracting &lt;span
262
263class=SpellE&gt;keyphrases&lt;/span&gt; from text.&lt;span style='mso-spacerun:yes'&gt; 
264
265&lt;/span&gt;&lt;span class=SpellE&gt;Kea&lt;/span&gt; identifies candidate &lt;span class=SpellE&gt;keyphrases&lt;/span&gt;
266
267using lexical methods, calculates feature values for each candidate, and uses a
268
269machine learning algorithm to predict which candidates are good &lt;span
270
271class=SpellE&gt;keyphrases&lt;/span&gt;.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The
272
273machine learning scheme first builds a prediction model using training
274
275documents with known &lt;span class=SpellE&gt;keyphrases&lt;/span&gt;, and then uses the
276
277model to find &lt;span class=SpellE&gt;keyphrases&lt;/span&gt; in new documents.&lt;span
278
279style='mso-spacerun:yes'&gt;  &lt;/span&gt;We use a large test corpus to evaluate &lt;span
280
281class=SpellE&gt;Kea's&lt;/span&gt; effectiveness in terms of how many author-assigned &lt;span
282
283class=SpellE&gt;keyphrases&lt;/span&gt; are correctly identified.&lt;span
284
285style='mso-spacerun:yes'&gt;  &lt;/span&gt;The system is simple, robust, and publicly
286
287available.&lt;/span&gt;&lt;/p&gt;
288
289
290
291
292
293
294
295&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/6&lt;/span&gt;&lt;/p&gt;
296
297
298
299&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;i style='mso-bidi-font-style:
300
301normal'&gt;&lt;span lang=EN-GB style='font-family:Symbol;mso-ascii-font-family:&quot;Times New Roman&quot;;
302
303mso-hansi-font-family:&quot;Times New Roman&quot;;mso-char-type:symbol;mso-symbol-font-family:
304
305Symbol'&gt;&lt;span style='mso-char-type:symbol;mso-symbol-font-family:Symbol'&gt;m&lt;/span&gt;&lt;/span&gt;&lt;/i&gt;&lt;span
306
307lang=EN-GB&gt;-Charts and Z:&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;&lt;span
308
309class=SpellE&gt;hows&lt;/span&gt;, &lt;span class=SpellE&gt;whys&lt;/span&gt; and &lt;span
310
311class=SpellE&gt;wherefores&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
312
313
314
315&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Greg Reeve,
316
317Steve Reeves&lt;/span&gt;&lt;/p&gt;
318
319
320
321&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;In this paper we
322
323show, by a series of examples, how the &lt;/span&gt;&lt;i style='mso-bidi-font-style:
324
325normal'&gt;&lt;span lang=EN-GB style='font-family:Symbol;mso-ascii-font-family:&quot;Times New Roman&quot;;
326
327mso-hansi-font-family:&quot;Times New Roman&quot;;mso-char-type:symbol;mso-symbol-font-family:
328
329Symbol'&gt;&lt;span style='mso-char-type:symbol;mso-symbol-font-family:Symbol'&gt;m&lt;/span&gt;&lt;/span&gt;&lt;/i&gt;&lt;span
330
331lang=EN-GB&gt;-chart formalism can be translated into Z.&lt;span
332
333style='mso-spacerun:yes'&gt;  &lt;/span&gt;We give reasons for why this is an
334
335interesting and sensible thing to do and what it might be used for.&lt;/span&gt;&lt;/p&gt;
336
337
338
339
340
341
342
343
344
345
346
347&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/7&lt;/span&gt;&lt;/p&gt;
348
349
350
351&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;One dimensional
352
353non-uniform rational B-splines for animation control&lt;/span&gt;&lt;/p&gt;
354
355
356
357&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
358
359lang=EN-GB&gt;Abdelaziz&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt; &lt;span class=SpellE&gt;Mahoui&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
360
361
362
363&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Most 3D
364
365animation packages use graphical representations called motion graphs to
366
367represent the variation in time of the motion parameters.&lt;span
368
369style='mso-spacerun:yes'&gt;  &lt;/span&gt;Many use two-dimensional B-splines as
370
371animation curves because of their power to represent free-form curves.&lt;span
372
373style='mso-spacerun:yes'&gt;  &lt;/span&gt;In this project, we investigate the
374
375possibility of using One-dimensional Non-Uniform Rational B-&lt;span class=SpellE&gt;Spline&lt;/span&gt;
376
377(NURBS) curves for the interactive construction of animation control
378
379curves.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;One-dimensional NURBS curves
380
381present the potential of solving some problems encountered in motion graphs
382
383when two-dimensional B-splines are used.&lt;span style='mso-spacerun:yes'&gt; 
384
385&lt;/span&gt;The study focuses on the properties of One-dimensional NURBS
386
387mathematical model.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;It also investigates
388
389the algorithms and shape modification tools devised for two-dimensional curves
390
391and their port to the One-dimensional NURBS model.&lt;span
392
393style='mso-spacerun:yes'&gt;  &lt;/span&gt;It also looks at the issues related to the
394
395user interface used to interactively modify the shape of the curves.&lt;/span&gt;&lt;/p&gt;
396
397
398
399
400
401
402
403&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/8&lt;/span&gt;&lt;/p&gt;
404
405
406
407&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Correlation-based
408
409feature selection of discrete and numeric class machine learning&lt;/span&gt;&lt;/p&gt;
410
411
412
413&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Mark A. Hall&lt;/span&gt;&lt;/p&gt;
414
415
416
417&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Algorithms for
418
419feature selection fall into two broad categories:
420
421&amp;lt;I&amp;gt;wrappers&amp;lt;/I&amp;gt;that use the learning algorithm itself to evaluate
422
423the usefulness of features and &amp;lt;I&amp;gt;filters&amp;lt;/I&amp;gt;that evaluate features
424
425according to heuristics based on general characteristics of the data.&lt;span
426
427style='mso-spacerun:yes'&gt;  &lt;/span&gt;For application to large databases, filters
428
429have proven to be more practical than wrappers because they are much
430
431faster.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;However, most existing filter
432
433algorithms only work with discrete classification problems.&lt;span
434
435style='mso-spacerun:yes'&gt;  &lt;/span&gt;This paper describes a fast,
436
437correlation-based filter algorithm that can be applied to continuous and
438
439discrete problems.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The algorithm often
440
441out-performs the well-known &lt;span class=SpellE&gt;ReliefF&lt;/span&gt; attribute
442
443estimator when used as a &lt;span class=SpellE&gt;preprocessing&lt;/span&gt; step for naïve
444
445&lt;span class=SpellE&gt;Bayes&lt;/span&gt;, instance-based learning, decision trees,
446
447locally weighted regression, and model trees.&lt;span style='mso-spacerun:yes'&gt; 
448
449&lt;/span&gt;It performs more feature selection than &lt;span class=SpellE&gt;ReliefF&lt;/span&gt;
450
451does-reducing the data dimensionality by fifty percent in most cases.&lt;span
452
453style='mso-spacerun:yes'&gt;  &lt;/span&gt;Also, decision and model trees built from the
454
455&lt;span class=SpellE&gt;prepocessed&lt;/span&gt; data are often significantly smaller.&lt;/span&gt;&lt;/p&gt;
456
457
458
459
460
461
462
463&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/9&lt;/span&gt;&lt;/p&gt;
464
465
466
467&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A development
468
469environment for predictive modelling in foods&lt;/span&gt;&lt;/p&gt;
470
471
472
473&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;G. Holmes, &lt;span
474
475class=SpellE&gt;M.A.&lt;/span&gt; Hall&lt;/span&gt;&lt;/p&gt;
476
477
478
479&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;WEKA (Waikato
480
481Environment for Knowledge Analysis) is a comprehensive suite of Java class
482
483libraries that implement many state-of-the-art machine learning/data mining
484
485algorithms.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Non-programmers interact
486
487with the software via a user interface component called the Knowledge Explorer.&lt;/span&gt;&lt;/p&gt;
488
489
490
491
492
493
494
495&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Applications
496
497constructed from the WEKA class libraries can be run on any computer with a web
498
499browsing capability, allowing users to apply machine learning techniques to
500
501their own data regardless of computer platform.&lt;span style='mso-spacerun:yes'&gt; 
502
503&lt;/span&gt;This paper describes the user interface component of the WEKA system in
504
505reference to previous applications in the predictive &lt;span class=SpellE&gt;modeling&lt;/span&gt;
506
507of foods.&lt;/span&gt;&lt;/p&gt;
508
509
510
511
512
513
514
515&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/10&lt;/span&gt;&lt;/p&gt;
516
517
518
519&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Benchmarking
520
521attribute selection techniques for data mining&lt;/span&gt;&lt;/p&gt;
522
523
524
525&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Mark A. Hall,
526
527Geoffrey Holmes&lt;/span&gt;&lt;/p&gt;
528
529
530
531&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Data engineering
532
533is generally considered to be a central issue in the development of data mining
534
535applications.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The success of many
536
537learning schemes, in their attempts to construct models of data, hinges on the
538
539reliable identification of a small set of highly predictive attributes.&lt;span
540
541style='mso-spacerun:yes'&gt;  &lt;/span&gt;The inclusion of irrelevant, redundant and
542
543noisy attributes in the model building process phase can result in poor
544
545predictive performance and increased computation.&lt;/span&gt;&lt;/p&gt;
546
547
548
549
550
551
552
553&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Attribute
554
555selection generally involves a combination of search and attribute utility
556
557estimation plus evaluation with respect to specific learning schemes.&lt;span
558
559style='mso-spacerun:yes'&gt;  &lt;/span&gt;This leads to a large number of possible
560
561permutations and has led to a situation where very few benchmark studies have
562
563been conducted.&lt;/span&gt;&lt;/p&gt;
564
565
566
567
568
569
570
571&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
572
573presents a benchmark comparison of several attribute selection methods.&lt;span
574
575style='mso-spacerun:yes'&gt;  &lt;/span&gt;All the methods produce an attribute ranking,
576
577a useful devise of isolating the individual merit of an attribute.&lt;span
578
579style='mso-spacerun:yes'&gt;  &lt;/span&gt;Attribute selection is achieved by
580
581cross-validating the rankings with respect to a learning scheme to find the
582
583best attributes.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Results are reported
584
585for a selection of standard data sets and two learning schemes C4.5 and naïve &lt;span
586
587class=SpellE&gt;Bayes&lt;/span&gt;.&lt;/span&gt;&lt;/p&gt;
588
589
590
591
592
593
594
595&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/11&lt;/span&gt;&lt;/p&gt;
596
597
598
599&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Steve Reeves,
600
601Greg Reeve&lt;/span&gt;&lt;/p&gt;
602
603
604
605
606
607
608
609
610
611
612
613&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;2000/12&lt;/span&gt;&lt;/p&gt;
614
615
616
617&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
618
619lang=EN-GB&gt;Malika&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt; &lt;span class=SpellE&gt;Mahoui&lt;/span&gt;,
620
621Sally Jo Cunningham&lt;/span&gt;&lt;/p&gt;
622
623
624
625&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Transaction logs
626
627are invaluable sources of fine-grained information about users' search &lt;span
628
629class=SpellE&gt;behavior&lt;/span&gt;.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;This paper
630
631compares the searching &lt;span class=SpellE&gt;behavior&lt;/span&gt; of users across two
632
633WWW-accessible digital libraries: the New Zealand Digital Library's Computer
634
635Science Technical Reports collection (CSTR), and the &lt;span class=SpellE&gt;Karlsruhe&lt;/span&gt;
636
637Computer Science Bibliographies (CSBIB) collection.&lt;span
638
639style='mso-spacerun:yes'&gt;  &lt;/span&gt;Since the two collections are designed to
640
641support the same type of users-researchers/students in computer science a
642
643comparative log analysis is likely to uncover common searching preferences for
644
645that user group.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The two collections
646
647differ in their content, however; the CSTR indexes a full text collection,
648
649while the CSBIB is primarily a bibliographic database.&lt;span
650
651style='mso-spacerun:yes'&gt;  &lt;/span&gt;Differences in searching &lt;span class=SpellE&gt;behavior&lt;/span&gt;
652
653between the two systems may indicate the effect of differing search facilities
654
655and content type.&lt;/span&gt;&lt;/p&gt;
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/1&lt;/span&gt;&lt;/p&gt;
684
685
686
687&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Lexical
688
689attraction for text compression&lt;/span&gt;&lt;/p&gt;
690
691
692
693&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
694
695lang=EN-GB&gt;Joscha&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt; Bach, Ian H. &lt;span
696
697class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
698
699
700
701&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;New methods of
702
703acquiring structural information in text documents may support better
704
705compression by identifying an appropriate prediction context for each
706
707symbol.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The method of “lexical
708
709attraction” infers syntactic dependency structures from statistical analysis of
710
711large corpora.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We describe the
712
713generation of a lexical attraction model, discuss its application to text
714
715compression, and explore its potential to outperform fixed-context models such
716
717as word-level PPM.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Perhaps the most
718
719exciting aspect of this work is the prospect of using compression as a metric
720
721for structure discovery in text.&lt;/span&gt;&lt;/p&gt;
722
723
724
725
726
727
728
729
730
731
732
733&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/2&lt;/span&gt;&lt;/p&gt;
734
735
736
737&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Generating rule
738
739sets from model trees&lt;/span&gt;&lt;/p&gt;
740
741
742
743&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Geoffrey Holmes,
744
745Mark Hall, &lt;span class=SpellE&gt;Eibe&lt;/span&gt; Frank&lt;/span&gt;&lt;/p&gt;
746
747
748
749&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Knowledge discovered
750
751in a database must be represented in a form that is easy to understand.&lt;span
752
753style='mso-spacerun:yes'&gt;  &lt;/span&gt;Small, easy to interpret nuggets of knowledge
754
755from data are one requirement and the ability to induce them from a variety of
756
757data sources is a second.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The literature
758
759is abound with classification algorithms, and in recent years with algorithms
760
761for time sequence analysis, but relatively little has been published on
762
763extracting meaningful information from problems involving continuous classes
764
765(regression).&lt;/span&gt;&lt;/p&gt;
766
767
768
769
770
771
772
773&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Model
774
775trees-decision trees with linear models at the leaf nodes-have recently emerged
776
777as an accurate method for numeric prediction that produces understandable
778
779models.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;However, it is well known that
780
781decision lists-ordered sets of If-Then rules-have the potential to be more compact
782
783and therefore more understandable than their tree counterparts.&lt;/span&gt;&lt;/p&gt;
784
785
786
787
788
789
790
791&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;In this paper we
792
793present an algorithm for inducing simple, yet accurate rule sets from model
794
795trees.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The algorithm works by repeatedly
796
797building model trees and selecting the best rule at each iteration.&lt;span
798
799style='mso-spacerun:yes'&gt;  &lt;/span&gt;It produces rule sets that are, on the whole,
800
801as accurate but smaller than the model tree constructed from the entire &lt;span
802
803class=SpellE&gt;dataset&lt;/span&gt;.&lt;span style='mso-spacerun:yes'&gt; 
804
805&lt;/span&gt;Experimental results for various heuristics which attempt to find a
806
807compromise between rule accuracy and rule coverage are reported.&lt;span
808
809style='mso-spacerun:yes'&gt;  &lt;/span&gt;We also show empirically that our method
810
811produces more accurate and smaller rule sets than the commercial
812
813state-of-the-art rule learning system Cubist.&lt;/span&gt;&lt;/p&gt;
814
815
816
817
818
819
820
821
822
823
824
825&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/3&lt;/span&gt;&lt;/p&gt;
826
827
828
829&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A diagnostic
830
831tool for tree based supervised classification learning algorithms&lt;/span&gt;&lt;/p&gt;
832
833
834
835&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Leonard &lt;span
836
837class=SpellE&gt;Trigg&lt;/span&gt;, Geoffrey Holmes&lt;/span&gt;&lt;/p&gt;
838
839
840
841&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The process of
842
843developing applications of machine learning and data mining that employ
844
845supervised classification algorithms includes the important step of knowledge
846
847verification.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Interpretable output is
848
849presented to a user so that they can verify that the knowledge contained in the
850
851output makes sense for the given application.&lt;span style='mso-spacerun:yes'&gt; 
852
853&lt;/span&gt;As the development of an application is an iterative process it is quite
854
855likely that a user would wish to compare models constructed at various times or
856
857stages.&lt;/span&gt;&lt;/p&gt;
858
859
860
861
862
863
864
865&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;One crucial
866
867stage where comparison of models is important is when the accuracy of a model
868
869is being estimated, typically using some form of cross-validation.&lt;span
870
871style='mso-spacerun:yes'&gt;  &lt;/span&gt;This stage is used to establish an estimate
872
873of how well a model will perform on unseen data.&lt;span
874
875style='mso-spacerun:yes'&gt;  &lt;/span&gt;This is vital information to present to a
876
877user, but it is also important to show the degree of variation between models
878
879obtained from the entire &lt;span class=SpellE&gt;dataset&lt;/span&gt; and models obtained
880
881during cross-validation.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;In this way it
882
883can be verified that the cross-validation models are at least structurally
884
885aligned with the model garnered from the entire &lt;span class=SpellE&gt;dataset&lt;/span&gt;.&lt;/span&gt;&lt;/p&gt;
886
887
888
889
890
891
892
893&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
894
895presents a diagnostic tool for the comparison of tree-based supervised
896
897classification models.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The method is
898
899adapted from work on approximate tree matching and applied to decision
900
901trees.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The tool is described together
902
903with experimental results on standard &lt;span class=SpellE&gt;datasets&lt;/span&gt;.&lt;/span&gt;&lt;/p&gt;
904
905
906
907
908
909
910
911
912
913
914
915&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/4&lt;/span&gt;&lt;/p&gt;
916
917
918
919&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Feature
920
921selection for discrete and numeric class machine learning&lt;/span&gt;&lt;/p&gt;
922
923
924
925&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Mark A. Hall&lt;/span&gt;&lt;/p&gt;
926
927
928
929&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Algorithms for
930
931feature selection fall into two broad categories:
932
933&amp;lt;I&amp;gt;wrappers&amp;lt;/I&amp;gt;use the learning algorithm itself to evaluate the
934
935usefulness of features, while &amp;lt;I&amp;gt;filters&amp;lt;/I&amp;gt;evaluate features
936
937according to heuristics based on general characteristics of the data.&lt;span
938
939style='mso-spacerun:yes'&gt;  &lt;/span&gt;For application to large databases, filters
940
941have proven to be more practical than wrappers because they are much
942
943faster.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;However, most existing filter
944
945algorithms only work with discrete classification problems.&lt;/span&gt;&lt;/p&gt;
946
947
948
949
950
951
952
953&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
954
955describes a fast, correlation-based filter algorithm that can be applied to
956
957continuous and discrete problems.&lt;span style='mso-spacerun:yes'&gt; 
958
959&lt;/span&gt;Experiments using the new method as a &lt;span class=SpellE&gt;preprocessing&lt;/span&gt;
960
961step for naïve &lt;span class=SpellE&gt;Bayes&lt;/span&gt;, instance-based learning,
962
963decision trees, locally weighted regression, and model trees show it to be an
964
965effective feature selector- it reduces the data in dimensionality by more than
966
967sixty percent in most cases without negatively affecting accuracy.&lt;span
968
969style='mso-spacerun:yes'&gt;  &lt;/span&gt;Also, decision and model trees built from the
970
971pre-processed data are often significantly smaller.&lt;/span&gt;&lt;/p&gt;
972
973
974
975
976
977
978
979
980
981
982
983&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/5&lt;/span&gt;&lt;/p&gt;
984
985
986
987&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Browsing tree
988
989structures&lt;/span&gt;&lt;/p&gt;
990
991
992
993&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Mark &lt;span
994
995class=SpellE&gt;Apperley&lt;/span&gt;, Robert &lt;span class=SpellE&gt;Spence&lt;/span&gt;, Stephen &lt;span
996
997class=SpellE&gt;Hodge&lt;/span&gt;, Michael Chester&lt;/span&gt;&lt;/p&gt;
998
999
1000
1001&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Graphic
1002
1003representations of tree structures are notoriously difficult to create,
1004
1005display, and interpret, particularly when the volume of information they
1006
1007contain, and hence the number of nodes, is large.&lt;span
1008
1009style='mso-spacerun:yes'&gt;  &lt;/span&gt;The problem of interactively browsing
1010
1011information held in tree structures is examined, and the implementation of an
1012
1013innovative tree browser described.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;This
1014
1015browser is based on distortion-oriented display techniques and intuitive direct
1016
1017manipulation interaction.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The tree
1018
1019layout is automatically generated, but the location and extent of detail shown
1020
1021is controlled by the user.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;It is
1022
1023suggested that these techniques could be extended to the browsing of more
1024
1025general networks.&lt;/span&gt;&lt;/p&gt;
1026
1027
1028
1029
1030
1031
1032
1033&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/6&lt;/span&gt;&lt;/p&gt;
1034
1035
1036
1037&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Facilitating
1038
1039multiple copy/past operations&lt;/span&gt;&lt;/p&gt;
1040
1041
1042
1043&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Mark &lt;span
1044
1045class=SpellE&gt;Apperley&lt;/span&gt;, Jay Baker, Dale Fletcher, Bill Rogers&lt;/span&gt;&lt;/p&gt;
1046
1047
1048
1049&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Copy and paste,
1050
1051or cut and paste, using a clipboard or paste buffer has long been the principle
1052
1053facility provided to users for transferring data between and within GUI
1054
1055applications.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We argue that this
1056
1057mechanism can be clumsy in circumstances where several pieces of information
1058
1059must be moved systematically.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;In two
1060
1061situations - extraction of data fields from unstructured data found in a
1062
1063directed search process, and reorganisation of computer program source text -
1064
1065we present alternative, more natural, user interface facilities to make the
1066
1067task less onerous, and to provide improved visual feedback during the
1068
1069operation.&lt;/span&gt;&lt;/p&gt;
1070
1071
1072
1073
1074
1075
1076
1077&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;For the data
1078
1079extraction task we introduce the Stretchable Selection Tool, a &lt;span
1080
1081class=SpellE&gt;semi&lt;/span&gt;-transparent overlay augmenting the mouse pointer to
1082
1083automate paste operations and provide information to prompt the user.&lt;span
1084
1085style='mso-spacerun:yes'&gt;  &lt;/span&gt;We describe a prototype implementation that
1086
1087functions in a collaborative software environment, allowing users to &lt;span
1088
1089class=SpellE&gt;cooperate&lt;/span&gt; on a multiple copy/paste operation.&lt;span
1090
1091style='mso-spacerun:yes'&gt;  &lt;/span&gt;For text reorganisation, we present an
1092
1093extension to &lt;span class=SpellE&gt;Emacs&lt;/span&gt;, providing similar functionality,
1094
1095but without the collaborative features.&lt;/span&gt;&lt;/p&gt;
1096
1097
1098
1099
1100
1101
1102
1103&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/7&lt;/span&gt;&lt;/p&gt;
1104
1105
1106
1107&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Automating
1108
1109iterative tasks with programming by demonstration: a user evaluation&lt;/span&gt;&lt;/p&gt;
1110
1111
1112
1113&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Gordon W. &lt;span
1114
1115class=SpellE&gt;Paynter&lt;/span&gt;, Ian H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
1116
1117
1118
1119&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Computer users
1120
1121often face iterative tasks that cannot be automated using the tools and
1122
1123aggregation techniques provided by their application program: they end up
1124
1125performing the iteration by hand, repeating user interface actions over and
1126
1127over again.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We have implemented an
1128
1129agent, called Familiar, that can be taught to perform iterative tasks using
1130
1131programming by demonstration (PBD).&lt;span style='mso-spacerun:yes'&gt; 
1132
1133&lt;/span&gt;Unlike other PBD systems, it is domain independent and works with
1134
1135unmodified, widely-used, applications in a popular operating system.&lt;span
1136
1137style='mso-spacerun:yes'&gt;  &lt;/span&gt;In a formal evaluation, we found that users
1138
1139quickly learned to use the agent to automate iterative tasks.&lt;span
1140
1141style='mso-spacerun:yes'&gt;  &lt;/span&gt;Generally, the participants preferred to use
1142
1143multiple selection where possible, but could and did use PBD in situations
1144
1145involving iteration over many commands, or when other techniques were
1146
1147unavailable.&lt;/span&gt;&lt;/p&gt;
1148
1149
1150
1151
1152
1153
1154
1155&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/8&lt;/span&gt;&lt;/p&gt;
1156
1157
1158
1159&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A survey of
1160
1161software requirements specification practices in the New Zealand software
1162
1163industry&lt;/span&gt;&lt;/p&gt;
1164
1165
1166
1167&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Lindsay Groves,
1168
1169Ray &lt;span class=SpellE&gt;Nickson&lt;/span&gt;, Greg Reeve, Steve Reeves, Mark Utting&lt;/span&gt;&lt;/p&gt;
1170
1171
1172
1173&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We report on the
1174
1175software development techniques used in the New Zealand software industry,
1176
1177paying particular attention to requirements gathering.&lt;span
1178
1179style='mso-spacerun:yes'&gt;  &lt;/span&gt;We surveyed a selection of software companies
1180
1181with a general questionnaire and then conducted in-depth interviews with four
1182
1183companies.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Our results show a wide
1184
1185variety in the kinds of companies undertaking software development, employing a
1186
1187wide range of software development techniques.&lt;span style='mso-spacerun:yes'&gt; 
1188
1189&lt;/span&gt;Although our data are not sufficiently detailed to draw statistically
1190
1191significant conclusions, it appears that larger software development groups
1192
1193typically have more well-defined software development processes, spend
1194
1195proportionally more time on requirements gathering, and follow more rigorous
1196
1197testing regimes.&lt;/span&gt;&lt;/p&gt;
1198
1199
1200
1201
1202
1203
1204
1205&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/9&lt;/span&gt;&lt;/p&gt;
1206
1207
1208
1209&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The LRU*WWW proxy
1210
1211cache document replacement algorithm&lt;/span&gt;&lt;/p&gt;
1212
1213
1214
1215&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Chung-&lt;span
1216
1217class=SpellE&gt;yi&lt;/span&gt; Chang, Tony &lt;span class=SpellE&gt;McGregor&lt;/span&gt;, Geoffrey
1218
1219Holmes&lt;/span&gt;&lt;/p&gt;
1220
1221
1222
1223&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Obtaining good
1224
1225performance from WWW proxy caches is critically dependent on the document
1226
1227replacement policy used by the proxy.&lt;span style='mso-spacerun:yes'&gt; 
1228
1229&lt;/span&gt;This paper validates the work of other authors by reproducing their
1230
1231studies of proxy cache document replacement algorithms.&lt;span
1232
1233style='mso-spacerun:yes'&gt;  &lt;/span&gt;From this basis a cross-trace study is
1234
1235mounted.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;This demonstrates that the
1236
1237performance of most document replacement algorithms is dependent on the type of
1238
1239workload that they are presented with.&lt;span style='mso-spacerun:yes'&gt; 
1240
1241&lt;/span&gt;Finally we propose a new algorithm, LRU*, that consistently performs
1242
1243well across all our traces.&lt;/span&gt;&lt;/p&gt;
1244
1245
1246
1247
1248
1249
1250
1251&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/10&lt;/span&gt;&lt;/p&gt;
1252
1253
1254
1255&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Reduced-error
1256
1257pruning with significance tests&lt;/span&gt;&lt;/p&gt;
1258
1259
1260
1261&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
1262
1263lang=EN-GB&gt;Eibe&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt; Frank, Ian H. &lt;span
1264
1265class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
1266
1267
1268
1269&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;When building
1270
1271classification models, it is common practice to prune them to counter spurious
1272
1273effects of the training data: this often improves performance and reduces model
1274
1275size.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;&amp;quot;Reduced-error pruning&amp;quot;
1276
1277is a fast pruning procedure for decision trees that is known to produce small
1278
1279and accurate trees.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Apart from the data
1280
1281from which the tree is grown, it uses an independent &amp;quot;pruning&amp;quot; set,
1282
1283and pruning decisions are based on the model's error rate on this fresh
1284
1285data.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Recently it has been observed that
1286
1287reduced-error pruning &lt;span class=SpellE&gt;overfits&lt;/span&gt; the pruning data,
1288
1289producing unnecessarily large decision trees.&lt;span style='mso-spacerun:yes'&gt; 
1290
1291&lt;/span&gt;This paper investigates whether standard statistical significance tests
1292
1293can be used to counter this phenomenon.&lt;/span&gt;&lt;/p&gt;
1294
1295
1296
1297
1298
1299
1300
1301&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The problem of &lt;span
1302
1303class=SpellE&gt;overfitting&lt;/span&gt; to the pruning set highlights the need for
1304
1305significance testing.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We investigate two
1306
1307classes of test, &amp;quot;parametric&amp;quot; and &amp;quot;non-parametric.&amp;quot;&lt;span
1308
1309style='mso-spacerun:yes'&gt;  &lt;/span&gt;The standard chi-squared statistic can be
1310
1311used both in a parametric test and as the basis for a non-parametric
1312
1313permutation test.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;In both cases it is
1314
1315necessary to select the significance level at which pruning is applied.&lt;span
1316
1317style='mso-spacerun:yes'&gt;  &lt;/span&gt;We show empirically that both versions of the
1318
1319chi-squared test perform equally well if their significance levels are adjusted
1320
1321appropriately.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Using a collection of
1322
1323standard &lt;span class=SpellE&gt;datasets&lt;/span&gt;, we show that significance testing
1324
1325improves on standard reduced error pruning if the significance level is
1326
1327tailored to the particular &lt;span class=SpellE&gt;dataset&lt;/span&gt; at hand using
1328
1329cross-validation, yielding consistently smaller trees that perform at least as
1330
1331well and sometimes better.&lt;/span&gt;&lt;/p&gt;
1332
1333
1334
1335
1336
1337
1338
1339&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/11&lt;/span&gt;&lt;/p&gt;
1340
1341
1342
1343&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
1344
1345lang=EN-GB&gt;Weka&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt;: Practical machine learning
1346
1347tools and techniques with Java implementations&lt;/span&gt;&lt;/p&gt;
1348
1349
1350
1351&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Ian H. &lt;span
1352
1353class=SpellE&gt;Witten&lt;/span&gt;, &lt;span class=SpellE&gt;Eibe&lt;/span&gt; Frank, Len &lt;span
1354
1355class=SpellE&gt;Trigg&lt;/span&gt;, Mark Hall, Geoffrey Holmes, Sally Jo Cunningham&lt;/span&gt;&lt;/p&gt;
1356
1357
1358
1359&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The Waikato
1360
1361Environment for Knowledge Analysis (Weka) is a comprehensive suite of Java
1362
1363class libraries that implement many state-of-the-art machine learning and data
1364
1365mining algorithms.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;&lt;span class=SpellE&gt;Weka&lt;/span&gt;
1366
1367is freely available on the &lt;span class=SpellE&gt;World-Wide&lt;/span&gt; Web and
1368
1369accompanies a new text on data mining [1] which documents and fully explains
1370
1371all the algorithms it contains.&lt;span style='mso-spacerun:yes'&gt; 
1372
1373&lt;/span&gt;Applications written using the &lt;span class=SpellE&gt;Weka&lt;/span&gt; class
1374
1375libraries can be run on any computer with a Web browsing capability; this
1376
1377allows users to apply machine learning techniques to their own data regardless
1378
1379of computer platform.&lt;/span&gt;&lt;/p&gt;
1380
1381
1382
1383
1384
1385
1386
1387&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/12&lt;/span&gt;&lt;/p&gt;
1388
1389
1390
1391&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Pace Regression&lt;/span&gt;&lt;/p&gt;
1392
1393
1394
1395&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Yong Wang, Ian
1396
1397H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
1398
1399
1400
1401&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
1402
1403articulates a new method of linear regression, “pace regression”, that
1404
1405addresses many drawbacks of standard regression reported in the
1406
1407literature—particularly the subset selection problem.&lt;span
1408
1409style='mso-spacerun:yes'&gt;  &lt;/span&gt;Pace regression improves on classical ordinary
1410
1411least squares (OLS) regression by evaluating the effect of each variable and
1412
1413using a clustering analysis to improve the statistical basis for estimating
1414
1415their contribution to the overall regression.&lt;span style='mso-spacerun:yes'&gt; 
1416
1417&lt;/span&gt;As well as outperforming OLS, it also outperforms—in a remarkably
1418
1419general sense—other linear &lt;span class=SpellE&gt;modeling&lt;/span&gt; techniques in the
1420
1421literature, including subset selection procedures, which seek a reduction in
1422
1423dimensionality that falls out as a natural &lt;span class=SpellE&gt;byproduct&lt;/span&gt;
1424
1425of pace regression.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The paper defines
1426
1427six procedures that share the fundamental idea of pace regression, all of which
1428
1429are theoretically justified in terms of asymptotic performance.&lt;span
1430
1431style='mso-spacerun:yes'&gt;  &lt;/span&gt;Experiments confirm the performance
1432
1433improvement over other techniques.&lt;/span&gt;&lt;/p&gt;
1434
1435
1436
1437
1438
1439
1440
1441&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/13&lt;/span&gt;&lt;/p&gt;
1442
1443
1444
1445&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A
1446
1447compression-based algorithm for Chinese word segmentation&lt;/span&gt;&lt;/p&gt;
1448
1449
1450
1451&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;W.J. &lt;span
1452
1453class=SpellE&gt;Teahan&lt;/span&gt;, &lt;span class=SpellE&gt;Yingying&lt;/span&gt; Wen, &lt;span
1454
1455class=SpellE&gt;Rodger&lt;/span&gt; &lt;span class=SpellE&gt;McNab&lt;/span&gt;, Ian H. &lt;span
1456
1457class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
1458
1459
1460
1461&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The Chinese
1462
1463language is written without using spaces or other word delimiters.&lt;span
1464
1465style='mso-spacerun:yes'&gt;  &lt;/span&gt;Although a text may be thought of as a
1466
1467corresponding sequence of words, there is considerable ambiguity in the
1468
1469placement of boundaries.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Interpreting a
1470
1471text as a sequence of words is beneficial for some information retrieval and
1472
1473storage tasks: for example, full-text search, word-based compression, and &lt;span
1474
1475class=SpellE&gt;keyphrase&lt;/span&gt; extraction.&lt;/span&gt;&lt;/p&gt;
1476
1477
1478
1479
1480
1481
1482
1483&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We describe a
1484
1485scheme that infers appropriate positions for word boundaries using an adaptive
1486
1487language model that is standard in text compression.&lt;span
1488
1489style='mso-spacerun:yes'&gt;  &lt;/span&gt;It is trained on a corpus of pre-segmented
1490
1491text, and when applied to new text, interpolates word boundaries so as to &lt;span
1492
1493class=SpellE&gt;maximize&lt;/span&gt; the compression obtained.&lt;span
1494
1495style='mso-spacerun:yes'&gt;  &lt;/span&gt;This simple and general method performs well
1496
1497with respect to &lt;span class=SpellE&gt;specialized&lt;/span&gt; schemes for Chinese
1498
1499language segmentation.&lt;/span&gt;&lt;/p&gt;
1500
1501
1502
1503
1504
1505
1506
1507&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/14&lt;/span&gt;&lt;/p&gt;
1508
1509
1510
1511&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Clustering with
1512
1513finite data from &lt;span class=SpellE&gt;semi&lt;/span&gt;-parametric mixture
1514
1515distributions&lt;/span&gt;&lt;/p&gt;
1516
1517
1518
1519&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Yong Wang, Ian
1520
1521H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
1522
1523
1524
1525&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Existing
1526
1527clustering methods for the &lt;span class=SpellE&gt;semi&lt;/span&gt;-parametric mixture
1528
1529distribution perform well as the volume of data increases.&lt;span
1530
1531style='mso-spacerun:yes'&gt;  &lt;/span&gt;However, they all suffer from a serious
1532
1533drawback in finite-data situations: small outlying groups of data points can be
1534
1535completely ignored in the clusters that are produced, no matter how far away
1536
1537they lie from the major clusters.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;This
1538
1539can result in unbounded loss if the loss function is sensitive to the distance
1540
1541between clusters.&lt;/span&gt;&lt;/p&gt;
1542
1543
1544
1545&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
1546
1547proposes a new distance-based clustering method that overcomes the problem by
1548
1549avoiding global constraints.&lt;span style='mso-spacerun:yes'&gt; 
1550
1551&lt;/span&gt;Experimental results illustrate its superiority to existing methods when
1552
1553small clusters are present in finite data sets; they also suggest that it is
1554
1555more accurate and stable than other methods even when there are no small
1556
1557clusters.&lt;/span&gt;&lt;/p&gt;
1558
1559
1560
1561
1562
1563
1564
1565&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/15&lt;/span&gt;&lt;/p&gt;
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;99/16&lt;/span&gt;&lt;/p&gt;
1578
1579
1580
1581&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The &lt;span
1582
1583class=SpellE&gt;Niupepa&lt;/span&gt; Collection:&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Opening
1584
1585the blinds on a window to the past&lt;/span&gt;&lt;/p&gt;
1586
1587
1588
1589&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
1590
1591lang=EN-GB&gt;Te&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt; &lt;span class=SpellE&gt;Taka&lt;/span&gt; &lt;span
1592
1593class=SpellE&gt;Keegan&lt;/span&gt;, Sally Jo Cunningham, Mark &lt;span class=SpellE&gt;Apperley&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
1594
1595
1596
1597&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
1598
1599describes the building of a digital library collection of historic
1600
1601newspapers.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The newspapers (&lt;span
1602
1603class=SpellE&gt;&lt;i style='mso-bidi-font-style:normal'&gt;Niupepa&lt;/i&gt;&lt;/span&gt; in &lt;span
1604
1605class=SpellE&gt;Maori&lt;/span&gt;), which were published in New Zealand during the
1606
1607period 1842 to 1933, form a unique historical record of the &lt;span class=SpellE&gt;Maori&lt;/span&gt;
1608
1609language, and of events from an historical perspective.&lt;span
1610
1611style='mso-spacerun:yes'&gt;  &lt;/span&gt;Images of these newspapers have been
1612
1613converted to digital form, electronic text extracted from these, and the
1614
1615collection is now being made available over the Internet as a part of the New
1616
1617Zealand Digital Library (NZDL) project at the University of Waikato.&lt;/span&gt;&lt;/p&gt;
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/1&lt;/span&gt;&lt;/p&gt;
1634
1635
1636
1637&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Boosting trees
1638
1639for cost-sensitive classifications&lt;/span&gt;&lt;/p&gt;
1640
1641
1642
1643&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Kai &lt;span
1644
1645class=SpellE&gt;Ming&lt;/span&gt; Ting, &lt;span class=SpellE&gt;Zijian&lt;/span&gt; &lt;span
1646
1647class=SpellE&gt;Zheng&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
1648
1649
1650
1651&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
1652
1653explores two boosting techniques for cost-sensitive tree classification in the
1654
1655situation where misclassification costs change very often.&lt;span
1656
1657style='mso-spacerun:yes'&gt;  &lt;/span&gt;Ideally, one would like to have only one
1658
1659induction, and use the induced model for different misclassification
1660
1661costs.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Thus, it demands robustness of
1662
1663the induced model against cost changes.&lt;span style='mso-spacerun:yes'&gt; 
1664
1665&lt;/span&gt;Combining multiple trees gives robust predictions against this
1666
1667change.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We demonstrate that ordinary
1668
1669boosting combined with the minimum expected cost criterion to select the
1670
1671prediction class is a good solution under this situation.&lt;span
1672
1673style='mso-spacerun:yes'&gt;  &lt;/span&gt;We also introduce a variant of the ordinary
1674
1675boosting procedure which &lt;span class=SpellE&gt;utilizes&lt;/span&gt; the cost
1676
1677information during training.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We show
1678
1679that the proposed technique performs better than the ordinary boosting in terms
1680
1681of misclassification cost.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;However, this
1682
1683technique requires to induce a set of new trees every time the cost
1684
1685changes.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Our empirical investigation
1686
1687also reveals some interesting &lt;span class=SpellE&gt;behavior&lt;/span&gt; of boosting
1688
1689decision trees for cost-sensitive classification.&lt;/span&gt;&lt;/p&gt;
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/2&lt;/span&gt;&lt;/p&gt;
1702
1703
1704
1705&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Generating
1706
1707accurate rule sets without global &lt;span class=SpellE&gt;optimization&lt;/span&gt; &lt;/span&gt;&lt;/p&gt;
1708
1709
1710
1711&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
1712
1713lang=EN-GB&gt;Eibe&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt; Frank, Ian H. &lt;span
1714
1715class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
1716
1717
1718
1719&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The two dominant
1720
1721schemes for rule-learning, C4.5 and RIPPER, both operate in two stages.&lt;span
1722
1723style='mso-spacerun:yes'&gt;  &lt;/span&gt;First they induce an initial rule set and
1724
1725then they refine it using a rather complex &lt;span class=SpellE&gt;optimization&lt;/span&gt;
1726
1727stage that discards (C4.5) or adjusts (RIPPER) individual rules to make them
1728
1729work better together.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;In contrast, this
1730
1731paper shows how good rule sets can be learned one rule at a time, without any
1732
1733need for global &lt;span class=SpellE&gt;optimization&lt;/span&gt;.&lt;span
1734
1735style='mso-spacerun:yes'&gt;  &lt;/span&gt;We present an algorithm for inferring rules
1736
1737by repeatedly generating partial decision trees, thus combining the two major
1738
1739paradigms for rule generation-creating rules from decision trees and the
1740
1741separate-and-conquer rule-learning technique.&lt;span style='mso-spacerun:yes'&gt; 
1742
1743&lt;/span&gt;The algorithm is straightforward and elegant: despite this, experiments
1744
1745on standard &lt;span class=SpellE&gt;datasets&lt;/span&gt; show that it produces rule sets
1746
1747that are as accurate as and of similar size to those generated by C4.5, and
1748
1749more accurate than &lt;span class=SpellE&gt;RIPPER's&lt;/span&gt;.&lt;span
1750
1751style='mso-spacerun:yes'&gt;  &lt;/span&gt;Moreover, it operates efficiently, and
1752
1753because it avoids &lt;span class=SpellE&gt;postprocessing&lt;/span&gt;, does not suffer the
1754
1755extremely slow performance on pathological example sets for which the C4.5
1756
1757method has been &lt;span class=SpellE&gt;criticized&lt;/span&gt;.&lt;/span&gt;&lt;/p&gt;
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/3&lt;/span&gt;&lt;/p&gt;
1770
1771
1772
1773&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
1774
1775lang=EN-GB&gt;VQuery&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt;: a graphical user interface
1776
1777for Boolean query Specification and dynamic result preview&lt;/span&gt;&lt;/p&gt;
1778
1779
1780
1781&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Steve Jones&lt;/span&gt;&lt;/p&gt;
1782
1783
1784
1785&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Textual query
1786
1787languages based on Boolean logic are common amongst the search facilities of
1788
1789on-line information repositories.&lt;span style='mso-spacerun:yes'&gt; 
1790
1791&lt;/span&gt;However, there is evidence to suggest that the syntactic and semantic
1792
1793demands of such languages lead to user errors and adversely affect the time
1794
1795that it takes users to form queries.&lt;span style='mso-spacerun:yes'&gt; 
1796
1797&lt;/span&gt;Additionally, users are faced with user interfaces to these repositories
1798
1799which are unresponsive and uninformative, and consequently fail to support
1800
1801effective query refinement.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We suggest
1802
1803that graphical query languages, particularly Venn-like diagrams, provide a
1804
1805natural medium for Boolean query specification which overcomes the problems of
1806
1807textual query languages.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Also, dynamic
1808
1809result previews can be seamlessly integrated with graphical query specification
1810
1811to increase the effectiveness of query refinements.&lt;span
1812
1813style='mso-spacerun:yes'&gt;  &lt;/span&gt;We describe &lt;span class=SpellE&gt;VQuery&lt;/span&gt;,
1814
1815a query interface to the New Zealand Digital Library which exploits querying by
1816
1817Venn diagrams and integrated query result previews.&lt;/span&gt;&lt;/p&gt;
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/4&lt;/span&gt;&lt;/p&gt;
1830
1831
1832
1833&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Revising
1834
1835&amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt;: semantics and logic&lt;/span&gt;&lt;/p&gt;
1836
1837
1838
1839&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Martin C. &lt;span
1840
1841class=SpellE&gt;Henson&lt;/span&gt;, Steve Reeves&lt;/span&gt;&lt;/p&gt;
1842
1843
1844
1845&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We introduce a
1846
1847simple specification logic &amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt;c comprising a logic and
1848
1849semantics (in &amp;lt;I&amp;gt;ZF&amp;lt;/I&amp;gt; set theory).&lt;span
1850
1851style='mso-spacerun:yes'&gt;  &lt;/span&gt;We then provide an interpretation for (a
1852
1853rational reconstruction of) the specification language &amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt;
1854
1855within &amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt;c.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;As a
1856
1857result we obtain a sound logic for &amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt;, including the schema
1858
1859calculus.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;A consequence of our
1860
1861formalisation is a critique of a number of concepts used in
1862
1863&amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt;.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We demonstrate
1864
1865that the complications and confusions which these concepts introduce can be avoided
1866
1867without compromising &lt;span class=SpellE&gt;expressibility&lt;/span&gt;.&lt;/span&gt;&lt;/p&gt;
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/5&lt;/span&gt;&lt;/p&gt;
1880
1881
1882
1883&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A logic for the
1884
1885schema calculus&lt;/span&gt;&lt;/p&gt;
1886
1887
1888
1889&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Martin C. &lt;span
1890
1891class=SpellE&gt;Henson&lt;/span&gt;, Steve Reeves&lt;/span&gt;&lt;/p&gt;
1892
1893
1894
1895&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;In this paper we
1896
1897introduce and investigate a logic for the schema calculus of
1898
1899&amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt;.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The schema
1900
1901calculus is arguably the reason for &amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt;’s popularity but so
1902
1903far no true calculus (a sound system of rules for reasoning about schema
1904
1905expressions) has been given.&lt;span style='mso-spacerun:yes'&gt; 
1906
1907&lt;/span&gt;Presentations thus far have either failed to provide a calculus (e.g.
1908
1909the draft standard [3]) or have fallen back on informal descriptions at a
1910
1911syntactic level (most text books e.g. [7[).&lt;span style='mso-spacerun:yes'&gt; 
1912
1913&lt;/span&gt;Once the calculus is established we introduce a derived &lt;span
1914
1915class=SpellE&gt;equational&lt;/span&gt; logic which enables us to formalise properly the
1916
1917informal notations of schema expression equality to be found in the literature.&lt;/span&gt;&lt;/p&gt;
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/6&lt;/span&gt;&lt;/p&gt;
1930
1931
1932
1933&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;New foundations
1934
1935for &amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt;&lt;/span&gt;&lt;/p&gt;
1936
1937
1938
1939&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Martin C. &lt;span
1940
1941class=SpellE&gt;Henson&lt;/span&gt;, Steve Reeves&lt;/span&gt;&lt;/p&gt;
1942
1943
1944
1945&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We provide a
1946
1947constructive and &lt;span class=SpellE&gt;intensional&lt;/span&gt; interpretation for the
1948
1949specification language &amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt; in a theory of operations and kinds
1950
1951&amp;lt;I&amp;gt;T&amp;lt;/I&amp;gt;.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The motivation is
1952
1953to facilitate the development of an integrated approach to program
1954
1955construction.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We illustrate the new
1956
1957foundations for &amp;lt;I&amp;gt;Z&amp;lt;/I&amp;gt; with examples.&lt;/span&gt;&lt;/p&gt;
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/7&lt;/span&gt;&lt;/p&gt;
1970
1971
1972
1973&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Predicting apple
1974
1975bruising relationships using machine learning&lt;/span&gt;&lt;/p&gt;
1976
1977
1978
1979&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;G. Holmes, S.J.
1980
1981Cunningham, B.T. &lt;span class=SpellE&gt;Dela&lt;/span&gt; Rue, &lt;span class=SpellE&gt;A.F.&lt;/span&gt;
1982
1983&lt;span class=SpellE&gt;Bollen&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
1984
1985
1986
1987&lt;p class=MsoBodyText&gt;&lt;span lang=EN-US&gt;Many models have been used to describe
1988
1989the influence of internal or external factors on apple bruising.&lt;span
1990
1991style='mso-spacerun:yes'&gt;  &lt;/span&gt;Few of these have addressed the application
1992
1993of derived relationships to the evaluation of commercial operations.&lt;span
1994
1995style='mso-spacerun:yes'&gt;  &lt;/span&gt;From an industry perspective, a model must
1996
1997enable fruit to be rejected on the basis of a commercially significant bruise
1998
1999and must also accurately quantify the effects of various combinations of input
2000
2001features (such as &lt;span class=SpellE&gt;cultivar&lt;/span&gt;, maturity, size, and so
2002
2003on) on bruise prediction.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Input features
2004
2005must in turn have characteristics which are measurable commercially; for
2006
2007example, the measure of force should be impact energy rather than energy
2008
2009absorbed.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Further, as the commercial
2010
2011criteria for acceptable damage levels change, the model should be versatile
2012
2013enough to regenerate new bruise thresholds from existing data.&lt;/span&gt;&lt;/p&gt;
2014
2015
2016
2017
2018
2019
2020
2021&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Machine learning
2022
2023is a burgeoning technology with a vast range of potential applications
2024
2025particularly in agriculture where large amounts of data can be readily
2026
2027collected [1].&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The main advantage of
2028
2029using a machine learning method in an application is that the models built for
2030
2031prediction can be viewed and understood by the owner of the data who is in a
2032
2033position to determine the usefulness of the model, an essential component in a
2034
2035commercial environment.&lt;/span&gt;&lt;/p&gt;
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/8&lt;/span&gt;&lt;/p&gt;
2048
2049
2050
2051&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;An evaluation of
2052
2053passage-level indexing strategies for a technical report archive&lt;/span&gt;&lt;/p&gt;
2054
2055
2056
2057&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Michael Williams&lt;/span&gt;&lt;/p&gt;
2058
2059
2060
2061&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Past research
2062
2063has shown that using evidence from document passages rather than complete
2064
2065documents is an effective way of improving the precision of full-text database
2066
2067searches.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;However, passage-level
2068
2069indexing has yet to be widely adopted for commercial or online databases.&lt;/span&gt;&lt;/p&gt;
2070
2071
2072
2073
2074
2075
2076
2077&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
2078
2079reports on experiments designed to test the efficacy of passage-level indexing
2080
2081with a particular collection of a full-text online database, the New Zealand
2082
2083Digital Library.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Discourse passages and
2084
2085word-window passages are used for the indexing process.&lt;span
2086
2087style='mso-spacerun:yes'&gt;  &lt;/span&gt;Both ranked and Boolean searching are used to
2088
2089test the resulting indexes.&lt;/span&gt;&lt;/p&gt;
2090
2091
2092
2093
2094
2095
2096
2097&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Overlapping
2098
2099window passages are shown to offer the best retrieval performance with both
2100
2101ranked and Boolean queries.&lt;span style='mso-spacerun:yes'&gt; 
2102
2103&lt;/span&gt;Modifications may be necessary to the term weighting methodology in
2104
2105order to ensure optimal ranked query performance.&lt;/span&gt;&lt;/p&gt;
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/9&lt;/span&gt;&lt;/p&gt;
2118
2119
2120
2121&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Managing
2122
2123multiple collections, multiple languages, and multiple media in a distributed
2124
2125digital library&lt;/span&gt;&lt;/p&gt;
2126
2127
2128
2129&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Ian H. &lt;span
2130
2131class=SpellE&gt;Witten&lt;/span&gt;, &lt;span class=SpellE&gt;Rodger&lt;/span&gt; &lt;span
2132
2133class=SpellE&gt;McNab&lt;/span&gt;, Steve Jones, Sally Jo Cunningham, David Bainbridge,
2134
2135Mark &lt;span class=SpellE&gt;Apperley&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
2136
2137
2138
2139&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Managing the &lt;span
2140
2141class=SpellE&gt;organizational&lt;/span&gt; and software complexity of a comprehensive
2142
2143digital library presents a significant challenge.&lt;span
2144
2145style='mso-spacerun:yes'&gt;  &lt;/span&gt;Different library collections each have their
2146
2147own distinctive features.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Different
2148
2149presentation languages have structural implications such as left-to-right
2150
2151writing order and text-only interfaces for the visually impaired.&lt;span
2152
2153style='mso-spacerun:yes'&gt;  &lt;/span&gt;Different media involve different file
2154
2155formats, and-more importantly-radically different search strategies are
2156
2157required for non-textual media.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;In a
2158
2159distributed library, new collections can appear asynchronously on servers in
2160
2161different parts of the world.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;And as
2162
2163searching interfaces mature from the command-line era exemplified by current
2164
2165Web search engines into the age of reactive visual interfaces, experimental new
2166
2167interfaces must be developed, supported, and tested.&lt;span
2168
2169style='mso-spacerun:yes'&gt;  &lt;/span&gt;This paper describes our experience, gained
2170
2171from operating a substantial digital library service over several years, in
2172
2173solving these problems by designing an appropriate software architecture.&lt;/span&gt;&lt;/p&gt;
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/10&lt;/span&gt;&lt;/p&gt;
2186
2187
2188
2189&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Experiences with
2190
2191a weighted decision tree learner&lt;/span&gt;&lt;/p&gt;
2192
2193
2194
2195&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;John G. &lt;span
2196
2197class=SpellE&gt;Cleary&lt;/span&gt;, Leonard E. &lt;span class=SpellE&gt;Trigg&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
2198
2199
2200
2201&lt;p class=MsoBodyText&gt;&lt;span lang=EN-US&gt;Machine learning algorithms for inferring
2202
2203decision trees typically choose a single “best” tree to describe the training
2204
2205data.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Recent research has shown that
2206
2207classification performance can be significantly improved by voting predictions
2208
2209of multiple, independently produced decision trees.&lt;span
2210
2211style='mso-spacerun:yes'&gt;  &lt;/span&gt;This paper describes an algorithm, OB1, that
2212
2213makes a weighted sum over many possible models.&lt;span style='mso-spacerun:yes'&gt; 
2214
2215&lt;/span&gt;We describe one instance of OB1, that includes &amp;lt;I&amp;gt;all&amp;lt;/I&amp;gt;
2216
2217possible decision trees as well as naïve &lt;span class=SpellE&gt;Bayesian&lt;/span&gt;
2218
2219models.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;OB1 is compared with a number of
2220
2221other decision tree and instance based learning &lt;span class=SpellE&gt;alogrithms&lt;/span&gt;
2222
2223on some of the data sets from the UCI repository.&lt;span
2224
2225style='mso-spacerun:yes'&gt;  &lt;/span&gt;Both an information gain and an accuracy
2226
2227measure are used for the comparison.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;On
2228
2229the information gain measure OB1 performs significantly better than all the
2230
2231other algorithms.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;On the accuracy
2232
2233measure it is significantly better than all the algorithms except naïve &lt;span
2234
2235class=SpellE&gt;Bayes&lt;/span&gt; which performs comparably to OB1.&lt;/span&gt;&lt;/p&gt;
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/11&lt;/span&gt;&lt;/p&gt;
2248
2249
2250
2251&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;An entropy gain
2252
2253measure of numeric prediction performance&lt;/span&gt;&lt;/p&gt;
2254
2255
2256
2257&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Leonard &lt;span
2258
2259class=SpellE&gt;Trigg&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
2260
2261
2262
2263&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Categorical
2264
2265classifier performance is typically evaluated with respect to error rate,
2266
2267expressed as a percentage of test instances that were not correctly
2268
2269classified.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;When a classifier produces
2270
2271multiple classifications for a test instance, the prediction is counted as
2272
2273incorrect (even if the correct class was one of the predictions).&lt;span
2274
2275style='mso-spacerun:yes'&gt;  &lt;/span&gt;Although commonly used in the literature,
2276
2277error rate is a coarse measure of classifier performance, as it is based only
2278
2279on a single prediction offered for a test instance.&lt;span
2280
2281style='mso-spacerun:yes'&gt;  &lt;/span&gt;Since many classifiers can produce a class
2282
2283distribution as a prediction, we should use this to provide a better measure of
2284
2285how much information the classifier is extracting from the domain.&lt;/span&gt;&lt;/p&gt;
2286
2287
2288
2289
2290
2291
2292
2293&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Numeric
2294
2295classifiers are a relatively new development in machine learning, and as such
2296
2297there is no single performance measure that has become standard.&lt;span
2298
2299style='mso-spacerun:yes'&gt;  &lt;/span&gt;Typically these machine learning schemes
2300
2301predict a single real number for each test instance, and the error between the
2302
2303predicted and actual value is used to calculate a myriad of performance
2304
2305measures such as correlation coefficient, root mean squared error, mean
2306
2307absolute error, relative absolute error, and root relative squared error.&lt;span
2308
2309style='mso-spacerun:yes'&gt;  &lt;/span&gt;With so many performance measures it is
2310
2311difficult to establish an overall performance evaluation.&lt;/span&gt;&lt;/p&gt;
2312
2313
2314
2315
2316
2317
2318
2319&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The next section
2320
2321describes a performance measure for machine learning schemes that attempts to
2322
2323overcome the problems with current measures.&lt;span style='mso-spacerun:yes'&gt; 
2324
2325&lt;/span&gt;In addition, the same evaluation measure is used for categorical and
2326
2327numeric classifier.&lt;/span&gt;&lt;/p&gt;
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/12&lt;/span&gt;&lt;/p&gt;
2344
2345
2346
2347&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Proceedings of
2348
2349CBISE ’98 CaiSE*98 Workshop on Component Based Information Systems Engineering&lt;/span&gt;&lt;/p&gt;
2350
2351
2352
2353&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Edited by John &lt;span
2354
2355class=SpellE&gt;Grundy&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
2356
2357
2358
2359&lt;p class=MsoBodyText&gt;&lt;span lang=EN-US&gt;Component-based information systems
2360
2361development is an area of research and practice of increasing importance.&lt;span
2362
2363style='mso-spacerun:yes'&gt;  &lt;/span&gt;Information Systems developers have &lt;span
2364
2365class=SpellE&gt;realised&lt;/span&gt; that traditional approaches to IS engineering
2366
2367produce monolithic, difficult to maintain, difficult to reuse systems.&lt;span
2368
2369style='mso-spacerun:yes'&gt;  &lt;/span&gt;In contrast, the use of software components,
2370
2371which embody data, functionality and well-specified and understood interfaces,
2372
2373makes interoperable, distributed and highly reusable IS components
2374
2375feasible.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Component-based approaches to
2376
2377IS engineering can be used at strategic and &lt;span class=SpellE&gt;organisational&lt;/span&gt;
2378
2379levels, to model business processes and whole IS architectures, in development
2380
2381methods which &lt;span class=SpellE&gt;utilise&lt;/span&gt; component-based models during
2382
2383analysis and design, and in system implementation.&lt;span
2384
2385style='mso-spacerun:yes'&gt;  &lt;/span&gt;Reusable components can allow end users to
2386
2387compose and configure their own Information Systems, possibly from a range of
2388
2389suppliers, and to more tightly couple their &lt;span class=SpellE&gt;organisational&lt;/span&gt;
2390
2391&lt;span class=SpellE&gt;workflows&lt;/span&gt; with their IS support.&lt;/span&gt;&lt;/p&gt;
2392
2393
2394
2395
2396
2397
2398
2399&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This workshop
2400
2401proceedings contains a range of papers addressing one or more of the above
2402
2403issues relating to the use of component models for IS development.&lt;span
2404
2405style='mso-spacerun:yes'&gt;  &lt;/span&gt;All of these papers were refereed by at least
2406
2407two members of an international workshop committee comprising industry and
2408
2409academic researchers and users of component technologies.&lt;span
2410
2411style='mso-spacerun:yes'&gt;  &lt;/span&gt;Strategic uses of components are addressed in
2412
2413the first three papers, while the following three address uses of components for
2414
2415systems design and workflow management.&lt;span style='mso-spacerun:yes'&gt; 
2416
2417&lt;/span&gt;Systems development using components, and the provision of environments
2418
2419for component management are addressed in the following group of five
2420
2421papers.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The last three papers in this
2422
2423proceedings address component management and analysis techniques.&lt;/span&gt;&lt;/p&gt;
2424
2425
2426
2427
2428
2429
2430
2431&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;All of these
2432
2433papers provide new insights into the many&lt;span style='mso-spacerun:yes'&gt; 
2434
2435&lt;/span&gt;varied uses of component technology for IS engineering.&lt;span
2436
2437style='mso-spacerun:yes'&gt;  &lt;/span&gt;I hope you find them as interesting and
2438
2439useful as I have when collating this proceedings and organising the workshop.&lt;/span&gt;&lt;/p&gt;
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/13&lt;/span&gt;&lt;/p&gt;
2452
2453
2454
2455&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;An analysis of
2456
2457usage of a digital library&lt;/span&gt;&lt;/p&gt;
2458
2459
2460
2461&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Steve Jones,
2462
2463Sally Jo Cunningham, &lt;span class=SpellE&gt;Rodger&lt;/span&gt; &lt;span class=SpellE&gt;McNab&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
2464
2465
2466
2467&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;As experimental
2468
2469digital library &lt;span class=SpellE&gt;testbeds&lt;/span&gt; gain wider acceptance and
2470
2471develop significant user bases, it becomes important to investigate the ways in
2472
2473which users interact with the systems in practice.&lt;span
2474
2475style='mso-spacerun:yes'&gt;  &lt;/span&gt;Transaction logs are one source of usage
2476
2477information, and the information on user behaviour can be culled from them both
2478
2479automatically (through calculation of summary statistics) and manually (by
2480
2481examining query strings for semantic clues on search motivations and searching
2482
2483strategy).&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We conduct a transaction log
2484
2485analysis on user activity in the Computer Science Technical Reports Collection
2486
2487of the New Zealand Digital Library, and report insights gained and identify
2488
2489resulting search interface design issues.&lt;/span&gt;&lt;/p&gt;
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/14&lt;/span&gt;&lt;/p&gt;
2502
2503
2504
2505&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Measuring ATM
2506
2507traffic: final report for New Zealand Telecom&lt;/span&gt;&lt;/p&gt;
2508
2509
2510
2511&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;John &lt;span
2512
2513class=SpellE&gt;Cleary&lt;/span&gt;, Ian Graham, Murray Pearson, Tony &lt;span
2514
2515class=SpellE&gt;McGregor&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
2516
2517
2518
2519&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The report
2520
2521describes the development of a low-cost ATM monitoring system, hosted by a
2522
2523standard PC.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The monitor can be used
2524
2525remotely returning information on ATM traffic flows to a central site.&lt;span
2526
2527style='mso-spacerun:yes'&gt;  &lt;/span&gt;The monitor is interfaces to a GPS timing
2528
2529receiver, which provides an absolute time accuracy of better than 1 &lt;span
2530
2531class=SpellE&gt;usec&lt;/span&gt;.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;By monitoring
2532
2533the same traffic flow at different points in a network it is possible to
2534
2535measure cell delay and delay variation in real time, and with existing
2536
2537traffic.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The monitoring system
2538
2539characterises cells by a CRC calculated over the cell payload, thus special
2540
2541measurement cells are not required.&lt;span style='mso-spacerun:yes'&gt; 
2542
2543&lt;/span&gt;Delays in both local area and wide-area networks have been measured
2544
2545using this system.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;It is possible to
2546
2547measure delay in a network that is not end-to-end ATM, as long as some cells
2548
2549remain identical at the entry and exit points.&lt;span style='mso-spacerun:yes'&gt; 
2550
2551&lt;/span&gt;Examples are given of traffic and delay measurements in both wide and
2552
2553local area network systems, including delays measured over the Internet from
2554
2555Canada to New Zealand.&lt;/span&gt;&lt;/p&gt;
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/15&lt;/span&gt;&lt;/p&gt;
2572
2573
2574
2575&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Despite its
2576
2577simplicity, the naïve &lt;span class=SpellE&gt;Bayes&lt;/span&gt; learning scheme performs
2578
2579well on most classification tasks, and is often significantly more accurate
2580
2581than more sophisticated methods.&lt;span style='mso-spacerun:yes'&gt; 
2582
2583&lt;/span&gt;Although the probability estimates that it produces can be inaccurate,
2584
2585it often assigns maximum probability to the correct class.&lt;span
2586
2587style='mso-spacerun:yes'&gt;  &lt;/span&gt;This suggests that its good performance might
2588
2589be restricted to situations where the output is categorical.&lt;span
2590
2591style='mso-spacerun:yes'&gt;  &lt;/span&gt;It is therefore interesting to see how it
2592
2593performs in domains where the predicted value is numeric, because in this case,
2594
2595predictions are more sensitive to inaccurate probability estimates.&amp;lt;P&amp;gt;&lt;/span&gt;&lt;/p&gt;
2596
2597
2598
2599
2600
2601
2602
2603&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper shows
2604
2605how to apply the naïve &lt;span class=SpellE&gt;Bayes&lt;/span&gt; methodology to numeric
2606
2607prediction (i.e. regression) tasks, and compares it to linear regression,
2608
2609instance-based learning, and a method that produces “model trees”-decision
2610
2611trees with linear regression functions at the leaves.&lt;span
2612
2613style='mso-spacerun:yes'&gt;  &lt;/span&gt;Although we exhibit an artificial &lt;span
2614
2615class=SpellE&gt;dataset&lt;/span&gt; for which naïve &lt;span class=SpellE&gt;Bayes&lt;/span&gt; is
2616
2617the method of choice, on real-world &lt;span class=SpellE&gt;datasets&lt;/span&gt; it is
2618
2619almost uniformly worse than model trees.&lt;span style='mso-spacerun:yes'&gt; 
2620
2621&lt;/span&gt;The comparison with linear regression depends on the error measure: for
2622
2623one measure naïve &lt;span class=SpellE&gt;Bayes&lt;/span&gt; performs similarly, for
2624
2625another it is worse.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Compared to
2626
2627instance-based learning, it performs similarly with respect to both
2628
2629measures.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;These results indicate that
2630
2631the simplistic statistical assumption that naïve &lt;span class=SpellE&gt;Bayes&lt;/span&gt;
2632
2633makes is indeed more restrictive for regression than for classification.&lt;/span&gt;&lt;/p&gt;
2634
2635
2636
2637
2638
2639
2640
2641&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/16&lt;/span&gt;&lt;/p&gt;
2642
2643
2644
2645&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Link as you
2646
2647type: using key phrases for automated dynamic link generation&lt;/span&gt;&lt;/p&gt;
2648
2649
2650
2651&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Steve Jones&lt;/span&gt;&lt;/p&gt;
2652
2653
2654
2655&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;When documents
2656
2657are collected together from diverse sources they are unlikely to contain useful
2658
2659hypertext links to support browsing amongst them.&lt;span
2660
2661style='mso-spacerun:yes'&gt;  &lt;/span&gt;For large collections of thousands of
2662
2663documents it is prohibitively resource intensive to manually insert links into
2664
2665each document.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Users of such collections
2666
2667may wish to relate documents within them to text that they are themselves
2668
2669generating.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;This process, often
2670
2671involving keyword searching, distracts from the authoring process and results
2672
2673in material related to query terms but not necessarily to the author’s
2674
2675document.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Query terms that are effective
2676
2677in one collection might not be so in another.&lt;span style='mso-spacerun:yes'&gt; 
2678
2679&lt;/span&gt;We have developed &lt;span class=SpellE&gt;Phrasier&lt;/span&gt;, a system that
2680
2681integrates authoring (of text and hyperlinks), browsing, querying and reading
2682
2683in support of information retrieval activities.&lt;span style='mso-spacerun:yes'&gt; 
2684
2685&lt;/span&gt;&lt;span class=SpellE&gt;Phrasier&lt;/span&gt; exploits key phrases which are
2686
2687automatically extracted from documents in a collection, and uses them as link
2688
2689anchors and to identify candidate destinations for hyperlinks.&lt;span
2690
2691style='mso-spacerun:yes'&gt;  &lt;/span&gt;This system suggests links into existing
2692
2693collections for purposes of authoring and retrieval of related information,
2694
2695creates links between documents in a collection and provides supportive
2696
2697document and link overviews.&lt;/span&gt;&lt;/p&gt;
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/17&lt;/span&gt;&lt;/p&gt;
2710
2711
2712
2713&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Melody based
2714
2715tune retrieval over the World Wide Web&lt;/span&gt;&lt;/p&gt;
2716
2717
2718
2719&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;David
2720
2721Bainbridge, &lt;span class=SpellE&gt;Rodger&lt;/span&gt; J. &lt;span class=SpellE&gt;McNab&lt;/span&gt;,
2722
2723Lloyd A. Smith&lt;/span&gt;&lt;/p&gt;
2724
2725
2726
2727&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;In this paper we
2728
2729describe the steps taken to develop a Web-based version of an existing
2730
2731stand-alone, single-user digital library application for &lt;span class=SpellE&gt;melodical&lt;/span&gt;
2732
2733searching of a collection of music.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;For
2734
2735the three key components: input, searching, and output, we assess the
2736
2737suitability of various Web-based strategies that deal with the now distributed
2738
2739software architecture and explain the decisions we made.&lt;span
2740
2741style='mso-spacerun:yes'&gt;  &lt;/span&gt;The resulting melody indexing service, known
2742
2743as MELDEX, has been in operation for one year, and the feed-back we have
2744
2745received has been &lt;span class=SpellE&gt;favorable&lt;/span&gt;.&lt;/span&gt;&lt;/p&gt;
2746
2747
2748
2749
2750
2751
2752
2753&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;98/18&lt;/span&gt;&lt;/p&gt;
2754
2755
2756
2757&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Making oral
2758
2759history accessible over the World Wide Web&lt;/span&gt;&lt;/p&gt;
2760
2761
2762
2763&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;David
2764
2765Bainbridge, Sally Jo Cunningham&lt;/span&gt;&lt;/p&gt;
2766
2767
2768
2769&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We describe a
2770
2771multimedia, WWW-based oral history collection constructed from off-the-shelf or
2772
2773publicly available software.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The source
2774
2775materials for the collection include audio tapes of interviews and summary
2776
2777transcripts of each interview, as well as photographs illustrating episodes
2778
2779mentioned in the tapes.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Sections of the
2780
2781transcripts are manually matched to associated segments of the tapes, and the
2782
2783tapes are &lt;span class=SpellE&gt;digitized&lt;/span&gt;.&lt;span style='mso-spacerun:yes'&gt; 
2784
2785&lt;/span&gt;Users search a full-text retrieval system based on the text transcripts
2786
2787to retrieve relevant transcript sections and their associated audio recordings
2788
2789and photographs.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;It is also possible to
2790
2791search for photos by matching text queries against text descriptions of the
2792
2793photos in the collection, where the located photos link back to their
2794
2795respective interview transcript and audio recordings.&lt;/span&gt;&lt;/p&gt;
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;b style='mso-bidi-font-weight:
2816
2817normal'&gt;&lt;span lang=EN-GB&gt;1997&lt;o:p&gt;&lt;/o:p&gt;&lt;/span&gt;&lt;/b&gt;&lt;/p&gt;
2818
2819
2820
2821
2822
2823
2824
2825&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/1&lt;/span&gt;&lt;/p&gt;
2826
2827
2828
2829&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A dynamic and
2830
2831flexible representation of social relationships in CSCW&lt;/span&gt;&lt;/p&gt;
2832
2833
2834
2835&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Steve Jones,
2836
2837Steve Marsh&lt;/span&gt;&lt;/p&gt;
2838
2839
2840
2841&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;CSCW system
2842
2843designers lack effective support in addressing the social issues and
2844
2845interpersonal relationships which are linked with the use of CSCW systems.&lt;span
2846
2847style='mso-spacerun:yes'&gt;  &lt;/span&gt;We present a formal description of trust to
2848
2849support CSCW system designers in considering the social aspects of group work,
2850
2851embedding those considerations in systems and analysing computer supported
2852
2853group processes.&lt;/span&gt;&lt;/p&gt;
2854
2855
2856
2857
2858
2859
2860
2861&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We argue that
2862
2863trust is a critical aspect in group work, and describe what we consider to be
2864
2865the building blocks of trust.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We then
2866
2867present a formal notation for the building blocks, their use in reasoning about
2868
2869social interactions and how they are amended over time.&lt;/span&gt;&lt;/p&gt;
2870
2871
2872
2873
2874
2875
2876
2877&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We then consider
2878
2879how the formalism may be used in practice, and present some insights from
2880
2881initial analysis of the behaviour of the formalism.&lt;span
2882
2883style='mso-spacerun:yes'&gt;  &lt;/span&gt;This is followed by a description of possible
2884
2885amendments and extensions to the formalism.&lt;span style='mso-spacerun:yes'&gt; 
2886
2887&lt;/span&gt;We conclude that it is possible to formalise a notion of trust and to
2888
2889model the formalisation by a computational mechanism.&lt;/span&gt;&lt;/p&gt;
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/2&lt;/span&gt;&lt;/p&gt;
2902
2903
2904
2905&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Design issues
2906
2907for World Wide Web navigation visualisation tools&lt;/span&gt;&lt;/p&gt;
2908
2909
2910
2911&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Andy &lt;span
2912
2913class=SpellE&gt;Cockburn&lt;/span&gt;, Steve Jones&lt;/span&gt;&lt;/p&gt;
2914
2915
2916
2917&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The World Wide
2918
2919Web (WWW) is a successful hypermedia information space used by millions of
2920
2921people, yet it suffers from many deficiencies and problems in support for
2922
2923navigation around its vast information space.&lt;span style='mso-spacerun:yes'&gt; 
2924
2925&lt;/span&gt;In this paper we identify the origins of these navigation problems,
2926
2927namely WWW browser design, WWW page design, and WWW page description
2928
2929languages.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Regardless of their origins,
2930
2931these problems are eventually represented to the user at the browser’s user
2932
2933interface.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;To help overcome these
2934
2935problems, many tools are being developed which allow users to visualise WWW
2936
2937subspaces.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We identify five key issues
2938
2939in the design and functionality of these visualisation systems: characteristics
2940
2941of the visual representation, the scope of the subspace representation, the
2942
2943mechanisms for generating the visualisation, the degree of browser
2944
2945independence, and the navigation support facilities.&lt;span
2946
2947style='mso-spacerun:yes'&gt;  &lt;/span&gt;We provide a critical review of the diverse
2948
2949range of WWW visualisation tools with respect to these issues.&lt;/span&gt;&lt;/p&gt;
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/3&lt;/span&gt;&lt;/p&gt;
2962
2963
2964
2965&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Stacked &lt;span
2966
2967class=SpellE&gt;generalization&lt;/span&gt;:&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;when
2968
2969does it work?&lt;/span&gt;&lt;/p&gt;
2970
2971
2972
2973&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Kai &lt;span
2974
2975class=SpellE&gt;Ming&lt;/span&gt; Ting, Ian H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
2976
2977
2978
2979&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Stacked &lt;span
2980
2981class=SpellE&gt;generalization&lt;/span&gt; is a general method of using a high-level
2982
2983model to combine lower-level models to achieve greater predictive
2984
2985accuracy.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;In this paper we address two
2986
2987crucial issues which have been considered to be a 'black art' in classification
2988
2989tasks ever since the introduction of stacked &lt;span class=SpellE&gt;generalization&lt;/span&gt;
2990
2991in 1992 by &lt;span class=SpellE&gt;Wolpert&lt;/span&gt;: the type of &lt;span class=SpellE&gt;generalizer&lt;/span&gt;
2992
2993that is suitable to derive the higher-level model, and the kind of attributes
2994
2995that should be used as its input. &lt;/span&gt;&lt;/p&gt;
2996
2997
2998
2999
3000
3001
3002
3003&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We demonstrate
3004
3005the effectiveness of stacked &lt;span class=SpellE&gt;generalization&lt;/span&gt; for
3006
3007combining three different types of learning algorithms, and also for combining
3008
3009models of the same type derived from a single learning algorithm in a
3010
3011multiple-data-batches scenario.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We also
3012
3013compare the performance of stacked &lt;span class=SpellE&gt;generalization&lt;/span&gt;
3014
3015with published results arcing and bagging.&lt;/span&gt;&lt;/p&gt;
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/4&lt;/span&gt;&lt;/p&gt;
3028
3029
3030
3031&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Browsing in
3032
3033digital libraries:&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;a phrase-based
3034
3035approach&lt;/span&gt;&lt;/p&gt;
3036
3037
3038
3039&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Craig &lt;span
3040
3041class=SpellE&gt;Nevill&lt;/span&gt;-Manning, Ian H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;,
3042
3043Gordon W. &lt;span class=SpellE&gt;Paynter&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3044
3045
3046
3047&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A key question
3048
3049for digital libraries is this: how should one go about becoming familiar with a
3050
3051digital collection, as opposed to a physical one?&lt;span
3052
3053style='mso-spacerun:yes'&gt;  &lt;/span&gt;Digital collections generally present an
3054
3055appearance which is extremely opaque-a screen, typically a Web page, with no
3056
3057indication of what, or how much, lies beyond: whether a carefully-selected
3058
3059collection or a morass of worthless ephemera; whether half a dozen documents or
3060
3061many millions.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;At least physical
3062
3063collections occupy physical space, present a physical appearance, and exhibit
3064
3065tangible physical &lt;span class=SpellE&gt;organization&lt;/span&gt;.&lt;span
3066
3067style='mso-spacerun:yes'&gt;  &lt;/span&gt;When standing on the threshold of a large
3068
3069library one gains a sense of presence and permanence that reflects the care
3070
3071taken in building and maintaining the collection inside.&lt;span
3072
3073style='mso-spacerun:yes'&gt;  &lt;/span&gt;No-one could confuse it with a
3074
3075dung-heap!&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Yet in the digital world the
3076
3077difference is not so palpable.&lt;/span&gt;&lt;/p&gt;
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/5&lt;/span&gt;&lt;/p&gt;
3094
3095
3096
3097&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A graphical
3098
3099notation for the design of information visualisations&lt;/span&gt;&lt;/p&gt;
3100
3101
3102
3103&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Matthew C. &lt;span
3104
3105class=SpellE&gt;Humphrey&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3106
3107
3108
3109&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Visualisations
3110
3111are coherent, graphical expressions of complex information that enhance people’s
3112
3113ability to communicate and reason about that information.&lt;span
3114
3115style='mso-spacerun:yes'&gt;  &lt;/span&gt;Yet despite the importance of visualisations
3116
3117in helping people to understand and solve a wide variety of problems, there is
3118
3119a dearth of formal tools and methods for discussing, describing and designing
3120
3121them.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Although simple visualisations,
3122
3123such as bar charts and &lt;span class=SpellE&gt;scatterplots&lt;/span&gt;, are easily
3124
3125produced by modern interactive software, novel visualisations of multivariate, &lt;span
3126
3127class=SpellE&gt;multirelational&lt;/span&gt; data must be expressed in a programming
3128
3129language.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The Relational Visualisation
3130
3131Notation is a new, graphical language for designing such highly expressive
3132
3133visualisations that does not use programming constructs.&lt;span
3134
3135style='mso-spacerun:yes'&gt;  &lt;/span&gt;Instead, the notation is based on relational
3136
3137algebra, which is widely used in database query languages, and it is supported
3138
3139by a suite of direct manipulation tools.&lt;span style='mso-spacerun:yes'&gt; 
3140
3141&lt;/span&gt;This article presents the notation and examines the designs of some
3142
3143interesting visualisations.&lt;/span&gt;&lt;/p&gt;
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/6&lt;/span&gt;&lt;/p&gt;
3160
3161
3162
3163&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Applications of
3164
3165machine learning in information retrieval&lt;/span&gt;&lt;/p&gt;
3166
3167
3168
3169&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Sally Jo
3170
3171Cunningham, James &lt;span class=SpellE&gt;Littin&lt;/span&gt;, Ian H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3172
3173
3174
3175&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Information
3176
3177retrieval systems provide access to collections of thousands, or millions, of
3178
3179documents, from which, by providing an appropriate description, users can
3180
3181recover any one.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Typically, users &lt;span
3182
3183class=SpellE&gt;iteratively&lt;/span&gt; refine the descriptions they provide to satisfy
3184
3185their needs, and retrieval systems can &lt;span class=SpellE&gt;utilize&lt;/span&gt; user
3186
3187feedback on selected documents to indicate the accuracy of the description at
3188
3189any stage.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The style of description
3190
3191required from the user, and the way it is employed to search the document
3192
3193database, are consequences of the indexing method used for the collection.&lt;span
3194
3195style='mso-spacerun:yes'&gt;  &lt;/span&gt;The index may take different forms, from
3196
3197storing keywords with links to individual documents, to clustering documents
3198
3199under related topics.&lt;/span&gt;&lt;/p&gt;
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/7&lt;/span&gt;&lt;/p&gt;
3216
3217
3218
3219&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Computer
3220
3221concepts without computers:&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;a first
3222
3223course in computer science&lt;/span&gt;&lt;/p&gt;
3224
3225
3226
3227&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Geoffrey Holmes,
3228
3229Tony C. Smith, William J. Rogers&lt;/span&gt;&lt;/p&gt;
3230
3231
3232
3233&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;While some
3234
3235institutions seek to make CS1 curricula more enjoyable by incorporating
3236
3237specialised educational software [1] or by setting more enjoyable programming
3238
3239assignments [2], we have joined the growing number of Computer Science
3240
3241departments that seek to improve the quality of the CS1 experience by focusing
3242
3243student attention away from the computer monitor [3,4].&lt;span
3244
3245style='mso-spacerun:yes'&gt;  &lt;/span&gt;Sophisticated computing concepts usually
3246
3247reserved for senior level courses are presented in a &amp;lt;I&amp;gt;popular
3248
3249science&amp;lt;/I&amp;gt; manner, and given equal time alongside the essential
3250
3251introductory programming material.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;By
3252
3253exposing students to a broad range of specific computational problems we
3254
3255endeavour to make the introductory course more interesting and enjoyable, and
3256
3257instil in students a sense of vision for areas they might specialise in as
3258
3259computing majors.&lt;/span&gt;&lt;/p&gt;
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/8&lt;/span&gt;&lt;/p&gt;
3276
3277
3278
3279&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A sight-singing
3280
3281tutor&lt;/span&gt;&lt;/p&gt;
3282
3283
3284
3285&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Lloyd A. Smith, &lt;span
3286
3287class=SpellE&gt;Rodger&lt;/span&gt; J. &lt;span class=SpellE&gt;McNab&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3288
3289
3290
3291&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
3292
3293describes a computer program designed to aid its users in learning to
3294
3295sight-sing.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Sight-singing-the ability to
3296
3297sing music from a score without prior study-is an important skill for musicians
3298
3299and holds a central place in most university music curricula.&lt;span
3300
3301style='mso-spacerun:yes'&gt;  &lt;/span&gt;Its importance to vocalists is obvious; it is
3302
3303also an important skill for instrumentalists and conductors because it develops
3304
3305the aural imagination necessary to judge how the music should sound, when
3306
3307played (&lt;span class=SpellE&gt;Benward&lt;/span&gt; and Carr 1991).&lt;span
3308
3309style='mso-spacerun:yes'&gt;  &lt;/span&gt;Furthermore, it is an important skill for
3310
3311amateur musicians, who can save a great deal of rehearsal time through an
3312
3313ability to sing music at sight.&lt;/span&gt;&lt;/p&gt;
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/9&lt;/span&gt;&lt;/p&gt;
3326
3327
3328
3329&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Stacking bagged
3330
3331and &lt;span class=SpellE&gt;dagged&lt;/span&gt; models&lt;/span&gt;&lt;/p&gt;
3332
3333
3334
3335&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Kai &lt;span
3336
3337class=SpellE&gt;Ming&lt;/span&gt; Ting, I.H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3338
3339
3340
3341&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;In this paper,
3342
3343we investigate the method of &lt;i style='mso-bidi-font-style:normal'&gt;stacked &lt;span
3344
3345class=SpellE&gt;generalization&lt;/span&gt;&lt;/i&gt; in combining models derived from
3346
3347different subsets of a training &lt;span class=SpellE&gt;dataset&lt;/span&gt; by a single
3348
3349learning algorithm, as well as different algorithms.&lt;span
3350
3351style='mso-spacerun:yes'&gt;  &lt;/span&gt;The simplest way to combine predictions from
3352
3353competing models is majority vote, and the effect of the sampling regime used
3354
3355to generate training subsets has already been studied in this context-when
3356
3357bootstrap samples are used the method is called &lt;i style='mso-bidi-font-style:
3358
3359normal'&gt;bagging&lt;/i&gt;, and for disjoint samples we call it &lt;span class=SpellE&gt;&lt;i
3360
3361style='mso-bidi-font-style:normal'&gt;dagging&lt;/i&gt;&lt;/span&gt;.&lt;span
3362
3363style='mso-spacerun:yes'&gt;  &lt;/span&gt;This paper extends these studies to stacked &lt;span
3364
3365class=SpellE&gt;generalization&lt;/span&gt;, where a learning algorithm is employed to combine
3366
3367the models.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;This yields new methods
3368
3369dubbed &lt;i style='mso-bidi-font-style:normal'&gt;bag-stacking&lt;/i&gt; and &lt;span
3370
3371class=SpellE&gt;&lt;i style='mso-bidi-font-style:normal'&gt;dag&lt;/i&gt;&lt;/span&gt;&lt;i
3372
3373style='mso-bidi-font-style:normal'&gt;-stacking&lt;/i&gt;.&lt;/span&gt;&lt;/p&gt;
3374
3375
3376
3377
3378
3379
3380
3381&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We demonstrate
3382
3383that bag-stacking and &lt;span class=SpellE&gt;dag&lt;/span&gt;-stacking can be effective
3384
3385for classification tasks even when the training samples cover just a small
3386
3387fraction of the full &lt;span class=SpellE&gt;dataset&lt;/span&gt;.&lt;span
3388
3389style='mso-spacerun:yes'&gt;  &lt;/span&gt;In contrast to earlier bagging results, we
3390
3391show that bagging and bag-stacking work for stable as well as unstable learning
3392
3393algorithms, as do &lt;span class=SpellE&gt;dagging&lt;/span&gt; and &lt;span class=SpellE&gt;dag&lt;/span&gt;-stacking.&lt;span
3394
3395style='mso-spacerun:yes'&gt;  &lt;/span&gt;We find that bag-stacking (&lt;span
3396
3397class=SpellE&gt;dag&lt;/span&gt;-stacking) almost always has higher predictive accuracy
3398
3399than bagging (&lt;span class=SpellE&gt;dagging&lt;/span&gt;), and we also show that
3400
3401bag-stacking models derived using two different algorithms is more effective
3402
3403than bagging.&lt;/span&gt;&lt;/p&gt;
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/10&lt;/span&gt;&lt;/p&gt;
3416
3417
3418
3419&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Extracting text
3420
3421from Postscript&lt;/span&gt;&lt;/p&gt;
3422
3423
3424
3425&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Craig &lt;span
3426
3427class=SpellE&gt;Nevill&lt;/span&gt;-Manning, Todd Reed, Ian H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3428
3429
3430
3431&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We show how to
3432
3433extract plain text from PostScript files. A textual scan is inadequate because
3434
3435PostScript interpreters can generate characters on the page that do not appear
3436
3437in the source file. Furthermore, word and line breaks are implicit in the
3438
3439graphical rendition, and must be inferred from the positioning of word
3440
3441fragments. We present a robust technique for extracting text and &lt;span
3442
3443class=SpellE&gt;recognizing&lt;/span&gt; words and paragraphs. The method uses a
3444
3445standard PostScript interpreter but redefines several PostScript operators, and
3446
3447simple heuristics are employed to locate word and line breaks. The scheme has
3448
3449been used to create a full-text index, and plain-text versions, of 40,000
3450
3451technical reports (34 &lt;span class=SpellE&gt;Gbyte&lt;/span&gt; of PostScript). Other
3452
3453text-extraction systems are reviewed: none offer the same combination of
3454
3455robustness and simplicity.&lt;/span&gt;&lt;/p&gt;
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/11&lt;/span&gt;&lt;/p&gt;
3468
3469
3470
3471&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Gathering and
3472
3473indexing rich fragments of the World Wide Web&lt;/span&gt;&lt;/p&gt;
3474
3475
3476
3477&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Geoffrey Holmes,
3478
3479William J Rogers&lt;/span&gt;&lt;/p&gt;
3480
3481
3482
3483&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;While the World
3484
3485Wide Web (WWW) is an attractive option as a resource for teaching and research
3486
3487it does have some undesirable features. The cost of allowing students unlimited
3488
3489access can be high-both in money and time; students may become addicted to
3490
3491'surfing' the web-exploring purely for entertainment-and jeopardise their
3492
3493studies. Students are likely to discover undesirable material because large
3494
3495scale search engines index sites regardless of their merit. Finally, the
3496
3497explosive growth of WWW usage means that servers and networks are often
3498
3499overloaded, to the extent that a student may gain a very negative view of the
3500
3501technology.&lt;/span&gt;&lt;/p&gt;
3502
3503
3504
3505
3506
3507
3508
3509&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We have developed
3510
3511a piece of software which attempts to address these issues by capturing rich
3512
3513fragments of the WWW onto local storage media. It is possible to put a
3514
3515collection onto CD ROM, providing portability and inexpensive storage. This
3516
3517enables the presentation of the WWW to distance learning students, who do not
3518
3519have internet access. The software interfaces to standard, commonly available
3520
3521web browsers, acting as a proxy server to the files stored on the local media,
3522
3523and provides a search engine giving full text searching capability within the
3524
3525collection.&lt;/span&gt;&lt;/p&gt;
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/12&lt;/span&gt;&lt;/p&gt;
3542
3543
3544
3545&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Using model
3546
3547trees for classification&lt;/span&gt;&lt;/p&gt;
3548
3549
3550
3551&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span class=SpellE&gt;&lt;span
3552
3553lang=EN-GB&gt;Eibe&lt;/span&gt;&lt;/span&gt;&lt;span lang=EN-GB&gt; Frank, Yong Wang, Stuart &lt;span
3554
3555class=SpellE&gt;Inglis&lt;/span&gt;, Geoffrey Holmes, Ian H. &lt;span class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3556
3557
3558
3559&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Model trees,
3560
3561which are a type of decision tree with linear regression functions at the
3562
3563leaves, form the basis of a recent successful technique for predicting
3564
3565continuous numeric values.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;They can be
3566
3567applied to classification problems by employing a standard method of
3568
3569transforming a classification problem into a problem of function
3570
3571approximation.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Surprisingly, using this
3572
3573simple transformation the model tree &lt;span class=SpellE&gt;inducer&lt;/span&gt; M5',
3574
3575based on &lt;span class=SpellE&gt;Quinlan's&lt;/span&gt; M5, generates more accurate
3576
3577classifiers than the state-of-the-art decision tree learner C5.0, particularly
3578
3579when most of the attributes are numeric.&lt;/span&gt;&lt;/p&gt;
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/13&lt;/span&gt;&lt;/p&gt;
3592
3593
3594
3595&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Discovering inter-attribute
3596
3597relationships&lt;/span&gt;&lt;/p&gt;
3598
3599
3600
3601&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Geoffrey Holmes&lt;/span&gt;&lt;/p&gt;
3602
3603
3604
3605&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;It is important
3606
3607to discover relationships between attributes being used to predict a class
3608
3609attribute in supervised learning situations for two reasons.&lt;span
3610
3611style='mso-spacerun:yes'&gt;  &lt;/span&gt;First, any such relationship will be
3612
3613potentially interesting to the provider of a &lt;span class=SpellE&gt;dataset&lt;/span&gt;
3614
3615in its own right.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Second, it would
3616
3617simplify a learning algorithm's search space, and the related irrelevant
3618
3619feature and subset selection problem, if the relationships were removed from &lt;span
3620
3621class=SpellE&gt;datasets&lt;/span&gt; ahead of learning.&lt;span style='mso-spacerun:yes'&gt; 
3622
3623&lt;/span&gt;An algorithm to discover such relationships is presented in this
3624
3625paper.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The algorithm is described and a
3626
3627surprising number of inter-attribute relationships are discovered in &lt;span
3628
3629class=SpellE&gt;datasets&lt;/span&gt; from the University of California at Irvine (UCI)
3630
3631repository.&lt;/span&gt;&lt;/p&gt;
3632
3633
3634
3635
3636
3637
3638
3639
3640
3641
3642
3643&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/14&lt;/span&gt;&lt;/p&gt;
3644
3645
3646
3647&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Learning from &lt;span
3648
3649class=SpellE&gt;batched&lt;/span&gt; data:&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;model
3650
3651combination &lt;span class=SpellE&gt;vs&lt;/span&gt; data combination&lt;/span&gt;&lt;/p&gt;
3652
3653
3654
3655&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Kai &lt;span
3656
3657class=SpellE&gt;Ming&lt;/span&gt; Ting, Boon &lt;span class=SpellE&gt;Toh&lt;/span&gt; Low, Ian H. &lt;span
3658
3659class=SpellE&gt;Witten&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3660
3661
3662
3663&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;When presented
3664
3665with multiple batches of data, one can either combine them into a single batch
3666
3667before applying a machine learning procedure or learn from each batch
3668
3669independently and combine the resulting models.&lt;span style='mso-spacerun:yes'&gt; 
3670
3671&lt;/span&gt;The former procedure, data combination, is straightforward; this paper
3672
3673investigates the latter, model combination.&lt;span style='mso-spacerun:yes'&gt; 
3674
3675&lt;/span&gt;Given an appropriate combination method, one might expect model
3676
3677combination to prove superior when the data in each batch was obtained under
3678
3679somewhat different conditions or when different learning algorithms were used
3680
3681on the batches.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Empirical results show
3682
3683that model combination often outperforms data combination even when the batches
3684
3685are drawn randomly from a single source of data and the same learning method is
3686
3687used on each.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Moreover, this is not just
3688
3689an &lt;span class=SpellE&gt;artifact&lt;/span&gt; of one particular method of combining
3690
3691models: it occurs with several different combination methods.&lt;/span&gt;&lt;/p&gt;
3692
3693
3694
3695
3696
3697
3698
3699&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We relate this
3700
3701phenomenon to the learning curve of the classifiers being used.&lt;span
3702
3703style='mso-spacerun:yes'&gt;  &lt;/span&gt;Early in the learning process when the
3704
3705learning curve is steep there is much to gain from data combination, but later
3706
3707when it becomes shallow there is less to gain and model combination achieves a
3708
3709greater reduction in variance and hence a lower error rate.&lt;/span&gt;&lt;/p&gt;
3710
3711
3712
3713
3714
3715
3716
3717&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The practical
3718
3719implication of these results is that one should consider using model
3720
3721combination rather than data combination, especially when multiple batches of
3722
3723data for the same task are readily available.&lt;span style='mso-spacerun:yes'&gt; 
3724
3725&lt;/span&gt;It is often superior even when the batches are drawn randomly from a
3726
3727single sample, and we expect its advantage to increase if genuine statistical
3728
3729differences between the batches exist.&lt;/span&gt;&lt;/p&gt;
3730
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/15&lt;/span&gt;&lt;/p&gt;
3742
3743
3744
3745&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Information
3746
3747seeking retrieval, reading and storing behaviour of library users&lt;/span&gt;&lt;/p&gt;
3748
3749
3750
3751&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Turner K.&lt;/span&gt;&lt;/p&gt;
3752
3753
3754
3755&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;In the interest
3756
3757of digital libraries, it is advisable that designers be aware of the potential
3758
3759behaviour of the users of such a system.&lt;span style='mso-spacerun:yes'&gt; 
3760
3761&lt;/span&gt;There are two distinct parts under investigation, the interaction
3762
3763between traditional libraries involving the seeking and retrieval of relevant
3764
3765material, and the reading and storage behaviours ensuing. Through this
3766
3767analysis, the findings could be incorporated into digital library facilities.
3768
3769There has been copious amounts of research on information seeking leading to
3770
3771the development of behavioural models to describe the process. Often research
3772
3773on the information seeking practices of individuals is based on the task and
3774
3775field of study. The information seeking model, presented by Ellis et al.
3776
3777(1993), characterises the format of this study where it is used to compare
3778
3779various research on the information seeking practices of groups of people (from
3780
3781academics to professionals). It is found that, although researchers do make use
3782
3783of library facilities, they tend to rely heavily on their own collections and
3784
3785primarily use the library as a source for previously identified information,
3786
3787browsing and &lt;span class=SpellE&gt;interloan&lt;/span&gt;. It was found that there are
3788
3789significant differences in user behaviour between the groups analysed. When
3790
3791looking at the reading and storage of material it was hard to draw conclusions,
3792
3793due to the lack of substantial research and information on the topic. However,
3794
3795through the use of reading strategies, a general idea on how readers behave can
3796
3797be developed. Designers of digital libraries can benefit from the guidelines
3798
3799presented here to better understand their audience.&lt;/span&gt;&lt;/p&gt;
3800
3801
3802
3803
3804
3805
3806
3807
3808
3809
3810
3811&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/16&lt;/span&gt;&lt;/p&gt;
3812
3813
3814
3815&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Proceeding of
3816
3817the INTERACT97 Combined Workshop on CSCW in HCI-&lt;span class=SpellE&gt;Worldwide&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3818
3819
3820
3821&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Matthias &lt;span
3822
3823class=SpellE&gt;Rauterberg&lt;/span&gt;, Lars &lt;span class=SpellE&gt;Oestreicher&lt;/span&gt;,
3824
3825John &lt;span class=SpellE&gt;Grundy&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3826
3827
3828
3829&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This is the
3830
3831proceedings for the INTERACT97 combined workshop on “CSCW in HCI-&lt;span
3832
3833class=SpellE&gt;worldwide&lt;/span&gt;”.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The
3834
3835position papers in this proceedings are those selected from topics relating to
3836
3837HCI community development &lt;span class=SpellE&gt;worldwide&lt;/span&gt; and to CSCW
3838
3839issues.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Originally these were to be two
3840
3841separate INTERACT workshops, but were combined to ensure sufficient
3842
3843participation for a combined workshop to run.&lt;/span&gt;&lt;/p&gt;
3844
3845
3846
3847
3848
3849
3850
3851&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The combined
3852
3853workshop has been split into two separate sessions to run in the morning of
3854
3855July 15&lt;sup&gt;th&lt;/sup&gt;, Sydney, Australia.&lt;span style='mso-spacerun:yes'&gt; 
3856
3857&lt;/span&gt;One to discuss the issues relating to the position papers focusing on
3858
3859general CSCW systems, the other to the development of HCI communities in a &lt;span
3860
3861class=SpellE&gt;worldwide&lt;/span&gt; context.&lt;span style='mso-spacerun:yes'&gt; 
3862
3863&lt;/span&gt;The CSCW session uses as a case study a proposed &lt;span class=SpellE&gt;groupware&lt;/span&gt;
3864
3865tool for facilitating the development of an HCI database with a &lt;span
3866
3867class=SpellE&gt;worldwide&lt;/span&gt; geographical distribution.&lt;span
3868
3869style='mso-spacerun:yes'&gt;  &lt;/span&gt;The HCI community session focuses on
3870
3871developing the content for such a database, in order for it to foster the
3872
3873continued development of HCI communities.&lt;span style='mso-spacerun:yes'&gt; 
3874
3875&lt;/span&gt;The afternoon session of the combined workshop involves a joint
3876
3877discussion of the case study &lt;span class=SpellE&gt;groupware&lt;/span&gt; tool, in terms
3878
3879of its content and likely &lt;span class=SpellE&gt;groupware&lt;/span&gt; facilities.&lt;/span&gt;&lt;/p&gt;
3880
3881
3882
3883
3884
3885
3886
3887&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The position
3888
3889papers have been grouped into those focusing on HCI communities and hence
3890
3891content issues for a &lt;span class=SpellE&gt;groupware&lt;/span&gt; database, and those focusing
3892
3893on CSCW and &lt;span class=SpellE&gt;groupware&lt;/span&gt; issues, and hence likely &lt;span
3894
3895class=SpellE&gt;groupware&lt;/span&gt; support in the proposed HCI
3896
3897database/collaboration tools.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We hope
3898
3899that you find the position papers in this proceedings offer a wide range of
3900
3901interesting reports of HCI community development &lt;span class=SpellE&gt;worldwide&lt;/span&gt;,
3902
3903leading CSCW system research, and that a &lt;span class=SpellE&gt;groupware&lt;/span&gt;
3904
3905tool supporting aspects of a &lt;span class=SpellE&gt;worldwide&lt;/span&gt; HCI database
3906
3907can draw upon the varied work reported.&lt;/span&gt;&lt;/p&gt;
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/17&lt;/span&gt;&lt;/p&gt;
3924
3925
3926
3927&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Internationalising
3928
3929a spreadsheet for Pacific Basin languages&lt;/span&gt;&lt;/p&gt;
3930
3931
3932
3933&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Robert &lt;span
3934
3935class=SpellE&gt;Barbour&lt;/span&gt;, Alvin &lt;span class=SpellE&gt;Yeo&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
3936
3937
3938
3939&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;As people trade
3940
3941and engage in commerce, an economically dominant culture tends to migrate
3942
3943language into other recently contacted cultures.&lt;span
3944
3945style='mso-spacerun:yes'&gt;  &lt;/span&gt;Information technology (IT) can accelerate &lt;span
3946
3947class=SpellE&gt;enculturation&lt;/span&gt; and promote the expansion of western hegemony
3948
3949in IT.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Equally, IT can present a
3950
3951culturally appropriate interface to the user that promotes the preservation of
3952
3953culture and language with very little additional effort.&lt;span
3954
3955style='mso-spacerun:yes'&gt;  &lt;/span&gt;In this paper a spreadsheet is
3956
3957internationalised to accept languages from the Latin-1 character set such as
3958
3959English, &lt;span class=SpellE&gt;Maori&lt;/span&gt; and &lt;span class=SpellE&gt;Bahasa&lt;/span&gt; &lt;span
3960
3961class=SpellE&gt;Melayu&lt;/span&gt; (Malaysia’s national language).&lt;span
3962
3963style='mso-spacerun:yes'&gt;  &lt;/span&gt;A technique that allows a non-programmer to
3964
3965add a new language to the spreadsheet is described.&lt;span
3966
3967style='mso-spacerun:yes'&gt;  &lt;/span&gt;The technique could also be used to
3968
3969internationalise other software at the point of design by following the steps
3970
3971we outline.&lt;/span&gt;&lt;/p&gt;
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/18&lt;/span&gt;&lt;/p&gt;
3988
3989
3990
3991&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Localising a
3992
3993spreadsheet:&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;an &lt;span class=SpellE&gt;Iban&lt;/span&gt;
3994
3995example&lt;/span&gt;&lt;/p&gt;
3996
3997
3998
3999&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Alvin &lt;span
4000
4001class=SpellE&gt;Yeo&lt;/span&gt;, Robert &lt;span class=SpellE&gt;Barbour&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4002
4003
4004
4005&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Presently, there
4006
4007is little localisation of software to smaller cultures if it is not
4008
4009economically viable.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We believe software
4010
4011should also be localised to the languages of small cultures in order to sustain
4012
4013and preserve these small cultures.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;As an
4014
4015example, we localised a spreadsheet from English to &lt;span class=SpellE&gt;Iban&lt;/span&gt;.&lt;span
4016
4017style='mso-spacerun:yes'&gt;  &lt;/span&gt;The process in which we carried out the
4018
4019localisation can be used as a framework for the localisation of software to
4020
4021languages of small ethnic minorities.&lt;span style='mso-spacerun:yes'&gt; 
4022
4023&lt;/span&gt;Some problems faced during the localisation process are also discussed.&lt;/span&gt;&lt;/p&gt;
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/19&lt;/span&gt;&lt;/p&gt;
4040
4041
4042
4043&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Strategies of
4044
4045internationalisation and localisation: a postmodernist/s perspective&lt;/span&gt;&lt;/p&gt;
4046
4047
4048
4049&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Alvin &lt;span
4050
4051class=SpellE&gt;Yeo&lt;/span&gt;, Robert &lt;span class=SpellE&gt;Barbour&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4052
4053
4054
4055&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Many software
4056
4057companies today are developing software not only for local consumption but for
4058
4059the rest of the world.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We introduce the
4060
4061concepts of internationalisation and localisation and discuss some techniques
4062
4063using these processes.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;An examination of
4064
4065&lt;span class=SpellE&gt;postmodern&lt;/span&gt; critique with respect to the software
4066
4067industry is also reported.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;In addition,
4068
4069we also feature our proposed internationalisation technique that was inspired
4070
4071by taking into account the researches of &lt;span class=SpellE&gt;postmodern&lt;/span&gt;
4072
4073philosophers and mathematicians.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;As illustrated
4074
4075in our prototype, the technique empowers non-programmers to localise their own
4076
4077software.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Further development of the
4078
4079technique and its implications on user interfaces and the future of software
4080
4081internationalisation and localisation are discussed.&lt;/span&gt;&lt;/p&gt;
4082
4083
4084
4085
4086
4087
4088
4089
4090
4091
4092
4093&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/20&lt;/span&gt;&lt;/p&gt;
4094
4095
4096
4097&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Language use in
4098
4099software&lt;/span&gt;&lt;/p&gt;
4100
4101
4102
4103&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Alvin &lt;span
4104
4105class=SpellE&gt;Yeo&lt;/span&gt;, Robert &lt;span class=SpellE&gt;Barbour&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4106
4107
4108
4109&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Many of the
4110
4111popular software we use today are in English.&lt;span style='mso-spacerun:yes'&gt; 
4112
4113&lt;/span&gt;Very few software applications are available in minority languages.&lt;span
4114
4115style='mso-spacerun:yes'&gt;  &lt;/span&gt;Besides economic goals, we justify why
4116
4117software should be made available to smaller cultures.&lt;span
4118
4119style='mso-spacerun:yes'&gt;  &lt;/span&gt;Furthermore, there is evidence that people
4120
4121learn and progress faster in software in their mother tongue (&lt;span
4122
4123class=SpellE&gt;Griffiths&lt;/span&gt; et at, 1994) (&lt;span class=SpellE&gt;Krock&lt;/span&gt;,
4124
41251996).&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We hypothesise that experienced
4126
4127users of English spreadsheet can easily migrate to a spreadsheet in their
4128
4129native tongue i.e. &lt;span class=SpellE&gt;Bahasa&lt;/span&gt; &lt;span class=SpellE&gt;Melayu&lt;/span&gt;
4130
4131(Malaysia’s national language).&lt;span style='mso-spacerun:yes'&gt; 
4132
4133&lt;/span&gt;Observations made in the study suggest that the native speakers of &lt;span
4134
4135class=SpellE&gt;Bahasa&lt;/span&gt; &lt;span class=SpellE&gt;Melayu&lt;/span&gt; had difficulties
4136
4137with the &lt;span class=SpellE&gt;Bahasa&lt;/span&gt; &lt;span class=SpellE&gt;Melayu&lt;/span&gt;
4138
4139interface.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The subjects’ main difficulty
4140
4141was their unfamiliarity with computing terminology in &lt;span class=SpellE&gt;Bahasa&lt;/span&gt;
4142
4143&lt;span class=SpellE&gt;Melayu&lt;/span&gt;.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We
4144
4145present possible strategies to increase the use of &lt;span class=SpellE&gt;Bahasa&lt;/span&gt;
4146
4147&lt;span class=SpellE&gt;Melayu&lt;/span&gt; in IT.&lt;span style='mso-spacerun:yes'&gt; 
4148
4149&lt;/span&gt;These strategies may also be used to promote the use of other minority
4150
4151languages in IT.&lt;/span&gt;&lt;/p&gt;
4152
4153
4154
4155
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/21&lt;/span&gt;&lt;/p&gt;
4168
4169
4170
4171&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Usability
4172
4173testing:&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;a Malaysian study&lt;/span&gt;&lt;/p&gt;
4174
4175
4176
4177&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Alvin &lt;span
4178
4179class=SpellE&gt;Yeo&lt;/span&gt;, Robert &lt;span class=SpellE&gt;Barbour&lt;/span&gt;, Mark &lt;span
4180
4181class=SpellE&gt;Apperley&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4182
4183
4184
4185&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;An exploratory
4186
4187study of software assessment techniques is conducted in Malaysia.&lt;span
4188
4189style='mso-spacerun:yes'&gt;  &lt;/span&gt;Subjects in the study comprised staff members
4190
4191of a Malaysian university with a high Information Technology (IT) presence.&lt;span
4192
4193style='mso-spacerun:yes'&gt;  &lt;/span&gt;The subjects assessed a spreadsheet tool with
4194
4195a &lt;span class=SpellE&gt;Bahasa&lt;/span&gt; &lt;span class=SpellE&gt;Melayu&lt;/span&gt; (Malaysia’s
4196
4197national language) interface.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Software
4198
4199evaluation techniques used include the think aloud method, interviews and the
4200
4201System Usability Scale.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The responses in
4202
4203the various techniques used are reported and initial results indicate
4204
4205idiosyncratic behaviour of Malaysian subjects.&lt;span style='mso-spacerun:yes'&gt; 
4206
4207&lt;/span&gt;The implications of the findings are also discussed.&lt;/span&gt;&lt;/p&gt;
4208
4209
4210
4211
4212
4213
4214
4215
4216
4217
4218
4219
4220
4221
4222
4223&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/22&lt;/span&gt;&lt;/p&gt;
4224
4225
4226
4227&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Inducing
4228
4229cost-sensitive trees via instance-weighting&lt;/span&gt;&lt;/p&gt;
4230
4231
4232
4233&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Kai &lt;span
4234
4235class=SpellE&gt;Ming&lt;/span&gt; Ting&lt;/span&gt;&lt;/p&gt;
4236
4237
4238
4239&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;We introduce an
4240
4241instance-weighting method to induce cost-sensitive trees in this paper.&lt;span
4242
4243style='mso-spacerun:yes'&gt;  &lt;/span&gt;It is a &lt;span class=SpellE&gt;generalization&lt;/span&gt;
4244
4245of the standard tree induction process where only the initial instance weights
4246
4247determine the type of tree (i.e., minimum error trees or minimum cost trees) to
4248
4249be induced.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We demonstrate that it can
4250
4251be easily adopted to an existing tree learning algorithm.&lt;/span&gt;&lt;/p&gt;
4252
4253
4254
4255
4256
4257
4258
4259&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Previous
4260
4261research gave insufficient evidence to support the fact that the greedy
4262
4263divide-and-conquer algorithm can effectively induce a truly cost-sensitive tree
4264
4265directly from the training data.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We
4266
4267provide this empirical evidence in this paper.&lt;span style='mso-spacerun:yes'&gt; 
4268
4269&lt;/span&gt;The algorithm employing the instance-weighting method is found to be
4270
4271comparable to or better than both C4.5 and C5 in terms of total
4272
4273misclassification costs, tree size and the number of high cost errors.&lt;span
4274
4275style='mso-spacerun:yes'&gt;  &lt;/span&gt;The instance-weighting method is also simpler
4276
4277and more effective in implementation than a method based on altered priors.&lt;/span&gt;&lt;/p&gt;
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/23&lt;/span&gt;&lt;/p&gt;
4290
4291
4292
4293&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Fast convergence
4294
4295with a greedy tag-phrase dictionary&lt;/span&gt;&lt;/p&gt;
4296
4297
4298
4299&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Ross &lt;span
4300
4301class=SpellE&gt;Peeters&lt;/span&gt;, Tony C. Smith&lt;/span&gt;&lt;/p&gt;
4302
4303
4304
4305&lt;p class=MsoBodyText&gt;&lt;span lang=EN-US&gt;The best general-purpose compression
4306
4307schemes make their gains by estimating a probability distribution over all
4308
4309possible next symbols given the context established by some number of previous
4310
4311symbols.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Such context models typically
4312
4313obtain good compression results for plain text by taking advantage of
4314
4315regularities in character sequences.&lt;span style='mso-spacerun:yes'&gt; 
4316
4317&lt;/span&gt;Frequent words and syllables can be incorporated into the model quickly
4318
4319and thereafter used for reasonably accurate prediction.&lt;span
4320
4321style='mso-spacerun:yes'&gt;  &lt;/span&gt;However, the precise context in which
4322
4323frequent patterns emerge is often extremely varied, and each new word or phrase
4324
4325immediately introduces new contexts which can adversely affect the compression
4326
4327rate&lt;/span&gt;&lt;/p&gt;
4328
4329
4330
4331
4332
4333
4334
4335&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;A great deal of
4336
4337the structural regularity in a natural language is given rather more by
4338
4339properties of its grammar than by the orthographic transcription of its
4340
4341phonology.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;This implies that access to a
4342
4343grammatical abstraction might lead to good compression.&lt;span
4344
4345style='mso-spacerun:yes'&gt;  &lt;/span&gt;While grammatical models have been used
4346
4347successfully for compressing computer programs [4], grammar-based compression
4348
4349of plain text has received little attention, primarily because of the
4350
4351difficulties associated with constructing a suitable natural language
4352
4353grammar.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;But even without a precise
4354
4355formulation of the syntax of a language, there is a linguistic abstraction
4356
4357which is easily accessed and which demonstrates a high degree of regularity
4358
4359which can be exploited for compression purposes-namely, lexical categories.&lt;/span&gt;&lt;/p&gt;
4360
4361
4362
4363
4364
4365
4366
4367
4368
4369
4370
4371&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/24&lt;/span&gt;&lt;/p&gt;
4372
4373
4374
4375&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Tag based models
4376
4377of English text&lt;/span&gt;&lt;/p&gt;
4378
4379
4380
4381&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;W. J. &lt;span
4382
4383class=SpellE&gt;Teahan&lt;/span&gt;, John G. &lt;span class=SpellE&gt;Cleary&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4384
4385
4386
4387&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The problem of
4388
4389compressing English text is important both because of the ubiquity of English
4390
4391as a target for compression and because of the light that compression can shed
4392
4393on the structure of English.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;English
4394
4395text is examined in conjunction with additional information about the parts of
4396
4397speech of each word in the text (these are referred to as “tags”).&lt;span
4398
4399style='mso-spacerun:yes'&gt;  &lt;/span&gt;It is shown that the tags plus the text can
4400
4401be compressed more than the text alone.&lt;span style='mso-spacerun:yes'&gt; 
4402
4403&lt;/span&gt;Essentially the tags can be compressed for nothing or even a small net
4404
4405saving in size.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;A comparison is made of
4406
4407a number of different ways of integrating compression of tags and text using an
4408
4409escape mechanism similar to PPM.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;These
4410
4411are also compared with standard word based and character based compression
4412
4413programs.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The result is that the tag
4414
4415character and word based schemes always outperform the character based
4416
4417schemes.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Overall, the tag based schemes
4418
4419outperform the word based schemes.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We
4420
4421conclude by conjecturing that tags chosen for compression rather than
4422
4423linguistic purposes would perform even better.&lt;/span&gt;&lt;/p&gt;
4424
4425
4426
4427
4428
4429
4430
4431
4432
4433
4434
4435&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/25&lt;/span&gt;&lt;/p&gt;
4436
4437
4438
4439&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Musical image
4440
4441compression&lt;/span&gt;&lt;/p&gt;
4442
4443
4444
4445&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;David
4446
4447Bainbridge, Stuart &lt;span class=SpellE&gt;Inglis&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4448
4449
4450
4451&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Optical music
4452
4453recognition aims to convert the vast repositories of sheet music in the world
4454
4455into an on-line digital format [Bai97].&lt;span style='mso-spacerun:yes'&gt; 
4456
4457&lt;/span&gt;In the near future it will be possible to assimilate music into digital
4458
4459libraries and users will be able to perform searches based on a sung melody in
4460
4461addition to typical text-based searching [MSW+96].&lt;span
4462
4463style='mso-spacerun:yes'&gt;  &lt;/span&gt;An important requirement for such a system is
4464
4465the ability to reproduce the original score as accurately as possible.&lt;span
4466
4467style='mso-spacerun:yes'&gt;  &lt;/span&gt;Due to the huge amount of sheet music
4468
4469available, the efficient storage of musical images is an important topic of
4470
4471study.&lt;/span&gt;&lt;/p&gt;
4472
4473
4474
4475
4476
4477
4478
4479&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
4480
4481investigates whether the “knowledge” extracted from the optical music
4482
4483recognition (OMR) process can be exploited to gain higher compression than the
4484
4485JBIG international standard for &lt;span class=SpellE&gt;bi&lt;/span&gt;-level image
4486
4487compression.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We present a hybrid
4488
4489approach where the primitive shapes of music extracted by the optical music
4490
4491recognition process-note heads, note stems, staff lines and so forth-are fed
4492
4493into a graphical symbol based compression scheme originally designed for images
4494
4495containing mainly printed text.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Using
4496
4497this hybrid approach the average compression rate for a single page is improved
4498
4499by 3.5% over JBIG.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;When multiple pages with
4500
4501similar typography are processed in sequence, the file size is decreased by
4502
45034-8%.&lt;/span&gt;&lt;/p&gt;
4504
4505
4506
4507
4508
4509
4510
4511&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Section 2
4512
4513presents the relevant background to both optical music recognition and textual
4514
4515image compression.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Section 3 describes
4516
4517the experiments performed on 66 test images, outlining the combinations of
4518
4519parameters that were examined to give the best results.&lt;span
4520
4521style='mso-spacerun:yes'&gt;  &lt;/span&gt;The initial results and refinements are
4522
4523presented in Section 4, and we conclude in the last section by &lt;span
4524
4525class=SpellE&gt;summarizing&lt;/span&gt; the findings of this work.&lt;/span&gt;&lt;/p&gt;
4526
4527
4528
4529
4530
4531
4532
4533
4534
4535
4536
4537
4538
4539
4540
4541&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/26&lt;/span&gt;&lt;/p&gt;
4542
4543
4544
4545&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Correcting English
4546
4547text using PPM models&lt;/span&gt;&lt;/p&gt;
4548
4549
4550
4551&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;W. J. &lt;span
4552
4553class=SpellE&gt;Teahan&lt;/span&gt;, S. &lt;span class=SpellE&gt;Inglis&lt;/span&gt;, J. G. &lt;span
4554
4555class=SpellE&gt;Cleary&lt;/span&gt;, G. Holmes&lt;/span&gt;&lt;/p&gt;
4556
4557
4558
4559&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;An essential
4560
4561component of many applications in natural language processing is a language &lt;span
4562
4563class=SpellE&gt;modeler&lt;/span&gt; able to correct errors in the text being
4564
4565processed.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;For optical character recognition
4566
4567(OCR), poor scanning quality or extraneous pixels in the image may cause one or
4568
4569more characters to be mis-&lt;span class=SpellE&gt;recognized&lt;/span&gt;; while for
4570
4571spelling correction, two characters may be transposed, or a character may be
4572
4573inadvertently inserted or missed out. &lt;/span&gt;&lt;/p&gt;
4574
4575
4576
4577
4578
4579
4580
4581&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;This paper
4582
4583describes a method for correcting English text using a PPM model.&lt;span
4584
4585style='mso-spacerun:yes'&gt;  &lt;/span&gt;A method that segments words in English text
4586
4587is introduced and is shown to be a significant improvement over previously used
4588
4589methods.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;A similar technique is also
4590
4591applied as a post-processing stage after pages have been &lt;span class=SpellE&gt;recognized&lt;/span&gt;
4592
4593by a state-of-the-art commercial OCR system.&lt;span style='mso-spacerun:yes'&gt; 
4594
4595&lt;/span&gt;We show that the accuracy of the OCR system can be increased from 95.9%
4596
4597to 96.6%, a decrease of about 10 errors per page.&lt;/span&gt;&lt;/p&gt;
4598
4599
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/27&lt;/span&gt;&lt;/p&gt;
4618
4619
4620
4621&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Constraints on
4622
4623parallelism beyond 10 instructions per cycle&lt;/span&gt;&lt;/p&gt;
4624
4625
4626
4627&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;John G. &lt;span
4628
4629class=SpellE&gt;Cleary&lt;/span&gt;, Richard H. &lt;span class=SpellE&gt;Littin&lt;/span&gt;, J. A.
4630
4631David &lt;span class=SpellE&gt;McWha&lt;/span&gt;, Murray W. Pearson&lt;/span&gt;&lt;/p&gt;
4632
4633
4634
4635&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The problem of
4636
4637extracting Instruction Level Parallelism at levels of 10 instructions per clock
4638
4639and higher is considered.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Two different
4640
4641architectures which use speculation on memory accesses to achieve this level of
4642
4643performance are reviewed.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;It is pointed
4644
4645out that while this form of speculation gives high potential parallelism it is
4646
4647necessary to retain execution state so that incorrect speculation can be detected
4648
4649and subsequently squashed.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Simulation
4650
4651results show that the space to store such state is a critical resource in
4652
4653obtaining good speedup.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;To make good use
4654
4655of the space it is essential that state be stored efficiently and that it be
4656
4657retired as soon as possible.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;A number of
4658
4659techniques for extracting the best usage from the available state storage are
4660
4661introduced.&lt;/span&gt;&lt;/p&gt;
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673
4674
4675
4676
4677&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/28&lt;/span&gt;&lt;/p&gt;
4678
4679
4680
4681&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Effects of
4682
4683re-ordered memory operations on parallelism&lt;/span&gt;&lt;/p&gt;
4684
4685
4686
4687&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;Richard H. &lt;span
4688
4689class=SpellE&gt;Littin&lt;/span&gt;, John G. &lt;span class=SpellE&gt;Cleary&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4690
4691
4692
4693&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;The performance
4694
4695effect of permitting different memory operations to be re-ordered is
4696
4697examined.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The available parallelism is
4698
4699computed using a machine code simulator.&lt;span style='mso-spacerun:yes'&gt; 
4700
4701&lt;/span&gt;A range of possible restrictions on the re-ordering of memory operations
4702
4703is considered: from the purely sequential case where no re-ordering is
4704
4705permitted; to the completely permissive one where memory operations may occur
4706
4707in any order so that the parallelism is restricted only by data
4708
4709dependencies.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;A general conclusion is
4710
4711drawn that to reliably obtain parallelism beyond 10 instructions per clock will
4712
4713require an ability to re-order all memory instructions.&lt;span
4714
4715style='mso-spacerun:yes'&gt;  &lt;/span&gt;A brief description of a feasible
4716
4717architecture capable of this is given.&lt;/span&gt;&lt;/p&gt;
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727
4728
4729
4730
4731
4732
4733&lt;p class=MsoNormal style='margin-right:-.4pt'&gt;&lt;span lang=EN-GB&gt;97/29&lt;/span&gt;&lt;/p&gt;
4734
4735
4736
4737&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;OZCHI’96 Industry Session:&lt;span
4738
4739style='mso-spacerun:yes'&gt;  &lt;/span&gt;Sixth Australian Conference on Human-Computer
4740
4741Interaction&lt;/span&gt;&lt;/p&gt;
4742
4743
4744
4745&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;Edited by Chris Phillips, Janis &lt;span
4746
4747class=SpellE&gt;McKauge&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4748
4749
4750
4751&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;The idea for a specific industry session at
4752
4753OZCHI was first mooted at the 1995 conference in &lt;span class=SpellE&gt;Wollongong&lt;/span&gt;,
4754
4755during questions following a session of short papers which happened
4756
4757(serendipitously) to be presented by people from industry.&lt;span
4758
4759style='mso-spacerun:yes'&gt;  &lt;/span&gt;An animated discussion took place, most of
4760
4761which was about how OZCHI could be made more relevant to people in industry, be
4762
4763it working as usability consultants, or working within organisations either as
4764
4765usability professionals or as ‘champions of the cause’.&lt;span
4766
4767style='mso-spacerun:yes'&gt;  &lt;/span&gt;The discussion raised more questions than
4768
4769answers, about the format of such as session, about the challenges of
4770
4771attracting industry participation, and about the best way of publishing the
4772
4773results.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;Although no real solutions were
4774
4775arrived at, it was enough to place an industry session on the agenda for
4776
4777OZCHI’96.&lt;/span&gt;&lt;/p&gt;
4778
4779
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;97/30&lt;/span&gt;&lt;/p&gt;
4790
4791
4792
4793&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;Adaptive models of English text&lt;/span&gt;&lt;/p&gt;
4794
4795
4796
4797&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;W. J. &lt;span class=SpellE&gt;Teahan&lt;/span&gt;,
4798
4799John G. &lt;span class=SpellE&gt;Cleary&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4800
4801
4802
4803&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;High quality models of English text with
4804
4805performance approaching that of humans is important for many applications
4806
4807including spelling correction, speech recognition, OCR, and encryption.&lt;span
4808
4809style='mso-spacerun:yes'&gt;  &lt;/span&gt;A number of different statistical models of
4810
4811English are compared with each other and with previous estimates from human
4812
4813subjects.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;It is concluded that the best
4814
4815current models are word based with part of speech tags.&lt;span
4816
4817style='mso-spacerun:yes'&gt;  &lt;/span&gt;Given sufficient training text, they are able
4818
4819to attain performance comparable to humans.&lt;/span&gt;&lt;/p&gt;
4820
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832
4833
4834
4835&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;97/31&lt;/span&gt;&lt;/p&gt;
4836
4837
4838
4839&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;A graphical user interface for Boolean
4840
4841query specification&lt;/span&gt;&lt;/p&gt;
4842
4843
4844
4845&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;Steve Jones, &lt;span class=SpellE&gt;Shona&lt;/span&gt;
4846
4847&lt;span class=SpellE&gt;McInnes&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
4848
4849
4850
4851&lt;p class=MsoNormal&gt;&lt;span lang=EN-GB&gt;On-line information repositories commonly
4852
4853provide keyword search facilities via textual query languages based on Boolean
4854
4855logic.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;However, there is evidence to
4856
4857suggest that the syntactical demands of such languages can lead to user errors
4858
4859and adversely affect the time that it takes users to form queries.&lt;span
4860
4861style='mso-spacerun:yes'&gt;  &lt;/span&gt;Users also face difficulties because of the
4862
4863conflict in semantics between AND &lt;span class=SpellE&gt;and&lt;/span&gt; OR when used in
4864
4865Boolean logic and English language.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We
4866
4867suggest that graphical query languages, in particular Venn-like diagrams, can
4868
4869alleviate the problems that users experience when forming Boolean expressions
4870
4871with textual languages.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We describe &lt;span
4872
4873class=SpellE&gt;Vquery&lt;/span&gt;, a Venn-diagram based user interface to the New
4874
4875Zealand Digital Library (NZDL).&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;The
4876
4877design of &lt;span class=SpellE&gt;Vquery&lt;/span&gt; has been partly motivated by
4878
4879analysis of NZDL usage.&lt;span style='mso-spacerun:yes'&gt;  &lt;/span&gt;We found that
4880
4881few queries contain more than three terms, use of the intersection operator
4882
4883dominates and that query refinement is common.&lt;span style='mso-spacerun:yes'&gt; 
4884
4885&lt;/span&gt;A study of the utility of Venn diagrams for query specification
4886
4887indicates that with little or no training users can interpret and form
4888
4889Venn-like diagrams which accurately correspond to Boolean expressions.&lt;span
4890
4891style='mso-spacerun:yes'&gt;  &lt;/span&gt;The utility of &lt;span class=SpellE&gt;Vquery&lt;/span&gt;
4892
4893is considered and directions for future work are proposed.&lt;/span&gt;&lt;/p&gt;
4894
4895
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905&lt;/div&gt;
4906
4907
4908
4909
4910
4911
4912
4913
4914
4915</Content>
4916</Section>
4917</Archive>
Note: See TracBrowser for help on using the repository browser.