%!PS-Adobe-2.0 %%Creator: dvipsk 5.58f Copyright 1986, 1994 Radical Eye Software %%Title: interface99.dvi %%Pages: 6 %%PageOrder: Ascend %%BoundingBox: 0 0 596 842 %%DocumentPaperSizes: a4 %%EndComments %DVIPSCommandLine: dvips -o interface99.ps interface99.dvi %DVIPSParameters: dpi=300, compressed, comments removed %DVIPSSource: TeX output 1999.06.21:1208 %%BeginProcSet: texc.pro /TeXDict 250 dict def TeXDict begin /N{def}def /B{bind def}N /S{exch}N /X{S N}B /TR{translate}N /isls false N /vsize 11 72 mul N /hsize 8.5 72 mul N /landplus90{false}def /@rigin{isls{[0 landplus90{1 -1}{-1 1} ifelse 0 0 0]concat}if 72 Resolution div 72 VResolution div neg scale isls{landplus90{VResolution 72 div vsize mul 0 exch}{Resolution -72 div hsize mul 0}ifelse TR}if Resolution VResolution vsize -72 div 1 add mul TR[matrix currentmatrix{dup dup round sub abs 0.00001 lt{round}if} forall round exch round exch]setmatrix}N /@landscape{/isls true N}B /@manualfeed{statusdict /manualfeed true put}B /@copies{/#copies X}B /FMat[1 0 0 -1 0 0]N /FBB[0 0 0 0]N /nn 0 N /IE 0 N /ctr 0 N /df-tail{ /nn 8 dict N nn begin /FontType 3 N /FontMatrix fntrx N /FontBBox FBB N string /base X array /BitMaps X /BuildChar{CharBuilder}N /Encoding IE N end dup{/foo setfont}2 array copy cvx N load 0 nn put /ctr 0 N[}B /df{ /sf 1 N /fntrx FMat N df-tail}B /dfs{div /sf X /fntrx[sf 0 0 sf neg 0 0] N df-tail}B /E{pop nn dup definefont setfont}B /ch-width{ch-data dup length 5 sub get}B /ch-height{ch-data dup length 4 sub get}B /ch-xoff{ 128 ch-data dup length 3 sub get sub}B /ch-yoff{ch-data dup length 2 sub get 127 sub}B /ch-dx{ch-data dup length 1 sub get}B /ch-image{ch-data dup type /stringtype ne{ctr get /ctr ctr 1 add N}if}B /id 0 N /rw 0 N /rc 0 N /gp 0 N /cp 0 N /G 0 N /sf 0 N /CharBuilder{save 3 1 roll S dup /base get 2 index get S /BitMaps get S get /ch-data X pop /ctr 0 N ch-dx 0 ch-xoff ch-yoff ch-height sub ch-xoff ch-width add ch-yoff setcachedevice ch-width ch-height true[1 0 0 -1 -.1 ch-xoff sub ch-yoff .1 sub]/id ch-image N /rw ch-width 7 add 8 idiv string N /rc 0 N /gp 0 N /cp 0 N{rc 0 ne{rc 1 sub /rc X rw}{G}ifelse}imagemask restore}B /G{{id gp get /gp gp 1 add N dup 18 mod S 18 idiv pl S get exec}loop}B /adv{cp add /cp X}B /chg{rw cp id gp 4 index getinterval putinterval dup gp add /gp X adv}B /nd{/cp 0 N rw exit}B /lsh{rw cp 2 copy get dup 0 eq{pop 1}{ dup 255 eq{pop 254}{dup dup add 255 and S 1 and or}ifelse}ifelse put 1 adv}B /rsh{rw cp 2 copy get dup 0 eq{pop 128}{dup 255 eq{pop 127}{dup 2 idiv S 128 and or}ifelse}ifelse put 1 adv}B /clr{rw cp 2 index string putinterval adv}B /set{rw cp fillstr 0 4 index getinterval putinterval adv}B /fillstr 18 string 0 1 17{2 copy 255 put pop}for N /pl[{adv 1 chg} {adv 1 chg nd}{1 add chg}{1 add chg nd}{adv lsh}{adv lsh nd}{adv rsh}{ adv rsh nd}{1 add adv}{/rc X nd}{1 add set}{1 add clr}{adv 2 chg}{adv 2 chg nd}{pop nd}]dup{bind pop}forall N /D{/cc X dup type /stringtype ne{] }if nn /base get cc ctr put nn /BitMaps get S ctr S sf 1 ne{dup dup length 1 sub dup 2 index S get sf div put}if put /ctr ctr 1 add N}B /I{ cc 1 add D}B /bop{userdict /bop-hook known{bop-hook}if /SI save N @rigin 0 0 moveto /V matrix currentmatrix dup 1 get dup mul exch 0 get dup mul add .99 lt{/QV}{/RV}ifelse load def pop pop}N /eop{SI restore userdict /eop-hook known{eop-hook}if showpage}N /@start{userdict /start-hook known{start-hook}if pop /VResolution X /Resolution X 1000 div /DVImag X /IE 256 array N 0 1 255{IE S 1 string dup 0 3 index put cvn put}for 65781.76 div /vsize X 65781.76 div /hsize X}N /p{show}N /RMat[1 0 0 -1 0 0]N /BDot 260 string N /rulex 0 N /ruley 0 N /v{/ruley X /rulex X V}B /V {}B /RV statusdict begin /product where{pop product dup length 7 ge{0 7 getinterval dup(Display)eq exch 0 4 getinterval(NeXT)eq or}{pop false} ifelse}{false}ifelse end{{gsave TR -.1 .1 TR 1 1 scale rulex ruley false RMat{BDot}imagemask grestore}}{{gsave TR -.1 .1 TR rulex ruley scale 1 1 false RMat{BDot}imagemask grestore}}ifelse B /QV{gsave newpath transform round exch round exch itransform moveto rulex 0 rlineto 0 ruley neg rlineto rulex neg 0 rlineto fill grestore}B /a{moveto}B /delta 0 N /tail {dup /delta X 0 rmoveto}B /M{S p delta add tail}B /b{S p tail}B /c{-4 M} B /d{-3 M}B /e{-2 M}B /f{-1 M}B /g{0 M}B /h{1 M}B /i{2 M}B /j{3 M}B /k{ 4 M}B /w{0 rmoveto}B /l{p -4 w}B /m{p -3 w}B /n{p -2 w}B /o{p -1 w}B /q{ p 1 w}B /r{p 2 w}B /s{p 3 w}B /t{p 4 w}B /x{0 S rmoveto}B /y{3 2 roll p a}B /bos{/SS save N}B /eos{SS restore}B end %%EndProcSet TeXDict begin 39158280 55380996 1000 300 300 (interface99.dvi) @start /Fa 14 120 df<120EA2121E12381230126012C012800708769C17>19 D<13201370A313B8A3EA011CA2EA031EEA020EA2487EEA07FFEA040738080380A2001813 C01301123838FC07F815157F9419>97 D 99 D101 D<38FF8FF8381C01C0A9EA1FFFEA1C01A938FF 8FF815157F9419>104 DI108 D<00FEEB0FE0001E140000171317A3381380 27A23811C047A33810E087A2EB7107A3133AA2131CA2123839FE083FE01B157F941F>I< 38FC03F8381E00E014401217EA138013C01211EA10E01370A21338131CA2130E130714C0 130313011300123800FE134015157F9419>II114 DI<387FFFF03860703000401310A200801308A300001300ADEA07FF15 157F9419>I<39FF07F87E393C01E03C0038EBC018391C02E010A3390E047020A3390708 3840A33903901C80A33901E00F00A33800C006A31F157F9423>119 D E /Fb 20 119 df<127812FCA4127806067D850D>46 D68 DII<3807F820381FFEE0EA3C07EA7801EA700012F01460A26C130012FE EAFFE0EA7FFE6C7E1480000F13C06C13E0EA007FEB03F01301130012C0A214E07E38F001 C0EAFC0338EFFF00EA83FC141C7D9B1B>83 D97 D99 D101 D<3803F0F0380E1F38EA 3C0F3838073000781380A400381300EA3C0FEA1E1CEA33F00030C7FCA3EA3FFF14C06C13 E014F0387801F838F00078A300701370007813F0381E03C03807FF00151B7F9118>103 D<121E123FA4121EC7FCA6127FA2121FAEEAFFC0A20A1E7F9D0E>105 D108 D<39FF0FC07E903831E18F3A1F40F207 80D980FC13C0A2EB00F8AB3AFFE7FF3FF8A225127F9128>I<38FF0FC0EB31E0381F40F0 EB80F8A21300AB38FFE7FFA218127F911B>II< 38FF3F80EBE1E0381F80F0EB0078147C143C143EA6143C147C1478EB80F0EBC1E0EB3F00 90C7FCA6EAFFE0A2171A7F911B>I114 DI<1203A45AA25AA2EA3F FC12FFEA1F00A9130CA4EA0F08EA0798EA03F00E1A7F9913>I<38FF07F8A2EA1F00AC13 01120F380786FFEA01F818127F911B>I<38FFC1FCA2381F00601380000F13C0A23807C1 80A23803E300A213F7EA01F613FE6C5AA21378A2133016127F9119>I E /Fc 1 49 df<1218A31230A31260A312C0A2050B7E8B09>48 D E /Fd 2 49 df<14C01303EB0700131C1378EA01E0EA0780000EC7FC123812F0A2123812 0E6C7EEA01E0EA0078131C1307EB03C013001400A5387FFF80B512C0121B7D931A>20 D<1204120EA2121CA31238A212301270A21260A212C0A2070F7F8F0A>48 D E /Fe 1 111 df 110 D E /Ff 11 107 df0 D20 D<12C012F0123C120FEA03C0EA00F0133C130FEB03C0EB00F0143C140FEC0380EC0F0014 3C14F0EB03C0010FC7FC133C13F0EA03C0000FC8FC123C127012C0C9FCA7007FB5FCB612 8019227D9920>I24 D<153081A381A281811680ED00C0B712F8A2C912 C0ED0380160015065DA25DA35D25167E942A>33 D49 DI<011F131C90383F807C01EF133CEA0187 390007C0381530010313601580ECC10014C614C814F014C0A21307130B13331343EA0183 EA02031204121800307F127039F001E180ECF70000F813FC38E000F81E1C7E9B1E>88 D<133C13E0EA01C013801203AD13005A121C12F0121C12077E1380AD120113C0EA00E013 3C0E297D9E15>102 D<12F0121C12077E1380AD120113C0EA00E0133C13E0EA01C01380 1203AD13005A121C12F00E297D9E15>I<12C0B3B3A502297B9E0C>106 D E /Fg 4 62 df2 D<120C121C12EC120CAFEAFFC00A137D9211>49 D<121FEA60C01360EAF07013301260EA0070A2136013C012011380EA02005AEA08101210 EA2020EA7FE012FF0C137E9211>I<387FFFE0B512F0C8FCA6B512F06C13E0140A7E8B19> 61 D E /Fh 5 91 df<1430146014C0EB0180EB03005B130E130C5B1338133013705B5B 12015B1203A290C7FC5A1206120EA2120C121CA312181238A45AA75AB3A31270A77EA412 18121CA3120C120EA2120612077E7FA212017F12007F13701330133813187F130E7F7FEB 0180EB00C014601430146377811F>18 D<12C012607E7E7E120E7E7E6C7E7F12007F1370 133013381318131CA2130C130E13061307A27F1480A3130114C0A4EB00E0A71470B3A314 E0A7EB01C0A414801303A314005BA21306130E130C131CA213181338133013705B5B1201 5B48C7FC5A120E120C5A5A5A5A14637F811F>I80 D88 D90 D E /Fi 5 111 df71 D<1206120712061200A41238124CA2128C12981218A212301232A21264A21238 08147F930C>105 D<1330133813301300A4EA01C0EA0260EA0430136012081200A213C0 A4EA0180A4EA630012E312C612780D1A81930E>I<121E12065AA45A1338135C139CEA31 18EA36001238EA3F80EA61C0EA60C8A3EAC0D013600E147F9312>I110 D E /Fj 18 121 df<137813CCEA0186EA03061206120E120CEA1C071218EA3806A2EA30 0E1270A2EA7FFEEAE01CA31318133812C013701360A213C0EAC1801261EA6200123C101D 7E9C13>18 D<3801FFF85A120F381E1E00EA180EEA38061270A2EAE00EA3130C131C1318 5BEA60606C5A001FC7FC15127E9118>27 D<126012F0A2126004047C830C>58 D<126012F0A212701210A41220A212401280040C7C830C>I<130113031306A3130CA313 18A31330A31360A213C0A3EA0180A3EA0300A31206A25AA35AA35AA35AA35AA210297E9E 15>61 D<140CA2141CA2143C145CA2149E148EEB010E1302A21304A213081310A2497EEB 3FFFEB40071380A2EA0100A212025AA2001C148039FF803FF01C1D7F9C1F>65 D<48B512F038003C00013813301520A35BA214081500495AA21430EBFFF03801C020A448 485A91C7FCA348C8FCA45AEAFFF01C1C7E9B1B>70 D<903801F80890380E061890383801 3890386000F048481370485A48C71230481420120E5A123C15005AA35AA2EC7FF0EC0780 1500A31270140E123012386C131E6C136438070184D800FEC7FC1D1E7E9C21>I 78 D<48B5FC39003C03C090383800E015F01570A24913F0A315E0EBE001EC03C0EC0700 141E3801FFF001C0C7FCA3485AA448C8FCA45AEAFFE01C1C7E9B1B>80 D100 D102 D105 D<1307130FA213061300A6 1378139CEA010C1202131C12041200A21338A41370A413E0A4EA01C01261EAF180EAF300 12E6127C1024809B11>II110 D<001CEBC080392701C1C0124714 C03987038040A2120EA2391C070080A3EC0100EA1806A2381C0E02EB0F04380E13083803 E1F01A127E911E>119 D<380787803808C8403810F0C03820F1E0EBE3C03840E1803800 E000A2485AA43863808012F3EB810012E5EA84C6EA787813127E9118>I E /Fk 47 122 df<14FE90380301801306EB0C03EB1C0191C7FC13181338A43803FFFE38 00700EA35CA213E0A25CA3EA01C01472A438038034141891C7FC90C8FCA25A12C612E65A 12781925819C17>12 D<13031306130813181330136013C0A2EA0180EA0300A21206A25A A2121C1218A212381230A21270A21260A412E0A51260A51220123012107EA2102A7B9E11 >40 D<1310A21308130C13041306A51307A51306A4130EA2130CA2131C1318A213381330 A21360A213C0A2EA0180EA0300A212065A5A121012605A102A809E11>I<121812381278 12381208A21210A212201240A21280050C7D830D>44 DI<1230 127812F0126005047C830D>I<133EEBE180380181C0EA03005A1206000E13E0120C001C 13C0A2EA18011238A338700380A43860070012E01306130EA2130C5BEA60385BEA30C000 1FC7FC131D7C9B15>48 D<131FEB60C013803801006012021340000413E0A3EB81C0EA03 0138000380EB070013FC131C1306A21307A41270EAE00E12805BEA40185BEA20E0EA1F80 131D7D9B15>51 D53 D<133E13E138018180380300C01206120E120C121CA213011238A3130300 1813801307EA080B380C3300EA03C7EA0007130E130C131C1318EAE0305BEA80C0EAC180 003EC7FC121D7C9B15>57 D<1206120FA212061200AA1230127812F0126008127C910D> I<1418A21438A21478A214B8EB0138A2EB023C141C1304130C13081310A21320A2EB7FFC EBC01C1380EA0100141E0002130EA25A120C001C131EB4EBFFC01A1D7E9C1F>65 D<48B5FC39003C038090383801C0EC00E0A35B1401A2EC03C001E01380EC0F00141EEBFF FC3801C00E801580A2EA0380A43907000F00140E141E5C000E13F0B512C01B1C7E9B1D> I<903803F02090381E0C6090383002E09038E003C03801C001EA038048C7FC000E148012 1E121C123C15005AA35AA41404A35C12705C6C5B00185B6C485AD80706C7FCEA01F81B1E 7A9C1E>I<48B5FC39003C03C090383800E0A21570A24913781538A215785BA4484813F0 A315E03803800115C0140315803907000700140E5C5C000E13E0B512801D1C7E9B1F>I< 48B512F038003C00013813301520A35BA214081500495AA21430EBFFF03801C020A44848 5A91C7FCA348C8FCA45AEAFFF01C1C7E9B1B>70 D73 D I<3801FFC038003C001338A45BA45BA4485AA438038002A31404EA0700140C1418143800 0E13F0B5FC171C7E9B1A>76 DII< 3801FFFE39003C038090383801C0EC00E0A3EB7001A315C0EBE0031580EC0700141C3801 FFF001C0C7FCA3485AA448C8FCA45AEAFFE01B1C7E9B1C>80 D<3801FFFE39003C078090 383801C015E01400A2EB7001A3EC03C001E01380EC0700141CEBFFE03801C03080141CA2 EA0380A43807003C1520A348144039FFE01E80C7EA0F001B1D7E9B1E>82 DI97 D<123F1207A2120EA45AA4EA39E0EA3A18EA3C0C12381270 130EA3EAE01CA31318133813301360EA60C0EA3180EA1E000F1D7C9C13>I<13F8EA0304 120EEA1C0EEA181CEA30001270A25AA51304EA60081310EA3060EA0F800F127C9113>I< EB07E01300A2EB01C0A4EB0380A43801E700EA0717EA0C0F1218EA380E12301270A2485A A41339A3EA6079EA319AEA1E0C131D7C9C15>I<13F8EA0704120CEA1802EA38041230EA 7008EA7FF0EAE000A5EA60041308EA30101360EA0F800F127C9113>III< EA0FC01201A2485AA448C7FCA4EA0E3E13C3380F0380120E121E121CA338380700A3130E 00701340A2131C1480EAE00C38600700121D7E9C15>II107 DI<391C1E078039266318C0394683 A0E0384703C0008E1380A2120EA2391C0701C0A3EC0380D8380E1388A2EC070815103970 1C032039300C01C01D127C9122>II<13F8EA030CEA0E06 487E1218123000701380A238E00700A3130EA25BEA60185BEA30E0EA0F8011127C9115> I<380387803804C860EBD03013E0EA09C014381201A238038070A31460380700E014C0EB 0180EB8300EA0E86137890C7FCA25AA45AB4FC151A809115>IIII<12035AA3120EA4EAFFE0EA1C00 A35AA45AA4EAE080A2EAE100A2126612380B1A7C990E>I<381C0180EA2E03124EA2388E 0700A2121CA2EA380EA438301C80A3EA383C38184D00EA0F8611127C9116>II<38038780380CC840380870E012103820E0C014001200A2485AA4EA03811263EA E38212C5EA8584EA787813127E9113>120 D<381C0180EA2E03124EA2388E0700A2121C A2EA380EA4EA301CA3EA383CEA1878EA0FB8EA003813301370EAE0605BEA81800043C7FC 123C111A7C9114>I E /Fl 85 125 df2 D11 D<137E3801C180EA0301380703C0120EEB 018090C7FCA5B512C0EA0E01B0387F87F8151D809C17>II<90383F07E03901C09C18380380F0D807 01133C000E13E00100131892C7FCA5B612FC390E00E01CB03A7FC7FCFF80211D809C23> I<120EA2121E1238127012E012800707779C15>19 D34 D<13E0EA0310EA0608A2120EA45B A25B6C5AEC3FE09038800F80EC06000003130412073809C00800115BEA30E03820F020EA 607038E03840EB3C80131C90380F00207F0070EB8040383009C0391830E180390FC03F00 1B1F7E9D20>38 D<126012F012F812681208A31210A2122012401280050C7C9C0C>I<13 80EA0100120212065AA25AA25AA35AA412E0AC1260A47EA37EA27EA27E12027EEA008009 2A7C9E10>I<7E12407E12307EA27EA27EA37EA41380AC1300A41206A35AA25AA25A1220 5A5A092A7E9E10>I<1306ADB612E0A2D80006C7FCAD1B1C7E9720>43 D<126012F0A212701210A41220A212401280040C7C830C>II<12 6012F0A2126004047C830C>I<130113031306A3130CA31318A31330A31360A213C0A3EA 0180A3EA0300A31206A25AA35AA35AA35AA35AA210297E9E15>II<5A1207123F12C71207B3A5EAFFF80D1C7C9B15>III<130CA2131C133CA2135C13DC139C EA011C120312021204120C1208121012301220124012C0B512C038001C00A73801FFC012 1C7F9B15>II<13F0EA03 0CEA0404EA0C0EEA181E1230130CEA7000A21260EAE3E0EAE430EAE818EAF00C130EEAE0 061307A51260A2EA7006EA300E130CEA1818EA0C30EA03E0101D7E9B15>I<1240387FFF 801400A2EA4002485AA25B485AA25B1360134013C0A212015BA21203A41207A66CC7FC11 1D7E9B15>III<126012F0A212601200AA126012F0A2126004127C910C>I<126012F0A2 12601200AA126012F0A212701210A41220A212401280041A7C910C>I<007FB512C0B612 E0C9FCA8B612E06C14C01B0C7E8F20>61 D<1306A3130FA3EB1780A2EB37C01323A2EB43 E01341A2EB80F0A338010078A2EBFFF83802003CA3487FA2000C131F80001E5BB4EBFFF0 1C1D7F9C1F>65 DI<90381F8080EBE0613801801938 070007000E13035A14015A00781300A2127000F01400A8007014801278A212386CEB0100 A26C13026C5B380180083800E030EB1FC0191E7E9C1E>IIII<90381F8080EBE0613801 801938070007000E13035A14015A00781300A2127000F01400A6ECFFF0EC0F8000701307 1278A212387EA27E6C130B380180113800E06090381F80001C1E7E9C21>I<39FFF0FFF0 390F000F00AC90B5FCEB000FAD39FFF0FFF01C1C7F9B1F>II<3807FF8038007C00133CB3127012F8A21338EA7078EA4070EA30E0EA0F 80111D7F9B15>I<39FFF01FE0390F000780EC060014045C5C5C5C5C49C7FC1302130613 0FEB17801327EB43C0EB81E013016D7E1478A280143E141E80158015C039FFF03FF01C1C 7F9B20>IIIIII82 D<3807E080EA1C19EA30051303EA600112E01300A36C13007E127CEA7FC0EA3FF8EA1FFE EA07FFC61380130FEB07C0130313011280A300C01380A238E00300EAD002EACC0CEA83F8 121E7E9C17>I<007FB512C038700F010060130000401440A200C014201280A300001400 B1497E3803FFFC1B1C7F9B1E>I<39FFF01FF0390F000380EC0100B3A26C130213800003 5BEA01C03800E018EB7060EB0F801C1D7F9B1F>I<39FFE00FF0391F0003C0EC01806C14 00A238078002A213C000035BA2EBE00C00011308A26C6C5AA213F8EB7820A26D5AA36D5A A2131F6DC7FCA21306A31C1D7F9B1F>I<3AFFE1FFC0FF3A1F003E003C001E013C13186C 6D1310A32607801F1320A33A03C0278040A33A01E043C080A33A00F081E100A39038F900 F3017913F2A2017E137E013E137CA2013C133C011C1338A20118131801081310281D7F9B 2B>I<39FFF003FC390F8001E00007EB00C06D13800003EB01006D5A000113026C6C5A13 F8EB7808EB7C18EB3C10EB3E20131F6D5A14C06D5AABEB7FF81E1C809B1F>89 D<387FFFF0EA7C01007013E0386003C0A238400780130F1400131E12005B137C13785BA2 485A1203EBC010EA0780A2EA0F00481330001E13205A14604813E0EAF803B5FC141C7E9B 19>I<12FEA212C0B3B312FEA207297C9E0C>II<12FEA21206B3B312FEA20729809E0C>I<12 0C12121221EA4080EA80400A057B9B15>I97 D<12FC121CAA137CEA1D87381E0180381C00C014E014601470A6146014E014C0381E0180 38190700EA10FC141D7F9C17>IIII<13F8EA018CEA071E1206EA0E0C1300A6EAFFE0EA0E00B0EA7F E00F1D809C0D>II<12FC121CAA137C1387EA1D03001E1380121CAD38 FF9FF0141D7F9C17>I<1218123CA21218C7FCA712FC121CB0EAFF80091D7F9C0C>I<13C0 EA01E0A2EA00C01300A7EA07E01200B3A21260EAF0C012F1EA6180EA3E000B25839C0D> I<12FC121CAAEB0FE0EB0780EB06005B13105B5B13E0121DEA1E70EA1C781338133C131C 7F130F148038FF9FE0131D7F9C16>I<12FC121CB3A9EAFF80091D7F9C0C>I<39FC7E07E0 391C838838391D019018001EEBE01C001C13C0AD3AFF8FF8FF8021127F9124>IIII<3803E080EA0E19EA1805EA3807EA7003A212E0 A61270A2EA38071218EA0E1BEA03E3EA0003A7EB1FF0141A7F9116>II I<1204A4120CA2121C123CEAFFE0EA1C00A91310A5120CEA0E20EA03C00C1A7F9910>I< 38FC1F80EA1C03AD1307120CEA0E1B3803E3F014127F9117>I<38FF07E0383C0380381C 0100A2EA0E02A2EA0F06EA0704A2EA0388A213C8EA01D0A2EA00E0A3134013127F9116> I<39FF3FC7E0393C0703C0001CEB01801500130B000E1382A21311000713C4A213203803 A0E8A2EBC06800011370A2EB8030000013201B127F911E>I<38FF0FE0381E0700EA1C06 EA0E046C5AEA039013B0EA01E012007F12011338EA021C1204EA0C0E487E003C138038FE 1FF014127F9116>I<38FF07E0383C0380381C0100A2EA0E02A2EA0F06EA0704A2EA0388 A213C8EA01D0A2EA00E0A31340A25BA212F000F1C7FC12F312661238131A7F9116>IIII E /Fm 7 117 df<1470A214F8A3497EA2497EA3EB067FA2010C7F143FA2496C7EA20130 7F140F01707FEB6007A201C07F90B5FC4880EB8001A2D803007F14004880000680A23AFF E007FFF8A225227EA12A>65 D 97 DII114 DI<487EA41203A21207A2120F123FB5FCA2EA0F80ABEB8180A5EB8300EA07C3EA 03FEEA00F811207F9F16>I E /Fn 37 123 df<127012F812FCA212741204A41208A212 10A212201240060F7C840E>44 D<127012F8A3127005057C840E>46 D<127012F8A312701200AB127012F8A3127005157C940E>58 D64 D<903807E0109038381830EBE0063901C0017039038000F048C7FC000E1470121E001C14 30123CA2007C14101278A200F81400A812781510127C123CA2001C1420121E000E14407E 6C6C13803901C001003800E002EB381CEB07E01C247DA223>67 DII<39FFFC3FFF390FC003F039078001E0AE90B5FC EB8001AF390FC003F039FFFC3FFF20227EA125>72 DI<39FF8007FF3907C000F81570D805E01320EA04F0A21378137C13 3C7F131F7FEB0780A2EB03C0EB01E0A2EB00F014F81478143C143E141E140FA2EC07A0EC 03E0A21401A21400000E1460121FD8FFE0132020227EA125>78 D<3803F020380C0C60EA 1802383001E0EA70000060136012E0A21420A36C1300A21278127FEA3FF0EA1FFE6C7E00 03138038003FC0EB07E01301EB00F0A214707EA46C1360A26C13C07E38C8018038C60700 EA81FC14247DA21B>83 D<39FFFC07FF390FC000F86C4813701520B3A5000314407FA200 0114806C7E9038600100EB3006EB1C08EB03F020237EA125>85 D<3BFFF03FFC03FE3B1F 8007E000F86C486C48137017206E7ED807801540A24A7E2603C0021480A39039E0047801 00011600A2EC083CD800F01402A2EC101E01785CA2EC200F013C5CA20260138890391E40 0790A216D090391F8003F0010F5CA2EC00016D5CA20106130001025C2F237FA132>87 D89 D<387FFFFE387E003E0078133C007013781260004013F012C0EB01E038 8003C0A2EB07801200EB0F005B131E5BA25BA25B1201EBE001EA03C0A2EA07801403EA0F 00001E1302A2481306140E48131E00F8137EB512FE18227DA11E>I97 D99 D<14E0130F13011300ABEA01F8EA0704EA0C02EA1C01EA38001278127012F0A712701278 1238EA1801EA0C0238070CF03801F0FE17237EA21B>II<133E13E33801C780EA0387130748C7FCA9EAFFF80007C7FCB2 7FEA7FF0112380A20F>I<14703803F198380E1E18EA1C0E38380700A200781380A40038 1300A2EA1C0EEA1E1CEA33F00020C7FCA212301238EA3FFE381FFFC06C13E0383000F048 1330481318A400601330A2003813E0380E03803803FE0015217F9518>I<120E12FE121E 120EABEB1F80EB60C0EB80E0380F0070A2120EAF38FFE7FF18237FA21B>I<121C121E12 3E121E121CC7FCA8120E127E121E120EB1EAFFC00A227FA10E>I<120E12FE121E120EAB EB03FCEB01F014C01480EB02005B5B5B133813F8EA0F1CEA0E1E130E7F1480EB03C01301 14E0EB00F014F838FFE3FE17237FA21A>107 D<120E12FE121E120EB3ADEAFFE00B237F A20E>I<390E1FC07F3AFE60E183803A1E807201C03A0F003C00E0A2000E1338AF3AFFE3 FF8FFE27157F942A>I<380E1F8038FE60C0381E80E0380F0070A2120EAF38FFE7FF1815 7F941B>III114 DI< 1202A41206A3120E121E123EEAFFFCEA0E00AB1304A6EA07081203EA01F00E1F7F9E13> I<000E137038FE07F0EA1E00000E1370AD14F0A238060170380382783800FC7F18157F94 1B>I<38FF80FE381E00781430000E1320A26C1340A2EB80C000031380A23801C100A2EA 00E2A31374A21338A3131017157F941A>I<39FF8FF87F393E01E03C001CEBC01814E000 0E1410EB0260147000071420EB04301438D803841340EB8818141CD801C81380EBD00C14 0E3900F00F00497EA2EB6006EB400220157F9423>I<38FF80FE381E00781430000E1320 A26C1340A2EB80C000031380A23801C100A2EA00E2A31374A21338A31310A25BA35B12F0 5B12F10043C7FC123C171F7F941A>121 D<383FFFC038380380EA300700201300EA600E EA401C133C1338C65A5B12015B38038040EA07005A000E13C04813805AEA7801EA7007B5 FC12157F9416>I E /Fo 36 122 df<49B4FC011F13C090387F81E0EBFC013901F807F0 1203EA07F0A4EC01C091C8FCA3EC3FF8B6FCA33807F003B3A33A7FFF3FFF80A3212A7FA9 25>12 D45 D<130E131E137EEA07FE12FFA212F81200B3ABB512 FEA317277BA622>49 DII<140F A25C5C5C5C5BA2EB03BFEB073F130E131C133C1338137013E0EA01C0EA03801207130012 0E5A5A5A12F0B612F8A3C7EA7F00A890381FFFF8A31D277EA622>I<00181303381F801F EBFFFE5C5C5C14C091C7FC001CC8FCA7EB7FC0381DFFF8381F80FC381E003F1208C7EA1F 8015C0A215E0A21218127C12FEA315C05A0078EB3F80A26CEB7F00381F01FE6CB45A0003 13F0C613801B277DA622>II<91387FE003903907FFFC07011FEBFF0F90397FF00F9F9039FF 0001FFD801FC7F4848147F4848143F4848141F485A160F485A1607127FA290C9FC5AA97E 7F1607123FA26C7E160E6C7E6C6C141C6C6C143C6C6C14786CB4EB01F090397FF007C001 1FB512800107EBFE009038007FF028297CA831>67 D69 D73 D 82 D<9038FF80600003EBF0E0000F13F8381F80FD383F001F003E1307481303A200FC13 01A214007EA26C140013C0EA7FFCEBFFE06C13F86C13FE80000714806C14C0C6FC010F13 E0EB007FEC1FF0140F140700E01303A46C14E0A26C13076C14C0B4EB0F80EBE03F39E3FF FE0000E15B38C01FF01C297CA825>I<007FB71280A39039807F807FD87C00140F007815 07A20070150300F016C0A2481501A5C791C7FCB3A490B612C0A32A287EA72F>I<3803FF 80000F13F0381F01FC383F80FE147F801580EA1F00C7FCA4EB3FFF3801FC3FEA0FE0EA1F 80EA3F00127E5AA4145F007E13DF393F839FFC381FFE0F3803FC031E1B7E9A21>97 DIIIII<9038FF80F00003EBE3F839 0FC1FE1C391F007C7C48137E003EEB3E10007EEB3F00A6003E133E003F137E6C137C380F C1F8380BFFE00018138090C8FC1238A2123C383FFFF814FF6C14C06C14E06C14F0121F38 3C0007007CEB01F8481300A4007CEB01F0A2003FEB07E0390FC01F806CB5120038007FF0 1E287E9A22>III108 D<26FFC07FEB1FC0903AC1FFC07FF0903AC307E0C1F8D80FC490 38F101FC9039C803F20001D801FE7F01D05BA201E05BB03CFFFE3FFF8FFFE0A3331B7D9A 38>I<38FFC07E9038C1FF809038C30FC0D80FC413E0EBC80701D813F013D0A213E0B039 FFFE3FFFA3201B7D9A25>II<38FFE1FE9038EFFF809038FE0FE0390FF803 F09038F001F801E013FC140015FEA2157FA8157E15FEA215FC140101F013F89038F807F0 9038FC0FE09038EFFF809038E1FC0001E0C7FCA9EAFFFEA320277E9A25>I<38FFC1F0EB C7FCEBC63E380FCC7F13D813D0A2EBF03EEBE000B0B5FCA3181B7F9A1B>114 D<3803FE30380FFFF0EA3E03EA7800127000F01370A27E00FE1300EAFFE06CB4FC14C06C 13E06C13F0000713F8C6FCEB07FC130000E0137C143C7E14387E6C137038FF01E038E7FF C000C11300161B7E9A1B>I<13E0A41201A31203A21207120F381FFFE0B5FCA2380FE000 AD1470A73807F0E0000313C03801FF8038007F0014267FA51A>I<39FFE07FF0A3000F13 07B2140FA2000713173903F067FF3801FFC738007F87201B7D9A25>I<39FFFC03FFA339 0FF000F0000714E07F0003EB01C0A2EBFC0300011480EBFE070000140013FFEB7F0EA214 9EEB3F9C14FC6D5AA26D5AA36D5AA26D5AA2201B7F9A23>I<3BFFFC7FFC1FFCA33B0FE0 0FE001C02607F007EB0380A201F8EBF00700031600EC0FF801FC5C0001150EEC1FFC2600 FE1C5B15FE9039FF387E3C017F1438EC787F6D486C5A16F0ECE01F011F5CA26D486C5AA2 EC800701075CA22E1B7F9A31>I<39FFFC1FFEA33907F003803803F8079038FC0F003801 FE1E00005BEB7F3814F86D5A6D5A130F806D7E130F497EEB3CFEEB38FFEB787F9038F03F 803901E01FC0D803C013E0EB800F39FFF03FFFA3201B7F9A23>I<39FFFC03FFA3390FF0 00F0000714E07F0003EB01C0A2EBFC0300011480EBFE070000140013FFEB7F0EA2149EEB 3F9C14FC6D5AA26D5AA36D5AA26D5AA25CA21307003890C7FCEA7C0FEAFE0E131E131C5B EA74F0EA3FE0EA0F8020277F9A23>I E end %%EndProlog %%BeginSetup %%Feature: *Resolution 300dpi TeXDict begin %%PaperSize: a4 %%BeginPaperSize: a4 a4 %%EndPaperSize %%EndSetup %%Page: 1 1 1 0 bop -68 233 a Fo(Clustering)23 b(with)g(\014nite)h(data)e(from)g (semi-parametric)i(mixture)g(distributi)q(ons)270 353 y Fn(Y)l(ong)17 b(W)l(ang)889 b(Ian)17 b(H.)e(Witten)65 469 y(Computer)h(Science)e(Departmen)o(t)505 b(Computer)15 b(Science)g(Departmen)o(t)7 527 y(Univ)o(ersit)o(y)f(of)j(W)l(aik)m (ato,)f(New)g(Zealand)390 b(Univ)o(ersit)o(y)14 b(of)i(W)l(aik)m(ato,)h (New)f(Zealand)20 586 y(Email:)j(y)o(ongw)o(ang@cs.w)o(aik)m(ato.ac.nz) 483 b(Email:)20 b(ih)o(w@cs.w)o(aik)m(ato.ac.nz)-75 748 y Fm(Abstract)-75 828 y Fl(Existing)27 b(clustering)g(metho)q(ds)g(for) g(the)g(semi-parametric)-75 878 y(mixture)12 b(distribution)h(p)q (erform)g(w)o(ell)f(as)i(the)g(v)o(olume)d(of)i(data)-75 928 y(increases.)22 b(Ho)o(w)o(ev)o(er,)15 b(they)g(all)f(su\013er)i (from)d(a)h(serious)i(dra)o(w-)-75 978 y(bac)o(k)h(in)f(\014nite-data)g (situations:)23 b(small)15 b(outlying)g(groups)i(of)-75 1028 y(data)j(p)q(oin)o(ts)f(can)h(b)q(e)h(completely)d(ignored)i(in)f (the)i(clusters)-75 1077 y(that)12 b(are)g(pro)q(duced,)h(no)f(matter)f (ho)o(w)g(far)h(a)o(w)o(a)o(y)f(they)h(lie)f(from)-75 1127 y(the)18 b(ma)r(jor)e(clusters.)32 b(This)17 b(can)h(result)h(in)e (un)o(b)q(ounded)h(loss)-75 1177 y(if)e(the)h(loss)f(function)g(is)g (sensitiv)o(e)i(to)e(the)h(distance)g(b)q(et)o(w)o(een)-75 1227 y(clusters.)-25 1278 y(This)e(pap)q(er)h(prop)q(oses)h(a)e(new)g (distance-based)i(clustering)-75 1328 y(metho)q(d)d(that)i(o)o(v)o (ercomes)f(the)h(problem)e(b)o(y)h(a)o(v)o(oiding)e(global)-75 1378 y(constrain)o(ts.)19 b(Exp)q(erimen)o(tal)13 b(results)i (illustrate)f(its)g(sup)q(erior-)-75 1428 y(it)o(y)h(to)g(existing)g (metho)q(ds)g(when)g(small)e(clusters)k(are)f(presen)o(t)-75 1478 y(in)f(\014nite)h(data)f(sets;)i(they)f(also)f(suggest)h(that)g (it)f(is)g(more)f(ac-)-75 1528 y(curate)j(and)f(stable)g(than)g(other)h (metho)q(ds)e(ev)o(en)i(when)f(there)-75 1577 y(are)e(no)g(small)e (clusters.)-75 1724 y Fo(1)67 b(In)n(tro)r(duction)-75 1819 y Fl(A)13 b(common)d(practical)j(problem)f(is)h(to)g(\014t)g(an)g (underlying)f(sta-)-75 1868 y(tistical)k(distribution)g(to)h(a)f (sample.)25 b(In)17 b(some)f(applications,)-75 1918 y(this)h(in)o(v)o (olv)o(es)f(estimating)f(the)i(parameters)g(of)g(a)f(single)h(dis-)-75 1968 y(tribution)12 b(function|e.g.)f(the)i(mean)f(and)g(v)n(ariance)g (of)g(a)h(nor-)-75 2018 y(mal)i(distribution.)27 b(In)17 b(others,)i(an)e(appropriate)g(mixture)f(of)-75 2068 y(elemen)o(tary)k(distributions)g(m)o(ust)f(b)q(e)i(found|e.g.)e(a)h (set)h(of)-75 2117 y(normal)13 b(distributions,)i(eac)o(h)h(with)f(its) g(o)o(wn)g(mean)f(and)i(v)n(ari-)-75 2167 y(ance.)29 b(Among)16 b(man)o(y)f(kinds)i(of)g(mixture)f(distribution,)h(one)-75 2217 y(in)f(particular)h(is)f(attracting)h(increasing)g(researc)o(h)i (atten)o(tion)-75 2267 y(b)q(ecause)i(it)f(has)g(man)o(y)d(practical)j (applications:)28 b(the)21 b(semi-)-75 2317 y(parametric)13 b(mixture)g(distribution.)-25 2368 y(A)i Fk(semi-p)n(ar)n(ametric)h (mixtur)n(e)f(distribution)k Fl(is)c(one)h(whose)-75 2418 y(cum)o(ulativ)o(e)c(distribution)h(function)h(\(CDF\))g(has)g (the)g(form)189 2534 y Fj(F)216 2540 y Fi(G)244 2534 y Fl(\()p Fj(x)p Fl(\))e(=)355 2478 y Fh(Z)378 2572 y Fg(\002)413 2534 y Fj(F)6 b Fl(\()p Fj(x)p Fl(;)h Fj(\022)q Fl(\))13 b Fj(dG)p Fl(\()p Fj(\022)q Fl(\))p Fj(;)211 b Fl(\(1\))-75 2650 y(where)22 b Fj(\022)i Ff(2)f Fl(\002,)f(the)f (parameter)f(space,)j(and)d Fj(x)j Ff(2)f(X)6 b Fl(,)22 b(the)-75 2700 y(sample)16 b(space.)29 b(This)17 b(giv)o(es)g(the)h (CDF)f(of)g(the)h(mixture)e(dis-)1013 748 y(tribution)i Fj(F)1220 754 y Fi(G)1248 748 y Fl(\()p Fj(x)p Fl(\))h(in)f(terms)h(of) g(t)o(w)o(o)g(more)f(elemen)o(tary)g(dis-)1013 798 y(tributions:)h(the) c Fk(c)n(omp)n(onent)i(distribution)d Fj(F)6 b Fl(\()p Fj(x)p Fl(;)h Fj(\022)q Fl(\),)14 b(whic)o(h)g(is)1013 848 y(giv)o(en,)20 b(and)g(the)h Fk(mixing)f(distribution)g Fj(G)p Fl(\()p Fj(\022)q Fl(\),)h(whic)o(h)f(is)g(un-)1013 898 y(kno)o(wn.)h(The)16 b(former)e(has)i(a)f(single)g(unkno)o(wn)g (parameter)g Fj(\022)q Fl(,)1013 948 y(while)d(the)h(latter)g(giv)o(es) g(a)f(CDF)h(for)f Fj(\022)q Fl(.)19 b(F)m(or)12 b(example,)f Fj(F)6 b Fl(\()p Fj(x)p Fl(;)h Fj(\022)q Fl(\))1013 997 y(migh)o(t)12 b(b)q(e)j(the)g(normal)d(distribution)i(with)g(mean)f Fj(\022)j Fl(and)e(unit)1013 1047 y(v)n(ariance,)k(where)h Fj(\022)h Fl(is)d(a)h(random)e(v)n(ariable)h(distributed)i(ac-)1013 1097 y(cording)13 b(to)h Fj(G)p Fl(\()p Fj(\022)q Fl(\).)1062 1148 y(The)f(problem)f(that)g(w)o(e)h(will)e(address)j(is)e(the)i (estimation)d(of)1013 1198 y Fj(G)p Fl(\()p Fj(\022)q Fl(\))f(from)e(sampled)g(data)i(that)g(are)g(indep)q(enden)o(t)h(and)f (iden)o(ti-)1013 1248 y(cally)j(distributed)i(according)f(to)h(the)f (unkno)o(wn)g(distribution)1013 1297 y Fj(F)1040 1303 y Fi(G)1067 1297 y Fl(\()p Fj(x)p Fl(\).)k(Once)c Fj(G)p Fl(\()p Fj(\022)q Fl(\))f(has)g(b)q(een)h(obtained,)e(it)g(is)h(a)g (straigh)o(tfor-)1013 1347 y(w)o(ard)g(matter)g(to)h(obtain)f(the)i (mixture)e(distribution.)1062 1398 y(The)k(CDF)e Fj(G)p Fl(\()p Fj(\022)q Fl(\))i(can)f(b)q(e)g(either)h(con)o(tin)o(uous)f(or) g(discrete.)1013 1448 y(In)21 b(the)h(latter)g(case,)i Fj(G)p Fl(\()p Fj(\022)q Fl(\))e(is)f(comp)q(osed)h(of)e(a)i(n)o(um)o (b)q(er)f(of)1013 1498 y(mass)16 b(p)q(oin)o(ts,)i(sa)o(y)m(,)f Fj(\022)1362 1504 y Fg(1)1381 1498 y Fj(;)7 b(:)g(:)g(:)e(;)i(\022)1493 1504 y Fi(k)1530 1498 y Fl(with)17 b(masses)g Fj(w)1798 1504 y Fg(1)1817 1498 y Fj(;)7 b(:)g(:)g(:)e(;)i(w)1940 1504 y Fi(k)1976 1498 y Fl(re-)1013 1547 y(sp)q(ectiv)o(ely)m(,)22 b(satisfying)1412 1516 y Fh(P)1456 1527 y Fi(k)1456 1560 y(i)p Fg(=1)1519 1547 y Fj(w)1549 1553 y Fi(i)1585 1547 y Fl(=)h(1.)38 b(Then)21 b(\(1\))g(can)f(b)q(e)1013 1597 y(re-written)14 b(as)1305 1721 y Fj(F)1332 1727 y Fi(G)1359 1721 y Fl(\()p Fj(x)p Fl(\))e(=)1491 1669 y Fi(k)1471 1682 y Fh(X)1474 1770 y Fi(i)p Fg(=1)1538 1721 y Fj(w)1568 1727 y Fi(i)1581 1721 y Fj(F)6 b Fl(\()p Fj(x)p Fl(;)h Fj(\022)1692 1727 y Fi(i)1705 1721 y Fl(\))p Fj(;)239 b Fl(\(2\))1013 1851 y(eac)o(h)10 b(mass)e(p)q(oin)o(t)h(pro)o(viding)g (a)g(comp)q(onen)o(t,)g(or)h(cluster,)h(in)e(the)1013 1901 y(mixture)15 b(with)i(the)g(corresp)q(onding)h(w)o(eigh)o(t.)26 b(If)16 b(the)i(n)o(um)o(b)q(er)1013 1951 y(of)d(comp)q(onen)o(ts)h Fj(k)h Fl(is)f(\014nite)g(and)g(kno)o(wn)f Fk(a)i(priori)p Fl(,)e(the)i(mix-)1013 2000 y(ture)g(distribution)f(is)h(called)f Fk(\014nite)p Fl(;)i(otherwise)g(it)e(is)h(treated)1013 2050 y(as)f(coun)o(tably)f(in\014nite.)24 b(The)17 b(quali\014er)e (\\coun)o(tably")g(is)h(nec-)1013 2100 y(essary)j(to)f(distinguish)g (this)g(case)h(from)e(the)i(situation)e(with)1013 2150 y(con)o(tin)o(uous)c Fj(G)p Fl(\()p Fj(\022)q Fl(\),)h(whic)o(h)g(is)g (also)f(in\014nite.)1062 2201 y(W)m(e)22 b(will)f(fo)q(cus)i(on)f(the)h (estimation)e(of)h(arbitrary)g(mix-)1013 2251 y(ing)d(distributions,)j (i.e.,)e Fj(G)p Fl(\()p Fj(\022)q Fl(\))h(is)g(an)o(y)f(general)h (probabilit)o(y)1013 2300 y(distribution|\014nite,)13 b(coun)o(tably)g(in\014nite)i(or)f(con)o(tin)o(uous.)19 b(A)1013 2350 y(few)d(metho)q(ds)g(for)g(tac)o(kling)g(this)g(problem)f (can)i(b)q(e)g(found)f(in)1013 2400 y(the)g(literature.)26 b(Ho)o(w)o(ev)o(er,)16 b(as)h(w)o(e)f(shall)f(see,)j(they)f(all)e (su\013er)1013 2450 y(from)e(a)h(serious)i(dra)o(wbac)o(k)f(in)f (\014nite-data)h(situations:)k(small)1013 2500 y(outlying)9 b(groups)i(of)f(data)g(p)q(oin)o(ts)h(can)f(b)q(e)i(completely)d (ignored)1013 2549 y(in)k(the)h(clusters)i(that)e(are)g(pro)q(duced.) 1062 2600 y(This)c(phenomenon)g(seems)g(to)g(ha)o(v)o(e)g(b)q(een)h(o)o (v)o(erlo)q(ok)o(ed,)f(pre-)1013 2650 y(sumably)k(for)i(three)i (reasons:)23 b(small)14 b(amoun)o(ts)h(of)h(data)g(ma)o(y)1013 2700 y(b)q(e)f(assumed)g(to)f(represen)o(t)k(a)c(small)f(loss;)i(a)f (few)h(data)g(p)q(oin)o(ts)965 2825 y(1)p eop %%Page: 2 2 2 1 bop -75 117 a Fl(can)15 b(easily)g(b)q(e)h(dismissed)e(as)h (outliers;)g(and)g(in)g(the)g(limit)e(the)-75 166 y(problem)e(ev)n(ap)q (orates)h(b)q(ecause)i(most)d(estimators)g(p)q(ossess)j(the)-75 216 y(prop)q(ert)o(y)19 b(of)f Fk(str)n(ong)h(c)n(onsistency)t Fl(|whic)o(h)f(means)g(that,)h(al-)-75 266 y(most)h(surely)m(,)i(they)f (con)o(v)o(erge)h(w)o(eakly)e(to)h(an)o(y)g(giv)o(en)f Fj(G)p Fl(\()p Fj(\022)q Fl(\))-75 316 y(as)e(the)g(sample)e(size)i (approac)o(hes)g(in\014nit)o(y)m(.)28 b(Ho)o(w)o(ev)o(er,)18 b(often)-75 366 y(these)h(reasons)f(are)g(inappropriate:)24 b(the)18 b(loss)f(function)g(ma)o(y)-75 415 y(b)q(e)g(sensitiv)o(e)f (to)g(the)g(distance)h(b)q(et)o(w)o(een)g(clusters;)h(the)f(small)-75 465 y(n)o(um)o(b)q(er)f(of)g(outlying)f(data)h(p)q(oin)o(ts)h(ma)o(y)d (actually)i(represen)o(t)-75 515 y(small)9 b(clusters;)14 b(and)d(an)o(y)g(practical)h(clustering)g(situation)f(will)-75 565 y(necessarily)k(in)o(v)o(olv)o(e)e(\014nite)h(data.)-25 616 y(This)d(pap)q(er)i(prop)q(oses)g(a)e(new)h(metho)q(d,)f(based)h (on)f(the)i(idea)-75 666 y(of)e(lo)q(cal)f(\014tting,)h(that)g (successfully)h(solv)o(es)f(the)h(problem.)k(The)-75 716 y(exp)q(erimen)o(tal)k(results)i(presen)o(ted)h(b)q(elo)o(w)e (illustrate)f(its)h(su-)-75 765 y(p)q(eriorit)o(y)g(to)f(existing)h (metho)q(ds)f(when)h(small)e(clusters)j(are)-75 815 y(presen)o(t)17 b(in)f(\014nite)g(data)f(sets.)25 b(Moreo)o(v)o(er,)16 b(they)g(also)g(suggest)-75 865 y(that)k(it)g(is)g(more)f(accurate)j (and)e(stable)g(than)h(other)f(meth-)-75 915 y(o)q(ds)i(ev)o(en)g(when) g(there)h(are)f(no)f(small)e(clusters.)43 b(Existing)-75 965 y(clustering)19 b(metho)q(ds)f(for)g(semi-parametric)e(mixture)i (distri-)-75 1015 y(butions)13 b(are)h(brie\015y)g(review)o(ed)g(in)f (the)h(next)g(section.)19 b(Section)-75 1064 y(3)12 b(iden)o(ti\014es)h (a)f(common)d(problem)i(from)g(whic)o(h)h(these)i(curren)o(t)-75 1114 y(metho)q(ds)g(su\013er.)23 b(Then)15 b(w)o(e)g(presen)o(t)i(the)e (new)h(solution,)e(and)-75 1164 y(in)g(Section)h(5)g(w)o(e)g(describ)q (e)i(exp)q(erimen)o(ts)e(that)f(illustrate)h(the)-75 1214 y(problem)d(that)h(has)h(b)q(een)g(iden)o(ti\014ed)g(and)f(sho)o (w)g(ho)o(w)g(the)h(new)-75 1264 y(metho)q(d)f(o)o(v)o(ercomes)h(it.) -75 1408 y Fo(2)67 b(Clustering)23 b(metho)r(ds)-75 1502 y Fl(The)j(general)h(problem)d(of)h(inferring)h(mixture)f(mo)q(dels)g (is)-75 1552 y(treated)18 b(extensiv)o(ely)g(and)e(in)h(considerable)g (depth)h(in)e(b)q(o)q(oks)-75 1601 y(b)o(y)27 b(Titterington)g(et)h (al.)e(\(1985\),)j(McLac)o(hlan)e(and)g(Bas-)-75 1651 y(ford)20 b(\(1988\))f(and)h(Lindsa)o(y)g(\(1995\).)36 b(F)m(or)19 b(semi-parametric)-75 1701 y(mixture)d(distributions)g (there)i(are)f(three)h(basic)f(approac)o(hes:)-75 1751 y Fk(minimum)g(distanc)n(e)p Fl(,)g Fk(maximum)h(likeliho)n(o)n(d)p Fl(,)f(and)f Fk(Bayesian)p Fl(.)-75 1801 y(W)m(e)c(brie\015y)h(in)o (tro)q(duce)g(the)g(\014rst)g(approac)o(h,)f(whic)o(h)g(is)g(the)h(one) -75 1850 y(adopted)g(in)f(the)i(pap)q(er,)f(review)g(the)h(other)f(t)o (w)o(o)f(to)h(sho)o(w)g(wh)o(y)-75 1900 y(they)19 b(are)f(not)h (suitable)f(for)f(arbitrary)i(mixtures,)f(and)g(then)-75 1950 y(return)d(to)e(the)h(c)o(hosen)h(approac)o(h)e(and)h(review)g (the)g(minim)n(um)-75 2000 y(distance)19 b(estimators)f(for)g (arbitrary)g(semi-parametric)f(mix-)-75 2050 y(ture)f(distributions)e (that)h(ha)o(v)o(e)g(b)q(een)h(describ)q(ed)g(in)e(the)i(liter-)-75 2099 y(ature.)-25 2151 y(The)h(idea)f(of)g(the)i(minim)n(um)12 b(distance)17 b(metho)q(d)f(is)h(to)f(de-)-75 2200 y(\014ne)e(some)f (measure)h(of)f(the)h(go)q(o)q(dness)h(of)e(the)h(clustering)h(and)-75 2250 y(optimize)d(this)h(b)o(y)g(suitable)h(c)o(hoice)f(of)g(a)g (mixing)e(distribution)-75 2300 y Fj(G)-42 2306 y Fi(n)-20 2300 y Fl(\()p Fj(\022)q Fl(\))20 b(for)g(a)f(sample)f(of)g(size)j Fj(n)p Fl(.)34 b(W)m(e)19 b(generally)g(w)o(an)o(t)g(the)-75 2350 y(estimator)e(to)i(b)q(e)g(strongly)f(consisten)o(t)i(as)e Fj(n)h Ff(!)g(1)p Fl(,)g(in)f(the)-75 2400 y(sense)g(de\014ned)g(ab)q (o)o(v)o(e,)e(for)g(arbitrary)g(mixing)e(distributions.)-75 2450 y(W)m(e)g(also)f(generally)h(w)o(an)o(t)g(to)g(tak)o(e)g(adv)n(an) o(tage)g(of)f(the)i(sp)q(ecial)-75 2499 y(structure)j(of)d (semi-parametric)f(mixtures)h(to)g(come)g(up)h(with)-75 2549 y(an)e(e\016cien)o(t)g(algorithmic)d(solution.)-25 2600 y(The)25 b(maxim)o(um)20 b(lik)o(eliho)q(o)q(d)k(approac)o(h)h (maxim)o(izes)e(the)-75 2650 y(lik)o(eliho)q(o)q(d)10 b(\(or)h(equiv)n(alen)o(tly)f(the)i(log-lik)o(eliho)q(o)q(d\))d(of)i (the)h(data)-75 2700 y(b)o(y)i(suitable)h(c)o(hoice)g(of)f Fj(G)344 2706 y Fi(n)366 2700 y Fl(\()p Fj(\022)q Fl(\).)22 b(It)14 b(can)h(in)g(fact)f(b)q(e)i(view)o(ed)e(as)1013 117 y(a)20 b(minim)n(um)c(distance)22 b(metho)q(d)e(that)h(uses)g(the)h (Kullbac)o(k{)1013 166 y(Leibler)e(distance)h(\(Titterington)f(et)h (al.,)f(1985\).)36 b(This)20 b(ap-)1013 216 y(proac)o(h)d(has)g(b)q (een)h(widely)f(used)h(for)e(estimating)g(\014nite)h(mix-)1013 266 y(tures,)e(particularly)f(when)h(the)g(n)o(um)o(b)q(er)f(of)h (clusters)h(is)e(fairly)1013 316 y(small,)e(and)i(it)h(is)g(generally)f (accepted)j(that)e(it)f(is)h(more)e(accu-)1013 366 y(rate)g(than)g (other)g(metho)q(ds.)k(Ho)o(w)o(ev)o(er,)c(it)g(has)f(not)h(b)q(een)h (used)1013 415 y(to)c(estimate)g(arbitrary)h(semi-parametric)d (mixtures,)i(presum-)1013 465 y(ably)16 b(b)q(ecause)j(of)d(its)h(high) g(computational)d(cost.)29 b(Its)17 b(sp)q(eed)1013 515 y(drops)j(dramatically)e(as)j(the)g(n)o(um)o(b)q(er)f(of)f(parameters)i (that)1013 565 y(m)o(ust)12 b(b)q(e)i(determined)f(increases,)h(whic)o (h)f(mak)o(es)f(it)h(computa-)1013 615 y(tionally)c(infeasible)h(for)h (arbitrary)g(mixtures,)f(since)i(eac)o(h)f(data)1013 664 y(p)q(oin)o(t)h(migh)o(t)f(represen)o(t)k(a)e(comp)q(onen)o(t)g(of) f(the)i(\014nal)e(distribu-)1013 714 y(tion)h(with)g(its)h(o)o(wn)g (parameters.)1062 767 y(Ba)o(y)o(esian)28 b(metho)q(ds)f(assume)h (prior)f(kno)o(wledge,)k(often)1013 817 y(giv)o(en)13 b(b)o(y)h(some)f(kind)h(of)f(heuristic,)h(to)g(determine)g(a)g (suitable)1013 867 y Fk(a)21 b(priori)e Fl(probabilit)o(y)g(densit)o(y) h(function.)37 b(They)21 b(are)g(often)1013 917 y(used)16 b(to)g(determine)g(the)g(n)o(um)o(b)q(er)f(of)h(comp)q(onen)o(ts)f(in)h (the)g(\014-)1013 966 y(nal)c(distribution|particularly)f(when)j (outliers)f(are)h(presen)o(t.)1013 1016 y(Lik)o(e)19 b(the)h(maxim)n(um)15 b(lik)o(eliho)q(o)q(d)i(approac)o(h)j(they)g(are) f(com-)1013 1066 y(putationally)12 b(exp)q(ensiv)o(e,)k(for)e(they)h (use)g(the)h(same)d(computa-)1013 1116 y(tional)f(tec)o(hniques.)1062 1169 y(W)m(e)f(no)o(w)h(review)g(existing)f(minim)n(um)c(distance)13 b(estimators)1013 1219 y(for)f(arbitrary)h(semi-parametric)d(mixture)i (distributions.)17 b(W)m(e)1013 1268 y(b)q(egin)g(with)g(some)f (notation.)28 b(Let)18 b Fj(x)1628 1274 y Fg(1)1646 1268 y Fj(;)7 b(:)g(:)g(:)e(;)i(x)1763 1274 y Fi(n)1802 1268 y Fl(b)q(e)18 b(a)f(sample)1013 1318 y(c)o(hosen)g(according)g(to)f (the)h(mixture)f(distribution,)g(and)h(sup-)1013 1368 y(p)q(ose)k(\(without)f(loss)h(of)e(generalit)o(y\))i(that)f(the)h (sequence)i(is)1013 1418 y(ordered)18 b(so)g(that)g Fj(x)1339 1424 y Fg(1)1375 1418 y Ff(\024)g Fj(x)1449 1424 y Fg(2)1486 1418 y Ff(\024)g Fj(:)7 b(:)g(:)16 b Ff(\024)i Fj(x)1676 1424 y Fi(n)1699 1418 y Fl(.)29 b(Let)18 b Fj(G)1851 1424 y Fi(n)1873 1418 y Fl(\()p Fj(\022)q Fl(\))h(b)q(e)f(a)1013 1468 y(discrete)d(estimator)e(of)g(the)i(underlying)e(mixing)e (distribution)1013 1517 y(with)e(a)h(set)i(of)d(supp)q(ort)i(p)q(oin)o (ts)f(at)g Ff(f)p Fj(\022)1597 1523 y Fi(nj)1636 1517 y Fl(;)d Fj(j)13 b Fl(=)f(1)p Fj(;)7 b(:)g(:)g(:)t(;)g(k)1864 1523 y Fi(n)1886 1517 y Ff(g)p Fl(.)17 b(Eac)o(h)1013 1567 y Fj(\022)1032 1573 y Fi(nj)1090 1567 y Fl(pro)o(vides)j(a)g(comp) q(onen)o(t)f(of)h(the)g(\014nal)g(clustering)g(with)1013 1624 y(w)o(eigh)o(t)c Fj(w)1177 1630 y Fi(nj)1230 1624 y Ff(\025)h Fl(0,)f(where)1451 1593 y Fh(P)1495 1603 y Fi(k)1513 1607 y Fe(n)1495 1637 y Fi(j)r Fg(=1)1561 1624 y Fj(w)1591 1630 y Fi(nj)1645 1624 y Fl(=)g(1.)26 b(Giv)o(en)16 b(the)h(sup-)1013 1674 y(p)q(ort)c(p)q(oin)o(ts,)g (obtaining)f Fj(G)1456 1680 y Fi(n)1479 1674 y Fl(\()p Fj(\022)q Fl(\))i(is)f(equiv)n(alen)o(t)g(to)h(computing)1013 1724 y(the)k(w)o(eigh)o(t)f(v)o(ector)i Fj(w)1382 1730 y Fi(n)1422 1724 y Fl(=)f(\()p Fj(w)1518 1730 y Fi(n)p Fg(1)1557 1724 y Fj(;)7 b(w)1606 1730 y Fi(n)p Fg(2)1645 1724 y Fj(;)g(:)g(:)g(:)t(;)g(w)1767 1730 y Fi(nk)1806 1734 y Fe(n)1827 1724 y Fl(\))1843 1709 y Fd(0)1855 1724 y Fl(.)29 b(Denote)1013 1774 y(b)o(y)15 b Fj(F)1099 1780 y Fi(G)1125 1784 y Fe(n)1147 1774 y Fl(\()p Fj(x)p Fl(\))h(the)h (estimated)e(mixture)g(CDF)h(with)g(resp)q(ect)i(to)1013 1824 y Fj(G)1046 1830 y Fi(n)1068 1824 y Fl(\()p Fj(\022)q Fl(\).)1062 1876 y(Tw)o(o)e(minim)n(um)c(distance)17 b(estimators)e(w)o(ere)i(prop)q(osed)g(in)1013 1926 y(the)d(late)g (1960s.)j(Choi)c(and)h(Bulgren)g(\(1968\))f(used)1326 2029 y(1)p 1324 2047 25 2 v 1324 2085 a Fj(n)1381 2005 y Fi(n)1361 2017 y Fh(X)1364 2106 y Fi(i)p Fg(=1)1421 2057 y Fl([)p Fj(F)1460 2063 y Fi(G)1486 2067 y Fe(n)1508 2057 y Fl(\()p Fj(x)1548 2063 y Fi(i)1561 2057 y Fl(\))d Ff(\000)f Fj(i=n)p Fl(])1700 2040 y Fg(2)1972 2057 y Fl(\(3\))1013 2191 y(as)14 b(the)h(distance)g(measure.)20 b(Minimizing)11 b(this)k(quan)o(tit)o(y)e(with)1013 2240 y(resp)q(ect)k(to)e Fj(G)1241 2246 y Fi(n)1279 2240 y Fl(yields)g(a)f(strongly)h(consisten)o(t)i(estimator.)k(A)1013 2290 y(sligh)o(t)11 b(impro)o(v)o(em)o(en)o(t)f(is)i(obtained)f(b)o(y)h (using)g(the)g(Cram)o(\023)-20 b(er-v)o(on)1013 2340 y(Mises)14 b(statistic)1143 2432 y(1)p 1141 2451 V 1141 2489 a Fj(n)1198 2409 y Fi(n)1178 2421 y Fh(X)1181 2509 y Fi(i)p Fg(=1)1238 2460 y Fl([)p Fj(F)1277 2466 y Fi(G)1303 2470 y Fe(n)1324 2460 y Fl(\()p Fj(x)1364 2466 y Fi(i)1378 2460 y Fl(\))9 b Ff(\000)h Fl(\()p Fj(i)g Ff(\000)f Fl(1)p Fj(=)p Fl(2\))p Fj(=n)p Fl(])1663 2443 y Fg(2)1690 2460 y Fl(+)g(1)p Fj(=)p Fl(\(12)p Fj(n)1856 2443 y Fg(2)1874 2460 y Fl(\))p Fj(;)70 b Fl(\(4\))1013 2600 y(whic)o(h)11 b(essen)o(tially)h(replaces)h Fj(i=n)f Fl(in)g(\(3\))f(with)h(\()p Fj(i)5 b Ff(\000)1835 2584 y Fg(1)p 1835 2591 17 2 v 1835 2615 a(2)1857 2600 y Fl(\))p Fj(=n)11 b Fl(with-)1013 2650 y(out)20 b(a\013ecting)g(the)h(asymptotic)e(result.)38 b(As)21 b(migh)o(t)d(b)q(e)j(ex-)1013 2700 y(p)q(ected,)c(this)g (reduces)h(the)e(bias)g(for)g(small-sam)o(ple)d(cases,)18 b(as)p eop %%Page: 3 3 3 2 bop -75 117 a Fl(w)o(as)12 b(demonstrated)h(empirically)c(b)o(y)k (Macdonald)e(\(1971\))h(in)g(a)-75 166 y(note)i(on)g(Choi)f(and)h (Bulgren's)g(pap)q(er.)-25 230 y(A)o(t)d(ab)q(out)g(the)h(same)e(time,) g(Deely)h(and)g(Kruse)h(\(1968\))f(used)-75 280 y(the)16 b(sup-norm)f(asso)q(ciated)i(with)e(the)h(Kolmogoro)o(v-Sm)o(irno)o(v) -75 330 y(test.)j(The)14 b(minimi)o(zation)d(is)j(o)o(v)o(er)7 449 y(sup)-12 484 y Fg(1)p Fd(\024)p Fi(i)p Fd(\024)p Fi(n)89 449 y Ff(fj)p Fj(F)149 455 y Fi(G)175 459 y Fe(n)196 449 y Fl(\()p Fj(x)236 455 y Fi(i)250 449 y Fl(\))9 b Ff(\000)h Fl(\()p Fj(i)f Ff(\000)h Fl(1\))p Fj(=n)p Ff(j)p Fj(;)d Ff(j)p Fj(F)551 455 y Fi(G)577 459 y Fe(n)597 449 y Fl(\()p Fj(x)637 455 y Fi(i)651 449 y Fl(\))i Ff(\000)g Fj(i=n)p Ff(jg)p Fj(;)62 b Fl(\(5\))-75 596 y(and)15 b(this)g(leads)g(to)f(a)h(linear)f(programming)e(problem.)19 b(Deely)-75 646 y(and)9 b(Kruse)i(also)e(established)i(the)f(strong)g (consistency)h(of)e(their)-75 696 y(estimator)18 b Fj(G)148 702 y Fi(n)170 696 y Fl(.)33 b(T)m(en)19 b(y)o(ears)h(later,)g(this)f (approac)o(h)f(w)o(as)h(ex-)-75 746 y(tended)i(b)o(y)f(Blum)f(and)h (Susarla)g(\(1977\))g(b)o(y)g(using)g(an)o(y)g(se-)-75 796 y(quence)11 b Ff(f)p Fj(f)99 802 y Fi(n)122 796 y Ff(g)e Fl(of)h(functions)f(whic)o(h)h(satis\014es)h(sup)c Ff(j)p Fj(f)735 802 y Fi(n)759 796 y Ff(\000)q Fj(f)812 802 y Fi(G)841 796 y Ff(j)k(!)g Fl(0)-75 845 y(a.s.)k(as)h Fj(n)e Ff(!)h(1)p Fl(.)23 b(Eac)o(h)16 b Fj(f)352 851 y Fi(n)390 845 y Fl(can,)g(for)g(example,)e(b)q(e)j(obtained)-75 895 y(b)o(y)e(a)g(k)o(ernel-based)i(densit)o(y)e(estimator.)22 b(Blum)14 b(and)h(Susarla)-75 945 y(appro)o(ximated)10 b(the)i(function)f Fj(f)435 951 y Fi(n)469 945 y Fl(b)o(y)h(the)g(o)o (v)o(erall)e(mixture)h(p)q(df)-75 995 y Fj(f)-55 1001 y Fi(G)-29 1005 y Fe(n)-6 995 y Fl(,)k(and)g(established)h(the)h (strong)e(consistency)i(of)e(the)h(esti-)-75 1045 y(mator)c Fj(G)80 1051 y Fi(n)116 1045 y Fl(under)j(w)o(eak)f(conditions.)-25 1108 y(F)m(or)d(reason)h(of)f(simplicit)o(y)e(and)j(generalit)o(y)m(,)f (w)o(e)g(will)g(denote)-75 1158 y(the)h(appro)o(ximation)d(b)q(et)o(w)o (een)k(t)o(w)o(o)e(mathematical)e(en)o(tities)j(of)-75 1208 y(the)f(same)e(t)o(yp)q(e)h(b)o(y)236 1197 y Ff(\030)236 1210 y Fl(=)268 1208 y(,)g(whic)o(h)g(implies)e(the)i(minimi)o(zation)d (with)-75 1258 y(resp)q(ect)19 b(to)e(an)g(estimator)f(of)h(a)f (distance)i(measure)f(b)q(et)o(w)o(een)-75 1308 y(the)f(en)o(tities)f (on)g(either)h(side.)21 b(The)16 b(t)o(yp)q(es)g(of)e(en)o(tit)o(y)h (in)o(v)o(olv)o(ed)-75 1358 y(in)f(this)g(pap)q(er)h(include)g(v)o (ector,)g(function)f(and)g(measure,)g(and)-75 1407 y(w)o(e)g(use)h(the) f(same)f(sym)o(b)q(ol)375 1396 y Ff(\030)375 1409 y Fl(=)422 1407 y(for)g(eac)o(h.)-25 1471 y(In)c(the)i(w)o(ork)e(review)o(ed)h(ab) q(o)o(v)o(e,)g(t)o(w)o(o)f(kinds)h(of)f(estimator)f(are)-75 1521 y(used:)26 b(CDF-based)18 b(\(Choi)f(and)h(Bulgren,)g(Macdonald,)g (and)-75 1571 y(Deely)i(and)f(Kruse\))i(and)f(p)q(df-based)g(\(Blum)f (and)g(Susarla\).)-75 1621 y(CDF-based)14 b(estimators)g(in)o(v)o(olv)o (e)f(appro)o(ximating)e(an)j(empir-)-75 1670 y(ical)h(distribution)h (with)g(an)g(estimated)g(one)g Fj(F)686 1676 y Fi(G)712 1680 y Fe(n)734 1670 y Fl(.)25 b(W)m(e)16 b(write)-75 1720 y(this)e(as)336 1812 y Fj(F)363 1818 y Fi(G)389 1822 y Fe(n)422 1801 y Ff(\030)422 1814 y Fl(=)466 1812 y Fj(F)493 1818 y Fi(n)515 1812 y Fj(;)357 b Fl(\(6\))-75 1929 y(where)16 b Fj(F)73 1935 y Fi(n)109 1929 y Fl(is)f(the)g (Kolmogoro)o(v)d(empirical)g(CDF|or)i(indeed)-75 1979 y(an)o(y)i(empirical)e(CDF)j(that)f(con)o(v)o(erges)i(to)e(it.)25 b(Pdf-based)17 b(es-)-75 2029 y(timators)12 b(in)o(v)o(olv)o(e)h(the)h (appro)o(ximation)d(b)q(et)o(w)o(een)k(probabilit)o(y)-75 2079 y(densit)o(y)f(functions:)342 2171 y Fj(f)362 2177 y Fi(G)388 2181 y Fe(n)422 2159 y Ff(\030)422 2173 y Fl(=)466 2171 y Fj(f)486 2177 y Fi(n)509 2171 y Fj(;)363 b Fl(\(7\))-75 2287 y(where)19 b Fj(f)69 2293 y Fi(G)95 2297 y Fe(n)136 2287 y Fl(is)f(the)g(estimated)g(mixture)e(p)q(df)i (and)g Fj(f)793 2293 y Fi(n)834 2287 y Fl(is)g(the)-75 2337 y(empirical)12 b(p)q(df)i(describ)q(ed)h(ab)q(o)o(v)o(e.)-25 2401 y(The)22 b(en)o(tities)g(in)o(v)o(olv)o(ed)e(in)h(\(6\))h(and)f (\(7\))h(are)g(functions.)-75 2451 y(When)g(the)g(appro)o(ximation)d (is)i(computed,)i(ho)o(w)o(ev)o(er,)g(it)f(is)-75 2501 y(computed)14 b(b)q(et)o(w)o(een)i(v)o(ectors)f(that)f(represen)o(t)j (the)e(functions.)-75 2551 y(These)e(v)o(ectors)g(con)o(tain)f(the)g (function)g(v)n(alues)f(at)h(a)g(particular)-75 2600 y(set)i(of)e(p)q(oin)o(ts,)g(whic)o(h)h(w)o(e)g(call)f(\\\014tting)g(p) q(oin)o(ts.")18 b(In)12 b(the)i(w)o(ork)-75 2650 y(review)o(ed)j(ab)q (o)o(v)o(e,)g(the)g(\014tting)f(p)q(oin)o(ts)h(are)g(c)o(hosen)g(to)g (b)q(e)g(the)-75 2700 y(data)d(p)q(oin)o(ts)f(themselv)o(es.)1013 117 y Fo(3)66 b(The)29 b(problem)h(of)e(minorit)n(y)j(clus-)1113 191 y(ters)1013 295 y Fl(Although)12 b(they)i(p)q(erform)e(w)o(ell)h (asymptotically)m(,)c(all)j(the)i(min-)1013 345 y(im)o(um)c(distance)k (metho)q(ds)f(describ)q(ed)i(ab)q(o)o(v)o(e)f(su\013er)h(from)c(the) 1013 395 y(\014nite-sample)f(problem)g(discussed)j(earlier:)k(they)12 b(can)f(neglect)1013 445 y(small)h(groups)k(of)e(outlying)g(data)h(p)q (oin)o(ts)f(no)h(matter)g(ho)o(w)f(far)1013 494 y(they)f(lie)g(from)e (the)j(dominan)o(t)d(data)i(p)q(oin)o(ts.)18 b(The)13 b(underlying)1013 544 y(reason)i(is)f(that)g(the)h(ob)r(jectiv)o(e)g (function)g(to)f(b)q(e)h(minim)o(ized)d(is)1013 594 y(de\014ned)k (globally)e(rather)i(than)g(lo)q(cally)m(.)21 b(A)16 b(global)e(approac)o(h)1013 644 y(means)h(that)h(the)h(v)n(alue)e(of)h (the)g(estimated)g(probabilit)o(y)f(den-)1013 694 y(sit)o(y)g(function) g(at)g(a)g(particular)g(place)h(will)e(b)q(e)i(in\015uenced)g(b)o(y) 1013 743 y(all)e(data)g(p)q(oin)o(ts,)h(no)g(matter)g(ho)o(w)g(far)g(a) o(w)o(a)o(y)f(they)i(are.)22 b(This)1013 793 y(can)12 b(cause)h(small)c(groups)j(of)g(data)f(p)q(oin)o(ts)h(to)g(b)q(e)h (ignored)e(ev)o(en)1013 843 y(if)17 b(they)j(are)f(a)f(long)g(w)o(a)o (y)g(from)f(the)i(dominan)o(t)d(part)j(of)f(the)1013 893 y(data)e(sample.)26 b(F)m(rom)16 b(a)g(probabilistic)g(p)q(oin)o(t) h(of)f(view,)i(ho)o(w-)1013 943 y(ev)o(er,)11 b(there)h(is)e(no)g (reason)h(to)f(subsume)h(distan)o(t)f(groups)h(within)1013 992 y(the)h(ma)r(jor)e(clusters)j(just)g(b)q(ecause)g(they)f(are)h (relativ)o(ely)e(small.)1062 1049 y(The)21 b(ultimate)d(e\013ect)k(of)e (suppressing)h(distan)o(t)f(minorit)o(y)1013 1099 y(clusters)14 b(dep)q(ends)h(on)e(ho)o(w)g(the)h(clustering)f(is)g(applied.)18 b(If)12 b(the)1013 1149 y(application's)j(loss)i(function)f(dep)q(ends) j(on)d(the)i(distance)f(b)q(e-)1013 1199 y(t)o(w)o(een)e(clusters,)i (the)f(result)g(ma)o(y)d(pro)o(v)o(e)i(disastrous)h(b)q(ecause)1013 1248 y(there)g(is)f(no)g(limit)e(to)i(ho)o(w)g(far)g(a)o(w)o(a)o(y)f (these)j(outlying)d(groups)1013 1298 y(ma)o(y)8 b(b)q(e.)17 b(One)11 b(migh)o(t)c(argue)k(that)f(small)d(groups)k(of)e(p)q(oin)o (ts)h(can)1013 1348 y(easily)15 b(b)q(e)i(explained)g(a)o(w)o(a)o(y)e (as)h(outliers,)h(b)q(ecause)h(the)f(e\013ect)1013 1398 y(will)10 b(b)q(ecome)j(less)g(imp)q(ortan)o(t)d(as)j(the)g(n)o(um)o(b) q(er)e(of)h(data)g(p)q(oin)o(ts)1013 1448 y(increases|and)18 b(it)f(will)f(disapp)q(ear)i(in)f(the)h(limit)d(of)i(in\014nite)1013 1498 y(data.)f(Ho)o(w)o(ev)o(er,)11 b(in)g(a)f(\014nite-data)g (situation|and)g(all)f(practi-)1013 1547 y(cal)g(applications)g (necessarily)i(in)o(v)o(olv)o(e)e(\014nite)h(data|the)f(\\out-)1013 1597 y(liers")i(ma)o(y)e(equally)i(w)o(ell)f(represen)o(t)k(small)9 b(minorit)o(y)g(clusters.)1013 1647 y(F)m(urthermore,)j(outlying)f (data)i(p)q(oin)o(ts)f(are)h(not)g(really)f(treated)1013 1697 y(as)k(outliers)h(b)o(y)f(these)i(metho)q(ds|whether)f(or)g(not)f (they)i(are)1013 1747 y(discarded)e(is)g(merely)f(an)g(artifact)h(of)f (the)h(global)e(\014tting)h(cal-)1013 1796 y(culation.)32 b(When)19 b(clustering,)h(the)f(\014nal)f(mixture)g(distribu-)1013 1846 y(tion)12 b(should)i(tak)o(e)f(all)g(data)g(p)q(oin)o(ts)g(in)o (to)g(accoun)o(t|including)1013 1896 y(outlying)j(clusters)k(if)e(an)o (y)f(exist.)32 b(If)18 b(practical)g(applications)1013 1946 y(demand)11 b(that)h(small)d(outlying)i(clusters)j(are)e (suppressed,)j(this)1013 1996 y(should)e(b)q(e)i(done)f(in)f(a)h (separate)h(stage.)1062 2052 y(In)10 b(distance-based)g(clustering,)h (eac)o(h)e(data)g(p)q(oin)o(t)g(has)g(a)g(far-)1013 2102 y(reac)o(hing)k(e\013ect)h(b)q(ecause)h(of)d(t)o(w)o(o)h(global)e (constrain)o(ts.)18 b(One)c(is)1013 2152 y(the)c(use)i(of)d(the)i(cum)o (ulativ)o(e)e(distribution)h(function;)g(the)h(other)1013 2202 y(is)18 b(the)i(normalization)15 b(constrain)o(t)1604 2171 y Fh(P)1647 2181 y Fi(k)1665 2185 y Fe(n)1647 2214 y Fi(j)r Fg(=1)1714 2202 y Fj(w)1744 2208 y Fi(nj)1801 2202 y Fl(=)20 b(1.)33 b(These)1013 2252 y(constrain)o(ts)10 b(ma)o(y)d(sacri\014ce)k(a)e(small)f(n)o(um)o(b)q(er)g(of)h(data)h(p)q (oin)o(ts|)1013 2301 y(at)j(an)o(y)g(distance|for)h(a)f(b)q(etter)j(o)o (v)o(erall)c(\014t)i(to)g(the)g(data)f(as)h(a)1013 2351 y(whole.)k(Choi)13 b(and)h(Bulgren)g(\(1968\),)f(the)i(Cramer-v)o(on)d (Mises)1013 2401 y(statistic)e(\(Macdonald,)g(1971\),)g(and)g(Deely)h (and)f(Kruse)i(\(1968\))1013 2451 y(all)18 b(enforce)j(b)q(oth)g(the)f (CDF)g(and)g(the)h(normalization)c(con-)1013 2501 y(strain)o(ts.)28 b(Blum)16 b(and)h(Susarla)g(\(1977\))g(drop)g(the)h(CDF,)f(but)1013 2551 y(still)d(enforce)j(the)f(normalization)d(constrain)o(t.)23 b(The)16 b(result)g(is)1013 2600 y(that)h(these)i(clustering)f(metho)q (ds)f(are)h(only)f(appropriate)g(for)1013 2650 y(\014nite)d(mixtures)g (without)g(small)e(clusters,)k(where)f(the)g(risk)g(of)1013 2700 y(suppressing)g(clusters)g(is)f(lo)o(w.)p eop %%Page: 4 4 4 3 bop -25 117 a Fl(This)11 b(pap)q(er)h(addresses)h(the)f(general)g (problem)e(of)h(arbitrary)-75 166 y(mixtures.)17 b(Of)12 b(course,)h(the)g(minorit)o(y)d(cluster)k(problem)d(exists)-75 216 y(for)g(all)g(t)o(yp)q(es)h(of)f(mixture|including)f(\014nite)h (mixtures.)17 b(Ev)o(en)-75 266 y(here,)e(the)g(maxim)n(um)10 b(lik)o(eliho)q(o)q(d)i(and)j(Ba)o(y)o(esian)f(approac)o(hes)-75 316 y(do)i(not)g(solv)o(e)g(the)h(problem,)e(b)q(ecause)j(they)e(b)q (oth)h(in)o(tro)q(duce)-75 366 y(a)d(global)e(normalization)f (constrain)o(t.)-75 505 y Fo(4)67 b(Solving)53 b(the)f(minorit)n(y)i (cluster)26 580 y(problem)-75 671 y Fl(No)o(w)13 b(that)h(the)g(source) h(of)e(the)i(problem)d(has)i(b)q(een)h(iden)o(ti\014ed,)-75 721 y(the)21 b(solution)f(is)g(clear,)i(at)e(least)h(in)f(principle:)31 b(drop)20 b(b)q(oth)-75 771 y(the)12 b(appro)o(ximation)d(of)h(CDFs,)i (as)g(Blum)e(and)h(Susarla)g(\(1977\))-75 821 y(do,)21 b(and)f(the)h(normalization)c(constrain)o(t|no)i(matter)h(ho)o(w)-75 870 y(seductiv)o(e)15 b(it)f(ma)o(y)e(seem.)-25 921 y(Let)17 b Fj(G)85 906 y Fd(0)85 931 y Fi(n)123 921 y Fl(b)q(e)g(a)f(discrete)i (function)e(with)g(masses)g Ff(f)p Fj(w)826 927 y Fi(nj)864 921 y Ff(g)g Fl(at)-75 970 y Ff(f)p Fj(\022)-35 976 y Fi(nj)3 970 y Ff(g)p Fl(;)j(note)f(that)g(w)o(e)g(do)g(not)g(require)g (the)h Fj(w)699 976 y Fi(nj)754 970 y Fl(to)f(sum)f(to)-75 1020 y(one.)g(Since)10 b(the)g(new)g(metho)q(d)e(op)q(erates)j(in)e (terms)g(of)g(measures)-75 1070 y(rather)k(than)f(distribution)g (functions,)g(the)g(notion)g(of)f(appro)o(x-)-75 1120 y(imation)17 b(is)j(altered)g(to)g(use)g(in)o(terv)n(als)g(rather)g (than)g(p)q(oin)o(ts.)-75 1170 y(Using)14 b(the)g(form)o(ulation)d (describ)q(ed)16 b(in)d(Section)h(2,)g(w)o(e)g(ha)o(v)o(e)336 1262 y Fj(P)363 1268 y Fi(G)389 1260 y Fc(0)389 1276 y Fe(n)422 1251 y Ff(\030)422 1264 y Fl(=)466 1262 y Fj(P)493 1268 y Fi(n)515 1262 y Fj(;)357 b Fl(\(8\))-75 1354 y(where)17 b Fj(P)74 1360 y Fi(G)100 1352 y Fc(0)100 1368 y Fe(n)137 1354 y Fl(is)e(the)h(estimated)f(measure)g(and)h Fj(P)717 1360 y Fi(n)754 1354 y Fl(is)g(the)g(em-)-75 1404 y(pirical)g(measure.)27 b(The)17 b(in)o(terv)n(als)f(o)o(v)o(er)h (whic)o(h)g(the)g(appro)o(xi-)-75 1454 y(mation)e(tak)o(es)j(place)f (are)g(called)g(\\\014tting)g(in)o(terv)n(als.")27 b(Since)-75 1503 y(\(8\))14 b(is)f(not)g(sub)r(ject)i(to)f(the)g(normalization)c (constrain)o(t,)k Fj(G)874 1488 y Fd(0)874 1514 y Fi(n)910 1503 y Fl(is)-75 1553 y(not)f(a)h(CDF)f(and)g Fj(P)242 1559 y Fi(G)268 1551 y Fc(0)268 1568 y Fe(n)304 1553 y Fl(is)g(not)h(a)f(probabilit)o(y)f(measure.)18 b(Ho)o(w-)-75 1603 y(ev)o(er,)f Fj(G)60 1588 y Fd(0)60 1613 y Fi(n)97 1603 y Fl(can)f(b)q(e)h(easily)e(con)o(v)o(erted)i(in)o(to)e(a)g(CDF)g (estimator)-75 1653 y(b)o(y)f(normalizing)d(it)i(after)h(equation)g (\(8\))g(has)g(b)q(een)h(solv)o(ed.)-25 1703 y(T)m(o)g(de\014ne)i(the)f (estimation)f(pro)q(cedure)i(fully)m(,)e(w)o(e)h(need)h(to)-75 1753 y(determine)h(\(a\))f(the)i(set)f(of)f(supp)q(ort)i(p)q(oin)o(ts,) f(\(b\))g(the)g(set)h(of)-75 1803 y(\014tting)c(in)o(terv)n(als,)g (\(c\))h(the)g(empirical)e(measure,)h(and)g(\(d\))h(the)-75 1852 y(distance)g(measure.)21 b(Here)16 b(w)o(e)g(discuss)g(these)g(in) f(an)f(in)o(tuitiv)o(e)-75 1902 y(manner;)19 b(W)m(ang)e(and)g(Witten)h (\(1999\))g(sho)o(w)g(ho)o(w)f(to)h(deter-)-75 1952 y(mine)9 b(them)g(in)h(a)g(w)o(a)o(y)f(that)h(guaran)o(tees)i(a)d(strongly)h (consisten)o(t)-75 2002 y(estimator.)-25 2052 y Fb(Supp)q(ort)21 b(p)q(oin)o(ts.)37 b Fl(The)22 b(supp)q(ort)f(p)q(oin)o(ts)g(are)g (usually)-75 2102 y(suggested)c(b)o(y)e(the)h(data)g(p)q(oin)o(ts)f(in) g(the)h(sample.)22 b(F)m(or)15 b(exam-)-75 2152 y(ple,)f(if)g(the)h (comp)q(onen)o(t)e(distribution)h Fj(F)6 b Fl(\()p Fj(x)p Fl(;)h Fj(\022)q Fl(\))14 b(is)h(the)g(normal)-75 2201 y(distribution)i(with)g(mean)f Fj(\022)j Fl(and)f(unit)f(v)n(ariance,)h (eac)o(h)g(data)-75 2251 y(p)q(oin)o(t)c(can)g(b)q(e)g(tak)o(en)g(as)h (a)e(supp)q(ort)i(p)q(oin)o(t.)j(In)c(fact,)g(the)g(sup-)-75 2301 y(p)q(ort)19 b(p)q(oin)o(ts)g(are)g(more)f(accurately)h(describ)q (ed)i(as)e Fk(p)n(otential)-75 2351 y Fl(supp)q(ort)11 b(p)q(oin)o(ts,)g(b)q(ecause)h(their)f(asso)q(ciated)h(w)o(eigh)o(ts)e (ma)o(y)f(b)q(e-)-75 2401 y(come)h(zero)h(after)f(solving)g(\(8\)|and,) f(in)h(practice,)i(man)o(y)c(often)-75 2451 y(do.)-25 2501 y Fb(Fitting)j(in)o(terv)m(als.)16 b Fl(The)d(\014tting)f(in)o (terv)n(als)g(are)h(also)f(sug-)-75 2551 y(gested)19 b(b)o(y)g(the)f(data)g(p)q(oin)o(ts.)32 b(In)18 b(the)h(normal)d (distribution)-75 2600 y(example,)j(eac)o(h)g(data)g(p)q(oin)o(t)g Fj(x)442 2606 y Fi(i)474 2600 y Fl(can)h(pro)o(vide)e(one)i(in)o(terv)n (al,)-75 2650 y(suc)o(h)g(as)e([)p Fj(x)115 2656 y Fi(i)141 2650 y Ff(\000)13 b Fl(3)p Fj(\033)o(;)7 b(x)273 2656 y Fi(i)285 2650 y Fl(],)19 b(or)g(t)o(w)o(o,)g(suc)o(h)h(as)e([)p Fj(x)670 2656 y Fi(i)696 2650 y Ff(\000)13 b Fl(3)p Fj(\033)o(;)7 b(x)828 2656 y Fi(i)840 2650 y Fl(])19 b(and)-75 2700 y([)p Fj(x)-39 2706 y Fi(i)-26 2700 y Fj(;)7 b(x)17 2706 y Fi(i)39 2700 y Fl(+)j(3)p Fj(\033)q Fl(],)j(or)h(more.)j(There)f(is)e (no)f(problem)g(if)g(the)i(\014tting)1013 117 y(in)o(terv)n(als)i(o)o (v)o(erlap.)31 b(Their)19 b(length)f(should)g(not)h(b)q(e)g(so)f(large) 1013 166 y(that)f(p)q(oin)o(ts)g(can)g(exert)i(an)e(in\015uence)h(on)f (the)h(clustering)g(at)1013 216 y(an)13 b(unduly)g(remote)h(place,)f (nor)h(so)f(small)f(that)i(the)g(empirical)1013 266 y(measure)g(is)g (inaccurate.)20 b(The)15 b(exp)q(erimen)o(ts)f(rep)q(orted)i(b)q(elo)o (w)1013 316 y(use)i(in)o(terv)n(als)e(of)h(a)g(few)g(standard)h (deviations)e(around)h(eac)o(h)1013 366 y(data)c(p)q(oin)o(t,)g(and,)g (as)h(w)o(e)g(will)f(see,)h(this)g(w)o(orks)g(w)o(ell.)1062 417 y Fb(Empirical)f(measure.)k Fl(The)d(empirical)d(measure)i(can)g(b) q(e)1013 467 y(the)h(probabilit)o(y)f(measure)h(determined)g(b)o(y)g (the)h(Kolmogoro)o(v)1013 516 y(empirical)8 b(CDF,)i(or)g(an)o(y)g (measure)h(that)f(con)o(v)o(erges)i(to)e(it.)17 b(The)1013 566 y(\014tting)d(in)o(terv)n(als)g(discussed)j(ab)q(o)o(v)o(e)d(can)h (b)q(e)h(op)q(en,)f(closed,)g(or)1013 616 y(semi-op)q(en.)h(This)c (will)e(a\013ect)j(the)f(empirical)e(measure)h(if)g(data)1013 666 y(p)q(oin)o(ts)j(are)h(used)g(as)g(in)o(terv)n(al)f(b)q(oundaries,) g(although)g(it)g(do)q(es)1013 716 y(not)f(c)o(hange)h(the)g(v)n(alues) f(of)g(the)h(estimated)f(measure)g(b)q(ecause)1013 765 y(the)k(corresp)q(onding)g(distribution)f(is)h(con)o(tin)o(uous.)26 b(In)16 b(small-)1013 815 y(sample)c(situations,)h(bias)g(can)h(b)q(e)h (reduced)g(b)o(y)f(careful)g(atten-)1013 865 y(tion)f(to)i(this)f (detail|as)g(Macdonald)g(\(1971\))g(discusses)i(with)1013 915 y(resp)q(ect)g(to)d(Choi)h(and)f(Bulgren's)i(\(1968\))e(metho)q(d.) 1062 966 y Fb(Distance)22 b(measure.)39 b Fl(The)22 b(c)o(hoice)f(of)g (distance)g(mea-)1013 1016 y(sure)h(determines)f(what)g(kind)g(of)f (mathematical)e(program-)1013 1066 y(ming)10 b(problem)i(m)o(ust)f(b)q (e)j(solv)o(ed.)j(F)m(or)12 b(example,)g(a)g(quadratic)1013 1115 y(distance)k(will)e(giv)o(e)h(rise)h(to)g(a)f(least)h(squares)h (problem)d(under)1013 1165 y(linear)g(constrain)o(ts,)i(whereas)g(the)g (sup-norm)e(giv)o(es)h(rise)h(to)f(a)1013 1215 y(linear)j(programming)e (problem)i(that)h(can)g(b)q(e)h(solv)o(ed)f(using)1013 1265 y(the)c(simplex)f(metho)q(d.)20 b(These)d(t)o(w)o(o)d(measures)i (ha)o(v)o(e)e(e\016cien)o(t)1013 1315 y(solutions)f(that)h(are)g (globally)e(optimal.)1062 1366 y(It)18 b(is)f(w)o(orth)g(p)q(oin)o (ting)f(out)h(that)g(abandoning)f(the)i(global)1013 1416 y(constrain)o(ts)f(asso)q(ciated)h(with)f(b)q(oth)g(CDFs)h(and)e (normaliza-)1013 1465 y(tion)g(can)h(brings)g(with)g(it)f(a)h (computational)e(adv)n(an)o(tage.)26 b(In)1013 1515 y(v)o(ector)17 b(form,)f(w)o(e)h(write)h Fj(P)1455 1521 y Fi(G)1481 1513 y Fc(0)1481 1530 y Fe(n)1520 1515 y Fl(=)f Fj(A)1600 1521 y Fi(G)1626 1513 y Fc(0)1626 1530 y Fe(n)1648 1515 y Fj(w)1678 1521 y Fi(n)1700 1515 y Fl(,)h(where)g Fj(w)1883 1521 y Fi(n)1922 1515 y Fl(is)f(the)1013 1565 y(\(unnormalized\))g(w)o (eigh)o(t)h(v)o(ector)i(and)e(eac)o(h)h(elemen)o(t)f(of)g(the)1013 1615 y(matrix)11 b Fj(A)1178 1621 y Fi(G)1204 1613 y Fc(0)1204 1629 y Fe(n)1239 1615 y Fl(is)i(the)h(probabilit)o(y)d(v)n (alue)i(of)f(a)h(comp)q(onen)o(t)f(dis-)1013 1665 y(tribution)18 b(o)o(v)o(er)i(an)f(\014tting)g(in)o(terv)n(al.)34 b(Then,)21 b(pro)o(vided)e(the)1013 1714 y(supp)q(ort)e(p)q(oin)o(ts)f(corresp)q (onding)h(to)f Fj(w)1649 1699 y Fd(0)1648 1725 y Fi(n)1686 1714 y Fl(and)g Fj(w)1800 1699 y Fd(00)1799 1725 y Fi(n)1838 1714 y Fl(lie)g(outside)1013 1764 y(eac)o(h)k(others')h(sphere)g(of)f (in\015uence)h(as)f(determined)g(b)o(y)g(the)1013 1814 y(comp)q(onen)o(t)c(distributions)h Fj(F)6 b Fl(\()p Fj(x)p Fl(;)h Fj(\022)q Fl(\),)17 b(the)h(estimation)e(pro)q(ce-)1013 1864 y(dure)e(b)q(ecomes)1171 1919 y Fh(\022)1223 1947 y Fj(A)1254 1932 y Fd(0)1254 1959 y Fi(G)1280 1950 y Fc(0)1280 1967 y Fe(n)1373 1947 y Fl(0)1252 2002 y(0)71 b Fj(A)1375 1987 y Fd(00)1375 2013 y Fi(G)1401 2005 y Fc(0)1401 2021 y Fe(n)1444 1919 y Fh(\023)6 b(\022)1533 1952 y Fj(w)1564 1937 y Fd(0)1563 1962 y Fi(n)1533 2002 y Fj(w)1564 1987 y Fd(00)1563 2012 y Fi(n)1606 1919 y Fh(\023)1648 1966 y Ff(\030)1648 1979 y Fl(=)1691 1919 y Fh(\022)1745 1952 y Fj(P)1778 1937 y Fd(0)1772 1962 y Fi(n)1743 2002 y Fj(P)1776 1987 y Fd(00)1770 2012 y Fi(n)1817 1919 y Fh(\023)1855 1977 y Fj(;)105 b Fl(\(9\))1013 2101 y(sub)r(ject)13 b(to)f Fj(w)1235 2086 y Fd(0)1234 2111 y Fi(n)1268 2101 y Ff(\025)g Fl(0)g(and)g Fj(w)1455 2086 y Fd(00)1454 2111 y Fi(n)1488 2101 y Ff(\025)g Fl(0.)17 b(This)12 b(is)h(the)g(same)e(as)h(com-)1013 2151 y(bining)k(the)h (solutions)g(of)f(t)o(w)o(o)h(sub-equations,)h Fj(A)1835 2136 y Fd(0)1835 2161 y Fi(n)1857 2151 y Fj(w)1888 2136 y Fd(0)1887 2161 y Fi(n)1927 2140 y Ff(\030)1927 2153 y Fl(=)1976 2151 y Fj(P)2009 2136 y Fd(0)2003 2161 y Fi(n)1013 2201 y Fl(sub)r(ject)e(to)f Fj(w)1241 2185 y Fd(0)1240 2211 y Fi(n)1275 2201 y Ff(\025)f Fl(0,)h(and)f Fj(A)1481 2185 y Fd(00)1481 2211 y Fi(n)1504 2201 y Fj(w)1535 2185 y Fd(00)1534 2211 y Fi(n)1570 2189 y Ff(\030)1570 2203 y Fl(=)1616 2201 y Fj(P)1649 2185 y Fd(00)1643 2211 y Fi(n)1684 2201 y Fl(sub)r(ject)j(to)d Fj(w)1912 2185 y Fd(00)1911 2211 y Fi(n)1947 2201 y Ff(\025)g Fl(0.)1013 2250 y(If)c(the)h(relev)n(an)o(t)f(supp)q(ort)h(p)q(oin)o(ts)g(con)o (tin)o(ue)f(to)h(lie)f(outside)g(eac)o(h)1013 2300 y(others')i(sphere)h (of)e(in\015uence,)i(the)f(sub-equations)g(can)g(b)q(e)g(fur-)1013 2350 y(ther)g(partitioned.)17 b(This)12 b(implies)e(that)i(when)g(data) g(p)q(oin)o(ts)g(are)1013 2400 y(su\016cien)o(tly)k(far)f(apart,)h(the) h(mixing)d(distribution)h Fj(G)h Fl(can)g(b)q(e)1013 2450 y(estimated)g(b)o(y)h(grouping)f(data)h(p)q(oin)o(ts)g(in)f (di\013eren)o(t)i(regions.)1013 2499 y(Moreo)o(v)o(er,)13 b(the)g(solution)f(in)h(eac)o(h)g(region)g(can)g(b)q(e)h(normalized) 1013 2549 y(separately)e(b)q(efore)i(they)e(are)h(com)o(bined,)e(whic)o (h)h(yields)g(a)g(b)q(et-)1013 2599 y(ter)i(estimation)e(of)i(the)g (mixing)d(distribution.)1062 2650 y(If)17 b(the)h(normalization)d (constrain)o(t)1648 2619 y Fh(P)1692 2629 y Fi(k)1710 2633 y Fe(n)1692 2663 y Fi(j)r Fg(=1)1758 2650 y Fj(w)1788 2656 y Fi(nj)1843 2650 y Fl(=)j(1)f(is)g(re-)1013 2700 y(tained)d(when)i(estimating)d(the)i(mixing)e(distribution,)h(the)h (es-)p eop %%Page: 5 5 5 4 bop -75 117 a Fl(timation)11 b(pro)q(cedure)16 b(b)q(ecomes)336 199 y Fj(P)363 205 y Fi(G)389 209 y Fe(n)422 188 y Ff(\030)422 201 y Fl(=)466 199 y Fj(P)493 205 y Fi(n)515 199 y Fj(:)337 b Fl(\(10\))-75 282 y(where)18 b(the)f(estimator)f Fj(G)343 288 y Fi(n)382 282 y Fl(is)h(a)f(discrete)j(CDF)d(on)h(\002.)27 b(This)-75 331 y(constrain)o(t)17 b(is)g(necessary)h(for)f(the)g (left-hand)f(side)h(of)f(\(10\))h(to)-75 381 y(b)q(e)f(a)e(probabilit)o (y)g(measure.)21 b(Although)14 b(he)i(did)f(not)f(dev)o(elop)-75 431 y(an)22 b(op)q(erational)g(estimation)f(sc)o(heme,)j(Barb)q(e)g (\(1998\))d(sug-)-75 481 y(gested)15 b(exploiting)e(the)h(fact)g(that)g (the)g(empirical)e(probabilit)o(y)-75 531 y(measure)19 b(is)f(appro)o(ximated)f(b)o(y)i(the)g(estimated)f(probabilit)o(y)-75 580 y(measure|but)c(he)h(retained)g(the)g(normalization)d(constrain)o (t.)-75 630 y(As)i(noted)f(ab)q(o)o(v)o(e,)g(relaxing)f(the)i (constrain)o(t)f(has)h(the)f(e\013ect)i(of)-75 680 y(lo)q(osening)f (the)i(throttling)e(e\013ect)j(of)e(large)f(clusters)j(on)e(small)-75 730 y(groups)21 b(of)f(outliers,)h(and)g(our)f(exp)q(erimen)o(tal)g (results)h(sho)o(w)-75 780 y(that)d(the)g(resulting)f(estimator)g (su\013ers)i(from)c(the)k(dra)o(wbac)o(k)-75 829 y(noted)14 b(earlier.)-25 879 y(Both)e(estimators,)g Fj(G)321 885 y Fi(n)355 879 y Fl(obtained)g(from)f(\(10\))h(and)g Fj(G)819 864 y Fd(0)819 890 y Fi(n)853 879 y Fl(from)-75 929 y(\(8\),)21 b(ha)o(v)o(e)f(b)q(een)h(sho)o(wn)f(to)f(b)q(e)i (strongly)f(consisten)o(t)h(under)-75 979 y(w)o(eak)16 b(conditions)g(similar)d(to)j(those)h(used)g(b)o(y)f(others)h(\(W)m (ang)-75 1029 y(&)i(Witten,)g(1999\).)31 b(Of)18 b(course,)i(the)g(w)o (eak)e(con)o(v)o(ergence)i(of)-75 1079 y Fj(G)-42 1063 y Fd(0)-42 1089 y Fi(n)-5 1079 y Fl(is)15 b(in)g(the)g(sense)i(of)d (general)i(functions,)f(not)f(CDFs.)22 b(The)-75 1128 y(strong)13 b(consistency)g(of)f Fj(G)346 1113 y Fd(0)346 1139 y Fi(n)380 1128 y Fl(immediately)d(implies)h(the)j(strong)-75 1178 y(consistency)h(of)d(the)i(CDF)f(estimator)g(obtained)g(b)o(y)g (normaliz-)-75 1228 y(ing)h Fj(G)27 1213 y Fd(0)27 1238 y Fi(n)49 1228 y Fl(.)-75 1364 y Fo(5)67 b(Exp)r(erimen)n(tal)25 b(v)l(alidation)-75 1455 y Fl(W)m(e)14 b(ha)o(v)o(e)g(conducted)i(exp)q (erimen)o(ts)e(to)g(illustrate)g(the)h(failure)-75 1504 y(of)9 b(existing)h(metho)q(ds)f(to)h(detect)i(small)7 b(outlying)i(clusters,)j(and)-75 1554 y(the)k(impro)o(v)o(emen)o(t)d (ac)o(hiev)o(ed)j(b)o(y)g(the)g(new)g(sc)o(heme.)24 b(The)16 b(re-)-75 1604 y(sults)d(also)f(suggest)i(that)f(the)g(new)h(metho)q(d) e(is)g(more)g(accurate)-75 1654 y(and)i(stable)g(than)g(the)g(others.) -25 1704 y(When)f(comparing)e(clustering)j(metho)q(ds,)e(it)h(is)g(not) g(alw)o(a)o(ys)-75 1754 y(easy)18 b(to)g(ev)n(aluate)g(the)h(clusters)g (obtained.)30 b(T)m(o)18 b(\014nesse)h(this)-75 1803 y(problem)11 b(w)o(e)i(consider)h(simple)d(arti\014cial)h(situations)g (in)h(whic)o(h)-75 1853 y(the)21 b(prop)q(er)h(outcome)e(is)g(clear.)39 b(Some)19 b(practical)i(applica-)-75 1903 y(tions)f(of)f(clusters)i(do) e(pro)o(vide)h(ob)r(jectiv)o(e)g(ev)n(aluation)e(func-)-75 1953 y(tions;)12 b(ho)o(w)o(ev)o(er,)g(these)h(are)g(b)q(ey)o(ond)f (the)g(scop)q(e)h(of)e(this)h(pap)q(er.)-25 2003 y(The)j(metho)q(ds)g (used)h(are)f(Choi)f(and)h(Bulgren)h(\(1968\))e(\(de-)-75 2052 y(noted)c Fa(choi)p Fl(\),)h(Macdonald's)e(application)g(of)g(the) i(Cram)o(\023)-20 b(er-v)o(on)-75 2102 y(Mises)12 b(statistic)g(\()p Fa(cram)314 2099 y(\023)314 2102 y(er)p Fl(\),)g(the)g(new)g(metho)q(d) e(with)i(the)g(nor-)-75 2152 y(malization)c(constrain)o(t)k(\()p Fa(test)p Fl(\),)f(and)g(the)h(new)f(metho)q(d)g(with-)-75 2202 y(out)17 b(that)f(constrain)o(t)h(\()p Fa(new)p Fl(\).)27 b(In)17 b(eac)o(h)g(case,)h(equations)f(in-)-75 2252 y(v)o(olving)9 b(non-negativit)o(y)g(and/or)h(linear)g(equalit)o (y)g(constrain)o(ts)-75 2301 y(are)i(solv)o(ed)f(as)g(quadratic)g (programming)d(problems)i(using)h(the)-75 2351 y(elegan)o(t)i(and)g (e\016cien)o(t)h(pro)q(cedures)h Fa(nnls)f Fl(and)f Fa(lsei)h Fl(pro)o(vided)-75 2401 y(b)o(y)i(La)o(wson)g(and)h(Hanson)f(\(1974\).) 26 b(All)15 b(four)i(metho)q(ds)f(ha)o(v)o(e)-75 2451 y(the)e(same)f(computational)f(time)g(complexit)o(y)m(.)-25 2501 y(W)m(e)e(set)i(the)g(sample)d(size)j Fj(n)f Fl(to)g(100)f (throughout)h(the)g(exp)q(er-)-75 2551 y(imen)o(ts.)23 b(The)17 b(data)e(p)q(oin)o(ts)h(are)h(arti\014cially)d(generated)j (from)-75 2600 y(a)12 b(mixture)e(of)i(t)o(w)o(o)f(clusters:)19 b Fj(n)424 2606 y Fg(1)454 2600 y Fl(p)q(oin)o(ts)12 b(from)e Fj(N)5 b Fl(\(0)p Fj(;)i Fl(1\))k(and)h Fj(n)919 2606 y Fg(2)-75 2650 y Fl(p)q(oin)o(ts)k(from)f Fj(N)5 b Fl(\(100)p Fj(;)i Fl(1\).)25 b(The)17 b(v)n(alues)f(of)g Fj(n)652 2656 y Fg(1)687 2650 y Fl(and)h Fj(n)796 2656 y Fg(2)831 2650 y Fl(are)g(in)-75 2700 y(the)d(ratios)g(99)d(:)g(1,)i (97)e(:)g(3,)j(93)d(:)g(7,)i(80)e(:)g(20)i(and)h(50)d(:)g(50.)1062 117 y(Ev)o(ery)h(data)f(p)q(oin)o(t)g(is)g(tak)o(en)g(as)h(a)f(p)q (oten)o(tial)f(supp)q(ort)i(p)q(oin)o(t)1013 166 y(in)i(all)g(four)h (metho)q(ds:)20 b(th)o(us)15 b(the)h(n)o(um)o(b)q(er)e(of)h(p)q(oten)o (tial)f(com-)1013 216 y(p)q(onen)o(ts)20 b(in)e(the)i(clustering)g(is)f (100.)34 b(F)m(or)19 b Fa(test)f Fl(and)h Fa(new)p Fl(,)1013 266 y(\014tting)e(in)o(terv)n(als)g(need)h(to)g(b)q(e)g(determined.)29 b(In)17 b(the)h(exp)q(eri-)1013 316 y(men)o(ts,)12 b(eac)o(h)i(data)f (p)q(oin)o(t)g Fj(x)1464 322 y Fi(i)1490 316 y Fl(pro)o(vides)h(the)g (t)o(w)o(o)f(\014tting)g(in)o(ter-)1013 366 y(v)n(als)g([)p Fj(x)1131 372 y Fi(i)1154 366 y Ff(\000)d Fl(3)p Fj(;)d(x)1260 372 y Fi(i)1273 366 y Fl(])14 b(and)g([)p Fj(x)1416 372 y Fi(i)1429 366 y Fj(;)7 b(x)1472 372 y Fi(i)1495 366 y Fl(+)j(3].)19 b(An)o(y)14 b(data)h(p)q(oin)o(t)f(lo)q(cated)1013 415 y(on)e(the)g(b)q(oundary)h(of)e(an)h(in)o(terv)n(al)g(is)g(coun)o (ted)h(as)f(half)f(a)h(p)q(oin)o(t)1013 465 y(when)f(determining)g(the) h(empirical)d(measure)j(o)o(v)o(er)f(that)h(in)o(ter-)1013 515 y(v)n(al.)1062 576 y(These)18 b(c)o(hoices)f(are)g(admittedly)d (crude,)k(and)e(further)h(im-)1013 625 y(pro)o(v)o(emen)o(ts)12 b(in)g(the)h(accuracy)h(and)f(sp)q(eed)h(of)e Fa(test)g Fl(and)h Fa(new)1013 675 y Fl(are)18 b(p)q(ossible)h(that)f(tak)o(e)g (adv)n(an)o(tage)g(of)f(the)i(\015exibilit)o(y)e(pro-)1013 725 y(vided)24 b(b)o(y)g(\(10\))g(and)g(\(8\).)49 b(F)m(or)24 b(example,)h(accuracy)g(will)1013 775 y(lik)o(ely)13 b(increase)j(with)f(more|and)e(more)h(carefully)g(c)o(hosen|)1013 825 y(supp)q(ort)d(p)q(oin)o(ts)g(and)g(\014tting)f(in)o(terv)n(als.)17 b(The)11 b(fact)g(that)g(it)f(p)q(er-)1013 874 y(forms)i(w)o(ell)h(ev)o (en)i(with)e(crudely)i(c)o(hosen)g(supp)q(ort)f(p)q(oin)o(ts)g(and)1013 924 y(\014tting)c(in)o(terv)n(als)h(testi\014es)h(to)f(the)h (robustness)h(of)e(the)g(metho)q(d.)1062 985 y(Our)h(primary)d(in)o (terest)k(in)d(this)h(exp)q(erimen)o(t)g(is)g(the)h(w)o(eigh)o(ts)1013 1035 y(of)23 b(the)i(clusters)h(that)f(are)f(found.)49 b(T)m(o)24 b(cast)h(the)g(results)1013 1084 y(in)18 b(terms)h(of)f(the) i(underlying)e(mo)q(dels,)h(w)o(e)g(use)h(the)g(cluster)1013 1134 y(w)o(eigh)o(ts)13 b(to)g(estimate)f(v)n(alues)h(for)g Fj(n)1586 1140 y Fg(1)1618 1134 y Fl(and)g Fj(n)1723 1140 y Fg(2)1741 1134 y Fl(.)18 b(Of)13 b(course,)h(the)1013 1184 y(results)20 b(often)f(do)f(not)h(con)o(tain)g(exactly)g(t)o(w)o (o)g(clusters|but)1013 1234 y(b)q(ecause)g(the)g(underlying)f(cluster)i (cen)o(tres,)g(0)e(and)g(100,)h(are)1013 1284 y(w)o(ell)c(separated)i (compared)e(to)h(their)g(standard)h(deviation)e(of)1013 1334 y(1,)k(it)g(is)g(highly)f(unlik)o(ely)g(that)h(an)o(y)g(data)f(p)q (oin)o(ts)h(from)f(one)1013 1383 y(cluster)g(will)d(fall)g(an)o(ywhere) j(near)f(the)g(other.)28 b(Th)o(us)17 b(w)o(e)g(use)1013 1433 y(a)c(threshold)i(of)f(50)f(to)h(divide)g(the)g(clusters)i(in)o (to)e(t)o(w)o(o)f(groups:)1013 1483 y(those)d(near)f(0)g(and)g(those)i (near)e(100.)16 b(The)10 b(\014nal)e(cluster)j(w)o(eigh)o(ts)1013 1533 y(are)17 b(normalized,)e(and)h(the)i(w)o(eigh)o(ts)e(for)g(the)i (\014rst)f(group)g(are)1013 1583 y(summed)12 b(to)i(obtain)g(an)g (estimate)i(^)-24 b Fj(n)1607 1589 y Fg(1)1640 1583 y Fl(of)14 b Fj(n)1713 1589 y Fg(1)1732 1583 y Fl(,)f(while)h(those)h (for)1013 1632 y(the)g(second)g(group)f(are)h(summed)e(to)h(giv)o(e)g (an)g(estimate)i(^)-23 b Fj(n)1959 1638 y Fg(2)1992 1632 y Fl(of)1013 1682 y Fj(n)1038 1688 y Fg(2)1056 1682 y Fl(.)1062 1743 y(T)m(able)17 b(1)h(sho)o(ws)g(results)h(for)e(eac)o(h)i (of)e(the)h(four)g(metho)q(ds.)1013 1793 y(Eac)o(h)i(cell)g(represen)o (ts)i(one)e(h)o(undred)h(separate)g(exp)q(erimen-)1013 1842 y(tal)15 b(runs.)25 b(Three)18 b(\014gures)f(are)f(recorded.)26 b(A)o(t)17 b(the)f(top)g(is)g(the)1013 1892 y(n)o(um)o(b)q(er)c(of)g (times)g(the)i(metho)q(d)e(failed)g(to)h(detect)i(the)f(smaller)1013 1942 y(cluster,)f(that)f(is,)g(the)i(n)o(um)o(b)q(er)d(of)h(times)i(^) -24 b Fj(n)1697 1948 y Fg(2)1728 1942 y Fl(=)11 b(0.)18 b(In)12 b(the)h(mid-)1013 1992 y(dle)g(are)g(the)h(a)o(v)o(erage)f(v)n (alues)g(for)i(^)-23 b Fj(n)1577 1998 y Fg(1)1609 1992 y Fl(and)15 b(^)-23 b Fj(n)1714 1998 y Fg(2)1733 1992 y Fl(.)17 b(A)o(t)d(the)f(b)q(ottom)1013 2042 y(is)e(the)h(standard)f (deviation)g(of)i(^)-23 b Fj(n)1540 2048 y Fg(1)1569 2042 y Fl(and)14 b(^)-23 b Fj(n)1673 2048 y Fg(2)1702 2042 y Fl(\(whic)o(h)12 b(are)g(equal\).)1013 2091 y(These)i(three)h (\014gures)f(can)f(b)q(e)h(though)o(t)f(of)g(as)g(measures)h(of)e(re-) 1013 2141 y(liabilit)o(y)l(,)f(accuracy)k(and)e(stabilit)o(y)g(resp)q (ectiv)o(ely)m(.)1062 2202 y(The)f(top)f(\014gures)i(in)e(T)m(able)f(1) h(sho)o(w)h(clearly)f(that)g(only)g Fa(new)1013 2252 y Fl(is)16 b(alw)o(a)o(ys)g(reliable)g(in)g(the)h(sense)i(that)e(it)f (nev)o(er)i(fails)d(to)i(de-)1013 2301 y(tect)f(the)f(smaller)f (cluster.)22 b(The)16 b(other)g(metho)q(ds)e(fail)g(mostly)1013 2351 y(when)k Fj(n)1150 2357 y Fg(2)1187 2351 y Fl(=)h(1;)g(their)g (failure)e(rate)i(gradually)d(decreases)21 b(as)1013 2401 y Fj(n)1038 2407 y Fg(2)1069 2401 y Fl(gro)o(ws.)c(The)d(cen)o (ter)g(\014gures)g(sho)o(w)e(that,)h(under)h(all)d(condi-)1013 2451 y(tions,)g Fa(new)i Fl(giv)o(es)f(a)g(more)f(accurate)j(estimate)e (of)f(the)i(correct)1013 2501 y(v)n(alues)19 b(of)g Fj(n)1220 2507 y Fg(1)1258 2501 y Fl(and)h Fj(n)1370 2507 y Fg(2)1408 2501 y Fl(than)g(the)g(other)g(metho)q(ds.)36 b(As)20 b(ex-)1013 2551 y(p)q(ected,)g Fa(cram)1268 2548 y(\023)1268 2551 y(er)e Fl(sho)o(ws)h(a)e(noticeable)i(impro)o(v)o(em)o(en)o(t)d(o) o(v)o(er)1013 2600 y Fa(choi)p Fl(,)d(but)h(it)g(is)g(v)o(ery)g(minor.) i(The)f Fa(test)e Fl(metho)q(d)g(has)h(lo)o(w)o(er)1013 2650 y(failure)d(rates)i(and)f(pro)q(duces)i(estimates)e(that)g(are)h (more)e(accu-)1013 2700 y(rate)17 b(and)g(far)g(more)f(stable)i (\(indicated)f(b)o(y)g(the)h(b)q(ottom)d(\014g-)p eop %%Page: 6 6 6 5 bop 263 118 1424 2 v 262 168 2 50 v 658 168 V 687 153 a Fj(n)712 159 y Fg(1)742 153 y Fl(=)12 b(99)p 855 168 V 56 w Fj(n)909 159 y Fg(1)939 153 y Fl(=)g(97)p 1052 168 V 57 w Fj(n)1107 159 y Fg(1)1137 153 y Fl(=)g(93)p 1250 168 V 67 w Fj(n)1315 159 y Fg(1)1345 153 y Fl(=)f(80)p 1468 168 V 78 w Fj(n)1533 159 y Fg(1)1563 153 y Fl(=)h(50)p 1686 168 V 262 218 V 658 218 V 697 203 a Fj(n)722 209 y Fg(2)752 203 y Fl(=)g(1)p 855 218 V 78 w Fj(n)920 209 y Fg(2)950 203 y Fl(=)g(3)p 1052 218 V 77 w Fj(n)1117 209 y Fg(2)1147 203 y Fl(=)g(7)p 1250 218 V 78 w Fj(n)1315 209 y Fg(2)1345 203 y Fl(=)f(20)p 1468 218 V 78 w Fj(n)1533 209 y Fg(2)1563 203 y Fl(=)h(50)p 1686 218 V 263 219 1424 2 v 262 269 2 50 v 288 254 a Fa(choi)115 b Fl(F)m(ailures)p 658 269 V 102 w(86)p 855 269 V 156 w(42)p 1052 269 V 166 w(4)p 1250 269 V 186 w(0)p 1468 269 V 198 w(0)p 1686 269 V 262 319 V 494 304 a(^)-23 b Fj(n)517 310 y Fg(1)535 304 y Fj(=)r Fl(^)g Fj(n)581 310 y Fg(2)p 658 319 V 683 304 a Fl(99.9/0.1)p 855 319 V 48 w(99.2/0.8)p 1052 319 V 47 w(95.8/4.2)p 1250 319 V 48 w(82.0/18.0)p 1468 319 V 47 w(50.6/49.4)p 1686 319 V 262 369 V 492 354 a(SD\()r(^)g Fj(n)588 360 y Fg(1)606 354 y Fl(\))p 658 369 V 98 w(0.36)p 855 369 V 123 w(0.98)p 1052 369 V 122 w(1.71)p 1250 369 V 133 w(1.77)p 1468 369 V 143 w(1.30)p 1686 369 V 263 371 1424 2 v 262 420 2 50 v 288 405 a Fa(cram)394 402 y(\023)394 405 y(er)50 b Fl(F)m(ailures)p 658 420 V 102 w(80)p 855 420 V 156 w(31)p 1052 420 V 166 w(1)p 1250 420 V 186 w(0)p 1468 420 V 198 w(0)p 1686 420 V 262 470 V 494 455 a(^)-23 b Fj(n)517 461 y Fg(1)535 455 y Fj(=)r Fl(^)g Fj(n)581 461 y Fg(2)p 658 470 V 683 455 a Fl(99.8/0.2)p 855 470 V 48 w(98.6/1.4)p 1052 470 V 47 w(95.1/4.9)p 1250 470 V 48 w(81.6/18.4)p 1468 470 V 47 w(49.7/50.3)p 1686 470 V 262 520 V 492 505 a(SD\()r(^)g Fj(n)588 511 y Fg(1)606 505 y Fl(\))p 658 520 V 98 w(0.50)p 855 520 V 123 w(1.13)p 1052 520 V 122 w(1.89)p 1250 520 V 133 w(1.80)p 1468 520 V 143 w(1.31)p 1686 520 V 263 522 1424 2 v 262 571 2 50 v 288 557 a Fa(test)112 b Fl(F)m(ailures)p 658 571 V 102 w(52)p 855 571 V 166 w(5)p 1052 571 V 177 w(0)p 1250 571 V 186 w(0)p 1468 571 V 198 w(0)p 1686 571 V 262 621 V 494 606 a(^)-23 b Fj(n)517 612 y Fg(1)535 606 y Fj(=)r Fl(^)g Fj(n)581 612 y Fg(2)p 658 621 V 683 606 a Fl(99.8/0.2)p 855 621 V 48 w(98.2/1.8)p 1052 621 V 47 w(94.1/5.9)p 1250 621 V 48 w(80.8/19.2)p 1468 621 V 47 w(50.1/49.9)p 1686 621 V 262 671 V 492 656 a(SD\()r(^)g Fj(n)588 662 y Fg(1)606 656 y Fl(\))p 658 671 V 98 w(0.32)p 855 671 V 123 w(0.83)p 1052 671 V 122 w(0.87)p 1250 671 V 133 w(0.78)p 1468 671 V 143 w(0.55)p 1686 671 V 263 673 1424 2 v 262 723 2 50 v 288 708 a Fa(new)121 b Fl(F)m(ailures)p 658 723 V 113 w(0)p 855 723 V 176 w(0)p 1052 723 V 177 w(0)p 1250 723 V 186 w(0)p 1468 723 V 198 w(0)p 1686 723 V 262 772 V 494 757 a(^)-23 b Fj(n)517 763 y Fg(1)535 757 y Fj(=)r Fl(^)g Fj(n)581 763 y Fg(2)p 658 772 V 683 757 a Fl(99.0/1.0)p 855 772 V 48 w(96.9/3.1)p 1052 772 V 47 w(92.8/7.2)p 1250 772 V 48 w(79.9/20.1)p 1468 772 V 47 w(50.1/49.9)p 1686 772 V 262 822 V 492 807 a(SD\()r(^)g Fj(n)588 813 y Fg(1)606 807 y Fl(\))p 658 822 V 98 w(0.01)p 855 822 V 123 w(0.16)p 1052 822 V 122 w(0.19)p 1250 822 V 133 w(0.34)p 1468 822 V 143 w(0.41)p 1686 822 V 263 824 1424 2 v 453 940 a(T)m(able)13 b(1:)18 b(Exp)q(erimen)o(tal)13 b(results)i(for)f(detecting)g(small)e(clusters)-75 1072 y(ures\))g(than)g(those)f(for)g Fa(choi)g Fl(and)g Fa(cram)571 1069 y(\023)571 1072 y(er)p Fl(|presumably)f(b)q(e-)-75 1122 y(cause)j(it)f(is)g(less)g(constrained.)19 b(Of)11 b(the)i(four)f(metho)q(ds,)f Fa(new)i Fl(is)-75 1172 y(clearly)h(and)g(consisten)o(tly)h(the)g(winner)g(in)e(terms)h(of)g (all)f(three)-75 1222 y(measures:)18 b(reliabilit)o(y)m(,)11 b(accuracy)k(and)f(stabilit)o(y)m(.)-25 1276 y(The)19 b(results)h(of)f(the)g Fa(new)h Fl(metho)q(d)e(can)h(b)q(e)h(further)g (im-)-75 1326 y(pro)o(v)o(ed.)d(If)9 b(the)h(decomp)q(osed)g(form)e (\(9\))h(is)h(used)g(instead)g(of)f(\(8\),)-75 1376 y(and)h(the)g (solutions)g(of)f(the)h(sub-equations)g(are)h(normalized)d(b)q(e-)-75 1426 y(fore)14 b(com)o(bining)e(them|whic)o(h)g(is)i(feasible)g(b)q (ecause)h(the)g(t)o(w)o(o)-75 1476 y(underlying)g(clusters)i(are)f(so)f (distan)o(t)g(from)f(eac)o(h)i(other|the)-75 1525 y(correct)21 b(v)n(alues)e(are)h(obtained)f(for)i(^)-23 b Fj(n)543 1531 y Fg(1)580 1525 y Fl(and)21 b(^)-23 b Fj(n)691 1531 y Fg(2)729 1525 y Fl(in)19 b(virtually)-75 1575 y(ev)o(ery)c(trial.)-75 1737 y Fo(6)67 b(Conclusions)-75 1836 y Fl(W)m(e)16 b(ha)o(v)o(e)g (iden)o(ti\014ed)h(a)f(shortcoming)f(of)h(existing)g(clustering)-75 1886 y(metho)q(ds)11 b(for)h(arbitrary)f(semi-parametric)f(mixture)h (distribu-)-75 1936 y(tions:)32 b(they)21 b(fail)e(to)i(detect)h(v)o (ery)f(small)e(clusters)j(reliably)m(.)-75 1986 y(This)c(is)g(a)g (signi\014can)o(t)g(w)o(eakness)h(when)g(the)f(minorit)o(y)e(clus-)-75 2036 y(ters)c(are)g(far)e(from)f(the)j(dominan)o(t)d(ones)i(and)g(the)h (loss)f(function)-75 2085 y(tak)o(es)j(accoun)o(t)h(of)e(the)h (distance)h(of)e(misclustered)h(p)q(oin)o(ts.)-25 2140 y(W)m(e)g(ha)o(v)o(e)g(describ)q(ed)i(a)d(new)i(clustering)g(metho)q(d) e(for)h(arbi-)-75 2190 y(trary)g(semi-parametric)e(mixture)h (distributions,)g(and)g(sho)o(wn)-75 2240 y(exp)q(erimen)o(tally)g (that)h(it)g(o)o(v)o(ercomes)g(the)g(problem.)k(F)m(urther-)-75 2289 y(more,)11 b(the)i(exp)q(erimen)o(ts)g(suggest)g(that)f(the)h(new) g(estimator)f(is)-75 2339 y(more)h(accurate)i(and)f(more)f(stable)h (than)g(existing)f(ones.)-75 2501 y Fo(References)-75 2600 y Fl(Barb)q(e,)20 b(P)m(.)e(\(1998\).)32 b(Statistical)18 b(analysis)g(of)g(mixtures)g(and)8 2650 y(the)f(empirical)d(probabilit) o(y)h(measure.)26 b Fk(A)n(cta)16 b(Applic)n(an-)8 2700 y(dae)f(Mathematic)n(ae)p Fl(,)f Fk(50\(3\))p Fl(,)g(253{340.)1013 1072 y(Blum,)9 b(J.)h(R.)f(&)h(Susarla,)h(V.)e(\(1977\).)j(Estimation)c (of)i(a)g(mixing)1096 1122 y(distribution)j(function.)18 b Fk(A)o(nn.)d(Pr)n(ob)n(ab)p Fl(,)e Fk(5)p Fl(,)h(200{209.)1013 1203 y(Choi,)g(K.)h(&)h(Bulgren,)g(W.)e(B.)h(\(1968\).)22 b(An)15 b(estimation)f(pro-)1096 1253 y(cedure)j(for)f(mixtures)g(of)f (distributions.)25 b Fk(J.)17 b(R.)g(Statist.)1096 1303 y(So)n(c.)e(B)p Fl(,)e Fk(30)p Fl(,)h(444{460.)1013 1383 y(Deely)m(,)g(J.)i(J.)f(&)h(Kruse,)g(R.)f(L.)g(\(1968\).)22 b(Construction)16 b(of)e(se-)1096 1433 y(quences)20 b(estimating)d(the) i(mixing)d(distribution.)31 b Fk(A)o(nn.)1096 1483 y(Math.)15 b(Statist.)p Fl(,)d Fk(39)p Fl(,)i(286{288.)1013 1564 y(La)o(wson,)k(C.)g(L.)g(&)h(Hanson,)h(R.)d(J.)h(\(1974\).)32 b Fk(Solving)19 b(L)n(e)n(ast)1096 1613 y(Squar)n(es)c(Pr)n(oblems)p Fl(.)i(Pren)o(tice-Hall,)d(Inc.)1013 1694 y(Lindsa)o(y)m(,)8 b(B.)i(G.)e(\(1995\).)i Fk(Mixtur)n(e)h(mo)n(dels:)16 b(the)n(ory,)11 b(ge)n(ometry,)1096 1744 y(and)j(applic)n(ations)p Fl(,)f(V)m(olume)e(5)i(of)f Fk(NSF-CBMS)j(R)n(e)n(gional)1096 1794 y(Confer)n(enc)n(e)g(Series)g(in)h(Pr)n(ob)n(ability)f(and)h (Statistics)p Fl(.)k(In-)1096 1843 y(stitute)14 b(for)g(Mathematical)e (Statistics:)18 b(Ha)o(yw)o(ard,)13 b(CA.)1013 1924 y(Macdonald,)j(P)m (.)f(D.)h(M.)g(\(1971\).)25 b(Commen)o(t)14 b(on)i(a)g(pap)q(er)h(b)o (y)1096 1974 y(Choi)f(and)g(Bulgren.)28 b Fk(J.)17 b(R.)g(Statist.)g (So)n(c.)h(B)p Fl(,)e Fk(33)p Fl(,)i(326{)1096 2024 y(329.)1013 2104 y(McLac)o(hlan,)e(G.)g(&)h(Basford,)f(K.)h(\(1988\).)25 b Fk(Mixtur)n(e)17 b(Mo)n(dels:)1096 2154 y(Infer)n(enc)n(e)j(and)i (Applic)n(ations)f(to)f(Clustering)p Fl(.)37 b(Marcel)1096 2204 y(Dekk)o(er,)13 b(New)i(Y)m(ork.)1013 2285 y(Titterington,)i(D.)f (M.,)h(Smith,)f(A.)h(F.)g(M.)f(&)i(Mak)o(o)o(v,)e(U.)h(E.)1096 2335 y(\(1985\).)26 b Fk(Statistic)n(al)16 b(A)o(nalysis)i(of)f(Finite) h(Mixtur)n(e)f(Dis-)1096 2384 y(tributions)p Fl(.)g(John)d(Wiley)f(&)h (Sons.)1013 2465 y(W)m(ang,)9 b(Y.)h(&)h(Witten,)g(I.)f(H.)h(\(1999\).) h(The)f(estimation)e(of)h(mix-)1096 2515 y(ing)16 b(distributions)h(b)o (y)f(appro)o(ximating)e(empirical)h(mea-)1096 2565 y(sures.)47 b(T)m(ec)o(hnical)22 b(Rep)q(ort)i(\(in)f(preparation\),)i(Dept.)1096 2615 y(of)17 b(Computer)h(Science,)i(Univ)o(ersit)o(y)e(of)g(W)m(aik)n (ato,)e(New)1096 2664 y(Zealand.)p eop %%Trailer end userdict /end-hook known{end-hook}if %%EOF