[32163] | 1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
|
---|
| 2 | <!DOCTYPE Archive SYSTEM "http://greenstone.org/dtd/Archive/1.0/Archive.dtd">
|
---|
| 3 | <Archive>
|
---|
| 4 | <Section>
|
---|
| 5 | <Description>
|
---|
| 6 | <Metadata name="gsdldoctype">indexed_doc</Metadata>
|
---|
| 7 | <Metadata name="Language">en</Metadata>
|
---|
| 8 | <Metadata name="Encoding">utf8</Metadata>
|
---|
| 9 | <Metadata name="URL">http://Scratch/ak19/gs2-diffcol-26Mar2018/collect/Enhanced-PDF/tmp/1522045416_1/pdf03.html</Metadata>
|
---|
| 10 | <Metadata name="UTF8URL">http://Scratch/ak19/gs2-diffcol-26Mar2018/collect/Enhanced-PDF/tmp/1522045416_1/pdf03.html</Metadata>
|
---|
| 11 | <Metadata name="Title">Applications for Bibliometric Research in the Emerging Digital Libraries Sally Jo Cunningham...</Metadata>
|
---|
| 12 | <Metadata name="gsdlsourcefilename">import/pdf03.pdf</Metadata>
|
---|
| 13 | <Metadata name="gsdlconvertedfilename">tmp/1522045416_1/pdf03.html</Metadata>
|
---|
| 14 | <Metadata name="OrigSource">pdf03.html</Metadata>
|
---|
| 15 | <Metadata name="Source">pdf03.pdf</Metadata>
|
---|
| 16 | <Metadata name="SourceFile">pdf03.pdf</Metadata>
|
---|
| 17 | <Metadata name="Plugin">PDFPlugin</Metadata>
|
---|
| 18 | <Metadata name="FileSize">35935</Metadata>
|
---|
| 19 | <Metadata name="FilenameRoot">pdf03</Metadata>
|
---|
| 20 | <Metadata name="FileFormat">PDF</Metadata>
|
---|
| 21 | <Metadata name="srcicon">_iconpdf_</Metadata>
|
---|
| 22 | <Metadata name="srclink_file">doc.pdf</Metadata>
|
---|
| 23 | <Metadata name="srclinkFile">doc.pdf</Metadata>
|
---|
| 24 | <Metadata name="NumPages">17</Metadata>
|
---|
| 25 | <Metadata name="gsdlthistype">Paged</Metadata>
|
---|
| 26 | <Metadata name="ex.ExifTool.ExifToolVersion">8.57</Metadata>
|
---|
| 27 | <Metadata name="ex.File.Directory">/Scratch/ak19/gs2-diffcol-26Mar2018/collect/Enhanced-PDF/import</Metadata>
|
---|
| 28 | <Metadata name="ex.File.FileModifyDate">2018:03:26 19:22:43+13:00</Metadata>
|
---|
| 29 | <Metadata name="ex.File.FileName">pdf03.pdf</Metadata>
|
---|
| 30 | <Metadata name="ex.File.FilePermissions">664</Metadata>
|
---|
| 31 | <Metadata name="ex.File.FileSize">35935</Metadata>
|
---|
| 32 | <Metadata name="ex.File.FileType">PDF</Metadata>
|
---|
| 33 | <Metadata name="ex.File.MIMEType">application/pdf</Metadata>
|
---|
| 34 | <Metadata name="ex.PDF.Author">Bronwyn</Metadata>
|
---|
| 35 | <Metadata name="ex.PDF.CreateDate">1999:09:27 16:05:06</Metadata>
|
---|
| 36 | <Metadata name="ex.PDF.Creator">Microsoft Word</Metadata>
|
---|
| 37 | <Metadata name="ex.PDF.Linearized">false</Metadata>
|
---|
| 38 | <Metadata name="ex.PDF.PDFVersion">1.1</Metadata>
|
---|
| 39 | <Metadata name="ex.PDF.PageCount">17</Metadata>
|
---|
| 40 | <Metadata name="ex.PDF.Producer">Acrobat PDFWriter 2.0 for Macintosh</Metadata>
|
---|
| 41 | <Metadata name="ex.PDF.Title">biblio_for_dl_scientometrics.do</Metadata>
|
---|
| 42 | <Metadata name="Identifier">HASH019c5dca7f5bb781460a6b9c</Metadata>
|
---|
| 43 | <Metadata name="lastmodified">1522045363</Metadata>
|
---|
| 44 | <Metadata name="lastmodifieddate">20180326</Metadata>
|
---|
| 45 | <Metadata name="oailastmodified">1522045416</Metadata>
|
---|
| 46 | <Metadata name="oailastmodifieddate">20180326</Metadata>
|
---|
| 47 | <Metadata name="assocfilepath">HASH019c.dir</Metadata>
|
---|
| 48 | <Metadata name="gsdlassocfile">doc.pdf:application/pdf:</Metadata>
|
---|
| 49 | </Description>
|
---|
| 50 | <Content>
|
---|
| 51 |
|
---|
| 52 |
|
---|
| 53 |
|
---|
| 54 |
|
---|
| 55 |
|
---|
| 56 |
|
---|
| 57 |
|
---|
| 58 |
|
---|
| 59 |
|
---|
| 60 |
|
---|
| 61 |
|
---|
| 62 |
|
---|
| 63 |
|
---|
| 64 |
|
---|
| 65 |
|
---|
| 66 |
|
---|
| 67 |
|
---|
| 68 |
|
---|
| 69 |
|
---|
| 70 |
|
---|
| 71 |
|
---|
| 72 |
|
---|
| 73 |
|
---|
| 74 |
|
---|
| 75 |
|
---|
| 76 |
|
---|
| 77 |
|
---|
| 78 |
|
---|
| 79 |
|
---|
| 80 |
|
---|
| 81 |
|
---|
| 82 |
|
---|
| 83 |
|
---|
| 84 |
|
---|
| 85 |
|
---|
| 86 | </Content>
|
---|
| 87 | <Section>
|
---|
| 88 | <Description>
|
---|
| 89 | <Metadata name="Title">1</Metadata>
|
---|
| 90 | </Description>
|
---|
| 91 | <Content><br />
|
---|
| 92 | <b>Applications for Bibliometric Research</b><br>
|
---|
| 93 | <b>in the Emerging Digital Libraries</b><br>
|
---|
| 94 | Sally Jo Cunningham<br>
|
---|
| 95 | Department of Computer Science<br>
|
---|
| 96 | University of Waikato<br>
|
---|
| 97 | Hamilton, New Zealand<br>
|
---|
| 98 | email: [email protected]<br>
|
---|
| 99 | <b>Abstract:</b> Large numbers of research documents have recently become available on<br>
|
---|
| 100 | the Internet through âdigital librariesâ, and these collections are seeing high levels of<br>
|
---|
| 101 | use by their related research communities. A secondary use for these document<br>
|
---|
| 102 | repositories and indexes is as a platform for bibliometric research. We examine the<br>
|
---|
| 103 | extent to which the new digital libraries support conventional bibliometric analysis, and<br>
|
---|
| 104 | discuss shortcomings in their current forms. Interestingly, these electronic text<br>
|
---|
| 105 | archives also provide opportunities for new types of studies: generally the full text of<br>
|
---|
| 106 | documents are available for analysis, giving a finer grain of insight than abstract-only<br>
|
---|
| 107 | online databases; these repositories often contain technical reports or pre-prints, the<br>
|
---|
| 108 | âgrey literatureâ that has been previously unavailable for analysis; and document<br>
|
---|
| 109 | âusageâ can be measured directly by recording user accesses, rather than studied<br>
|
---|
| 110 | indirectly through document references.<br>
|
---|
| 111 | <b>1. Introduction</b><br>
|
---|
| 112 | In recent years a number of &quot;digital libraries&quot; have become available through the<br>
|
---|
| 113 | Internet. While the technology promises in the future to support large, heterogenous<br>
|
---|
| 114 | collections, at present the most widely used of the academically-focussed digital<br>
|
---|
| 115 | libraries are generally repositories of one or two types of document (typically technical<br>
|
---|
| 116 | reports, journal articles, pre-prints, or conference proceedings), grouped by discipline.<br>
|
---|
| 117 | <hr>
|
---|
| 118 | </Content>
|
---|
| 119 | </Section>
|
---|
| 120 | <Section>
|
---|
| 121 | <Description>
|
---|
| 122 | <Metadata name="Title">2</Metadata>
|
---|
| 123 | </Description>
|
---|
| 124 | <Content><br />
|
---|
| 125 | A distinguishing characteristic of these digital libraries is that the full text of documents<br>
|
---|
| 126 | are often available for retrieval, as well as bibliographic records.The sciences are<br>
|
---|
| 127 | represented much more heavily in the present crop of digital libraries than the social<br>
|
---|
| 128 | sciences, arts, or humanities. They are maintained by professional societies,<br>
|
---|
| 129 | universities, research laboratories, and even private individuals. Access is generally<br>
|
---|
| 130 | free, both to search and to download documents.<br>
|
---|
| 131 | The emergence of these subject-specific digital libraries is particularly important<br>
|
---|
| 132 | given the pattern of access to materials presently employed by research scientists.<br>
|
---|
| 133 | Informal exchanges of preprints, reprints, and photocopies of papers passed on by<br>
|
---|
| 134 | colleagues currently are major venues for the transmission of scientific information<br>
|
---|
| 135 | between researchers in the sciences. In one study, the dependence on these sources<br>
|
---|
| 136 | ranges from 12% (for chemistry) to 39% (for mathematics) of all papers cited in<br>
|
---|
| 137 | researchers' own publications [11]. A qualitative study of study of how computer<br>
|
---|
| 138 | scientists locate and retrieve documents (computing is one of the domains considered<br>
|
---|
| 139 | later in this paper) indicates that for that field, technical reports and research documents<br>
|
---|
| 140 | found in various locations on the Internet are a preferred source of information [6].<br>
|
---|
| 141 | Many of the digital library systems discussed in this paper are repositories for just this<br>
|
---|
| 142 | type of literature. The documents tend to be of high quality: primarily technical<br>
|
---|
| 143 | reports or working papers from research institutions (both academic and commercial),<br>
|
---|
| 144 | as well as advance copies of work accepted for publication in conventional paper<br>
|
---|
| 145 | journals. Moreover, these digital libraries are also coming to include refereed work<br>
|
---|
| 146 | published digitally (in electronic journals). Anecdotal evidence suggests that in their<br>
|
---|
| 147 | fields, these digital libraries are coming to be the resource of choice for locating cutting<br>
|
---|
| 148 | edge work.<br>
|
---|
| 149 | For specialized subjects such as high energy physics, this dependence on<br>
|
---|
| 150 | informal or extra-library dissemination can be much higher. Ginsparg ([9], [10])<br>
|
---|
| 151 | reports that fields in physics have traditionally relied heavily on preprint exchanges, and<br>
|
---|
| 152 | the digital repositories of physics preprints begun in 1991 (the PHYSICS E-PRINT<br>
|
---|
| 153 | ARCHIVES) have to a large extent supplanted conventional publishing and physical<br>
|
---|
| 154 | <hr>
|
---|
| 155 | </Content>
|
---|
| 156 | </Section>
|
---|
| 157 | <Section>
|
---|
| 158 | <Description>
|
---|
| 159 | <Metadata name="Title">3</Metadata>
|
---|
| 160 | </Description>
|
---|
| 161 | <Content><br />
|
---|
| 162 | paper mailing of technical reports. By providing ready access to information sources<br>
|
---|
| 163 | that are already preferentially utilized by scientists, the digital libraries show potential to<br>
|
---|
| 164 | increase access to information that until recently was expensive or difficult to acquire in<br>
|
---|
| 165 | paper form. Indeed, in some fields (most notably physics) this process has already<br>
|
---|
| 166 | begun, as researchers in less developed countries report access to ongoing research<br>
|
---|
| 167 | through the Internet repositories that their local libraries could not afford to acquire<br>
|
---|
| 168 | through conventional journal subscriptions ([9], [10]).<br>
|
---|
| 169 | The primary use for new bibliographic resources is, of course, for the contents<br>
|
---|
| 170 | of the documents involved. A secondary use for emerging resources is as a basis for<br>
|
---|
| 171 | bibliometric analysis of the subject field. With the conventionally published scientific<br>
|
---|
| 172 | literature, the sheer difficulty of accumulating statistics discouraged bibliometric<br>
|
---|
| 173 | research until the advent of large bibliographic databases in the 1960's. Computerized<br>
|
---|
| 174 | bibliographic databases sparked a significant increase in the number of large-scale<br>
|
---|
| 175 | bibliographic studies, as significant portions of the collection and analysis of data could<br>
|
---|
| 176 | be automated ([12], [13]). The availability of CD-ROM versions of bibliographic<br>
|
---|
| 177 | databases has been of particular importance, since they provide a cheaper alternative to<br>
|
---|
| 178 | the online commercial databases [3].<br>
|
---|
| 179 | These computerized bibliographic resources have drawbacks, however. The<br>
|
---|
| 180 | greatest is that the full text of documents are rarely available, and even abstracts are not<br>
|
---|
| 181 | always present. This obviously limits the types of bibliometric research that can be<br>
|
---|
| 182 | conducted <i>solely</i> through these databases. In addition, these databases are generally<br>
|
---|
| 183 | limited to formally published documents (those appearing in selected books, journals,<br>
|
---|
| 184 | and conference proceedings). The &quot;grey literature&quot; of technical reports, pre-prints, and<br>
|
---|
| 185 | other works not formally published are largely ignored, and it is this absence of easy<br>
|
---|
| 186 | access to these documents that has hampered the analysis of these important forms of<br>
|
---|
| 187 | scientific communication.<br>
|
---|
| 188 | The digital libraries currently in existence complement the online and CD-ROM<br>
|
---|
| 189 | bibliographic databases. They are best suited for examinations of the &quot;physical&quot;<br>
|
---|
| 190 | characteristics of documents (for example, document length), analysis based on<br>
|
---|
| 191 | <hr>
|
---|
| 192 | </Content>
|
---|
| 193 | </Section>
|
---|
| 194 | <Section>
|
---|
| 195 | <Description>
|
---|
| 196 | <Metadata name="Title">4</Metadata>
|
---|
| 197 | </Description>
|
---|
| 198 | <Content><br />
|
---|
| 199 | bibliographic information that can be automatically extracted from the document text or<br>
|
---|
| 200 | the sometimes unevenly formatted bibliographic records (such as obsolescence<br>
|
---|
| 201 | studies), and usage studies (geographic or institutional origin of users, date/time of<br>
|
---|
| 202 | access, individual patterns of document retrieval, etc.). Because references are present<br>
|
---|
| 203 | in the document file but not identified by field, co-citation and bibliographic coupling<br>
|
---|
| 204 | research is not well-supported, and conducting these studies requires considerable<br>
|
---|
| 205 | effort on the part of the researcher.<br>
|
---|
| 206 | The variety of bibliographic repositories in the available digital libraries in itself<br>
|
---|
| 207 | has great potential in conducting bibliometric research. Sigogneau et al [15] present a<br>
|
---|
| 208 | case study illustrating the ways in which the strengths of different databases can be<br>
|
---|
| 209 | played off each other; they conduct a fine-grained analysis of the emergence of research<br>
|
---|
| 210 | fronts in molecular and cellular biology, and demonstrate that the observations gleaned<br>
|
---|
| 211 | from two complementary bibliographic databases provide greater insight into their<br>
|
---|
| 212 | problem. Similarly, it appears that the types of bibliographic data that can be gleaned<br>
|
---|
| 213 | from the relatively unstructured digital libraries can be profitably combined with data<br>
|
---|
| 214 | from online databases, CD-ROMS, and other more conventional bibliographic<br>
|
---|
| 215 | resources.<br>
|
---|
| 216 | This paper is organized as follows: Section 2 discusses the types of indexing<br>
|
---|
| 217 | and searching available with current digital libraries; Section 3 gives examples of<br>
|
---|
| 218 | conventional bibliometric techniques applied to Internet-accessible archives; Section 4<br>
|
---|
| 219 | discusses opportunities to directly measure usage of documents and to detect<br>
|
---|
| 220 | information-seeking patterns in researchers; and Section 5 presents our conclusions.<br>
|
---|
| 221 | <b>2. Indexing and searching in current digital libraries</b><br>
|
---|
| 222 | At present, the types of indexing fields for most academically-oriented digital<br>
|
---|
| 223 | library systems are limited. Many schemes index on user-supplied document<br>
|
---|
| 224 | descriptions, abstracts, or similar document surrogates (for example, the PHYSICS E-<br>
|
---|
| 225 | PRINT ARCHIVE [10], a collection of physics pre-prints and technical reports). As will<br>
|
---|
| 226 | <hr>
|
---|
| 227 | </Content>
|
---|
| 228 | </Section>
|
---|
| 229 | <Section>
|
---|
| 230 | <Description>
|
---|
| 231 | <Metadata name="Title">5</Metadata>
|
---|
| 232 | </Description>
|
---|
| 233 | <Content><br />
|
---|
| 234 | be discussed below, the quality of this user-provided data can be highly variable, and<br>
|
---|
| 235 | may unfavorably impact the usefulness of the index for searching. Alternatively, a<br>
|
---|
| 236 | designated site librarian may maintain a catalog (eg, the WATERS [14] system, now<br>
|
---|
| 237 | subsumed by NCSTRL (http://www.ncstrl.org/), both primarily collections of<br>
|
---|
| 238 | computer science technical reports); in this case the quality of the bibliographic<br>
|
---|
| 239 | information may be expedited to be higher, but fewer sites will be likely to support<br>
|
---|
| 240 | such a librarian and therefore fewer documents are likely to be included in the digital<br>
|
---|
| 241 | library. In a âharvestingâ system such as the computer science technical report<br>
|
---|
| 242 | collections supported by HARVEST [2] or the NEW ZEALAND DIGITAL LIBRARY<br>
|
---|
| 243 | computer science technical report collection ([16], [17]), documents are indexed from<br>
|
---|
| 244 | passive repositories (that may not even be aware that their documents are being<br>
|
---|
| 245 | included in the digital library). Harvesting systems therefore cannot rely on the<br>
|
---|
| 246 | presence of bibliographic data of any sort.<br>
|
---|
| 247 | Because of the relative paucity of high-quality bibliographic data available to<br>
|
---|
| 248 | many of the current academically- or research-focussed digital library collections, their<br>
|
---|
| 249 | search interfaces tend to be more primitive than those ordinarily found in online<br>
|
---|
| 250 | bibliographic databases or library catalogs. Systems such as NCSTRL can support<br>
|
---|
| 251 | author, title, and subject searching, but this more sophisticated search functionality<br>
|
---|
| 252 | comes at the expense of requiring participating repositories to use specific software. As<br>
|
---|
| 253 | a consequence, these latter systems may provide access to a small number of sites than<br>
|
---|
| 254 | harvesting systems. Harvesters may access a broader range of providers, but at the<br>
|
---|
| 255 | penalty of being limited to unfielded, keyword searches over the raw text of the<br>
|
---|
| 256 | document or document surrogate.<br>
|
---|
| 257 | Specifically, the indexing in existing digital libraries has a variety of shortcomings for<br>
|
---|
| 258 | bibliometric applications:<br>
|
---|
| 259 | â¢<br>
|
---|
| 260 | <i>lack of fielded indexing:</i> As noted above, some large and widely used digital<br>
|
---|
| 261 | libraries (such as the computer science technical report collection of the NEW<br>
|
---|
| 262 | ZEALAND DIGITAL LIBRARY) may lack formal cataloging entirely, and rely on<br>
|
---|
| 263 | <hr>
|
---|
| 264 | </Content>
|
---|
| 265 | </Section>
|
---|
| 266 | <Section>
|
---|
| 267 | <Description>
|
---|
| 268 | <Metadata name="Title">6</Metadata>
|
---|
| 269 | </Description>
|
---|
| 270 | <Content><br />
|
---|
| 271 | keyword searching over the raw document text. Obviously this makes field-<br>
|
---|
| 272 | dependent analysis more difficult (for example, locating documents produced by<br>
|
---|
| 273 | specific authors), and in the worst case my require a manual examination of all<br>
|
---|
| 274 | files in the collection in order to reliably identify a desired document subset.<br>
|
---|
| 275 | However, keyword search techniques that approximate fielded searching results<br>
|
---|
| 276 | may suffice: for example in the NEW ZEALAND DIGITAL LIBRARY computer<br>
|
---|
| 277 | science technical report collection, limiting the keyword search for âJohnsonâ<br>
|
---|
| 278 | to a search of first pages only is likely to retrieve documents written by Johnson<br>
|
---|
| 279 | (since for the majority of computer science technical reports, the first page<br>
|
---|
| 280 | contains little more than author, title, date, and institution details).<br>
|
---|
| 281 | A more principled approach to extracting bibliographic information is embodied<br>
|
---|
| 282 | in the CiteSeer tool [1]. This software parses raw, unfielded academic<br>
|
---|
| 283 | documents and attempts to identify such indexing information as author, title,<br>
|
---|
| 284 | reference list, etc. Obviously such a tool cannot attain 100% accuracy over a<br>
|
---|
| 285 | heterogenous document collection, but in practice it appears useful in that it can<br>
|
---|
| 286 | make a good first pass in processing a set of documents, providing an initial set<br>
|
---|
| 287 | of parsed documents for analysis. The remaining (presumably much smaller) set<br>
|
---|
| 288 | of unparsable documents can then be dealt with manually.<br>
|
---|
| 289 | â¢<br>
|
---|
| 290 | <i>lack of consistency in field formatting:</i> Current digital libraries usually acquire<br>
|
---|
| 291 | bibliographic information from either the authors of submitted articles or<br>
|
---|
| 292 | automatic extraction routines (retrieving bibliographic details from catalog files<br>
|
---|
| 293 | that may or may not be in a given document site, and that may or may not be in<br>
|
---|
| 294 | an easily parsable form). Neither of these methods produce records with<br>
|
---|
| 295 | standard formatting, which causes problems with automated bibliometric<br>
|
---|
| 296 | analysis. Consider the following examples selected from entries in the hep-th<br>
|
---|
| 297 | (high energy physics) collection of the PHYSICS E-PRINT ARCHIVES:<br>
|
---|
| 298 | <hr>
|
---|
| 299 | </Content>
|
---|
| 300 | </Section>
|
---|
| 301 | <Section>
|
---|
| 302 | <Description>
|
---|
| 303 | <Metadata name="Title">7</Metadata>
|
---|
| 304 | </Description>
|
---|
| 305 | <Content><br />
|
---|
| 306 | (i)<br>
|
---|
| 307 | Authors: A. Yu. Alekseev, V. Schomerus<br>
|
---|
| 308 | (ii)<br>
|
---|
| 309 | Authors: Adel Bilal and Ian. I. Kogan<br>
|
---|
| 310 | (iii)<br>
|
---|
| 311 | Authors: Paul S. Aspinwall and David R. Morrison (with an appendix <br>
|
---|
| 312 | by Mark Gross)<br>
|
---|
| 313 | (iv)<br>
|
---|
| 314 | Authors: A. H. Chamseddine and Herbi Dreiner (ETH-Zurich)<br>
|
---|
| 315 | In this case, typical for existing digital libraries, there is no standardized format<br>
|
---|
| 316 | for authors' names (here, appearing with full names, initials plus last name, and<br>
|
---|
| 317 | a mixture of the two); no standard convention for separating author names<br>
|
---|
| 318 | (here, either a comma or &quot;and&quot; are used); and parenthetical information can<br>
|
---|
| 319 | include a variety of information such as the name of an associate author or the<br>
|
---|
| 320 | institutional affiliations of an author. Manual processing or specially crafted<br>
|
---|
| 321 | software would be required to reformat these fields for analysis.<br>
|
---|
| 322 | â¢<br>
|
---|
| 323 | <i>duplicate entries: </i> Digital libraries that draw documents from a variety of sources<br>
|
---|
| 324 | may inadvertently contain duplicate items. Unfortunately, the irregular<br>
|
---|
| 325 | formatting of the bibliographic information makes it difficult to automatically<br>
|
---|
| 326 | detect these duplicates.<br>
|
---|
| 327 | â¢<br>
|
---|
| 328 | <i>implicit field tagging:</i> In some repositories, items are not explicitly tagged with<br>
|
---|
| 329 | certain types of information â most commonly the document's date of<br>
|
---|
| 330 | publication or production. Instead, the date is implicit in the document's title<br>
|
---|
| 331 | (eg, its numeration in a technical report series) or in the location of the document<br>
|
---|
| 332 | in the file structure of the repository (eg, separate directories exist for each<br>
|
---|
| 333 | year). A second common piece of implicit data is the authorsâ institutional<br>
|
---|
| 334 | affiliations. This may be contained in the document itself (typically on a cover<br>
|
---|
| 335 | page), or may be implicit in the documentâs location (for example, a<br>
|
---|
| 336 | corporationâs technical reports are stored in its ftp repository). Again, in these<br>
|
---|
| 337 | <hr>
|
---|
| 338 | </Content>
|
---|
| 339 | </Section>
|
---|
| 340 | <Section>
|
---|
| 341 | <Description>
|
---|
| 342 | <Metadata name="Title">8</Metadata>
|
---|
| 343 | </Description>
|
---|
| 344 | <Content><br />
|
---|
| 345 | cases special processing is required to append this field information to a<br>
|
---|
| 346 | document record for bibliometric analysis. <br>
|
---|
| 347 | â¢<br>
|
---|
| 348 | <i>extraction of document text:</i> Few of the documents stored in the research-<br>
|
---|
| 349 | oriented digital libraries discussed in this paper are straight ascii text; instead,<br>
|
---|
| 350 | documents may appear in a variety of file formats, such as LaTeX, PostScript,<br>
|
---|
| 351 | PDF, etc. If the contents of the documents are to be automatically processed<br>
|
---|
| 352 | (for example, to count the words in a document, or to extract reference<br>
|
---|
| 353 | publication dates for an obsolescence study), then the text must be extracted.<br>
|
---|
| 354 | Utilities are available to convert most common document formats to ascii.<br>
|
---|
| 355 | It is likely that many of these problems will be addressed as the Internet-based<br>
|
---|
| 356 | document indexing systems mature. Even minor changes can greatly increase the<br>
|
---|
| 357 | useability of a bibliographic database for bibliometric research. For example, the<br>
|
---|
| 358 | addition of an explicit date tag to many online databases in 1975 sparked new<br>
|
---|
| 359 | applications in time series research [3].<br>
|
---|
| 360 | <b>3. Opportunities for applications of bibliometric techniques</b><br>
|
---|
| 361 | One type of bibliometric research concentrates on quantifying fundamental,<br>
|
---|
| 362 | structural details about a subject literature: how many items are published, how many<br>
|
---|
| 363 | authors are publishing, over what time period documents are likely to be used, etc.<br>
|
---|
| 364 | More complex studies analyze the relationships between documents, such as how<br>
|
---|
| 365 | documents cluster into subjects. The following examples give a flavour of the<br>
|
---|
| 366 | bibliometric research that is possible using the emerging digital libraries:<br>
|
---|
| 367 | <i>examining the âphysicalâ characteristics of archived documents</i><br>
|
---|
| 368 | One relatively straightforward type of bibliometric study characterizes the<br>
|
---|
| 369 | formats of different literatures. For example, Figure 1 presents a the range of the size<br>
|
---|
| 370 | <hr>
|
---|
| 371 | </Content>
|
---|
| 372 | </Section>
|
---|
| 373 | <Section>
|
---|
| 374 | <Description>
|
---|
| 375 | <Metadata name="Title">9</Metadata>
|
---|
| 376 | </Description>
|
---|
| 377 | <Content><br />
|
---|
| 378 | of computer science technical reports as measured by their length in pages. Of the<br>
|
---|
| 379 | 45,720 documents in the CSTR collection as of April 1998, nearly 1600 did not contain<br>
|
---|
| 380 | page divisions in their files (and hence are excluded from analysis). Note that the<br>
|
---|
| 381 | number of pages in the shorter documents (&lt;50 pages) falls into an approximately<br>
|
---|
| 382 | normal distribution (slightly skewed to the left), while presumably the longer<br>
|
---|
| 383 | documents represent Mastersâ and Doctoral theses. A surprising number of documents<br>
|
---|
| 384 | are very short (between one and 5 pages); these may represent the type of condensed<br>
|
---|
| 385 | results frequently found in the âtechnical notesâ, âshort papersâ, and âposter sessionsâ<br>
|
---|
| 386 | of computing conferences and journals. The average number of pages per document,<br>
|
---|
| 387 | 27.5, appears to be slightly longer than the common upper bound for a computing<br>
|
---|
| 388 | journal article, although this observation must be confirmed by a similar study of the<br>
|
---|
| 389 | lengths of formally published computing articles.<br>
|
---|
| 390 | This type of analysis is of particular interest for technical reports, since they<br>
|
---|
| 391 | have not been studied in the same detail as formally published papers. A comparison of<br>
|
---|
| 392 | the physical characteristics of the formal and informal literature could provide<br>
|
---|
| 393 | supporting evidence for common beliefs about the relationship between the two types<br>
|
---|
| 394 | of documents. For example, do publishing constraints force journal and proceedings<br>
|
---|
| 395 | articles to be shorter than technical reports, and therefore presumably omit technical<br>
|
---|
| 396 | details of findings? Do technical reports contain more/less extensive reference sections?<br>
|
---|
| 397 | If reference sections of technical reports are longer than those of published articles, then<br>
|
---|
| 398 | citation links are being ommitted in published works; if technical reports contain fewer<br>
|
---|
| 399 | references, then this may confirm earlier indications that computer scientists tend to<br>
|
---|
| 400 | âresearch firstâ and do literature surveys later [6].<br>
|
---|
| 401 | Figure 1. Range of sizes of CS technical reports, measured by number of pages<br>
|
---|
| 402 | <i>obsolescence studies.</i><br>
|
---|
| 403 | A document is considered obsolete when it is no longer referenced by the<br>
|
---|
| 404 | current literature. Typically, documents receive their greatest number and frequency of<br>
|
---|
| 405 | <hr>
|
---|
| 406 | </Content>
|
---|
| 407 | </Section>
|
---|
| 408 | <Section>
|
---|
| 409 | <Description>
|
---|
| 410 | <Metadata name="Title">10</Metadata>
|
---|
| 411 | </Description>
|
---|
| 412 | <Content><br />
|
---|
| 413 | citations immediately after publication, and the frequency of citation falls rapidly as time<br>
|
---|
| 414 | passes. One technique for estimating the obsolescence rate of a body of literatureâ the<br>
|
---|
| 415 | <i>synchronous</i> method â is to find the median date in the references of the documents.<br>
|
---|
| 416 | This median date is subtracted from the year of publication for the documents, yielding<br>
|
---|
| 417 | the <i>median citation age</i>. As would be expected, this median varies between the<br>
|
---|
| 418 | disciplines. Typically the social sciences and arts have a higher median citation age<br>
|
---|
| 419 | than the âhardâ sciences and engineering, indicating that documents obsolesce more<br>
|
---|
| 420 | quickly for the latter fields.<br>
|
---|
| 421 | As noted in Section 2, references are not generally explicitly tagged in existing<br>
|
---|
| 422 | digital repositories. However, reference dates can usually be extracted from the<br>
|
---|
| 423 | document text by first locating the reference section (usually delimited by a &quot;references&quot;<br>
|
---|
| 424 | or &quot;bibliography&quot; section heading), and then extracting all numbers in the appropriate<br>
|
---|
| 425 | ranges for dates for the field under study.<br>
|
---|
| 426 | To illustrate this process, 188 technical reports were sampled from Internet-<br>
|
---|
| 427 | accessible repositories1 and used as source documents for a synchronous obsolescence<br>
|
---|
| 428 | study. Conveniently, the repositories chosen organize technical reports into sub-<br>
|
---|
| 429 | directories by their date of publication. The reference dates for each technical report<br>
|
---|
| 430 | were automatically extracted by software that scanned the documentâs file for numbers<br>
|
---|
| 431 | of the form 19XX, since previous studies indicate that few if any computing reports<br>
|
---|
| 432 | reference documents published in previous centuries [5]. Table 1 presents the median<br>
|
---|
| 433 | citation age calculated for these documents, broken down by repository and the year of<br>
|
---|
| 434 | publication for the source documents from which the reference dates were extracted:<br>
|
---|
| 435 | Table 1. Median citation ages for technical report repositories<br>
|
---|
| 436 | The median citation age ranges between 2 and 4 years, which is consistent with<br>
|
---|
| 437 | previous examinations of computing and information systems literature ([5], [4]).<br>
|
---|
| 438 | When graphed, the distribution of reference dates show the exponential curve typically<br>
|
---|
| 439 | found in obsolescence studies, including the final droop due to an âimmediacy effectâ<br>
|
---|
| 440 | <hr>
|
---|
| 441 | </Content>
|
---|
| 442 | </Section>
|
---|
| 443 | <Section>
|
---|
| 444 | <Description>
|
---|
| 445 | <Metadata name="Title">11</Metadata>
|
---|
| 446 | </Description>
|
---|
| 447 | <Content><br />
|
---|
| 448 | as fewer very new documents are available for citation [7]. These types of results<br>
|
---|
| 449 | provide confirmation that references used in computer science technical reports (the pre-<br>
|
---|
| 450 | eminent âgrey literatureâ of the computing field) conforms to the same patterns as<br>
|
---|
| 451 | references found in the formally published literature.<br>
|
---|
| 452 | <i>co-citation and bibliographic coupling studies</i><br>
|
---|
| 453 | The rate at which documents cite each other (co-citation) or cite the same<br>
|
---|
| 454 | documents (bibliographic coupling) can be used to produce &quot;maps&quot; of a subject<br>
|
---|
| 455 | literature. These techniques rely on analysis of the references of documents, and these<br>
|
---|
| 456 | references must be in a common format. While digital libraries contain full text of<br>
|
---|
| 457 | documents, their references are not standardized, and indeed are not even tagged as<br>
|
---|
| 458 | such. To perform these studies the references must be manually extracted and<br>
|
---|
| 459 | processedâa tedious process that is only worthwhile for documents (such as technical<br>
|
---|
| 460 | reports) that are not included in existing citation databases such as the Science Citation<br>
|
---|
| 461 | Index and Social Science Citation Index.<br>
|
---|
| 462 | <i>detecting cycles or regularities in the rate of production of research</i><br>
|
---|
| 463 | Analysis of trends in the production of technical reports can give indications<br>
|
---|
| 464 | about working conditions that affect research; for example, is more research produced<br>
|
---|
| 465 | over the summer, when the teaching load is lighter? or is research steadily produced<br>
|
---|
| 466 | throughout the year?<br>
|
---|
| 467 | Figure 2. Distribution of the number of documents submitted to hep-th, 1992-1994<br>
|
---|
| 468 | Figures 2 and 3 present statistics on document accumulation in the hep-th (high<br>
|
---|
| 469 | energy physics) e-print server, a part of the PHYSICS E-PRINT ARCHIVE. This system<br>
|
---|
| 470 | is one of the oldest formal pre-print archives, and has become the primary means for<br>
|
---|
| 471 | information dissemination in its field. Examination of these figures reveals several<br>
|
---|
| 472 | trends. Clearly the absolute number of documents deposited in the repository has<br>
|
---|
| 473 | <hr>
|
---|
| 474 | </Content>
|
---|
| 475 | </Section>
|
---|
| 476 | <Section>
|
---|
| 477 | <Description>
|
---|
| 478 | <Metadata name="Title">12</Metadata>
|
---|
| 479 | </Description>
|
---|
| 480 | <Content><br />
|
---|
| 481 | tended to increase over the time period. For all three years, research production has its<br>
|
---|
| 482 | lowest point in January and February, increases through May and June, then decreases<br>
|
---|
| 483 | until August and September. At that point the rate of production steps up, reaching a<br>
|
---|
| 484 | yearly peak in November and December. This pattern is less clear for 1992, which<br>
|
---|
| 485 | might be expected as the archive was established in mid-1991.<br>
|
---|
| 486 | Figure 3. Distribution of the percentage of documents submitted to hep-th, 1992-1994<br>
|
---|
| 487 | <b>4. Analysis of usage data</b><br>
|
---|
| 488 | The emerging Internet-based digital libraries will permit research on scientific<br>
|
---|
| 489 | information collection and use at a much finer grain than is possible with current paper<br>
|
---|
| 490 | libraries or online bibliographic databases. Current bibliometric or scientometric<br>
|
---|
| 491 | research of this type must measure information use indirectly â for example, through<br>
|
---|
| 492 | examination of the list of references appended to published articles. However, it is well<br>
|
---|
| 493 | known that authors do not necessarily include in the reference list all documents that<br>
|
---|
| 494 | could have been cited, and conversely that not all references listed may have been<br>
|
---|
| 495 | actually âusedâ in performing the research; citation behavior can be affected by a<br>
|
---|
| 496 | number of motivating factors (Garfield lists <i>15</i> possible reasons in [8]).<br>
|
---|
| 497 | Digital library transaction logs provide a powerful tool for direct analysis of<br>
|
---|
| 498 | document âusageâ: since digital libraries contain the actual document (rather than only a<br>
|
---|
| 499 | document surrogate), the relative amount of âuseâ that a digital libraryâs clients make of<br>
|
---|
| 500 | a given document sees can be estimated from the number of times the document file is<br>
|
---|
| 501 | downloaded (and, presumably, the document is read). Note that file downloading is a<br>
|
---|
| 502 | much stronger statement on the part of the user than, for example, having a<br>
|
---|
| 503 | bibliographic record appear in the query result set for a conventional bibliographic<br>
|
---|
| 504 | system; the user downloads only <i>after</i> the document has been found potentially relevant<br>
|
---|
| 505 | through examination of its document surrogate. Additionally, downloading is<br>
|
---|
| 506 | frequently time-consuming and sometimes costly (depending on local pricing for<br>
|
---|
| 507 | <hr>
|
---|
| 508 | </Content>
|
---|
| 509 | </Section>
|
---|
| 510 | <Section>
|
---|
| 511 | <Description>
|
---|
| 512 | <Metadata name="Title">13</Metadata>
|
---|
| 513 | </Description>
|
---|
| 514 | <Content><br />
|
---|
| 515 | Internet access). Downloaded documents are therefore highly likely at least to be<br>
|
---|
| 516 | scanned, if not read closely. The transaction logs for a digital library can provide a<br>
|
---|
| 517 | global picture of the use of documents in the collection, since all user interactions with<br>
|
---|
| 518 | the library can be automatically logged for analysis. By contrast, it is of course<br>
|
---|
| 519 | impossible to track usage of print bibliographies, and very difficult to monitor usage of<br>
|
---|
| 520 | bibliographic data available on CD-ROM across more than one or two sites.<br>
|
---|
| 521 | Furthermore, analysis of search requests by geographic location, institution,<br>
|
---|
| 522 | and sometimes even individual user are also possible. As an example, Table 2 presents<br>
|
---|
| 523 | a portion of the summary of usage statistics (broken down by domain code) for queries<br>
|
---|
| 524 | to the computer science technical collection of the NEW ZEALAND DIGITAL LIBRARY.<br>
|
---|
| 525 | Examination of the data indicates that the heaviest use of the collection comes from<br>
|
---|
| 526 | North America, Europe (particularly Germany and Finland), as well as the local New<br>
|
---|
| 527 | Zealand community and nearby Australia. As expected for such a collection, a large<br>
|
---|
| 528 | proportion of users are from educational (.edu) institutions; surprisingly, however, a<br>
|
---|
| 529 | similar number of queries come from commercial (.com) organizations, indicating<br>
|
---|
| 530 | perhaps that the documents are seeing use in commercial research and development<br>
|
---|
| 531 | units.<br>
|
---|
| 532 | Table 2. Accesses to the NEW ZEALAND DIGITAL LIBRARY CS collection by Domain<br>Code<br>
|
---|
| 533 | Of course, usage levels can also be further broken down by IP number<br>
|
---|
| 534 | (indicating institutions), and systems requiring users to register may also be able to<br>
|
---|
| 535 | analyze usage on an individual basis. Since the query strings themselves are also<br>
|
---|
| 536 | recorded in the transaction logs, this domain/institution/individual activity could also be<br>
|
---|
| 537 | linked to specific subjects through the query terms. Summaries of this type could be<br>
|
---|
| 538 | invaluable for studies of geographic diffusion and distribution of research topics.<br>
|
---|
| 539 | Transaction log analysis can also indicate time-related patterns in the<br>
|
---|
| 540 | information seeking behavior of digital library users. As a sample of this type of<br>
|
---|
| 541 | analysis, Paul Ginsparg notes a seven day periodicity in the number of search requests<br>
|
---|
| 542 | <hr>
|
---|
| 543 | </Content>
|
---|
| 544 | </Section>
|
---|
| 545 | <Section>
|
---|
| 546 | <Description>
|
---|
| 547 | <Metadata name="Title">14</Metadata>
|
---|
| 548 | </Description>
|
---|
| 549 | <Content><br />
|
---|
| 550 | made to the PHYSICS E-PRINT archives (Figure 4, reproduced from [9]). From this he<br>
|
---|
| 551 | adduces that many physicists do not yet have weekend access to the Internet (an<br>
|
---|
| 552 | alternative, slightly more cynical hypothesis is that even high energy theoretical<br>
|
---|
| 553 | physicists take the weekend off).<br>
|
---|
| 554 | Figure 4. Summary of search requests to the physics pre-print archives<br>
|
---|
| 555 | <b>5. Conclusion</b><br>
|
---|
| 556 | This study suggests opportunities for conducting bibliometric research on the<br>
|
---|
| 557 | evolving digital libraries. These repositories are suitable platforms for conventional<br>
|
---|
| 558 | bibliometric techniques (such as obsolescence studies, quantification of physical<br>
|
---|
| 559 | characteristics of documents comprising a subject literature, time analysis, etc.). The<br>
|
---|
| 560 | ability to directly monitor access to documents in digital libraries also enables<br>
|
---|
| 561 | researchers to explicitly quantify document usage, as well as to implicitly measure<br>
|
---|
| 562 | usage through citations. Additional facilities could aid in the performance of<br>
|
---|
| 563 | bibliographic experiments, such as: improved tagging of document fields; provision of<br>
|
---|
| 564 | utilities to strip out titles, authors, etc. from common document formats; and the ability<br>
|
---|
| 565 | to easily eliminate duplicate entries from downloaded library subsets. Unfortunately,<br>
|
---|
| 566 | the most useful of these additional facilities â those associated with a higher degree of<br>
|
---|
| 567 | cataloging â run counter to the underlying philosophy of many digital libraries: to<br>
|
---|
| 568 | avoid, if possible, manual processing and formal cataloging of documents. While<br>
|
---|
| 569 | adherence to this principle can limit the accuracy of fielded searching (or indeed,<br>
|
---|
| 570 | preclude it altogether), it can also avoid the cataloging bottleneck and permit digital<br>
|
---|
| 571 | libraries to provide access to larger numbers of documents.<br>
|
---|
| 572 | The digital libraries complement the information currently available through<br>
|
---|
| 573 | paper, online, and CD-ROM bibliographic resources. While these latter databases<br>
|
---|
| 574 | generally have the advantage of standardized formatting of bibliographic fields, the<br>
|
---|
| 575 | digital libraries are freely accessible, often contain &quot;grey literature&quot; that is otherwise<br>
|
---|
| 576 | <hr>
|
---|
| 577 | </Content>
|
---|
| 578 | </Section>
|
---|
| 579 | <Section>
|
---|
| 580 | <Description>
|
---|
| 581 | <Metadata name="Title">15</Metadata>
|
---|
| 582 | </Description>
|
---|
| 583 | <Content><br />
|
---|
| 584 | unavailable for analysis, and generally make the full text of documents available. The<br>
|
---|
| 585 | insights gained from analysis of digital libraries will add to the store of &quot;information<br>
|
---|
| 586 | about information&quot; that we have gained from older types of bibliographic repositories.<br>
|
---|
| 587 | <b>References</b><br>
|
---|
| 588 | [1] Bollacker, K.D., S. Lawrence, and C.L.Giles, CiteSeer: An Autonomous Web<br>
|
---|
| 589 | Agent for Automatic Retrieval and Identification of Interesting Publications,<br>
|
---|
| 590 | <i>Proceedings of the Second International Conference on Autonomous Agents</i><br>
|
---|
| 591 | (Minneapolis/St. Paul, May 9-13), 1998.<br>
|
---|
| 592 | [2] Bowman, C.M., P.B. Danzig, U. Manber, and M.F. Schwartz, Scalable Internet<br>
|
---|
| 593 | resource discovery: Research problems and approaches, <i>Communications of</i><br>
|
---|
| 594 | <i>the ACM 37(8)</i> (1994) 98-107.<br>
|
---|
| 595 | [3] Burton, Hilary D. , Use of a virtual information system for bibliometric analysis,<br>
|
---|
| 596 | <i>Informaton Processing &amp; Management 24(1)</i> (1988) 39-44.<br>
|
---|
| 597 | [4] Cunningham, S.J., An empirical investigation of the obsolescence rate for<br>
|
---|
| 598 | information systems literature, <i>Library and Information Science</i><br>
|
---|
| 599 | <i>Research</i>., 1996, http://library.fgcu.edu/iclc/lisrissu.htm<br>
|
---|
| 600 | [5] Cunningham, S.J., and D. Bocock, Obsolescence of computing literature.<br>
|
---|
| 601 | <i>Scientometrics</i> <i>34(2) </i> (1995), pp. 255-262.<br>
|
---|
| 602 | [6] Cunningham, S.J. and Lynn Silipigni Connaway, Information searching<br>
|
---|
| 603 | preferences and practices of computer science researchers, <i>Proceedings of</i><br>
|
---|
| 604 | <i>OZCHI '96</i> (1996) 294-299.<br>
|
---|
| 605 | [7] de Solla Price, D.J., Citation measures of hard science, soft science, technology,<br>
|
---|
| 606 | and nonscience. In: C.E. Nelson and D.K. Pollock (eds), <i>Communication</i><br>
|
---|
| 607 | <i>among scientists and engineers</i> (Heath Lexington, 1970).<br>
|
---|
| 608 | [8] Garfield, E., <i>Citation Indexing: Its theory and application in Science, Technology</i><br>
|
---|
| 609 | <i>and Humanities (</i>Wiley, 1979).<br>
|
---|
| 610 | <hr>
|
---|
| 611 | </Content>
|
---|
| 612 | </Section>
|
---|
| 613 | <Section>
|
---|
| 614 | <Description>
|
---|
| 615 | <Metadata name="Title">16</Metadata>
|
---|
| 616 | </Description>
|
---|
| 617 | <Content><br />
|
---|
| 618 | [9] Ginsparg, P. After dinner remarks: 14 Oct â94 APS meeting at LANL, 1994<br>
|
---|
| 619 | (&lt;URL: http://xxx.lanl.gov/blurb&gt; ).<br>
|
---|
| 620 | [10] Ginsparg, P., First steps towards electronic research communication, <i>Computers</i><br>
|
---|
| 621 | <i>in Physics 8(4)</i> (1994) 390-401. <br>
|
---|
| 622 | [11] Hallmark, J., Scientists' access and retrieval of references cited in their recent<br>
|
---|
| 623 | journal articles, <i> College and Research Libraries 55(3)</i> (1994) 199-210.<br>
|
---|
| 624 | [12] Hawkins, D.T. , Unconventional uses of on-line information retrieval systems:<br>
|
---|
| 625 | on-line bibliometric studies, <i>Journal of the American Society for Information</i><br>
|
---|
| 626 | <i>Science 28</i> (1977) 13-18.<br>
|
---|
| 627 | [13] McGhee, P.E. , P.R. Skinner, K. Roberto, N.J. Ridenour, and S.M. Larson,<br>
|
---|
| 628 | Using online databases to study current research trends: an online bibliometric<br>
|
---|
| 629 | study, <i>Library and Information Science Research 9</i> (1987) 285-291.<br>
|
---|
| 630 | [14] Maly, K., E.A. Fox, J.C. French, and A.L. Selman, Wide area technical report<br>
|
---|
| 631 | server (<i>Technical Report , </i> Dept. of Computer Science, Old Dominion<br>
|
---|
| 632 | University, <br>
|
---|
| 633 | 1994. <br>
|
---|
| 634 | Also <br>
|
---|
| 635 | available <br>
|
---|
| 636 | at <br>
|
---|
| 637 | <br>
|
---|
| 638 | <br>
|
---|
| 639 | &lt;URL:<br>
|
---|
| 640 | http://www.cs.odu.edu/WATERS/WATERS-paper.ps&gt; ).<br>
|
---|
| 641 | [15] Sigogneau, M.J. , S. Bain, J.P. Courtial, and H. Feillet, Scientific innovation in<br>
|
---|
| 642 | bibliographical databases: a comparative study of the Science Citation Index<br>
|
---|
| 643 | and the Pascal database, <i>Scientometrics 22(1)</i> (1991) 65-82.<br>
|
---|
| 644 | [16] Witten, I.H., S.J. Cunningham, M. Vallabh, and T.C. Bell, A New Zealand<br>
|
---|
| 645 | digital library for computer science research, <i>Proceedings of Digital Libraries</i><br>
|
---|
| 646 | <i>'95</i> (1995) 25-30.<br>
|
---|
| 647 | [17] Witten, I.H., C. Nevill-Manning, and S.J. Cunningham, A public library based<br>
|
---|
| 648 | on full-text retrieval, <i>Communications of the ACM</i> 41(4), 1998, p. 71<br>
|
---|
| 649 | <hr>
|
---|
| 650 | </Content>
|
---|
| 651 | </Section>
|
---|
| 652 | <Section>
|
---|
| 653 | <Description>
|
---|
| 654 | <Metadata name="Title">17</Metadata>
|
---|
| 655 | </Description>
|
---|
| 656 | <Content><br />
|
---|
| 657 | <br>
|
---|
| 658 | 1Documents were randomly sampled from the DEC<br>
|
---|
| 659 | (ftp://crl.dec.com/pub/DEC/CRL/tech-reports/), Sony<br>
|
---|
| 660 | (ftp://ftp.csl.sony.co.jp/CSL/CSL-Papers), and Ohio (ftp://archive.cis.ohio-<br>
|
---|
| 661 | state.edu/pub/tech-report/) technical report repositories<br>
|
---|
| 662 | <hr>
|
---|
| 663 |
|
---|
| 664 |
|
---|
| 665 | </Content>
|
---|
| 666 | </Section>
|
---|
| 667 | </Section>
|
---|
| 668 | </Archive>
|
---|