source: trunk/gsdl/packages/isis-gdl/Unimarc.cpp@ 6127

Last change on this file since 6127 was 6127, checked in by mdewsnip, 17 years ago

IsisGdl package for reading CDS/ISIS databases. Provided by Jean-Claude Dauphin at UNESCO, and modified slightly to compile and run under Linux.

  • Property svn:keywords set to Author Date Id Revision
File size: 18.1 KB
Line 
1/**********************************************************************
2 *
3 * Unimarc.cpp
4 * Copyright (C) 2003 UNESCO
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "stdafx.h"
27
28///////////////////////////////////////////////////////////////////////////////////
29// MarcRecord - This class can instantiate objects that represent a MARC
30// bibilographic record and includes methods for manipulating the
31// record.
32//
33// example:
34//
35// #include "Unimarc.h"
36//
37// // Create a MarcRecord object with a raw MARC record
38// MarcRecord mr(buf,1)
39// mr.PrettyFormat("The title is %245, which is the field with tag 245");
40#include <vector>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include "Unimarc.h"
45
46//-------------------------------------------------------------------------------
47// ostream& operator<<(ostream& s, const MarcLeader& leader)
48//
49// Overloaded instance of the output operator for MarcLeader objects
50//---------------------------------------------------------------------------------
51std::ostream& operator<<(std::ostream& s, const MarcLeader& leader)
52{
53 s.write(leader.lreclen_, 5); // Logical record length
54 s.write(&leader.status_, 1); // Record status
55 s.write(&leader.type_, 1); // Type of record
56 s.write(&leader.bibLevel_, 1); // Bibliographic level
57 s.write(&leader.ctrlType_, 1); // Type of control
58 s.write(&leader.undPos_, 1); // Undefined character position
59 s.write(&leader.indCount_, 1); // Indicator count
60 s.write(&leader.scd_, 1); // Subfield code count
61 s.write(leader.base_, 5); // Base address of data
62 s.write(&leader.encLevel_, 1); // Encoding level
63 s.write(&leader.dcf_, 1); // Descriptive cataloging form
64 s.write(&leader.lrr_, 1); // Linked record requirement
65 // Directory entry map
66 s.write(&leader.llof_, 1); // Length of the length-of-field portion
67 s.write(&leader.lscp_, 1); // Length of starting-character-position portion
68 s.write(&leader.lidp_, 1); // Length of implementation-defined portion
69 s.write(&leader.uep_, 1); // Undefined entry map character position
70 return s;
71}
72
73//----------------------------------------------------------------------------------
74// istream& operator>>(istream& s, const MarcLeader& leader)
75//
76// Overloaded instance of the input operator for MarcLeader objects
77//----------------------------------------------------------------------------------
78std::istream& operator>>(std::istream& s, MarcLeader& leader)
79{
80 s.read(leader.lreclen_, 5); // Logical record length
81 s.read(&leader.status_, 1); // Record status
82 s.read(&leader.type_, 1); // Type of record
83 s.read(&leader.bibLevel_, 1); // Bibliographic level
84 s.read(&leader.ctrlType_, 1); // Type of control
85 s.read(&leader.undPos_, 1); // Undefined character position
86 s.read(&leader.indCount_, 1); // Indicator count
87 s.read(&leader.scd_, 1); // Subfield code count
88 s.read(leader.base_, 5); // Base address of data
89 s.read(&leader.encLevel_, 1); // Encoding level
90 s.read(&leader.dcf_, 1); // Descriptive cataloging form
91 s.read(&leader.lrr_, 1); // Linked record requirement
92 // Directory entry map
93 s.read(&leader.llof_, 1); // Length of the length-of-field portion
94 s.read(&leader.lscp_, 1); // Length of starting-character-position portion
95 s.read(&leader.lidp_, 1); // Length of implementation-defined portion
96 s.read(&leader.uep_, 1); // Undefined entry map character position
97 return s;
98}
99
100/************************************************************************
101Local prototypes
102 ***********************************************************************/
103static void Printx(FILE *fp, char *s, int x);
104
105//----------------------------------------------------------------------
106// MarcRecord::MarcRecord(char *data, int length, int copyData)
107//
108// Constructor that creates a new MarcRecord object.
109// data is raw marc record data for a single record. If copyData is 1,
110// data is encapsulated within this object. If 0, does not copy data and
111// responsibility falls to caller to maintain data pointer.
112//----------------------------------------------------------------------
113MarcRecord::MarcRecord(char* data, int length, int copyData)
114{
115 encapsulated_ = copyData;
116
117 /* encapsulate or set pointer to data */
118 if (encapsulated_)
119 {
120 data_ = new char[length + 1];
121 memcpy(data_, data, length);
122 data_[length] = '\0';
123 }
124 else
125 data = data;
126
127 // Lay leader overlay onto data
128 leader_ = *(MarcLeader *) (data_);
129
130 // Parse the record. sets dirCount_, directory_
131 if (!ParseMarcRecord())
132 {
133 // marcRecord_destroy(m);
134 // return NULL;
135 }
136}
137
138//----------------------------------------------------------------------
139// void MarcRecord::~MarcRecord()
140//
141// Destructor, destroys a MarcRecord object and all used resources.
142//----------------------------------------------------------------------
143MarcRecord::~MarcRecord()
144{
145 if ((encapsulated_) && (data_))
146 delete[] data_;
147
148 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
149 directory_[i].field_.erase(directory_[i].field_.begin(),directory_[i].field_.end());
150 directory_.erase(directory_.begin(),directory_.end());
151}
152
153#define MARC_STATE_IDLE 0
154#define MARC_STATE_TAG 1
155#define MARC_STATE_WIDTH 2
156//----------------------------------------------------------------------
157// int MarcRecord::PrettyFormat(char* format, char* buf, int maxlen)
158//
159// Formats the contents of the marc record according to a format string.
160// Useful for generating a buffer to pretty-print the record.
161// format contains a format-specifier string. The format-specifier string
162// is kindof like printf's, except the only control character is '%'.
163// Following the '%' should be the integer MARC field code as defined in
164// the (huge) list of available MARC fields from the library of congress
165// (http://lcweb.loc.gov/marc/). buf is a previously allocated
166// buffer into which the pretty version will be copied. maxlen is the max
167// length to write into buf. After call, buf contains pretty formatted text.
168//----------------------------------------------------------------------
169int MarcRecord::PrettyFormat(char* format, char* buf, int maxlen)
170{
171 char b[1024];
172 int bufpos = 0, fmtpos = 0;
173
174 b[0] = '\0';
175 buf[0] = '\0';
176
177 for (; format[fmtpos] != '\0'; )
178 {
179 switch (format[fmtpos])
180 {
181 case '%':
182 {
183 char tag[4];
184 strncpy(tag, (char *)(format + fmtpos + 1), 3);
185 tag[3] = '\0';
186 GetSubField(tag, format[fmtpos + 4], b, sizeof(b) - 1);
187 if (*b)
188 {
189 int l = strlen(b);
190 strncat(buf, b, maxlen - bufpos - 1);
191 bufpos += l;
192 if (bufpos >= maxlen)
193 {
194 buf[maxlen - 1] = '\0';
195 return maxlen;
196 }
197 }
198 fmtpos += 5;
199 break;
200 }
201 default:
202 buf[bufpos] = format[fmtpos];
203 bufpos++;
204 fmtpos++;
205 buf[bufpos] = '\0';
206 break;
207 }
208 }
209 return bufpos;
210}
211
212//----------------------------------------------------------------------
213// int MarcRecord::GetField(char* tag, char* buf, int maxlen)
214//
215// append all subfields in a field together.
216//----------------------------------------------------------------------
217int MarcRecord::GetField(char* tag, char* buf, int maxlen)
218{
219 int remain = maxlen - 1;
220
221 buf[0] = '\0';
222
223 if ((tag == NULL) || (*tag == '\0'))
224 return 0;
225
226 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
227 {
228 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
229 {
230 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
231 {
232 int flen = directory_[i].field_[j].len_;
233 if (flen > remain)
234 {
235 strncat(buf, directory_[i].field_[j].data_ + 1, remain);
236 buf[maxlen] = '\0';
237 return 1;
238 }
239 else
240 {
241 strncat(buf, directory_[i].field_[j].data_ + 1, flen);
242 remain -= flen;
243 }
244 }
245 return 1;
246 }
247 }
248 return 0;
249}
250
251//----------------------------------------------------------------------
252// int MarcRecord::GetSubFields(char* tag, char subtag, char* buf, int maxlen)
253//
254// append all fields with a certain subfield together.
255//----------------------------------------------------------------------
256int MarcRecord::GetSubFields(char* tag, char subtag, char* buf, int maxlen)
257{
258 int remain = maxlen - 1;
259
260 buf[0] = '\0';
261
262 if ((tag == NULL) || (*tag == '\0'))
263 return 0;
264
265 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
266 {
267 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
268 {
269 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
270 {
271 if (*(directory_[i].field_[j].data_) == subtag)
272 {
273 int flen = directory_[i].field_[j].len_;
274 if (flen > remain)
275 {
276 strncpy(buf, directory_[i].field_[j].data_ + 1, remain);
277 buf[maxlen] = '\0';
278 return 1;
279 }
280 else
281 {
282 strcpy(buf, directory_[i].field_[j].data_ + 1);
283 remain -= flen;
284 }
285 }
286 }
287 }
288 }
289 return 0;
290}
291
292int MarcRecord::HasField(char* tag)
293{
294
295 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
296 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
297 return 1;
298
299 return 0;
300}
301
302int MarcRecord::HasSubField(char* tag, char subtag)
303{
304
305 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
306 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
307 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
308 if (*directory_[i].field_[j].data_ == subtag)
309 return 1;
310 return 0;
311}
312
313int MarcRecord::GetSubField(char *tag, char subtag, char *buf, int maxlen)
314{
315
316 buf[0] = '\0';
317
318 if ((tag == NULL) || (*tag == '\0'))
319 return 0;
320
321 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
322 {
323 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
324 {
325 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
326 {
327 if (*(directory_[i].field_[j].data_) == subtag)
328 {
329 int l = directory_[i].field_[j].len_ - 1 > maxlen ? maxlen : directory_[i].field_[j].len_ - 1;
330 strncpy(buf, directory_[i].field_[j].data_ + 1, l);
331 buf[l] = '\0';
332 return 1;
333 }
334 }
335 }
336 }
337 return 0;
338}
339
340
341int GetNum(char *s, int n)
342{
343 char nbuf[10]; /* no more than 9 digits */
344
345 strncpy(&nbuf[0], s, n);
346 nbuf[n] = '\0';
347 return (atoi(nbuf));
348}
349
350void MarcRecord::Dump()
351{
352 int dc = 0, fc = 0;
353
354 printf("tags: ");
355 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
356 {
357 Printx(stdout, directory_[i].tag_, 3);
358 dc++;
359 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
360 {
361 if (directory_[i].field_[j].data_)
362 {
363 putchar((int)directory_[i].field_[j].data_);
364 }
365 fc++;
366 /*
367 printx(stdout, (char*)f->data + 1, f->len);
368 printf("\n"); */
369 }
370 printf(",");
371 }
372 putchar('\n');
373}
374
375
376
377
378
379void MarcRecord::PrintDetailed(FILE *fp)
380{
381 int count, i;
382
383 fprintf(fp, "RecordLength:\t\t%i\n", GetNum(leader_.lreclen_, 5));
384 fprintf(fp, "RecordStatus:\t\t%c\n", leader_.status_);
385 fprintf(fp, "RecordType:\t\t%c\n", leader_.type_);
386 fprintf(fp, "BibLevel:\t\t%c\n", leader_.bibLevel_);
387 fprintf(fp, "ControlType:\t\t%c\n", leader_.ctrlType_);
388 fprintf(fp, "UndefCharPos:\t\t%c\n", leader_.undPos_);
389 fprintf(fp, "IndCount:\t\t%i\n", GetNum(&leader_.indCount_, 1));
390 fprintf(fp, "SubCodeCount:\t\t%i\n", GetNum(&leader_.scd_, 1));
391 fprintf(fp, "DataBaseAddr:\t\t%i\n", GetNum(leader_.base_, 5));
392 fprintf(fp, "EncodingLevel:\t\t%c\n", leader_.encLevel_);
393 fprintf(fp, "DescCatForm:\t\t%c\n", leader_.dcf_);
394 fprintf(fp, "LinkedRecReq:\t\t%c\n", leader_.lrr_);
395 fprintf(fp, "LengthOf LengthOfField:\t%i\n", GetNum(&leader_.llof_, 1));
396 fprintf(fp, "LengthOf StartCharPos:\t%i\n", GetNum(&leader_.lscp_, 1));
397 fprintf(fp, "LengthOf ImpDefined:\t%i\n", GetNum(&leader_.lidp_, 1));
398 fprintf(fp, "UndefinedEntry:\t\t%i\n", GetNum(&leader_.uep_, 1));
399
400 count = (GetNum(leader_.base_, 5) - MARC_LEADER_LEN) / MARC_DIRENT_LEN;
401 for (i = 0; i < count; i++)
402 {
403 DirectoryEntry dir = *(DirectoryEntry*)(data_+MARC_LEADER_LEN+(i*MARC_DIRENT_LEN));
404 fprintf(fp, "\n");
405 fprintf(fp, "Tag:\t\t");
406 Printx(fp, dir.tag_, 3);
407 fprintf(fp, "\nLength:\t\t%i\n", GetNum(dir.len_, 4));
408 fprintf(fp, "StartPos:\t%i\n", GetNum(dir.scp_, 5));
409 fprintf(fp, "Data:\t\t");
410 Printx(fp, (char *)(data_ + GetNum(leader_.base_, 5) + GetNum(dir.scp_, 5)), GetNum(dir.len_, 4));
411 fprintf(fp, "\n");
412 }
413}
414
415/*
416desc: parses the marc record and builds appropriate data structures in m
417
418params:
419
420 m newly created marcRecord
421
422returns:
423
424 1 on success
425 0 on parse or memory failure
426*/
427int MarcRecord::ParseMarcRecord()
428{
429 dirCount_ = (GetNum(leader_.base_, 5) - MARC_LEADER_LEN) / MARC_DIRENT_LEN;
430
431 // For each MarcDirectory entry, create list of subfields
432 MarcDirectoryEntry dir_entry;
433 for (int i = 0; i < dirCount_; i++)
434 {
435 char* data;
436 // Extract the directory component
437 DirectoryEntry dir_entry = *(DirectoryEntry*)(data_+MARC_LEADER_LEN+(i*MARC_DIRENT_LEN));
438
439 MarcDirectoryEntry marc_dir_entry;
440
441 // Get a pointer to the data for this MarcDirectory
442 data = (char *)(data_ + GetNum(leader_.base_, 5) + GetNum(dir_entry.scp_, 5));
443
444 // set the tag for this MarcDirectory
445 marc_dir_entry.tag_ = dir_entry.tag_;
446
447 /* add the list of subfields for this MarcDirectory */
448 if (!AddSubFields(marc_dir_entry, data))
449 return 0;
450 directory_.push_back(marc_dir_entry);
451
452 }
453
454 return 1;
455}
456
457void Printx(FILE* fp, char* s, int x)
458{
459 int i;
460 for (i = 0; i < x; i++)
461 fprintf(fp, "%c", s[i]);
462}
463
464//----------------------------------------------------------------------
465// int MarcRecord::AddSubFields(MarcDirectory& d, char* b)
466//
467// Parses and adds subfields to MarcDirectory
468//
469// params:
470//
471// d MarcDirectory to which subfields are added
472// b buffer of data for d
473//
474// returns:
475//
476// 1 on success
477// 0 on parse or memory error
478//----------------------------------------------------------------------
479int MarcRecord::AddSubFields(MarcDirectoryEntry& d, char* b)
480{
481 char *p, *start;
482 int cont = 1, length = 0;
483
484 /* create new field */
485 MarcField field;
486
487
488 start = b;
489 for (p = b; ((*p != '\0') && (cont == 1)); p = (char *)p + 1)
490 {
491 switch (*p)
492 {
493 case SUBFDELIM:
494 /* end of current subfield, but more exist */
495
496 /* set length and pointer to data for this field */
497 field.len_ = length;
498 field.data_ = start;
499 d.field_.push_back(field);
500 /* by definition, there should be more subfields, but we'll
501 check for NULL just in case
502 */
503 if (*p + 1)
504 {
505 }
506 else
507 {
508 /* uh oh */
509 return 0;
510 }
511 length = 0;
512 start = (char *)p + 1;
513 break;
514 case FIELDTERM:
515 /* end of subfield list */
516
517 /* set length and pointer to data for this field */
518 field.len_ = length;
519 field.data_ = start;
520 d.field_.push_back(field);
521
522 /* break out of loop */
523 cont = 0;
524 break;
525 case RECTERM:
526 /* end of record */
527 cont = 0;
528 break;
529 default:
530 /* regular character */
531 length++;
532 break;
533 }
534 }
535 return 1;
536}
537
538void MarcRecord::Format(char *buf, int maxlen, int format)
539{
540 char b[1024];
541 char date[24];
542 char title[128];
543
544 b[0] = '\0';
545 buf[0] = '\0';
546
547 /* FIXME: get rid of this debugging crap sometime */
548 Dump();
549
550 switch (format)
551 {
552 case MARC_FMT_BRIEF:
553 {
554 GetSubField("245", 'a', title, sizeof(title) - 1);
555 GetSubField("260", 'c', date, sizeof(date) - 1);
556 if ((strlen(date) + strlen(title) + 2) <=(size_t)maxlen)
557 {
558 sprintf(buf, "%s %s", date, title);
559 }
560 else
561 {
562 /* FIXME: we're overflowing the buffer! */
563 }
564 break;
565 }
566 case MARC_FMT_FULL:
567 {
568 int dc = 0, fc = 0;
569 int remain = maxlen;
570
571 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
572 {
573 dc++;
574 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
575 {
576 fc++;
577 if ((directory_[i].field_[j].len_ + 2) > remain)
578 {
579 strncat(buf, (char *)directory_[i].field_[j].data_ + 1, remain - 2);
580 * (buf + remain - 2) = '\n';
581 * (buf + remain - 1) = '\0';
582 /* we're out of room in the buffer. */
583 return;
584 }
585 if (directory_[i].field_[j].len_ > 0)
586 {
587 strncat(buf, (char*)directory_[i].field_[j].data_ + 1, directory_[i].field_[j].len_);
588 strncat(buf, "\n\0", 2);
589 remain -=(directory_[i].field_[j].len_ + 2);
590 }
591 }
592 }
593 /*
594 marcRecord_getSubField(m, "245", 'a', title,
595 sizeof(title) - 1);
596 marcRecord_getSubField(m, "245", 'a', title,
597 sizeof(title) - 1);
598 marcRecord_getSubField(m, "260", 'c', date,
599 sizeof(date) - 1);
600 sprintf(buf, "%s %s", date, title);
601 */
602 break;
603 }
604 default:
605 break;
606 }
607}
608
609int MarcRecord::RecordLength()
610{
611 register int l = 0;
612
613
614 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
615 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
616 l += directory_[i].field_[i].len_;
617
618 return l;
619}
620
Note: See TracBrowser for help on using the repository browser.