source: gsdl/trunk/packages/isis-gdl/Unimarc.cpp@ 15554

Last change on this file since 15554 was 15554, checked in by davidb, 16 years ago

Changed typecast of memory location from (int) to (unsigned long) to be compliant with 64-bit architecture

  • Property svn:keywords set to Author Date Id Revision
File size: 18.1 KB
Line 
1/**********************************************************************
2 *
3 * Unimarc.cpp
4 * Copyright (C) 2003 UNESCO
5 *
6 * A component of the Greenstone digital library software
7 * from the New Zealand Digital Library Project at the
8 * University of Waikato, New Zealand.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 *********************************************************************/
25
26#include "stdafx.h"
27
28///////////////////////////////////////////////////////////////////////////////////
29// MarcRecord - This class can instantiate objects that represent a MARC
30// bibilographic record and includes methods for manipulating the
31// record.
32//
33// example:
34//
35// #include "Unimarc.h"
36//
37// // Create a MarcRecord object with a raw MARC record
38// MarcRecord mr(buf,1)
39// mr.PrettyFormat("The title is %245, which is the field with tag 245");
40#include <vector>
41#include <stdio.h>
42#include <stdlib.h>
43#include <string.h>
44#include "Unimarc.h"
45
46//-------------------------------------------------------------------------------
47// ostream& operator<<(ostream& s, const MarcLeader& leader)
48//
49// Overloaded instance of the output operator for MarcLeader objects
50//---------------------------------------------------------------------------------
51std::ostream& operator<<(std::ostream& s, const MarcLeader& leader)
52{
53 s.write(leader.lreclen_, 5); // Logical record length
54 s.write(&leader.status_, 1); // Record status
55 s.write(&leader.type_, 1); // Type of record
56 s.write(&leader.bibLevel_, 1); // Bibliographic level
57 s.write(&leader.ctrlType_, 1); // Type of control
58 s.write(&leader.undPos_, 1); // Undefined character position
59 s.write(&leader.indCount_, 1); // Indicator count
60 s.write(&leader.scd_, 1); // Subfield code count
61 s.write(leader.base_, 5); // Base address of data
62 s.write(&leader.encLevel_, 1); // Encoding level
63 s.write(&leader.dcf_, 1); // Descriptive cataloging form
64 s.write(&leader.lrr_, 1); // Linked record requirement
65 // Directory entry map
66 s.write(&leader.llof_, 1); // Length of the length-of-field portion
67 s.write(&leader.lscp_, 1); // Length of starting-character-position portion
68 s.write(&leader.lidp_, 1); // Length of implementation-defined portion
69 s.write(&leader.uep_, 1); // Undefined entry map character position
70 return s;
71}
72
73//----------------------------------------------------------------------------------
74// istream& operator>>(istream& s, const MarcLeader& leader)
75//
76// Overloaded instance of the input operator for MarcLeader objects
77//----------------------------------------------------------------------------------
78std::istream& operator>>(std::istream& s, MarcLeader& leader)
79{
80 s.read(leader.lreclen_, 5); // Logical record length
81 s.read(&leader.status_, 1); // Record status
82 s.read(&leader.type_, 1); // Type of record
83 s.read(&leader.bibLevel_, 1); // Bibliographic level
84 s.read(&leader.ctrlType_, 1); // Type of control
85 s.read(&leader.undPos_, 1); // Undefined character position
86 s.read(&leader.indCount_, 1); // Indicator count
87 s.read(&leader.scd_, 1); // Subfield code count
88 s.read(leader.base_, 5); // Base address of data
89 s.read(&leader.encLevel_, 1); // Encoding level
90 s.read(&leader.dcf_, 1); // Descriptive cataloging form
91 s.read(&leader.lrr_, 1); // Linked record requirement
92 // Directory entry map
93 s.read(&leader.llof_, 1); // Length of the length-of-field portion
94 s.read(&leader.lscp_, 1); // Length of starting-character-position portion
95 s.read(&leader.lidp_, 1); // Length of implementation-defined portion
96 s.read(&leader.uep_, 1); // Undefined entry map character position
97 return s;
98}
99
100/************************************************************************
101Local prototypes
102 ***********************************************************************/
103static void Printx(FILE *fp, char *s, int x);
104
105//----------------------------------------------------------------------
106// MarcRecord::MarcRecord(char *data, int length, int copyData)
107//
108// Constructor that creates a new MarcRecord object.
109// data is raw marc record data for a single record. If copyData is 1,
110// data is encapsulated within this object. If 0, does not copy data and
111// responsibility falls to caller to maintain data pointer.
112//----------------------------------------------------------------------
113MarcRecord::MarcRecord(char* data, int length, int copyData)
114{
115 encapsulated_ = copyData;
116
117 /* encapsulate or set pointer to data */
118 if (encapsulated_)
119 {
120 data_ = new char[length + 1];
121 memcpy(data_, data, length);
122 data_[length] = '\0';
123 }
124 else
125 data = data;
126
127 // Lay leader overlay onto data
128 leader_ = *(MarcLeader *) (data_);
129
130 // Parse the record. sets dirCount_, directory_
131 if (!ParseMarcRecord())
132 {
133 // marcRecord_destroy(m);
134 // return NULL;
135 }
136}
137
138//----------------------------------------------------------------------
139// void MarcRecord::~MarcRecord()
140//
141// Destructor, destroys a MarcRecord object and all used resources.
142//----------------------------------------------------------------------
143MarcRecord::~MarcRecord()
144{
145 if ((encapsulated_) && (data_))
146 delete[] data_;
147
148 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
149 directory_[i].field_.erase(directory_[i].field_.begin(),directory_[i].field_.end());
150 directory_.erase(directory_.begin(),directory_.end());
151}
152
153#define MARC_STATE_IDLE 0
154#define MARC_STATE_TAG 1
155#define MARC_STATE_WIDTH 2
156//----------------------------------------------------------------------
157// int MarcRecord::PrettyFormat(char* format, char* buf, int maxlen)
158//
159// Formats the contents of the marc record according to a format string.
160// Useful for generating a buffer to pretty-print the record.
161// format contains a format-specifier string. The format-specifier string
162// is kindof like printf's, except the only control character is '%'.
163// Following the '%' should be the integer MARC field code as defined in
164// the (huge) list of available MARC fields from the library of congress
165// (http://lcweb.loc.gov/marc/). buf is a previously allocated
166// buffer into which the pretty version will be copied. maxlen is the max
167// length to write into buf. After call, buf contains pretty formatted text.
168//----------------------------------------------------------------------
169int MarcRecord::PrettyFormat(char* format, char* buf, int maxlen)
170{
171 char b[1024];
172 int bufpos = 0, fmtpos = 0;
173
174 b[0] = '\0';
175 buf[0] = '\0';
176
177 for (; format[fmtpos] != '\0'; )
178 {
179 switch (format[fmtpos])
180 {
181 case '%':
182 {
183 char tag[4];
184 strncpy(tag, (char *)(format + fmtpos + 1), 3);
185 tag[3] = '\0';
186 GetSubField(tag, format[fmtpos + 4], b, sizeof(b) - 1);
187 if (*b)
188 {
189 int l = strlen(b);
190 strncat(buf, b, maxlen - bufpos - 1);
191 bufpos += l;
192 if (bufpos >= maxlen)
193 {
194 buf[maxlen - 1] = '\0';
195 return maxlen;
196 }
197 }
198 fmtpos += 5;
199 break;
200 }
201 default:
202 buf[bufpos] = format[fmtpos];
203 bufpos++;
204 fmtpos++;
205 buf[bufpos] = '\0';
206 break;
207 }
208 }
209 return bufpos;
210}
211
212//----------------------------------------------------------------------
213// int MarcRecord::GetField(char* tag, char* buf, int maxlen)
214//
215// append all subfields in a field together.
216//----------------------------------------------------------------------
217int MarcRecord::GetField(char* tag, char* buf, int maxlen)
218{
219 int remain = maxlen - 1;
220
221 buf[0] = '\0';
222
223 if ((tag == NULL) || (*tag == '\0'))
224 return 0;
225
226 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
227 {
228 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
229 {
230 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
231 {
232 int flen = directory_[i].field_[j].len_;
233 if (flen > remain)
234 {
235 strncat(buf, directory_[i].field_[j].data_ + 1, remain);
236 buf[maxlen] = '\0';
237 return 1;
238 }
239 else
240 {
241 strncat(buf, directory_[i].field_[j].data_ + 1, flen);
242 remain -= flen;
243 }
244 }
245 return 1;
246 }
247 }
248 return 0;
249}
250
251//----------------------------------------------------------------------
252// int MarcRecord::GetSubFields(char* tag, char subtag, char* buf, int maxlen)
253//
254// append all fields with a certain subfield together.
255//----------------------------------------------------------------------
256int MarcRecord::GetSubFields(char* tag, char subtag, char* buf, int maxlen)
257{
258 int remain = maxlen - 1;
259
260 buf[0] = '\0';
261
262 if ((tag == NULL) || (*tag == '\0'))
263 return 0;
264
265 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
266 {
267 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
268 {
269 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
270 {
271 if (*(directory_[i].field_[j].data_) == subtag)
272 {
273 int flen = directory_[i].field_[j].len_;
274 if (flen > remain)
275 {
276 strncpy(buf, directory_[i].field_[j].data_ + 1, remain);
277 buf[maxlen] = '\0';
278 return 1;
279 }
280 else
281 {
282 strcpy(buf, directory_[i].field_[j].data_ + 1);
283 remain -= flen;
284 }
285 }
286 }
287 }
288 }
289 return 0;
290}
291
292int MarcRecord::HasField(char* tag)
293{
294
295 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
296 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
297 return 1;
298
299 return 0;
300}
301
302int MarcRecord::HasSubField(char* tag, char subtag)
303{
304
305 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
306 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
307 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
308 if (*directory_[i].field_[j].data_ == subtag)
309 return 1;
310 return 0;
311}
312
313int MarcRecord::GetSubField(char *tag, char subtag, char *buf, int maxlen)
314{
315
316 buf[0] = '\0';
317
318 if ((tag == NULL) || (*tag == '\0'))
319 return 0;
320
321 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
322 {
323 if (!strncmp(tag, directory_[i].tag_, strlen(tag)))
324 {
325 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
326 {
327 if (*(directory_[i].field_[j].data_) == subtag)
328 {
329 int l = directory_[i].field_[j].len_ - 1 > maxlen ? maxlen : directory_[i].field_[j].len_ - 1;
330 strncpy(buf, directory_[i].field_[j].data_ + 1, l);
331 buf[l] = '\0';
332 return 1;
333 }
334 }
335 }
336 }
337 return 0;
338}
339
340
341int GetNum(char *s, int n)
342{
343 char nbuf[10]; /* no more than 9 digits */
344
345 strncpy(&nbuf[0], s, n);
346 nbuf[n] = '\0';
347 return (atoi(nbuf));
348}
349
350void MarcRecord::Dump()
351{
352 int dc = 0, fc = 0;
353
354 printf("tags: ");
355 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
356 {
357 Printx(stdout, directory_[i].tag_, 3);
358 dc++;
359 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
360 {
361 if (directory_[i].field_[j].data_)
362 {
363 putchar((unsigned long)directory_[i].field_[j].data_);
364 }
365 fc++;
366 /*
367 printx(stdout, (char*)f->data + 1, f->len);
368 printf("\n"); */
369 }
370 printf(",");
371 }
372 putchar('\n');
373}
374
375
376
377
378
379void MarcRecord::PrintDetailed(FILE *fp)
380{
381 int count, i;
382
383 fprintf(fp, "RecordLength:\t\t%i\n", GetNum(leader_.lreclen_, 5));
384 fprintf(fp, "RecordStatus:\t\t%c\n", leader_.status_);
385 fprintf(fp, "RecordType:\t\t%c\n", leader_.type_);
386 fprintf(fp, "BibLevel:\t\t%c\n", leader_.bibLevel_);
387 fprintf(fp, "ControlType:\t\t%c\n", leader_.ctrlType_);
388 fprintf(fp, "UndefCharPos:\t\t%c\n", leader_.undPos_);
389 fprintf(fp, "IndCount:\t\t%i\n", GetNum(&leader_.indCount_, 1));
390 fprintf(fp, "SubCodeCount:\t\t%i\n", GetNum(&leader_.scd_, 1));
391 fprintf(fp, "DataBaseAddr:\t\t%i\n", GetNum(leader_.base_, 5));
392 fprintf(fp, "EncodingLevel:\t\t%c\n", leader_.encLevel_);
393 fprintf(fp, "DescCatForm:\t\t%c\n", leader_.dcf_);
394 fprintf(fp, "LinkedRecReq:\t\t%c\n", leader_.lrr_);
395 fprintf(fp, "LengthOf LengthOfField:\t%i\n", GetNum(&leader_.llof_, 1));
396 fprintf(fp, "LengthOf StartCharPos:\t%i\n", GetNum(&leader_.lscp_, 1));
397 fprintf(fp, "LengthOf ImpDefined:\t%i\n", GetNum(&leader_.lidp_, 1));
398 fprintf(fp, "UndefinedEntry:\t\t%i\n", GetNum(&leader_.uep_, 1));
399
400 count = (GetNum(leader_.base_, 5) - MARC_LEADER_LEN) / MARC_DIRENT_LEN;
401 for (i = 0; i < count; i++)
402 {
403 DirectoryEntry dir = *(DirectoryEntry*)(data_+MARC_LEADER_LEN+(i*MARC_DIRENT_LEN));
404 fprintf(fp, "\n");
405 fprintf(fp, "Tag:\t\t");
406 Printx(fp, dir.tag_, 3);
407 fprintf(fp, "\nLength:\t\t%i\n", GetNum(dir.len_, 4));
408 fprintf(fp, "StartPos:\t%i\n", GetNum(dir.scp_, 5));
409 fprintf(fp, "Data:\t\t");
410 Printx(fp, (char *)(data_ + GetNum(leader_.base_, 5) + GetNum(dir.scp_, 5)), GetNum(dir.len_, 4));
411 fprintf(fp, "\n");
412 }
413}
414
415/*
416desc: parses the marc record and builds appropriate data structures in m
417
418params:
419
420 m newly created marcRecord
421
422returns:
423
424 1 on success
425 0 on parse or memory failure
426*/
427int MarcRecord::ParseMarcRecord()
428{
429 dirCount_ = (GetNum(leader_.base_, 5) - MARC_LEADER_LEN) / MARC_DIRENT_LEN;
430
431 // For each MarcDirectory entry, create list of subfields
432 MarcDirectoryEntry dir_entry;
433 for (int i = 0; i < dirCount_; i++)
434 {
435 char* data;
436 // Extract the directory component
437 DirectoryEntry dir_entry = *(DirectoryEntry*)(data_+MARC_LEADER_LEN+(i*MARC_DIRENT_LEN));
438
439 MarcDirectoryEntry marc_dir_entry;
440
441 // Get a pointer to the data for this MarcDirectory
442 data = (char *)(data_ + GetNum(leader_.base_, 5) + GetNum(dir_entry.scp_, 5));
443
444 // set the tag for this MarcDirectory
445 marc_dir_entry.tag_ = dir_entry.tag_;
446
447 /* add the list of subfields for this MarcDirectory */
448 if (!AddSubFields(marc_dir_entry, data))
449 return 0;
450 directory_.push_back(marc_dir_entry);
451
452 }
453
454 return 1;
455}
456
457void Printx(FILE* fp, char* s, int x)
458{
459 int i;
460 for (i = 0; i < x; i++)
461 fprintf(fp, "%c", s[i]);
462}
463
464//----------------------------------------------------------------------
465// int MarcRecord::AddSubFields(MarcDirectory& d, char* b)
466//
467// Parses and adds subfields to MarcDirectory
468//
469// params:
470//
471// d MarcDirectory to which subfields are added
472// b buffer of data for d
473//
474// returns:
475//
476// 1 on success
477// 0 on parse or memory error
478//----------------------------------------------------------------------
479int MarcRecord::AddSubFields(MarcDirectoryEntry& d, char* b)
480{
481 char *p, *start;
482 int cont = 1, length = 0;
483
484 /* create new field */
485 MarcField field;
486
487
488 start = b;
489 for (p = b; ((*p != '\0') && (cont == 1)); p = (char *)p + 1)
490 {
491 switch (*p)
492 {
493 case SUBFDELIM:
494 /* end of current subfield, but more exist */
495
496 /* set length and pointer to data for this field */
497 field.len_ = length;
498 field.data_ = start;
499 d.field_.push_back(field);
500 /* by definition, there should be more subfields, but we'll
501 check for NULL just in case
502 */
503 if (*p + 1)
504 {
505 }
506 else
507 {
508 /* uh oh */
509 return 0;
510 }
511 length = 0;
512 start = (char *)p + 1;
513 break;
514 case FIELDTERM:
515 /* end of subfield list */
516
517 /* set length and pointer to data for this field */
518 field.len_ = length;
519 field.data_ = start;
520 d.field_.push_back(field);
521
522 /* break out of loop */
523 cont = 0;
524 break;
525 case RECTERM:
526 /* end of record */
527 cont = 0;
528 break;
529 default:
530 /* regular character */
531 length++;
532 break;
533 }
534 }
535 return 1;
536}
537
538void MarcRecord::Format(char *buf, int maxlen, int format)
539{
540 char b[1024];
541 char date[24];
542 char title[128];
543
544 b[0] = '\0';
545 buf[0] = '\0';
546
547 /* FIXME: get rid of this debugging crap sometime */
548 Dump();
549
550 switch (format)
551 {
552 case MARC_FMT_BRIEF:
553 {
554 GetSubField("245", 'a', title, sizeof(title) - 1);
555 GetSubField("260", 'c', date, sizeof(date) - 1);
556 if ((strlen(date) + strlen(title) + 2) <=(size_t)maxlen)
557 {
558 sprintf(buf, "%s %s", date, title);
559 }
560 else
561 {
562 /* FIXME: we're overflowing the buffer! */
563 }
564 break;
565 }
566 case MARC_FMT_FULL:
567 {
568 int dc = 0, fc = 0;
569 int remain = maxlen;
570
571 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
572 {
573 dc++;
574 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
575 {
576 fc++;
577 if ((directory_[i].field_[j].len_ + 2) > remain)
578 {
579 strncat(buf, (char *)directory_[i].field_[j].data_ + 1, remain - 2);
580 * (buf + remain - 2) = '\n';
581 * (buf + remain - 1) = '\0';
582 /* we're out of room in the buffer. */
583 return;
584 }
585 if (directory_[i].field_[j].len_ > 0)
586 {
587 strncat(buf, (char*)directory_[i].field_[j].data_ + 1, directory_[i].field_[j].len_);
588 strncat(buf, "\n\0", 2);
589 remain -=(directory_[i].field_[j].len_ + 2);
590 }
591 }
592 }
593 /*
594 marcRecord_getSubField(m, "245", 'a', title,
595 sizeof(title) - 1);
596 marcRecord_getSubField(m, "245", 'a', title,
597 sizeof(title) - 1);
598 marcRecord_getSubField(m, "260", 'c', date,
599 sizeof(date) - 1);
600 sprintf(buf, "%s %s", date, title);
601 */
602 break;
603 }
604 default:
605 break;
606 }
607}
608
609int MarcRecord::RecordLength()
610{
611 register int l = 0;
612
613
614 for (std::vector<MarcDirectoryEntry>::size_type i=0; i!=directory_.size(); ++i)
615 for (std::vector<MarcField>::size_type j=0; j!=directory_[i].field_.size(); ++j)
616 l += directory_[i].field_[i].len_;
617
618 return l;
619}
620
Note: See TracBrowser for help on using the repository browser.