/********************************************************************** * * Unimarc.cpp * Copyright (C) 2003 UNESCO * * A component of the Greenstone digital library software * from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * *********************************************************************/ #include "stdafx.h" /////////////////////////////////////////////////////////////////////////////////// // MarcRecord - This class can instantiate objects that represent a MARC // bibilographic record and includes methods for manipulating the // record. // // example: // // #include "Unimarc.h" // // // Create a MarcRecord object with a raw MARC record // MarcRecord mr(buf,1) // mr.PrettyFormat("The title is %245, which is the field with tag 245"); #include #include #include #include #include "Unimarc.h" //------------------------------------------------------------------------------- // ostream& operator<<(ostream& s, const MarcLeader& leader) // // Overloaded instance of the output operator for MarcLeader objects //--------------------------------------------------------------------------------- std::ostream& operator<<(std::ostream& s, const MarcLeader& leader) { s.write(leader.lreclen_, 5); // Logical record length s.write(&leader.status_, 1); // Record status s.write(&leader.type_, 1); // Type of record s.write(&leader.bibLevel_, 1); // Bibliographic level s.write(&leader.ctrlType_, 1); // Type of control s.write(&leader.undPos_, 1); // Undefined character position s.write(&leader.indCount_, 1); // Indicator count s.write(&leader.scd_, 1); // Subfield code count s.write(leader.base_, 5); // Base address of data s.write(&leader.encLevel_, 1); // Encoding level s.write(&leader.dcf_, 1); // Descriptive cataloging form s.write(&leader.lrr_, 1); // Linked record requirement // Directory entry map s.write(&leader.llof_, 1); // Length of the length-of-field portion s.write(&leader.lscp_, 1); // Length of starting-character-position portion s.write(&leader.lidp_, 1); // Length of implementation-defined portion s.write(&leader.uep_, 1); // Undefined entry map character position return s; } //---------------------------------------------------------------------------------- // istream& operator>>(istream& s, const MarcLeader& leader) // // Overloaded instance of the input operator for MarcLeader objects //---------------------------------------------------------------------------------- std::istream& operator>>(std::istream& s, MarcLeader& leader) { s.read(leader.lreclen_, 5); // Logical record length s.read(&leader.status_, 1); // Record status s.read(&leader.type_, 1); // Type of record s.read(&leader.bibLevel_, 1); // Bibliographic level s.read(&leader.ctrlType_, 1); // Type of control s.read(&leader.undPos_, 1); // Undefined character position s.read(&leader.indCount_, 1); // Indicator count s.read(&leader.scd_, 1); // Subfield code count s.read(leader.base_, 5); // Base address of data s.read(&leader.encLevel_, 1); // Encoding level s.read(&leader.dcf_, 1); // Descriptive cataloging form s.read(&leader.lrr_, 1); // Linked record requirement // Directory entry map s.read(&leader.llof_, 1); // Length of the length-of-field portion s.read(&leader.lscp_, 1); // Length of starting-character-position portion s.read(&leader.lidp_, 1); // Length of implementation-defined portion s.read(&leader.uep_, 1); // Undefined entry map character position return s; } /************************************************************************ Local prototypes ***********************************************************************/ static void Printx(FILE *fp, char *s, int x); //---------------------------------------------------------------------- // MarcRecord::MarcRecord(char *data, int length, int copyData) // // Constructor that creates a new MarcRecord object. // data is raw marc record data for a single record. If copyData is 1, // data is encapsulated within this object. If 0, does not copy data and // responsibility falls to caller to maintain data pointer. //---------------------------------------------------------------------- MarcRecord::MarcRecord(char* data, int length, int copyData) { encapsulated_ = copyData; /* encapsulate or set pointer to data */ if (encapsulated_) { data_ = new char[length + 1]; memcpy(data_, data, length); data_[length] = '\0'; } else data = data; // Lay leader overlay onto data leader_ = *(MarcLeader *) (data_); // Parse the record. sets dirCount_, directory_ if (!ParseMarcRecord()) { // marcRecord_destroy(m); // return NULL; } } //---------------------------------------------------------------------- // void MarcRecord::~MarcRecord() // // Destructor, destroys a MarcRecord object and all used resources. //---------------------------------------------------------------------- MarcRecord::~MarcRecord() { if ((encapsulated_) && (data_)) delete[] data_; for (std::vector::size_type i=0; i!=directory_.size(); ++i) directory_[i].field_.erase(directory_[i].field_.begin(),directory_[i].field_.end()); directory_.erase(directory_.begin(),directory_.end()); } #define MARC_STATE_IDLE 0 #define MARC_STATE_TAG 1 #define MARC_STATE_WIDTH 2 //---------------------------------------------------------------------- // int MarcRecord::PrettyFormat(char* format, char* buf, int maxlen) // // Formats the contents of the marc record according to a format string. // Useful for generating a buffer to pretty-print the record. // format contains a format-specifier string. The format-specifier string // is kindof like printf's, except the only control character is '%'. // Following the '%' should be the integer MARC field code as defined in // the (huge) list of available MARC fields from the library of congress // (http://lcweb.loc.gov/marc/). buf is a previously allocated // buffer into which the pretty version will be copied. maxlen is the max // length to write into buf. After call, buf contains pretty formatted text. //---------------------------------------------------------------------- int MarcRecord::PrettyFormat(char* format, char* buf, int maxlen) { char b[1024]; int bufpos = 0, fmtpos = 0; b[0] = '\0'; buf[0] = '\0'; for (; format[fmtpos] != '\0'; ) { switch (format[fmtpos]) { case '%': { char tag[4]; strncpy(tag, (char *)(format + fmtpos + 1), 3); tag[3] = '\0'; GetSubField(tag, format[fmtpos + 4], b, sizeof(b) - 1); if (*b) { int l = strlen(b); strncat(buf, b, maxlen - bufpos - 1); bufpos += l; if (bufpos >= maxlen) { buf[maxlen - 1] = '\0'; return maxlen; } } fmtpos += 5; break; } default: buf[bufpos] = format[fmtpos]; bufpos++; fmtpos++; buf[bufpos] = '\0'; break; } } return bufpos; } //---------------------------------------------------------------------- // int MarcRecord::GetField(char* tag, char* buf, int maxlen) // // append all subfields in a field together. //---------------------------------------------------------------------- int MarcRecord::GetField(char* tag, char* buf, int maxlen) { int remain = maxlen - 1; buf[0] = '\0'; if ((tag == NULL) || (*tag == '\0')) return 0; for (std::vector::size_type i=0; i!=directory_.size(); ++i) { if (!strncmp(tag, directory_[i].tag_, strlen(tag))) { for (std::vector::size_type j=0; j!=directory_[i].field_.size(); ++j) { int flen = directory_[i].field_[j].len_; if (flen > remain) { strncat(buf, directory_[i].field_[j].data_ + 1, remain); buf[maxlen] = '\0'; return 1; } else { strncat(buf, directory_[i].field_[j].data_ + 1, flen); remain -= flen; } } return 1; } } return 0; } //---------------------------------------------------------------------- // int MarcRecord::GetSubFields(char* tag, char subtag, char* buf, int maxlen) // // append all fields with a certain subfield together. //---------------------------------------------------------------------- int MarcRecord::GetSubFields(char* tag, char subtag, char* buf, int maxlen) { int remain = maxlen - 1; buf[0] = '\0'; if ((tag == NULL) || (*tag == '\0')) return 0; for (std::vector::size_type i=0; i!=directory_.size(); ++i) { if (!strncmp(tag, directory_[i].tag_, strlen(tag))) { for (std::vector::size_type j=0; j!=directory_[i].field_.size(); ++j) { if (*(directory_[i].field_[j].data_) == subtag) { int flen = directory_[i].field_[j].len_; if (flen > remain) { strncpy(buf, directory_[i].field_[j].data_ + 1, remain); buf[maxlen] = '\0'; return 1; } else { strcpy(buf, directory_[i].field_[j].data_ + 1); remain -= flen; } } } } } return 0; } int MarcRecord::HasField(char* tag) { for (std::vector::size_type i=0; i!=directory_.size(); ++i) if (!strncmp(tag, directory_[i].tag_, strlen(tag))) return 1; return 0; } int MarcRecord::HasSubField(char* tag, char subtag) { for (std::vector::size_type i=0; i!=directory_.size(); ++i) if (!strncmp(tag, directory_[i].tag_, strlen(tag))) for (std::vector::size_type j=0; j!=directory_[i].field_.size(); ++j) if (*directory_[i].field_[j].data_ == subtag) return 1; return 0; } int MarcRecord::GetSubField(char *tag, char subtag, char *buf, int maxlen) { buf[0] = '\0'; if ((tag == NULL) || (*tag == '\0')) return 0; for (std::vector::size_type i=0; i!=directory_.size(); ++i) { if (!strncmp(tag, directory_[i].tag_, strlen(tag))) { for (std::vector::size_type j=0; j!=directory_[i].field_.size(); ++j) { if (*(directory_[i].field_[j].data_) == subtag) { int l = directory_[i].field_[j].len_ - 1 > maxlen ? maxlen : directory_[i].field_[j].len_ - 1; strncpy(buf, directory_[i].field_[j].data_ + 1, l); buf[l] = '\0'; return 1; } } } } return 0; } int GetNum(char *s, int n) { char nbuf[10]; /* no more than 9 digits */ strncpy(&nbuf[0], s, n); nbuf[n] = '\0'; return (atoi(nbuf)); } void MarcRecord::Dump() { int dc = 0, fc = 0; printf("tags: "); for (std::vector::size_type i=0; i!=directory_.size(); ++i) { Printx(stdout, directory_[i].tag_, 3); dc++; for (std::vector::size_type j=0; j!=directory_[i].field_.size(); ++j) { if (directory_[i].field_[j].data_) { putchar((unsigned long)directory_[i].field_[j].data_); } fc++; /* printx(stdout, (char*)f->data + 1, f->len); printf("\n"); */ } printf(","); } putchar('\n'); } void MarcRecord::PrintDetailed(FILE *fp) { int count, i; fprintf(fp, "RecordLength:\t\t%i\n", GetNum(leader_.lreclen_, 5)); fprintf(fp, "RecordStatus:\t\t%c\n", leader_.status_); fprintf(fp, "RecordType:\t\t%c\n", leader_.type_); fprintf(fp, "BibLevel:\t\t%c\n", leader_.bibLevel_); fprintf(fp, "ControlType:\t\t%c\n", leader_.ctrlType_); fprintf(fp, "UndefCharPos:\t\t%c\n", leader_.undPos_); fprintf(fp, "IndCount:\t\t%i\n", GetNum(&leader_.indCount_, 1)); fprintf(fp, "SubCodeCount:\t\t%i\n", GetNum(&leader_.scd_, 1)); fprintf(fp, "DataBaseAddr:\t\t%i\n", GetNum(leader_.base_, 5)); fprintf(fp, "EncodingLevel:\t\t%c\n", leader_.encLevel_); fprintf(fp, "DescCatForm:\t\t%c\n", leader_.dcf_); fprintf(fp, "LinkedRecReq:\t\t%c\n", leader_.lrr_); fprintf(fp, "LengthOf LengthOfField:\t%i\n", GetNum(&leader_.llof_, 1)); fprintf(fp, "LengthOf StartCharPos:\t%i\n", GetNum(&leader_.lscp_, 1)); fprintf(fp, "LengthOf ImpDefined:\t%i\n", GetNum(&leader_.lidp_, 1)); fprintf(fp, "UndefinedEntry:\t\t%i\n", GetNum(&leader_.uep_, 1)); count = (GetNum(leader_.base_, 5) - MARC_LEADER_LEN) / MARC_DIRENT_LEN; for (i = 0; i < count; i++) { DirectoryEntry dir = *(DirectoryEntry*)(data_+MARC_LEADER_LEN+(i*MARC_DIRENT_LEN)); fprintf(fp, "\n"); fprintf(fp, "Tag:\t\t"); Printx(fp, dir.tag_, 3); fprintf(fp, "\nLength:\t\t%i\n", GetNum(dir.len_, 4)); fprintf(fp, "StartPos:\t%i\n", GetNum(dir.scp_, 5)); fprintf(fp, "Data:\t\t"); Printx(fp, (char *)(data_ + GetNum(leader_.base_, 5) + GetNum(dir.scp_, 5)), GetNum(dir.len_, 4)); fprintf(fp, "\n"); } } /* desc: parses the marc record and builds appropriate data structures in m params: m newly created marcRecord returns: 1 on success 0 on parse or memory failure */ int MarcRecord::ParseMarcRecord() { dirCount_ = (GetNum(leader_.base_, 5) - MARC_LEADER_LEN) / MARC_DIRENT_LEN; // For each MarcDirectory entry, create list of subfields MarcDirectoryEntry dir_entry; for (int i = 0; i < dirCount_; i++) { char* data; // Extract the directory component DirectoryEntry dir_entry = *(DirectoryEntry*)(data_+MARC_LEADER_LEN+(i*MARC_DIRENT_LEN)); MarcDirectoryEntry marc_dir_entry; // Get a pointer to the data for this MarcDirectory data = (char *)(data_ + GetNum(leader_.base_, 5) + GetNum(dir_entry.scp_, 5)); // set the tag for this MarcDirectory marc_dir_entry.tag_ = dir_entry.tag_; /* add the list of subfields for this MarcDirectory */ if (!AddSubFields(marc_dir_entry, data)) return 0; directory_.push_back(marc_dir_entry); } return 1; } void Printx(FILE* fp, char* s, int x) { int i; for (i = 0; i < x; i++) fprintf(fp, "%c", s[i]); } //---------------------------------------------------------------------- // int MarcRecord::AddSubFields(MarcDirectory& d, char* b) // // Parses and adds subfields to MarcDirectory // // params: // // d MarcDirectory to which subfields are added // b buffer of data for d // // returns: // // 1 on success // 0 on parse or memory error //---------------------------------------------------------------------- int MarcRecord::AddSubFields(MarcDirectoryEntry& d, char* b) { char *p, *start; int cont = 1, length = 0; /* create new field */ MarcField field; start = b; for (p = b; ((*p != '\0') && (cont == 1)); p = (char *)p + 1) { switch (*p) { case SUBFDELIM: /* end of current subfield, but more exist */ /* set length and pointer to data for this field */ field.len_ = length; field.data_ = start; d.field_.push_back(field); /* by definition, there should be more subfields, but we'll check for NULL just in case */ if (*p + 1) { } else { /* uh oh */ return 0; } length = 0; start = (char *)p + 1; break; case FIELDTERM: /* end of subfield list */ /* set length and pointer to data for this field */ field.len_ = length; field.data_ = start; d.field_.push_back(field); /* break out of loop */ cont = 0; break; case RECTERM: /* end of record */ cont = 0; break; default: /* regular character */ length++; break; } } return 1; } void MarcRecord::Format(char *buf, int maxlen, int format) { char b[1024]; char date[24]; char title[128]; b[0] = '\0'; buf[0] = '\0'; /* FIXME: get rid of this debugging crap sometime */ Dump(); switch (format) { case MARC_FMT_BRIEF: { GetSubField("245", 'a', title, sizeof(title) - 1); GetSubField("260", 'c', date, sizeof(date) - 1); if ((strlen(date) + strlen(title) + 2) <=(size_t)maxlen) { sprintf(buf, "%s %s", date, title); } else { /* FIXME: we're overflowing the buffer! */ } break; } case MARC_FMT_FULL: { int dc = 0, fc = 0; int remain = maxlen; for (std::vector::size_type i=0; i!=directory_.size(); ++i) { dc++; for (std::vector::size_type j=0; j!=directory_[i].field_.size(); ++j) { fc++; if ((directory_[i].field_[j].len_ + 2) > remain) { strncat(buf, (char *)directory_[i].field_[j].data_ + 1, remain - 2); * (buf + remain - 2) = '\n'; * (buf + remain - 1) = '\0'; /* we're out of room in the buffer. */ return; } if (directory_[i].field_[j].len_ > 0) { strncat(buf, (char*)directory_[i].field_[j].data_ + 1, directory_[i].field_[j].len_); strncat(buf, "\n\0", 2); remain -=(directory_[i].field_[j].len_ + 2); } } } /* marcRecord_getSubField(m, "245", 'a', title, sizeof(title) - 1); marcRecord_getSubField(m, "245", 'a', title, sizeof(title) - 1); marcRecord_getSubField(m, "260", 'c', date, sizeof(date) - 1); sprintf(buf, "%s %s", date, title); */ break; } default: break; } } int MarcRecord::RecordLength() { register int l = 0; for (std::vector::size_type i=0; i!=directory_.size(); ++i) for (std::vector::size_type j=0; j!=directory_[i].field_.size(); ++j) l += directory_[i].field_[i].len_; return l; }