source: main/trunk/greenstone2/runtime-src/packages/d2m/D2Mconv.c@ 24306

Last change on this file since 24306 was 10365, checked in by kjdon, 19 years ago

changed my mind, now adding these all individually instead of in a tar file

  • Property svn:keywords set to Author Date Id Revision
File size: 6.2 KB
Line 
1/* ------------------------------------------------------------------- */
2/* D2Mconv : Converts metadata present in a file to MARC */
3/* Writes MARC record in "line format" */
4/* */
5/* Syntax : Arguments: */
6/* */
7/* char *dfile : input file */
8/* DC metadata must be present in HTML 2.0 */
9/* or HTML 4.0 syntax */
10/* char *buffer: output record in line format */
11/* Must be allocated in calling program! */
12/* char *tfile : tracefile */
13/* No trace if NULL */
14/* char *url : URL for use in case no URL in DC.Identifier */
15/* */
16/* char *format: MARC format (see d2m.h) */
17/* */
18/* Author : Ole Husby, BIBSYS */
19/* Updated : 1998-09-30 */
20/* ------------------------------------------------------------------- */
21
22#include <stdlib.h>
23#include <string.h>
24#include <stdio.h>
25#include <malloc.h>
26#include <ctype.h>
27#include <unistd.h>
28#include <time.h>
29#include "d2m.h"
30
31
32
33
34/* ------------------------------------------------------------------- */
35/* Checks a new line */
36/* ------------------------------------------------------------------- */
37/*
38 This is where starting and ending of a meta tag is parsed.
39 The whole metatag (resulting from one ore more lines) is put
40 into the buffer.
41
42 Note: This function is called recursively!
43*/
44
45int mline(char *l, char *buf, int *meta,
46 struct marcrec *mrec, FILE *tf, int format)
47{
48 char *p, *q, rc;
49 int diff;
50
51 struct metatag *mtag;
52
53 if (!l || !*l) return 1;
54
55 q = l;
56
57
58/* Looks for the end of an already started metatag buffer */
59/* The end is normally '">' */
60
61 if (*meta)
62 {
63 p = strstr(l, "\">");
64
65 if (p)
66 {
67 if ( (char *) p != (char *) l)
68 {
69 diff = (int)(p - l);
70 if (*buf)
71 strcat(buf, " ");
72 strncat(buf, l, diff);
73 }
74
75/* Parsing the buffer for metadata */
76
77 mtag = dc_parse(buf);
78
79 if (mtag)
80 {
81 if (tf)
82 {
83 fprintf(tf, "Name : %s\n", mtag->name );
84 fprintf(tf, "Type : %s\n", mtag->type );
85 fprintf(tf, "Scheme: %s\n", mtag->scheme );
86 fprintf(tf, "Value : %s\n", mtag->value );
87 fprintf(tf, "\n");
88 }
89
90/* Convert to MARC */
91
92 MARCmake(mtag, mrec, format);
93 }
94 free(mtag);
95
96 *buf = 0;
97 *meta = FALSE;
98 q = p + 2;
99 }
100 else
101 {
102 strcat(buf, l);
103 q = NULL;
104 }
105 }
106
107
108 if (!q) return 1;
109
110 p = (char *) cstr(q, "<meta ");
111
112 if (p)
113 {
114 *meta = TRUE;
115 mline(p + 6, buf, meta, mrec, tf, format);
116 }
117
118 return 1;
119}
120
121
122
123int D2Mconv(char *dfile, char *buffer, char *tfile, char *url, int format)
124{
125 struct marcrec *mrec;
126 struct metatag *mtag;
127 char line[50000], today[32];
128 char *l, *p;
129 time_t d;
130 struct tm *time_struct;
131
132 FILE *df, *tf;
133 int *meta;
134
135 putenv("TZ=NFT-1DFT");
136 d = time(NULL);
137 time_struct = localtime(&d);
138 strftime(today, 16, "%y%m%d", time_struct);
139
140 mrec = malloc(sizeof(*mrec));
141
142 mrec->marcline = malloc(100000);
143 mrec->partitle = malloc(10000);
144 mrec->subtitle = malloc(10000);
145 mrec->year = malloc(1000);
146 mrec->url = malloc(1000);
147 mrec->fmat = malloc(1000);
148 mrec->s008 = malloc(41);
149
150 mrec->ncreators = 0;
151 mrec->ntitles = 0;
152 *mrec->marcline = 0;
153 *mrec->subtitle = 0;
154 *mrec->partitle = 0;
155 *mrec->year = 0;
156 *mrec->url = 0;
157 *mrec->fmat = 0;
158 strcpy(mrec->s008, " ");
159
160 meta = malloc(sizeof(int));
161
162 tf = (FILE *) NULL;
163
164
165/* Open inputfile */
166
167 df = fopen(dfile, "r");
168 if (!df)
169 return 0; /* Error: Unable to read data */
170
171
172
173/* Open tracefile */
174
175 if (*tfile)
176 {
177 tf = fopen(tfile, "w");
178 if (!tf)
179 {
180 fclose(df);
181 return 0; /* Error: Unable to create trace */
182 }
183 }
184
185 *meta = FALSE;
186
187
188/* Reads the file */
189
190
191 while (l = fgets(line, 9999, df))
192 {
193
194/* Remove trailing LF and / or CR */
195
196 while ( (l[strlen(l) - 1] == '\n') || (l[strlen(l) - 1] == '\r') )
197 l[strlen(l) -1] = '\0';
198
199
200/* printf("... main %s\n",l); */
201
202/* Stops reading file if "</head" or "<body" is encountered */
203
204 p = cstr(l, (char *) "</head");
205 if (p) break;
206
207 p = cstr(l, (char *) "<body");
208 if (p) break;
209
210 mline(l, (char *) buffer, (int *) meta, mrec, tf, format);
211 }
212
213
214
215/* Check if remaining data in buffer */
216
217 mtag = dc_parse((char *) buffer);
218
219 if (mtag)
220 {
221 if (tf)
222 {
223 fprintf(tf, "Name : %s\n", mtag->name );
224 fprintf(tf, "Type : %s\n", mtag->type );
225 fprintf(tf, "Scheme: %s\n", mtag->scheme );
226 fprintf(tf, "Value : %s\n", mtag->value );
227 fprintf(tf, "\n");
228 }
229 MARCmake(mtag, mrec, format);
230 }
231 free(mtag);
232
233
234/* Close files */
235
236 fclose(df);
237
238 if (tf)
239 fclose(tf);
240
241/* Put URL into mrec (if not present) */
242
243 if (!*mrec->url)
244 strcpy(mrec->url, url);
245
246/* Put date into 008 */
247
248 put008(mrec->s008, today, F008_DATE_ENTERED);
249 put008(mrec->s008, "s", F008_TYPE_OF_DATE);
250
251 switch (format)
252 {
253 case ISMARC :
254 {
255 put008(mrec->s008, "k", F008_FORM_OF_PUBLICATION);
256 break;
257 }
258 }
259
260/* Tidy up the MARC buffer */
261
262 MARCtidy(mrec, buffer, format);
263
264 return 1;
265}
Note: See TracBrowser for help on using the repository browser.