1 | /* ------------------------------------------------------------------- */
|
---|
2 | /* dc_parse : parse the meta tag, build a metatag struct */
|
---|
3 | /* Author : Ole Husby */
|
---|
4 | /* Updated : 1998-09-30 */
|
---|
5 | /* */
|
---|
6 | /* Syntax : #include "d2m.h" */
|
---|
7 | /* dc_parse(char * string) */
|
---|
8 | /* */
|
---|
9 | /* Returns pointer to struct metatag */
|
---|
10 | /* */
|
---|
11 | /* Memory for the struct is allocated in the program! */
|
---|
12 | /* */
|
---|
13 | /* String includes a meta tag, after the initial */
|
---|
14 | /* "<meta " and the ending ">" are stripped away. */
|
---|
15 | /* */
|
---|
16 | /* The program accepts the tag if name starts with "dc." */
|
---|
17 | /* (case insensitive), else NULL is returned. */
|
---|
18 | /* */
|
---|
19 | /* The parsing looks for the HTML attributes "name", */
|
---|
20 | /* "scheme", and "content" only. Other attributes are */
|
---|
21 | /* ignored. */
|
---|
22 | /* */
|
---|
23 | /* The DC "type" will be recognized if suffixed */
|
---|
24 | /* "name" by "dot notation", or contained within */
|
---|
25 | /* parentheses within "content" (old notation). */
|
---|
26 | /* (type = subelement) */
|
---|
27 | /* */
|
---|
28 | /* The DC "scheme" will be recognized if */
|
---|
29 | /* present in "scheme" attribute (HTML 4.0), or if */
|
---|
30 | /* contained within aprentheses in "content" (HTML 2.0) */
|
---|
31 | /* */
|
---|
32 | /* Examples: */
|
---|
33 | /* */
|
---|
34 | /* meta name="DC.Creator.Personalname" */
|
---|
35 | /* content="Smith, John" */
|
---|
36 | /* */
|
---|
37 | /* meta name="dc.Creator" */
|
---|
38 | /* content="(type=Personalname) Smith, John"> */
|
---|
39 | /* */
|
---|
40 | /* meta name="DC.Subject" scheme="LCSH" */
|
---|
41 | /* content="Cataloguing" */
|
---|
42 | /* */
|
---|
43 | /* meta name="DC.Subject" */
|
---|
44 | /* content="(scheme=LCSH) Cataloguing" */
|
---|
45 | /* */
|
---|
46 | /* The values of the DC attributes are NOT validated! */
|
---|
47 | /* */
|
---|
48 | /* ------------------------------------------------------------------- */
|
---|
49 |
|
---|
50 | #include <stdlib.h>
|
---|
51 | #include <stdio.h>
|
---|
52 | #include <string.h>
|
---|
53 | #include <malloc.h>
|
---|
54 | #include "d2m.h"
|
---|
55 |
|
---|
56 |
|
---|
57 |
|
---|
58 |
|
---|
59 | struct metatag *dc_parse(char *buf)
|
---|
60 | {
|
---|
61 | int more;
|
---|
62 | char *p, *q;
|
---|
63 | struct metatag *mtag;
|
---|
64 | int strict = TRUE; /* Indicates that DC. prefix is mandatory */
|
---|
65 | char attrname[16];
|
---|
66 |
|
---|
67 | /* Allocates and initializes metatag struct */
|
---|
68 |
|
---|
69 | mtag = malloc(sizeof(*mtag));
|
---|
70 |
|
---|
71 | mtag->name = malloc(256);
|
---|
72 | mtag->type = malloc(256);
|
---|
73 | mtag->scheme = malloc(256);
|
---|
74 | mtag->value = malloc(10000);
|
---|
75 |
|
---|
76 | *mtag->name = 0;
|
---|
77 | *mtag->type = 0;
|
---|
78 | *mtag->scheme = 0;
|
---|
79 | *mtag->value = 0;
|
---|
80 |
|
---|
81 |
|
---|
82 |
|
---|
83 |
|
---|
84 | /* Parse attribute name/value pairs: */
|
---|
85 |
|
---|
86 | if (!buf || !*buf) return NULL;
|
---|
87 |
|
---|
88 | p = strtok(buf, "\"");
|
---|
89 |
|
---|
90 |
|
---|
91 |
|
---|
92 | while (p)
|
---|
93 | {
|
---|
94 | /* Look for attribute name */
|
---|
95 |
|
---|
96 | *attrname = 0;
|
---|
97 |
|
---|
98 | while (*p && ( p[strlen(p) - 1] == ' ' || p[strlen(p) - 1] == '='))
|
---|
99 | p[strlen(p) - 1] = '\0';
|
---|
100 |
|
---|
101 | if (*p && strlen(p) < 10)
|
---|
102 | strcpy(attrname, p);
|
---|
103 |
|
---|
104 | /* Look for attribute value */
|
---|
105 |
|
---|
106 | p = strtok(NULL, "\"");
|
---|
107 |
|
---|
108 | while(*p && p[0] == ' ')
|
---|
109 | p++;
|
---|
110 |
|
---|
111 | if (!p || !*p)
|
---|
112 | break;
|
---|
113 |
|
---|
114 | if (strncasecmp(attrname, "name", 4) == 0)
|
---|
115 | strcpy(mtag->name, p);
|
---|
116 | else if (strncasecmp(attrname, "scheme", 6) == 0)
|
---|
117 | strcpy(mtag->scheme, p);
|
---|
118 | else if (strncasecmp(attrname, "content", 7) == 0)
|
---|
119 | strcpy(mtag->value, p);
|
---|
120 | p = strtok(NULL, "\"");
|
---|
121 |
|
---|
122 | if (p)
|
---|
123 | {
|
---|
124 | while (*p && p[0] == ' ')
|
---|
125 | p++;
|
---|
126 | if (!*p) p = NULL;
|
---|
127 | }
|
---|
128 | }
|
---|
129 |
|
---|
130 |
|
---|
131 |
|
---|
132 | /* Check the name attribute value */
|
---|
133 |
|
---|
134 | p = mtag->name;
|
---|
135 |
|
---|
136 | if (!p || !*p)
|
---|
137 | return NULL;
|
---|
138 |
|
---|
139 | /* Check for leading "dc." and suffixed type */
|
---|
140 |
|
---|
141 | if (strncasecmp(p, "dc.", 3) == 0)
|
---|
142 | {
|
---|
143 | p += 3;
|
---|
144 | q = strstr(p, ".");
|
---|
145 | if (q)
|
---|
146 | {
|
---|
147 | strcpy(mtag->type, q + 1);
|
---|
148 | *q = 0;
|
---|
149 | }
|
---|
150 | mtag->name = p;
|
---|
151 | }
|
---|
152 | else
|
---|
153 | {
|
---|
154 | if (strict)
|
---|
155 | return NULL;
|
---|
156 | }
|
---|
157 |
|
---|
158 |
|
---|
159 | /* Check the content attribute value */
|
---|
160 | /* Extracting type and scheme */
|
---|
161 |
|
---|
162 |
|
---|
163 | p = mtag->value;
|
---|
164 |
|
---|
165 | more = 1;
|
---|
166 |
|
---|
167 | while (more)
|
---|
168 | {
|
---|
169 | if (strncasecmp(p, "(type=", 6) == 0)
|
---|
170 | {
|
---|
171 | p += 6;
|
---|
172 | q = strtok(p, ")");
|
---|
173 | if (!q)
|
---|
174 | return NULL;
|
---|
175 | else
|
---|
176 | {
|
---|
177 | if (!*mtag->type)
|
---|
178 | strcpy(mtag->type, q);
|
---|
179 | q = strtok(NULL, "\0");
|
---|
180 | p = q;
|
---|
181 | while (p && *p && p[0] == ' ') p++;
|
---|
182 |
|
---|
183 | }
|
---|
184 | }
|
---|
185 |
|
---|
186 | else if (strncasecmp(p, "(scheme=", 8) == 0)
|
---|
187 | {
|
---|
188 | p += 8;
|
---|
189 | q = strtok(p, ")");
|
---|
190 | if (!q)
|
---|
191 | return NULL;
|
---|
192 | else
|
---|
193 | {
|
---|
194 | if (!*mtag->scheme)
|
---|
195 | strcpy(mtag->scheme, q);
|
---|
196 | q = strtok(NULL, "\0");
|
---|
197 | p = q;
|
---|
198 | while (p && *p && p[0] == ' ') p++;
|
---|
199 | }
|
---|
200 | }
|
---|
201 |
|
---|
202 | else
|
---|
203 | {
|
---|
204 | mtag->value = p;
|
---|
205 | more = 0;
|
---|
206 | }
|
---|
207 |
|
---|
208 | }
|
---|
209 |
|
---|
210 | /* Finished */
|
---|
211 |
|
---|
212 | if (!*mtag->value)
|
---|
213 | mtag = (struct metatag *) NULL;
|
---|
214 |
|
---|
215 | return mtag;
|
---|
216 | }
|
---|