/* ------------------------------------------------------------------- */ /* dc_parse : parse the meta tag, build a metatag struct */ /* Author : Ole Husby */ /* Updated : 1998-09-30 */ /* */ /* Syntax : #include "d2m.h" */ /* dc_parse(char * string) */ /* */ /* Returns pointer to struct metatag */ /* */ /* Memory for the struct is allocated in the program! */ /* */ /* String includes a meta tag, after the initial */ /* "" are stripped away. */ /* */ /* The program accepts the tag if name starts with "dc." */ /* (case insensitive), else NULL is returned. */ /* */ /* The parsing looks for the HTML attributes "name", */ /* "scheme", and "content" only. Other attributes are */ /* ignored. */ /* */ /* The DC "type" will be recognized if suffixed */ /* "name" by "dot notation", or contained within */ /* parentheses within "content" (old notation). */ /* (type = subelement) */ /* */ /* The DC "scheme" will be recognized if */ /* present in "scheme" attribute (HTML 4.0), or if */ /* contained within aprentheses in "content" (HTML 2.0) */ /* */ /* Examples: */ /* */ /* meta name="DC.Creator.Personalname" */ /* content="Smith, John" */ /* */ /* meta name="dc.Creator" */ /* content="(type=Personalname) Smith, John"> */ /* */ /* meta name="DC.Subject" scheme="LCSH" */ /* content="Cataloguing" */ /* */ /* meta name="DC.Subject" */ /* content="(scheme=LCSH) Cataloguing" */ /* */ /* The values of the DC attributes are NOT validated! */ /* */ /* ------------------------------------------------------------------- */ #include #include #include #include #include "d2m.h" struct metatag *dc_parse(char *buf) { int more; char *p, *q; struct metatag *mtag; int strict = TRUE; /* Indicates that DC. prefix is mandatory */ char attrname[16]; /* Allocates and initializes metatag struct */ mtag = malloc(sizeof(*mtag)); mtag->name = malloc(256); mtag->type = malloc(256); mtag->scheme = malloc(256); mtag->value = malloc(10000); *mtag->name = 0; *mtag->type = 0; *mtag->scheme = 0; *mtag->value = 0; /* Parse attribute name/value pairs: */ if (!buf || !*buf) return NULL; p = strtok(buf, "\""); while (p) { /* Look for attribute name */ *attrname = 0; while (*p && ( p[strlen(p) - 1] == ' ' || p[strlen(p) - 1] == '=')) p[strlen(p) - 1] = '\0'; if (*p && strlen(p) < 10) strcpy(attrname, p); /* Look for attribute value */ p = strtok(NULL, "\""); while(*p && p[0] == ' ') p++; if (!p || !*p) break; if (strncasecmp(attrname, "name", 4) == 0) strcpy(mtag->name, p); else if (strncasecmp(attrname, "scheme", 6) == 0) strcpy(mtag->scheme, p); else if (strncasecmp(attrname, "content", 7) == 0) strcpy(mtag->value, p); p = strtok(NULL, "\""); if (p) { while (*p && p[0] == ' ') p++; if (!*p) p = NULL; } } /* Check the name attribute value */ p = mtag->name; if (!p || !*p) return NULL; /* Check for leading "dc." and suffixed type */ if (strncasecmp(p, "dc.", 3) == 0) { p += 3; q = strstr(p, "."); if (q) { strcpy(mtag->type, q + 1); *q = 0; } mtag->name = p; } else { if (strict) return NULL; } /* Check the content attribute value */ /* Extracting type and scheme */ p = mtag->value; more = 1; while (more) { if (strncasecmp(p, "(type=", 6) == 0) { p += 6; q = strtok(p, ")"); if (!q) return NULL; else { if (!*mtag->type) strcpy(mtag->type, q); q = strtok(NULL, "\0"); p = q; while (p && *p && p[0] == ' ') p++; } } else if (strncasecmp(p, "(scheme=", 8) == 0) { p += 8; q = strtok(p, ")"); if (!q) return NULL; else { if (!*mtag->scheme) strcpy(mtag->scheme, q); q = strtok(NULL, "\0"); p = q; while (p && *p && p[0] == ' ') p++; } } else { mtag->value = p; more = 0; } } /* Finished */ if (!*mtag->value) mtag = (struct metatag *) NULL; return mtag; }