/* ------------------------------------------------------------------- */
/* dc_parse : parse the meta tag, build a metatag struct */
/* Author : Ole Husby */
/* Updated : 1998-09-30 */
/* */
/* Syntax : #include "d2m.h" */
/* dc_parse(char * string) */
/* */
/* Returns pointer to struct metatag */
/* */
/* Memory for the struct is allocated in the program! */
/* */
/* String includes a meta tag, after the initial */
/* "" are stripped away. */
/* */
/* The program accepts the tag if name starts with "dc." */
/* (case insensitive), else NULL is returned. */
/* */
/* The parsing looks for the HTML attributes "name", */
/* "scheme", and "content" only. Other attributes are */
/* ignored. */
/* */
/* The DC "type" will be recognized if suffixed */
/* "name" by "dot notation", or contained within */
/* parentheses within "content" (old notation). */
/* (type = subelement) */
/* */
/* The DC "scheme" will be recognized if */
/* present in "scheme" attribute (HTML 4.0), or if */
/* contained within aprentheses in "content" (HTML 2.0) */
/* */
/* Examples: */
/* */
/* meta name="DC.Creator.Personalname" */
/* content="Smith, John" */
/* */
/* meta name="dc.Creator" */
/* content="(type=Personalname) Smith, John"> */
/* */
/* meta name="DC.Subject" scheme="LCSH" */
/* content="Cataloguing" */
/* */
/* meta name="DC.Subject" */
/* content="(scheme=LCSH) Cataloguing" */
/* */
/* The values of the DC attributes are NOT validated! */
/* */
/* ------------------------------------------------------------------- */
#include
#include
#include
#include
#include "d2m.h"
struct metatag *dc_parse(char *buf)
{
int more;
char *p, *q;
struct metatag *mtag;
int strict = TRUE; /* Indicates that DC. prefix is mandatory */
char attrname[16];
/* Allocates and initializes metatag struct */
mtag = malloc(sizeof(*mtag));
mtag->name = malloc(256);
mtag->type = malloc(256);
mtag->scheme = malloc(256);
mtag->value = malloc(10000);
*mtag->name = 0;
*mtag->type = 0;
*mtag->scheme = 0;
*mtag->value = 0;
/* Parse attribute name/value pairs: */
if (!buf || !*buf) return NULL;
p = strtok(buf, "\"");
while (p)
{
/* Look for attribute name */
*attrname = 0;
while (*p && ( p[strlen(p) - 1] == ' ' || p[strlen(p) - 1] == '='))
p[strlen(p) - 1] = '\0';
if (*p && strlen(p) < 10)
strcpy(attrname, p);
/* Look for attribute value */
p = strtok(NULL, "\"");
while(*p && p[0] == ' ')
p++;
if (!p || !*p)
break;
if (strncasecmp(attrname, "name", 4) == 0)
strcpy(mtag->name, p);
else if (strncasecmp(attrname, "scheme", 6) == 0)
strcpy(mtag->scheme, p);
else if (strncasecmp(attrname, "content", 7) == 0)
strcpy(mtag->value, p);
p = strtok(NULL, "\"");
if (p)
{
while (*p && p[0] == ' ')
p++;
if (!*p) p = NULL;
}
}
/* Check the name attribute value */
p = mtag->name;
if (!p || !*p)
return NULL;
/* Check for leading "dc." and suffixed type */
if (strncasecmp(p, "dc.", 3) == 0)
{
p += 3;
q = strstr(p, ".");
if (q)
{
strcpy(mtag->type, q + 1);
*q = 0;
}
mtag->name = p;
}
else
{
if (strict)
return NULL;
}
/* Check the content attribute value */
/* Extracting type and scheme */
p = mtag->value;
more = 1;
while (more)
{
if (strncasecmp(p, "(type=", 6) == 0)
{
p += 6;
q = strtok(p, ")");
if (!q)
return NULL;
else
{
if (!*mtag->type)
strcpy(mtag->type, q);
q = strtok(NULL, "\0");
p = q;
while (p && *p && p[0] == ' ') p++;
}
}
else if (strncasecmp(p, "(scheme=", 8) == 0)
{
p += 8;
q = strtok(p, ")");
if (!q)
return NULL;
else
{
if (!*mtag->scheme)
strcpy(mtag->scheme, q);
q = strtok(NULL, "\0");
p = q;
while (p && *p && p[0] == ' ') p++;
}
}
else
{
mtag->value = p;
more = 0;
}
}
/* Finished */
if (!*mtag->value)
mtag = (struct metatag *) NULL;
return mtag;
}