* Copyright 2004-2008, Franรงois Revol, <revol@free.fr>.
* Distributed under the terms of the MIT License.
*/
#include <ctype.h>
#include <malloc.h>
#include <string.h>
#include "string_utils.h"
#ifdef _KERNEL_MODE
#define printf dprintf
#undef TESTME
#endif
char *urlify_string(const char *str)
{
char *dst, *d;
const char *p;
const char *allowed = "abcdefghijklmnopqrstuvwxyz" \
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
"0123456789" \
"-_.!~*'()";
const char *hex = "0123456789ABCDEF";
if (!str)
return NULL;
dst = malloc(strlen(str)*3);
if (!dst)
return NULL;
for (p = str, d = dst; *p; p++) {
if (strchr(allowed, *p))
*d++ = *p;
else if (*p == ' ') {
*d++ = '+';
} else {
*d++ = '%';
*d++ = hex[(*(unsigned char *)p >> 4) & 0x0F];
*d++ = hex[(*(unsigned char *)p) & 0x0F];
}
}
*d = '\0';
return dst;
}
static const char *entities_tab[][2] = {
{ "lt", "<" },
{ "gt", ">" },
{ "amp", "&" },
{ "nbsp", " " },
{ "quot", "\"" },
{ "raquo", "ยป" },
{ "ccedil", "รง" },
{ "agrave", "ร " },
{ "egrave", "รจ" },
{ "eacute", "รฉ" },
{ "acirc", "รข" },
{ "ecirc", "รช" },
{ "icirc", "รฎ" },
{ "ocirc", "รด" },
{ "ucirc", "รป" },
{ "copy", "ยฉ" },
{ "trade", "โข" },
{ NULL, NULL },
};
char *unentitify_string(const char *str)
{
char *dst, *d;
const char *p;
const char *hex = "0123456789abcdef";
int i;
if (!str)
return NULL;
dst = malloc(strlen(str)+2);
if (!dst)
return NULL;
for (p = str, d = dst; *p; p++) {
if (*p != '&')
*d++ = *p;
else if ((p[1] == '#') && p[2] && p[3] && (p[4] == ';') &&
isdigit(p[2]) &&
isdigit(p[3])) {
char c = ((p[2]) - '0') * 10 +
((p[3]) - '0');
*d++ = c;
p += 4;
} else if ((p[1] == '#') && p[2] && p[3] && p[4] && (p[5] == ';') &&
isdigit(p[2]) &&
isdigit(p[3]) &&
isdigit(p[4])) {
char c = ((p[2]) - '0') * 100 +
((p[3]) - '0') * 10 +
((p[4]) - '0');
*d++ = c;
p += 5;
} else if ((p[1] == '#') && (p[2] == 'x') && p[3] && p[4] && (p[5] == ';') &&
strchr(hex, tolower(p[3])) &&
strchr(hex, tolower(p[4]))) {
char c = (strchr(hex, tolower(p[3])) - hex) << 4 |
(strchr(hex, tolower(p[4])) - hex);
*d++ = c;
p += 5;
} else {
char buf[20];
strncpy(buf, p+1, 20);
buf[19] = '\0';
if (!strchr(buf, ';')) {
*d++ = *p;
continue;
}
*(strchr(buf, ';')) = '\0';
for (i = 0; entities_tab[i][0]; i++) {
if (!strcmp(buf, entities_tab[i][0])) {
strcpy(d, entities_tab[i][1]);
d += strlen(d);
p += strlen(entities_tab[i][0]) + 1;
break;
}
}
if (!entities_tab[i][0])
*d++ = '&';
}
}
*d = '\0';
return dst;
}
#ifdef TESTME
int main(int argc, char **argv)
{
char *p;
if (argc < 2)
return 1;
p = unentitify_string(argv[1]);
printf("'%s'\n", p);
free(p);
free(malloc(10));
return 0;
}
#endif