Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 3583 → Rev 3584

/programs/network/netsurf/libdom/src/core/string.c
0,0 → 1,1029
/*
* This file is part of libdom.
* Licensed under the MIT License,
* http://www.opensource.org/licenses/mit-license.php
* Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
* Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
*/
 
#include <assert.h>
#include <ctype.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
 
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
 
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
 
 
#include <parserutils/charset/utf8.h>
 
#include "core/string.h"
#include "core/document.h"
#include "utils/utils.h"
 
/**
* Type of a DOM string
*/
enum dom_string_type {
DOM_STRING_CDATA = 0,
DOM_STRING_INTERNED = 1
};
 
/**
* A DOM string
*
* Strings are reference counted so destruction is performed correctly.
*/
typedef struct dom_string_internal {
dom_string base;
 
union {
struct {
uint8_t *ptr; /**< Pointer to string data */
size_t len; /**< Byte length of string */
} cdata;
lwc_string *intern; /**< Interned string */
} data;
 
enum dom_string_type type; /**< String type */
} dom_string_internal;
 
/**
* Empty string, for comparisons against NULL
*/
static const dom_string_internal empty_string = {
{ 0 },
{ { (uint8_t *) "", 0 } },
DOM_STRING_CDATA
};
 
void dom_string_destroy(dom_string *str)
{
dom_string_internal *istr = (dom_string_internal *)str;
if (str != NULL) {
assert(istr->base.refcnt == 0);
switch (istr->type) {
case DOM_STRING_INTERNED:
if (istr->data.intern != NULL) {
lwc_string_unref(istr->data.intern);
}
break;
case DOM_STRING_CDATA:
free(istr->data.cdata.ptr);
break;
}
 
free(str);
}
}
 
/**
* Create a DOM string from a string of characters
*
* \param ptr Pointer to string of characters
* \param len Length, in bytes, of string of characters
* \param str Pointer to location to receive result
* \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
*
* The returned string will already be referenced, so there is no need
* to explicitly reference it.
*
* The string of characters passed in will be copied for use by the
* returned DOM string.
*/
dom_exception dom_string_create(const uint8_t *ptr, size_t len,
dom_string **str)
{
dom_string_internal *ret;
 
if (ptr == NULL || len == 0) {
ptr = (const uint8_t *) "";
len = 0;
}
 
ret = malloc(sizeof(*ret));
if (ret == NULL)
return DOM_NO_MEM_ERR;
 
ret->data.cdata.ptr = malloc(len + 1);
if (ret->data.cdata.ptr == NULL) {
free(ret);
return DOM_NO_MEM_ERR;
}
 
memcpy(ret->data.cdata.ptr, ptr, len);
ret->data.cdata.ptr[len] = '\0';
 
ret->data.cdata.len = len;
 
ret->base.refcnt = 1;
 
ret->type = DOM_STRING_CDATA;
 
*str = (dom_string *)ret;
 
return DOM_NO_ERR;
}
 
/**
* Create an interned DOM string from a string of characters
*
* \param ptr Pointer to string of characters
* \param len Length, in bytes, of string of characters
* \param str Pointer to location to receive result
* \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
*
* The returned string will already be referenced, so there is no need
* to explicitly reference it.
*
* The string of characters passed in will be copied for use by the
* returned DOM string.
*/
dom_exception dom_string_create_interned(const uint8_t *ptr, size_t len,
dom_string **str)
{
dom_string_internal *ret;
 
if (ptr == NULL || len == 0) {
ptr = (const uint8_t *) "";
len = 0;
}
 
ret = malloc(sizeof(*ret));
if (ret == NULL)
return DOM_NO_MEM_ERR;
 
if (lwc_intern_string((const char *) ptr, len,
&ret->data.intern) != lwc_error_ok) {
free(ret);
return DOM_NO_MEM_ERR;
}
 
ret->base.refcnt = 1;
 
ret->type = DOM_STRING_INTERNED;
 
*str = (dom_string *)ret;
 
return DOM_NO_ERR;
}
 
/**
* Make the dom_string be interned
*
* \param str The dom_string to be interned
* \param lwcstr The result lwc_string
* \return DOM_NO_ERR on success, appropriate dom_exception on failure.
*/
dom_exception dom_string_intern(dom_string *str,
struct lwc_string_s **lwcstr)
{
dom_string_internal *istr = (dom_string_internal *) str;
/* If this string is already interned, do nothing */
if (istr->type != DOM_STRING_INTERNED) {
lwc_string *ret;
lwc_error lerr;
 
lerr = lwc_intern_string((const char *) istr->data.cdata.ptr,
istr->data.cdata.len, &ret);
if (lerr != lwc_error_ok) {
return _dom_exception_from_lwc_error(lerr);
}
 
free(istr->data.cdata.ptr);
 
istr->data.intern = ret;
 
istr->type = DOM_STRING_INTERNED;
}
 
*lwcstr = lwc_string_ref(istr->data.intern);
 
return DOM_NO_ERR;
}
 
/**
* Case sensitively compare two DOM strings
*
* \param s1 The first string to compare
* \param s2 The second string to compare
* \return true if strings match, false otherwise
*/
bool dom_string_isequal(const dom_string *s1, const dom_string *s2)
{
size_t len;
const dom_string_internal *is1 = (dom_string_internal *) s1;
const dom_string_internal *is2 = (dom_string_internal *) s2;
 
if (s1 == NULL)
is1 = &empty_string;
 
if (s2 == NULL)
is2 = &empty_string;
 
if (is1->type == DOM_STRING_INTERNED &&
is2->type == DOM_STRING_INTERNED) {
bool match;
 
(void) lwc_string_isequal(is1->data.intern, is2->data.intern,
&match);
 
return match;
}
 
len = dom_string_byte_length((dom_string *) is1);
 
if (len != dom_string_byte_length((dom_string *)is2))
return false;
 
return 0 == memcmp(dom_string_data((dom_string *) is1), dom_string_data((dom_string *)is2), len);
}
 
/**
* Trivial locale-agnostic lower case convertor
*/
static inline uint8_t dolower(const uint8_t c)
{
if ('A' <= c && c <= 'Z')
return c + 'a' - 'A';
return c;
}
 
/**
* Case insensitively compare two DOM strings
*
* \param s1 The first string to compare
* \param s2 The second string to compare
* \return true if strings match, false otherwise
*/
bool dom_string_caseless_isequal(const dom_string *s1, const dom_string *s2)
{
const uint8_t *d1 = NULL;
const uint8_t *d2 = NULL;
size_t len;
const dom_string_internal *is1 = (dom_string_internal *) s1;
const dom_string_internal *is2 = (dom_string_internal *) s2;
 
if (s1 == NULL)
is1 = &empty_string;
 
if (s2 == NULL)
is2 = &empty_string;
 
if (is1->type == DOM_STRING_INTERNED &&
is2->type == DOM_STRING_INTERNED) {
bool match;
 
if (lwc_string_caseless_isequal(is1->data.intern, is2->data.intern,
&match) != lwc_error_ok)
return false;
 
return match;
}
 
len = dom_string_byte_length((dom_string *) is1);
 
if (len != dom_string_byte_length((dom_string *)is2))
return false;
 
d1 = (const uint8_t *) dom_string_data((dom_string *) is1);
d2 = (const uint8_t *) dom_string_data((dom_string *)is2);
 
while (len > 0) {
if (dolower(*d1) != dolower(*d2))
return false;
 
d1++;
d2++;
len--;
}
 
return true;
}
 
 
/**
* Case sensitively compare DOM string with lwc_string
*
* \param s1 The first string to compare
* \param s2 The second string to compare
* \return true if strings match, false otherwise
*
* Returns false if either are NULL.
*/
bool dom_string_lwc_isequal(const dom_string *s1, lwc_string *s2)
{
size_t len;
dom_string_internal *is1 = (dom_string_internal *) s1;
 
if (s1 == NULL || s2 == NULL)
return false;
 
if (is1->type == DOM_STRING_INTERNED) {
bool match;
 
(void) lwc_string_isequal(is1->data.intern, s2, &match);
 
return match;
}
 
/* Handle non-interned case */
len = dom_string_byte_length(s1);
 
if (len != lwc_string_length(s2))
return false;
 
return 0 == memcmp(dom_string_data(s1), lwc_string_data(s2), len);
}
 
 
/**
* Case insensitively compare DOM string with lwc_string
*
* \param s1 The first string to compare
* \param s2 The second string to compare
* \return true if strings match, false otherwise
*
* Returns false if either are NULL.
*/
bool dom_string_caseless_lwc_isequal(const dom_string *s1, lwc_string *s2)
{
size_t len;
const uint8_t *d1 = NULL;
const uint8_t *d2 = NULL;
dom_string_internal *is1 = (dom_string_internal *) s1;
 
if (s1 == NULL || s2 == NULL)
return false;
 
if (is1->type == DOM_STRING_INTERNED) {
bool match;
 
if (lwc_string_caseless_isequal(is1->data.intern, s2, &match) != lwc_error_ok)
return false;
 
return match;
}
 
len = dom_string_byte_length(s1);
 
if (len != lwc_string_length(s2))
return false;
 
d1 = (const uint8_t *) dom_string_data(s1);
d2 = (const uint8_t *) lwc_string_data(s2);
 
while (len > 0) {
if (dolower(*d1) != dolower(*d2))
return false;
 
d1++;
d2++;
len--;
}
 
return true;
}
 
 
/**
* Get the index of the first occurrence of a character in a dom string
*
* \param str The string to search in
* \param chr UCS4 value to look for
* \return Character index of found character, or -1 if none found
*/
uint32_t dom_string_index(dom_string *str, uint32_t chr)
{
const uint8_t *s;
size_t clen, slen;
uint32_t c, index;
parserutils_error err;
 
s = (const uint8_t *) dom_string_data(str);
slen = dom_string_byte_length(str);
 
index = 0;
 
while (slen > 0) {
err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
if (err != PARSERUTILS_OK) {
return (uint32_t) -1;
}
 
if (c == chr) {
return index;
}
 
s += clen;
slen -= clen;
index++;
}
 
return (uint32_t) -1;
}
 
/**
* Get the index of the last occurrence of a character in a dom string
*
* \param str The string to search in
* \param chr UCS4 value to look for
* \return Character index of found character, or -1 if none found
*/
uint32_t dom_string_rindex(dom_string *str, uint32_t chr)
{
const uint8_t *s;
size_t clen = 0, slen;
uint32_t c, coff, index;
parserutils_error err;
 
s = (const uint8_t *) dom_string_data(str);
slen = dom_string_byte_length(str);
 
index = dom_string_length(str);
 
while (slen > 0) {
err = parserutils_charset_utf8_prev(s, slen,
(uint32_t *) &coff);
if (err == PARSERUTILS_OK) {
err = parserutils_charset_utf8_to_ucs4(s + coff,
slen - clen, &c, &clen);
}
 
if (err != PARSERUTILS_OK) {
return (uint32_t) -1;
}
 
if (c == chr) {
return index;
}
 
slen -= clen;
index--;
}
 
return (uint32_t) -1;
}
 
/**
* Get the length, in characters, of a dom string
*
* \param str The string to measure the length of
* \return The length of the string, in characters
*/
uint32_t dom_string_length(dom_string *str)
{
const uint8_t *s;
size_t slen, clen;
parserutils_error err;
 
s = (const uint8_t *) dom_string_data(str);
slen = dom_string_byte_length(str);
 
err = parserutils_charset_utf8_length(s, slen, &clen);
if (err != PARSERUTILS_OK) {
return 0;
}
 
return clen;
}
 
/**
* Get the UCS4 character at position index
*
* \param index The position of the charater
* \param ch The UCS4 character
* \return DOM_NO_ERR on success, appropriate dom_exception on failure.
*/
dom_exception dom_string_at(dom_string *str, uint32_t index,
uint32_t *ch)
{
const uint8_t *s;
size_t clen, slen;
uint32_t c, i;
parserutils_error err;
 
s = (const uint8_t *) dom_string_data(str);
slen = dom_string_byte_length(str);
 
i = 0;
 
while (slen > 0) {
err = parserutils_charset_utf8_char_byte_length(s, &clen);
if (err != PARSERUTILS_OK) {
return (uint32_t) -1;
}
 
i++;
if (i == index + 1)
break;
 
s += clen;
slen -= clen;
}
 
if (i == index + 1) {
err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
if (err != PARSERUTILS_OK) {
return (uint32_t) -1;
}
 
*ch = c;
return DOM_NO_ERR;
} else {
return DOM_DOMSTRING_SIZE_ERR;
}
}
 
/**
* Concatenate two dom strings
*
* \param s1 The first string
* \param s2 The second string
* \param result Pointer to location to receive result
* \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
*
* The returned string will be referenced. The client
* should dereference it once it has finished with it.
*/
dom_exception dom_string_concat(dom_string *s1, dom_string *s2,
dom_string **result)
{
dom_string_internal *concat;
const uint8_t *s1ptr, *s2ptr;
size_t s1len, s2len;
 
assert(s1 != NULL);
assert(s2 != NULL);
 
s1ptr = (const uint8_t *) dom_string_data(s1);
s2ptr = (const uint8_t *) dom_string_data(s2);
s1len = dom_string_byte_length(s1);
s2len = dom_string_byte_length(s2);
 
concat = malloc(sizeof(*concat));
if (concat == NULL) {
return DOM_NO_MEM_ERR;
}
 
concat->data.cdata.ptr = malloc(s1len + s2len + 1);
if (concat->data.cdata.ptr == NULL) {
free(concat);
 
return DOM_NO_MEM_ERR;
}
 
memcpy(concat->data.cdata.ptr, s1ptr, s1len);
 
memcpy(concat->data.cdata.ptr + s1len, s2ptr, s2len);
 
concat->data.cdata.ptr[s1len + s2len] = '\0';
 
concat->data.cdata.len = s1len + s2len;
 
concat->base.refcnt = 1;
 
concat->type = DOM_STRING_CDATA;
 
*result = (dom_string *)concat;
 
return DOM_NO_ERR;
}
 
/**
* Extract a substring from a dom string
*
* \param str The string to extract from
* \param i1 The character index of the start of the substring
* \param i2 The character index of the end of the substring
* \param result Pointer to location to receive result
* \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
*
* The returned string will have its reference count increased. The client
* should dereference it once it has finished with it.
*/
dom_exception dom_string_substr(dom_string *str,
uint32_t i1, uint32_t i2, dom_string **result)
{
const uint8_t *s = (const uint8_t *) dom_string_data(str);
size_t slen = dom_string_byte_length(str);
uint32_t b1, b2;
parserutils_error err;
 
/* Initialise the byte index of the start to 0 */
b1 = 0;
/* Make the end a character offset from the start */
i2 -= i1;
 
/* Calculate the byte index of the start */
while (i1 > 0) {
err = parserutils_charset_utf8_next(s, slen, b1, &b1);
if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
 
i1--;
}
 
/* Initialise the byte index of the end to that of the start */
b2 = b1;
 
/* Calculate the byte index of the end */
while (i2 > 0) {
err = parserutils_charset_utf8_next(s, slen, b2, &b2);
if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
 
i2--;
}
 
/* Create a string from the specified byte range */
return dom_string_create(s + b1, b2 - b1, result);
}
 
/**
* Insert data into a dom string at the given location
*
* \param target Pointer to string to insert into
* \param source Pointer to string to insert
* \param offset Character offset of location to insert at
* \param result Pointer to location to receive result
* \return DOM_NO_ERR on success,
* DOM_NO_MEM_ERR on memory exhaustion,
* DOM_INDEX_SIZE_ERR if ::offset > len(::target).
*
* The returned string will have its reference count increased. The client
* should dereference it once it has finished with it.
*/
dom_exception dom_string_insert(dom_string *target,
dom_string *source, uint32_t offset,
dom_string **result)
{
dom_string_internal *res;
const uint8_t *t, *s;
uint32_t tlen, slen, clen;
uint32_t ins = 0;
parserutils_error err;
 
t = (const uint8_t *) dom_string_data(target);
tlen = dom_string_byte_length(target);
s = (const uint8_t *) dom_string_data(source);
slen = dom_string_byte_length(source);
 
clen = dom_string_length(target);
 
if (offset > clen)
return DOM_INDEX_SIZE_ERR;
 
/* Calculate the byte index of the insertion point */
if (offset == clen) {
/* Optimisation for append */
ins = tlen;
} else {
while (offset > 0) {
err = parserutils_charset_utf8_next(t, tlen,
ins, &ins);
 
if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
 
offset--;
}
}
 
/* Allocate result string */
res = malloc(sizeof(*res));
if (res == NULL) {
return DOM_NO_MEM_ERR;
}
 
/* Allocate data buffer for result contents */
res->data.cdata.ptr = malloc(tlen + slen + 1);
if (res->data.cdata.ptr == NULL) {
free(res);
return DOM_NO_MEM_ERR;
}
 
/* Copy initial portion of target, if any, into result */
if (ins > 0) {
memcpy(res->data.cdata.ptr, t, ins);
}
 
/* Copy inserted data into result */
memcpy(res->data.cdata.ptr + ins, s, slen);
 
/* Copy remainder of target, if any, into result */
if (tlen - ins > 0) {
memcpy(res->data.cdata.ptr + ins + slen, t + ins, tlen - ins);
}
 
res->data.cdata.ptr[tlen + slen] = '\0';
 
res->data.cdata.len = tlen + slen;
 
res->base.refcnt = 1;
 
res->type = DOM_STRING_CDATA;
 
*result = (dom_string *)res;
 
return DOM_NO_ERR;
}
 
/**
* Replace a section of a dom string
*
* \param target Pointer to string of which to replace a section
* \param source Pointer to replacement string
* \param i1 Character index of start of region to replace
* \param i2 Character index of end of region to replace
* \param result Pointer to location to receive result
* \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion.
*
* The returned string will have its reference count increased. The client
* should dereference it once it has finished with it.
*/
dom_exception dom_string_replace(dom_string *target,
dom_string *source, uint32_t i1, uint32_t i2,
dom_string **result)
{
dom_string_internal *res;
const uint8_t *t, *s;
uint32_t tlen, slen;
uint32_t b1, b2;
parserutils_error err;
 
t = (const uint8_t *) dom_string_data(target);
tlen = dom_string_byte_length(target);
s = (const uint8_t *) dom_string_data(source);
slen = dom_string_byte_length(source);
 
/* Initialise the byte index of the start to 0 */
b1 = 0;
/* Make the end a character offset from the start */
i2 -= i1;
 
/* Calculate the byte index of the start */
while (i1 > 0) {
err = parserutils_charset_utf8_next(t, tlen, b1, &b1);
 
if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
 
i1--;
}
 
/* Initialise the byte index of the end to that of the start */
b2 = b1;
 
/* Calculate the byte index of the end */
while (i2 > 0) {
err = parserutils_charset_utf8_next(t, tlen, b2, &b2);
 
if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
 
i2--;
}
 
/* Allocate result string */
res = malloc(sizeof(*res));
if (res == NULL) {
return DOM_NO_MEM_ERR;
}
 
/* Allocate data buffer for result contents */
res->data.cdata.ptr = malloc(tlen + slen - (b2 - b1) + 1);
if (res->data.cdata.ptr == NULL) {
free(res);
return DOM_NO_MEM_ERR;
}
 
/* Copy initial portion of target, if any, into result */
if (b1 > 0) {
memcpy(res->data.cdata.ptr, t, b1);
}
 
/* Copy replacement data into result */
if (slen > 0) {
memcpy(res->data.cdata.ptr + b1, s, slen);
}
 
/* Copy remainder of target, if any, into result */
if (tlen - b2 > 0) {
memcpy(res->data.cdata.ptr + b1 + slen, t + b2, tlen - b2);
}
 
res->data.cdata.ptr[tlen + slen - (b2 - b1)] = '\0';
 
res->data.cdata.len = tlen + slen - (b2 - b1);
 
res->base.refcnt = 1;
 
res->type = DOM_STRING_CDATA;
 
*result = (dom_string *)res;
 
return DOM_NO_ERR;
}
 
/**
* Calculate a hash value from a dom string
*
* \param str The string to calculate a hash of
* \return The hash value associated with the string
*/
uint32_t dom_string_hash(dom_string *str)
{
const uint8_t *s = (const uint8_t *) dom_string_data(str);
size_t slen = dom_string_byte_length(str);
uint32_t hash = 0x811c9dc5;
 
while (slen > 0) {
hash *= 0x01000193;
hash ^= *s;
 
s++;
slen--;
}
 
return hash;
}
 
/**
* Convert a lwc_error to a dom_exception
*
* \param err The input lwc_error
* \return the dom_exception
*/
dom_exception _dom_exception_from_lwc_error(lwc_error err)
{
switch (err) {
case lwc_error_ok:
return DOM_NO_ERR;
case lwc_error_oom:
return DOM_NO_MEM_ERR;
case lwc_error_range:
return DOM_INDEX_SIZE_ERR;
}
 
return DOM_NO_ERR;
}
 
/**
* Get the raw character data of the dom_string.
*
* \param str The dom_string object
* \return The C string pointer
*
* @note: This function is just provided for the convenience of accessing the
* raw C string character, no change on the result string is allowed.
*/
const char *dom_string_data(const dom_string *str)
{
dom_string_internal *istr = (dom_string_internal *) str;
if (istr->type == DOM_STRING_CDATA) {
return (const char *) istr->data.cdata.ptr;
} else {
return lwc_string_data(istr->data.intern);
}
}
 
/** Get the byte length of this dom_string
*
* \param str The dom_string object
*/
size_t dom_string_byte_length(const dom_string *str)
{
dom_string_internal *istr = (dom_string_internal *) str;
if (istr->type == DOM_STRING_CDATA) {
return istr->data.cdata.len;
} else {
return lwc_string_length(istr->data.intern);
}
}
 
/** Convert the given string to uppercase
*
* \param source
* \param ascii_only Whether to only convert [a-z] to [A-Z]
* \param upper Result pointer for uppercase string. Caller owns ref
*
* \return DOM_NO_ERR on success.
*
* \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false.
*/
dom_exception
dom_string_toupper(dom_string *source, bool ascii_only, dom_string **upper)
{
const uint8_t *orig_s = (const uint8_t *) dom_string_data(source);
const size_t nbytes = dom_string_byte_length(source);
uint8_t *copy_s;
size_t index = 0, clen;
parserutils_error err;
dom_exception exc;
if (ascii_only == false)
return DOM_NOT_SUPPORTED_ERR;
copy_s = malloc(nbytes);
if (copy_s == NULL)
return DOM_NO_MEM_ERR;
memcpy(copy_s, orig_s, nbytes);
while (index < nbytes) {
err = parserutils_charset_utf8_char_byte_length(orig_s + index,
&clen);
if (err != PARSERUTILS_OK) {
free(copy_s);
/** \todo Find a better exception */
return DOM_NO_MEM_ERR;
}
if (clen == 1) {
if (orig_s[index] >= 'a' &&
orig_s[index] <= 'z')
copy_s[index] -= 'a' - 'A';
}
index += clen;
}
if (((dom_string_internal*)source)->type == DOM_STRING_CDATA) {
exc = dom_string_create(copy_s, nbytes, upper);
} else {
exc = dom_string_create_interned(copy_s, nbytes, upper);
}
free(copy_s);
return exc;
}
 
/** Convert the given string to lowercase
*
* \param source
* \param ascii_only Whether to only convert [a-z] to [A-Z]
* \param lower Result pointer for lowercase string. Caller owns ref
*
* \return DOM_NO_ERR on success.
*
* \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false.
*/
dom_exception
dom_string_tolower(dom_string *source, bool ascii_only, dom_string **lower)
{
const uint8_t *orig_s = (const uint8_t *) dom_string_data(source);
const size_t nbytes = dom_string_byte_length(source);
uint8_t *copy_s;
size_t index = 0, clen;
parserutils_error err;
dom_exception exc;
if (ascii_only == false)
return DOM_NOT_SUPPORTED_ERR;
copy_s = malloc(nbytes);
if (copy_s == NULL)
return DOM_NO_MEM_ERR;
memcpy(copy_s, orig_s, nbytes);
while (index < nbytes) {
err = parserutils_charset_utf8_char_byte_length(orig_s + index,
&clen);
if (err != PARSERUTILS_OK) {
free(copy_s);
/** \todo Find a better exception */
return DOM_NO_MEM_ERR;
}
if (clen == 1) {
if (orig_s[index] >= 'A' &&
orig_s[index] <= 'Z')
copy_s[index] += 'a' - 'A';
}
index += clen;
}
if (((dom_string_internal*)source)->type == DOM_STRING_CDATA) {
exc = dom_string_create(copy_s, nbytes, lower);
} else {
exc = dom_string_create_interned(copy_s, nbytes, lower);
}
free(copy_s);
return exc;
}