WebSVN – Kolibri OS – Path Comparison – / – /programs/network/netsurf/libdom/src/core/string.c Rev 3583 and /programs/network/netsurf/libdom/src/core/string.c Rev 3584

Regard whitespace Rev 3583 → Rev 3584

 /programs/network/netsurf/libdom/src/core/string.c
 ,0 → 1,1029
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ *                http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
+ */
+#include <assert.h>
+#include <ctype.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+typedef signed char int8_t;
+typedef signed short int16_t;
+typedef signed int int32_t;
+typedef unsigned char uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int uint32_t;
+#include <parserutils/charset/utf8.h>
+#include "core/string.h"
+#include "core/document.h"
+#include "utils/utils.h"
+/**
+ * Type of a DOM string
+ */
+enum dom_string_type {
+        DOM_STRING_CDATA = 0,
+        DOM_STRING_INTERNED = 1
+};
+/**
+ * A DOM string
+ *
+ * Strings are reference counted so destruction is performed correctly.
+ */
+typedef struct dom_string_internal {
+        dom_string base;
+        union {
+                struct {
+                        uint8_t *ptr;   /**< Pointer to string data */
+                        size_t len;     /**< Byte length of string */
+                } cdata;
+                lwc_string *intern;     /**< Interned string */
+        } data;
+        enum dom_string_type type;      /**< String type */
+} dom_string_internal;
+/**
+ * Empty string, for comparisons against NULL
+ */
+static const dom_string_internal empty_string = {
+        { 0 },
+        { { (uint8_t *) "", 0 } },
+        DOM_STRING_CDATA
+};
+void dom_string_destroy(dom_string *str)
+{
+        dom_string_internal *istr = (dom_string_internal *)str;
+        if (str != NULL) {
+                assert(istr->base.refcnt == 0);
+                switch (istr->type) {
+                case DOM_STRING_INTERNED:
+                        if (istr->data.intern != NULL) {
+                                lwc_string_unref(istr->data.intern);
+                        }
+                        break;
+                case DOM_STRING_CDATA:
+                        free(istr->data.cdata.ptr);
+                        break;
+                }
+                free(str);
+        }
+}
+/**
+ * Create a DOM string from a string of characters
+ *
+ * \param ptr    Pointer to string of characters
+ * \param len    Length, in bytes, of string of characters
+ * \param str    Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
+ *
+ * The returned string will already be referenced, so there is no need
+ * to explicitly reference it.
+ *
+ * The string of characters passed in will be copied for use by the
+ * returned DOM string.
+ */
+dom_exception dom_string_create(const uint8_t *ptr, size_t len,
+                dom_string **str)
+{
+        dom_string_internal *ret;
+        if (ptr == NULL || len == 0) {
+                ptr = (const uint8_t *) "";
+                len = 0;
+        }
+        ret = malloc(sizeof(*ret));
+        if (ret == NULL)
+                return DOM_NO_MEM_ERR;
+        ret->data.cdata.ptr = malloc(len + 1);
+        if (ret->data.cdata.ptr == NULL) {
+                free(ret);
+                return DOM_NO_MEM_ERR;
+        }
+        memcpy(ret->data.cdata.ptr, ptr, len);
+        ret->data.cdata.ptr[len] = '\0';
+        ret->data.cdata.len = len;
+        ret->base.refcnt = 1;
+        ret->type = DOM_STRING_CDATA;
+        *str = (dom_string *)ret;
+        return DOM_NO_ERR;
+}
+/**
+ * Create an interned DOM string from a string of characters
+ *
+ * \param ptr    Pointer to string of characters
+ * \param len    Length, in bytes, of string of characters
+ * \param str    Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
+ *
+ * The returned string will already be referenced, so there is no need
+ * to explicitly reference it.
+ *
+ * The string of characters passed in will be copied for use by the
+ * returned DOM string.
+ */
+dom_exception dom_string_create_interned(const uint8_t *ptr, size_t len,
+                dom_string **str)
+{
+        dom_string_internal *ret;
+        if (ptr == NULL || len == 0) {
+                ptr = (const uint8_t *) "";
+                len = 0;
+        }
+        ret = malloc(sizeof(*ret));
+        if (ret == NULL)
+                return DOM_NO_MEM_ERR;
+        if (lwc_intern_string((const char *) ptr, len,
+                        &ret->data.intern) != lwc_error_ok) {
+                free(ret);
+                return DOM_NO_MEM_ERR;
+        }
+        ret->base.refcnt = 1;
+        ret->type = DOM_STRING_INTERNED;
+        *str = (dom_string *)ret;
+        return DOM_NO_ERR;
+}
+/**
+ * Make the dom_string be interned
+ *
+ * \param str     The dom_string to be interned
+ * \param lwcstr  The result lwc_string
+ * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
+ */
+dom_exception dom_string_intern(dom_string *str,
+                struct lwc_string_s **lwcstr)
+{
+        dom_string_internal *istr = (dom_string_internal *) str;
+        /* If this string is already interned, do nothing */
+        if (istr->type != DOM_STRING_INTERNED) {
+                lwc_string *ret;
+                lwc_error lerr;
+                lerr = lwc_intern_string((const char *) istr->data.cdata.ptr,
+                                istr->data.cdata.len, &ret);
+                if (lerr != lwc_error_ok) {
+                        return _dom_exception_from_lwc_error(lerr);
+                }
+                free(istr->data.cdata.ptr);
+                istr->data.intern = ret;
+                istr->type = DOM_STRING_INTERNED;
+        }
+        *lwcstr = lwc_string_ref(istr->data.intern);
+        return DOM_NO_ERR;
+}
+/**
+ * Case sensitively compare two DOM strings
+ *
+ * \param s1  The first string to compare
+ * \param s2  The second string to compare
+ * \return true if strings match, false otherwise
+ */
+bool dom_string_isequal(const dom_string *s1, const dom_string *s2)
+{
+        size_t len;
+        const dom_string_internal *is1 = (dom_string_internal *) s1;
+        const dom_string_internal *is2 = (dom_string_internal *) s2;
+        if (s1 == NULL)
+                is1 = &empty_string;
+        if (s2 == NULL)
+                is2 = &empty_string;
+        if (is1->type == DOM_STRING_INTERNED &&
+                        is2->type == DOM_STRING_INTERNED) {
+                bool match;
+                (void) lwc_string_isequal(is1->data.intern, is2->data.intern,
+                        &match);
+                return match;
+        }
+        len = dom_string_byte_length((dom_string *) is1);
+        if (len != dom_string_byte_length((dom_string *)is2))
+                return false;
+        return 0 == memcmp(dom_string_data((dom_string *) is1), dom_string_data((dom_string *)is2), len);
+}
+/**
+ * Trivial locale-agnostic lower case convertor
+ */
+static inline uint8_t dolower(const uint8_t c)
+{
+        if ('A' <= c && c <= 'Z')
+                return c + 'a' - 'A';
+        return c;
+}
+/**
+ * Case insensitively compare two DOM strings
+ *
+ * \param s1  The first string to compare
+ * \param s2  The second string to compare
+ * \return true if strings match, false otherwise
+ */
+bool dom_string_caseless_isequal(const dom_string *s1, const dom_string *s2)
+{
+        const uint8_t *d1 = NULL;
+        const uint8_t *d2 = NULL;
+        size_t len;
+        const dom_string_internal *is1 = (dom_string_internal *) s1;
+        const dom_string_internal *is2 = (dom_string_internal *) s2;
+        if (s1 == NULL)
+                is1 = &empty_string;
+        if (s2 == NULL)
+                is2 = &empty_string;
+        if (is1->type == DOM_STRING_INTERNED &&
+                        is2->type == DOM_STRING_INTERNED) {
+                bool match;
+                if (lwc_string_caseless_isequal(is1->data.intern, is2->data.intern,
+                                                &match) != lwc_error_ok)
+                        return false;
+                return match;
+        }
+        len = dom_string_byte_length((dom_string *) is1);
+        if (len != dom_string_byte_length((dom_string *)is2))
+                return false;
+        d1 = (const uint8_t *) dom_string_data((dom_string *) is1);
+        d2 = (const uint8_t *) dom_string_data((dom_string *)is2);
+        while (len > 0) {
+                if (dolower(*d1) != dolower(*d2))
+                        return false;
+                d1++;
+                d2++;
+                len--;
+        }
+        return true;
+}
+/**
+ * Case sensitively compare DOM string with lwc_string
+ *
+ * \param s1  The first string to compare
+ * \param s2  The second string to compare
+ * \return true if strings match, false otherwise
+ *
+ * Returns false if either are NULL.
+ */
+bool dom_string_lwc_isequal(const dom_string *s1, lwc_string *s2)
+{
+        size_t len;
+        dom_string_internal *is1 = (dom_string_internal *) s1;
+        if (s1 == NULL || s2 == NULL)
+                return false;
+        if (is1->type == DOM_STRING_INTERNED) {
+                bool match;
+                (void) lwc_string_isequal(is1->data.intern, s2, &match);
+                return match;
+        }
+        /* Handle non-interned case */
+        len = dom_string_byte_length(s1);
+        if (len != lwc_string_length(s2))
+                return false;
+        return 0 == memcmp(dom_string_data(s1), lwc_string_data(s2), len);
+}
+/**
+ * Case insensitively compare DOM string with lwc_string
+ *
+ * \param s1  The first string to compare
+ * \param s2  The second string to compare
+ * \return true if strings match, false otherwise
+ *
+ * Returns false if either are NULL.
+ */
+bool dom_string_caseless_lwc_isequal(const dom_string *s1, lwc_string *s2)
+{
+        size_t len;
+        const uint8_t *d1 = NULL;
+        const uint8_t *d2 = NULL;
+        dom_string_internal *is1 = (dom_string_internal *) s1;
+        if (s1 == NULL || s2 == NULL)
+                return false;
+        if (is1->type == DOM_STRING_INTERNED) {
+                bool match;
+                if (lwc_string_caseless_isequal(is1->data.intern, s2, &match) != lwc_error_ok)
+                        return false;
+                return match;
+        }
+        len = dom_string_byte_length(s1);
+        if (len != lwc_string_length(s2))
+                return false;
+        d1 = (const uint8_t *) dom_string_data(s1);
+        d2 = (const uint8_t *) lwc_string_data(s2);
+        while (len > 0) {
+                if (dolower(*d1) != dolower(*d2))
+                        return false;
+                d1++;
+                d2++;
+                len--;
+        }
+        return true;
+}
+/**
+ * Get the index of the first occurrence of a character in a dom string
+ *
+ * \param str  The string to search in
+ * \param chr  UCS4 value to look for
+ * \return Character index of found character, or -1 if none found
+ */
+uint32_t dom_string_index(dom_string *str, uint32_t chr)
+{
+        const uint8_t *s;
+        size_t clen, slen;
+        uint32_t c, index;
+        parserutils_error err;
+        s = (const uint8_t *) dom_string_data(str);
+        slen = dom_string_byte_length(str);
+        index = 0;
+        while (slen > 0) {
+                err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
+                if (err != PARSERUTILS_OK) {
+                        return (uint32_t) -1;
+                }
+                if (c == chr) {
+                        return index;
+                }
+                s += clen;
+                slen -= clen;
+                index++;
+        }
+        return (uint32_t) -1;
+}
+/**
+ * Get the index of the last occurrence of a character in a dom string
+ *
+ * \param str  The string to search in
+ * \param chr  UCS4 value to look for
+ * \return Character index of found character, or -1 if none found
+ */
+uint32_t dom_string_rindex(dom_string *str, uint32_t chr)
+{
+        const uint8_t *s;
+        size_t clen = 0, slen;
+        uint32_t c, coff, index;
+        parserutils_error err;
+        s = (const uint8_t *) dom_string_data(str);
+        slen = dom_string_byte_length(str);
+        index = dom_string_length(str);
+        while (slen > 0) {
+                err = parserutils_charset_utf8_prev(s, slen,
+                                (uint32_t *) &coff);
+                if (err == PARSERUTILS_OK) {
+                        err = parserutils_charset_utf8_to_ucs4(s + coff,
+                                        slen - clen, &c, &clen);
+                }
+                if (err != PARSERUTILS_OK) {
+                        return (uint32_t) -1;
+                }
+                if (c == chr) {
+                        return index;
+                }
+                slen -= clen;
+                index--;
+        }
+        return (uint32_t) -1;
+}
+/**
+ * Get the length, in characters, of a dom string
+ *
+ * \param str  The string to measure the length of
+ * \return The length of the string, in characters
+ */
+uint32_t dom_string_length(dom_string *str)
+{
+        const uint8_t *s;
+        size_t slen, clen;
+        parserutils_error err;
+        s = (const uint8_t *) dom_string_data(str);
+        slen = dom_string_byte_length(str);
+        err = parserutils_charset_utf8_length(s, slen, &clen);
+        if (err != PARSERUTILS_OK) {
+                return 0;
+        }
+        return clen;
+}
+/**
+ * Get the UCS4 character at position index
+ *
+ * \param index  The position of the charater
+ * \param ch     The UCS4 character
+ * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
+ */
+dom_exception dom_string_at(dom_string *str, uint32_t index,
+                uint32_t *ch)
+{
+        const uint8_t *s;
+        size_t clen, slen;
+        uint32_t c, i;
+        parserutils_error err;
+        s = (const uint8_t *) dom_string_data(str);
+        slen = dom_string_byte_length(str);
+        i = 0;
+        while (slen > 0) {
+                err = parserutils_charset_utf8_char_byte_length(s, &clen);
+                if (err != PARSERUTILS_OK) {
+                        return (uint32_t) -1;
+                }
+                i++;
+                if (i == index + 1)
+                        break;
+                s += clen;
+                slen -= clen;
+        }
+        if (i == index + 1) {
+                err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
+                if (err != PARSERUTILS_OK) {
+                        return (uint32_t) -1;
+                }
+                *ch = c;
+                return DOM_NO_ERR;
+        } else {
+                return DOM_DOMSTRING_SIZE_ERR;
+        }
+}
+/**
+ * Concatenate two dom strings
+ *
+ * \param s1      The first string
+ * \param s2      The second string
+ * \param result  Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
+ *
+ * The returned string will be referenced. The client
+ * should dereference it once it has finished with it.
+ */
+dom_exception dom_string_concat(dom_string *s1, dom_string *s2,
+                dom_string **result)
+{
+        dom_string_internal *concat;
+        const uint8_t *s1ptr, *s2ptr;
+        size_t s1len, s2len;
+        assert(s1 != NULL);
+        assert(s2 != NULL);
+        s1ptr = (const uint8_t *) dom_string_data(s1);
+        s2ptr = (const uint8_t *) dom_string_data(s2);
+        s1len = dom_string_byte_length(s1);
+        s2len = dom_string_byte_length(s2);
+        concat = malloc(sizeof(*concat));
+        if (concat == NULL) {
+                return DOM_NO_MEM_ERR;
+        }
+        concat->data.cdata.ptr = malloc(s1len + s2len + 1);
+        if (concat->data.cdata.ptr == NULL) {
+                free(concat);
+                return DOM_NO_MEM_ERR;
+        }
+        memcpy(concat->data.cdata.ptr, s1ptr, s1len);
+        memcpy(concat->data.cdata.ptr + s1len, s2ptr, s2len);
+        concat->data.cdata.ptr[s1len + s2len] = '\0';
+        concat->data.cdata.len = s1len + s2len;
+        concat->base.refcnt = 1;
+        concat->type = DOM_STRING_CDATA;
+        *result = (dom_string *)concat;
+        return DOM_NO_ERR;
+}
+/**
+ * Extract a substring from a dom string
+ *
+ * \param str     The string to extract from
+ * \param i1      The character index of the start of the substring
+ * \param i2      The character index of the end of the substring
+ * \param result  Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
+ *
+ * The returned string will have its reference count increased. The client
+ * should dereference it once it has finished with it.
+ */
+dom_exception dom_string_substr(dom_string *str,
+                uint32_t i1, uint32_t i2, dom_string **result)
+{
+        const uint8_t *s = (const uint8_t *) dom_string_data(str);
+        size_t slen = dom_string_byte_length(str);
+        uint32_t b1, b2;
+        parserutils_error err;
+        /* Initialise the byte index of the start to 0 */
+        b1 = 0;
+        /* Make the end a character offset from the start */
+        i2 -= i1;
+        /* Calculate the byte index of the start */
+        while (i1 > 0) {
+                err = parserutils_charset_utf8_next(s, slen, b1, &b1);
+                if (err != PARSERUTILS_OK) {
+                        return DOM_NO_MEM_ERR;
+                }
+                i1--;
+        }
+        /* Initialise the byte index of the end to that of the start */
+        b2 = b1;
+        /* Calculate the byte index of the end */
+        while (i2 > 0) {
+                err = parserutils_charset_utf8_next(s, slen, b2, &b2);
+                if (err != PARSERUTILS_OK) {
+                        return DOM_NO_MEM_ERR;
+                }
+                i2--;
+        }
+        /* Create a string from the specified byte range */
+        return dom_string_create(s + b1, b2 - b1, result);
+}
+/**
+ * Insert data into a dom string at the given location
+ *
+ * \param target  Pointer to string to insert into
+ * \param source  Pointer to string to insert
+ * \param offset  Character offset of location to insert at
+ * \param result  Pointer to location to receive result
+ * \return DOM_NO_ERR          on success,
+ *         DOM_NO_MEM_ERR      on memory exhaustion,
+ *         DOM_INDEX_SIZE_ERR  if ::offset > len(::target).
+ *
+ * The returned string will have its reference count increased. The client
+ * should dereference it once it has finished with it.
+ */
+dom_exception dom_string_insert(dom_string *target,
+                dom_string *source, uint32_t offset,
+                dom_string **result)
+{
+        dom_string_internal *res;
+        const uint8_t *t, *s;
+        uint32_t tlen, slen, clen;
+        uint32_t ins = 0;
+        parserutils_error err;
+        t = (const uint8_t *) dom_string_data(target);
+        tlen = dom_string_byte_length(target);
+        s = (const uint8_t *) dom_string_data(source);
+        slen = dom_string_byte_length(source);
+        clen = dom_string_length(target);
+        if (offset > clen)
+                return DOM_INDEX_SIZE_ERR;
+        /* Calculate the byte index of the insertion point */
+        if (offset == clen) {
+                /* Optimisation for append */
+                ins = tlen;
+        } else {
+                while (offset > 0) {
+                        err = parserutils_charset_utf8_next(t, tlen,
+                                        ins, &ins);
+                        if (err != PARSERUTILS_OK) {
+                                return DOM_NO_MEM_ERR;
+                        }
+                        offset--;
+                }
+        }
+        /* Allocate result string */
+        res = malloc(sizeof(*res));
+        if (res == NULL) {
+                return DOM_NO_MEM_ERR;
+        }
+        /* Allocate data buffer for result contents */
+        res->data.cdata.ptr = malloc(tlen + slen + 1);
+        if (res->data.cdata.ptr == NULL) {
+                free(res);
+                return DOM_NO_MEM_ERR;
+        }
+        /* Copy initial portion of target, if any, into result */
+        if (ins > 0) {
+                memcpy(res->data.cdata.ptr, t, ins);
+        }
+        /* Copy inserted data into result */
+        memcpy(res->data.cdata.ptr + ins, s, slen);
+        /* Copy remainder of target, if any, into result */
+        if (tlen - ins > 0) {
+                memcpy(res->data.cdata.ptr + ins + slen, t + ins, tlen - ins);
+        }
+        res->data.cdata.ptr[tlen + slen] = '\0';
+        res->data.cdata.len = tlen + slen;
+        res->base.refcnt = 1;
+        res->type = DOM_STRING_CDATA;
+        *result = (dom_string *)res;
+        return DOM_NO_ERR;
+}
+/**
+ * Replace a section of a dom string
+ *
+ * \param target  Pointer to string of which to replace a section
+ * \param source  Pointer to replacement string
+ * \param i1      Character index of start of region to replace
+ * \param i2      Character index of end of region to replace
+ * \param result  Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion.
+ *
+ * The returned string will have its reference count increased. The client
+ * should dereference it once it has finished with it.
+ */
+dom_exception dom_string_replace(dom_string *target,
+                dom_string *source, uint32_t i1, uint32_t i2,
+                dom_string **result)
+{
+        dom_string_internal *res;
+        const uint8_t *t, *s;
+        uint32_t tlen, slen;
+        uint32_t b1, b2;
+        parserutils_error err;
+        t = (const uint8_t *) dom_string_data(target);
+        tlen = dom_string_byte_length(target);
+        s = (const uint8_t *) dom_string_data(source);
+        slen = dom_string_byte_length(source);
+        /* Initialise the byte index of the start to 0 */
+        b1 = 0;
+        /* Make the end a character offset from the start */
+        i2 -= i1;
+        /* Calculate the byte index of the start */
+        while (i1 > 0) {
+                err = parserutils_charset_utf8_next(t, tlen, b1, &b1);
+                if (err != PARSERUTILS_OK) {
+                        return DOM_NO_MEM_ERR;
+                }
+                i1--;
+        }
+        /* Initialise the byte index of the end to that of the start */
+        b2 = b1;
+        /* Calculate the byte index of the end */
+        while (i2 > 0) {
+                err = parserutils_charset_utf8_next(t, tlen, b2, &b2);
+                if (err != PARSERUTILS_OK) {
+                        return DOM_NO_MEM_ERR;
+                }
+                i2--;
+        }
+        /* Allocate result string */
+        res = malloc(sizeof(*res));
+        if (res == NULL) {
+                return DOM_NO_MEM_ERR;
+        }
+        /* Allocate data buffer for result contents */
+        res->data.cdata.ptr = malloc(tlen + slen - (b2 - b1) + 1);
+        if (res->data.cdata.ptr == NULL) {
+                free(res);
+                return DOM_NO_MEM_ERR;
+        }
+        /* Copy initial portion of target, if any, into result */
+        if (b1 > 0) {
+                memcpy(res->data.cdata.ptr, t, b1);
+        }
+        /* Copy replacement data into result */
+        if (slen > 0) {
+                memcpy(res->data.cdata.ptr + b1, s, slen);
+        }
+        /* Copy remainder of target, if any, into result */
+        if (tlen - b2 > 0) {
+                memcpy(res->data.cdata.ptr + b1 + slen, t + b2, tlen - b2);
+        }
+        res->data.cdata.ptr[tlen + slen - (b2 - b1)] = '\0';
+        res->data.cdata.len = tlen + slen - (b2 - b1);
+        res->base.refcnt = 1;
+        res->type = DOM_STRING_CDATA;
+        *result = (dom_string *)res;
+        return DOM_NO_ERR;
+}
+/**
+ * Calculate a hash value from a dom string
+ *
+ * \param str  The string to calculate a hash of
+ * \return The hash value associated with the string
+ */
+uint32_t dom_string_hash(dom_string *str)
+{
+        const uint8_t *s = (const uint8_t *) dom_string_data(str);
+        size_t slen = dom_string_byte_length(str);
+        uint32_t hash = 0x811c9dc5;
+        while (slen > 0) {
+                hash *= 0x01000193;
+                hash ^= *s;
+                s++;
+                slen--;
+        }
+        return hash;
+}
+/**
+ * Convert a lwc_error to a dom_exception
+ *
+ * \param err  The input lwc_error
+ * \return the dom_exception
+ */
+dom_exception _dom_exception_from_lwc_error(lwc_error err)
+{
+        switch (err) {
+        case lwc_error_ok:
+                return DOM_NO_ERR;
+        case lwc_error_oom:
+                return DOM_NO_MEM_ERR;
+        case lwc_error_range:
+                return DOM_INDEX_SIZE_ERR;
+        }
+        return DOM_NO_ERR;
+}
+/**
+ * Get the raw character data of the dom_string.
+ *
+ * \param str   The dom_string object
+ * \return      The C string pointer
+ *
+ * @note: This function is just provided for the convenience of accessing the
+ * raw C string character, no change on the result string is allowed.
+ */
+const char *dom_string_data(const dom_string *str)
+{
+        dom_string_internal *istr = (dom_string_internal *) str;
+        if (istr->type == DOM_STRING_CDATA) {
+                return (const char *) istr->data.cdata.ptr;
+        } else {
+                return lwc_string_data(istr->data.intern);
+        }
+}
+/** Get the byte length of this dom_string
+ *
+ * \param str   The dom_string object
+ */
+size_t dom_string_byte_length(const dom_string *str)
+{
+        dom_string_internal *istr = (dom_string_internal *) str;
+        if (istr->type == DOM_STRING_CDATA) {
+                return istr->data.cdata.len;
+        } else {
+                return lwc_string_length(istr->data.intern);
+        }
+}
+/** Convert the given string to uppercase
+ *
+ * \param source
+ * \param ascii_only  Whether to only convert [a-z] to [A-Z]
+ * \param upper       Result pointer for uppercase string.  Caller owns ref
+ *
+ * \return DOM_NO_ERR on success.
+ *
+ * \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false.
+ */
+dom_exception
+dom_string_toupper(dom_string *source, bool ascii_only, dom_string **upper)
+{
+        const uint8_t *orig_s = (const uint8_t *) dom_string_data(source);
+        const size_t nbytes = dom_string_byte_length(source);
+        uint8_t *copy_s;
+        size_t index = 0, clen;
+        parserutils_error err;
+        dom_exception exc;
+        if (ascii_only == false)
+                return DOM_NOT_SUPPORTED_ERR;
+        copy_s = malloc(nbytes);
+        if (copy_s == NULL)
+                return DOM_NO_MEM_ERR;
+        memcpy(copy_s, orig_s, nbytes);
+        while (index < nbytes) {
+                err = parserutils_charset_utf8_char_byte_length(orig_s + index,
+                                                                &clen);
+                if (err != PARSERUTILS_OK) {
+                        free(copy_s);
+                        /** \todo Find a better exception */
+                        return DOM_NO_MEM_ERR;
+                }
+                if (clen == 1) {
+                        if (orig_s[index] >= 'a' &&
+                            orig_s[index] <= 'z')
+                                copy_s[index] -= 'a' - 'A';
+                }
+                index += clen;
+        }
+        if (((dom_string_internal*)source)->type == DOM_STRING_CDATA) {
+                exc = dom_string_create(copy_s, nbytes, upper);
+        } else {
+                exc = dom_string_create_interned(copy_s, nbytes, upper);
+        }
+        free(copy_s);
+        return exc;
+}
+/** Convert the given string to lowercase
+ *
+ * \param source
+ * \param ascii_only  Whether to only convert [a-z] to [A-Z]
+ * \param lower       Result pointer for lowercase string.  Caller owns ref
+ *
+ * \return DOM_NO_ERR on success.
+ *
+ * \note Right now, will return DOM_NOT_SUPPORTED_ERR if ascii_only is false.
+ */
+dom_exception
+dom_string_tolower(dom_string *source, bool ascii_only, dom_string **lower)
+{
+        const uint8_t *orig_s = (const uint8_t *) dom_string_data(source);
+        const size_t nbytes = dom_string_byte_length(source);
+        uint8_t *copy_s;
+        size_t index = 0, clen;
+        parserutils_error err;
+        dom_exception exc;
+        if (ascii_only == false)
+                return DOM_NOT_SUPPORTED_ERR;
+        copy_s = malloc(nbytes);
+        if (copy_s == NULL)
+                return DOM_NO_MEM_ERR;
+        memcpy(copy_s, orig_s, nbytes);
+        while (index < nbytes) {
+                err = parserutils_charset_utf8_char_byte_length(orig_s + index,
+                                                                &clen);
+                if (err != PARSERUTILS_OK) {
+                        free(copy_s);
+                        /** \todo Find a better exception */
+                        return DOM_NO_MEM_ERR;
+                }
+                if (clen == 1) {
+                        if (orig_s[index] >= 'A' &&
+                            orig_s[index] <= 'Z')
+                                copy_s[index] += 'a' - 'A';
+                }
+                index += clen;
+        }
+        if (((dom_string_internal*)source)->type == DOM_STRING_CDATA) {
+                exc = dom_string_create(copy_s, nbytes, lower);
+        } else {
+                exc = dom_string_create_interned(copy_s, nbytes, lower);
+        }
+        free(copy_s);
+        return exc;
+}

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 3583 → Rev 3584