WebSVN – Kolibri OS – Blame – /contrib/network/netsurf/netsurf/content/urldb.c

Rev	Author	Line No.	Line
3584	sourcerer	1	/*
		2	* Copyright 2006 John M Bell
		3	* Copyright 2009 John Tytgat
		4	*
		5	* This file is part of NetSurf, http://www.netsurf-browser.org/
		6	*
		7	* NetSurf is free software; you can redistribute it and/or modify
		8	* it under the terms of the GNU General Public License as published by
		9	* the Free Software Foundation; version 2 of the License.
		10	*
		11	* NetSurf is distributed in the hope that it will be useful,
		12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		14	* GNU General Public License for more details.
		15	*
		16	* You should have received a copy of the GNU General Public License
		17	* along with this program. If not, see .
		18	*/
		19
		20	/** \file
		21	* Unified URL information database (implementation)
		22	*
		23	* URLs are stored in a tree-based structure as follows:
		24	*
		25	* The host component is extracted from each URL and, if a FQDN, split on
		26	* every '.'.The tree is constructed by inserting each FQDN segment in
		27	* reverse order. Duplicate nodes are merged.
		28	*
		29	* If the host part of an URL is an IP address, then this is added to the
		30	* tree verbatim (as if it were a TLD).
		31	*
		32	* This provides something looking like:
		33	*
		34	* root (a sentinel)
		35	* \|
		36	* -------------------------------------------------
		37	* \| \| \| \| \| \| \|
		38	* com edu gov 127.0.0.1 net org uk TLDs
		39	* \| \| \| \| \| \|
		40	* google ... ... ... ... co 2LDs
		41	* \| \|
		42	* www bbc Hosts/Subdomains
		43	* \|
		44	* www ...
		45	*
		46	* Each of the nodes in this tree is a struct host_part. This stores the
		47	* FQDN segment (or IP address) with which the node is concerned. Each node
		48	* may contain further information about paths on a host (struct path_data)
		49	* or SSL certificate processing on a host-wide basis
		50	* (host_part::permit_invalid_certs).
		51	*
		52	* Path data is concerned with storing various metadata about the path in
		53	* question. This includes global history data, HTTP authentication details
		54	* and any associated HTTP cookies. This is stored as a tree of path segments
		55	* hanging off the relevant host_part node.
		56	*
		57	* Therefore, to find the last visited time of the URL
		58	* http://www.example.com/path/to/resource.html, the FQDN tree would be
		59	* traversed in the order root -> "com" -> "example" -> "www". The "www"
		60	* node would have attached to it a tree of struct path_data:
		61	*
		62	* (sentinel)
		63	* \|
		64	* path
		65	* \|
		66	* to
		67	* \|
		68	* resource.html
		69	*
		70	* This represents the absolute path "/path/to/resource.html". The leaf node
		71	* "resource.html" contains the last visited time of the resource.
		72	*
		73	* The mechanism described above is, however, not particularly conducive to
		74	* fast searching of the database for a given URL (or URLs beginning with a
		75	* given prefix). Therefore, an anciliary data structure is used to enable
		76	* fast searching. This structure simply reflects the contents of the
		77	* database, with entries being added/removed at the same time as for the
		78	* core database. In order to ensure that degenerate cases are kept to a
		79	* minimum, we use an AAtree. This is an approximation of a Red-Black tree
		80	* with similar performance characteristics, but with a significantly
		81	* simpler implementation. Entries in this tree comprise pointers to the
		82	* leaf nodes of the host tree described above.
		83	*
		84	* REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of
		85	* non-normalised URLs with urldb will result in undefined behaviour and
		86	* potential crashes.
		87	*/
		88
		89	#include
		90	#include
		91	#include
		92	#include
		93	#include
		94	#include
		95	#include
		96	#include
		97
		98	#include
		99
		100	#include "image/bitmap.h"
		101	#include "content/content.h"
		102	#include "content/urldb.h"
		103	#include "desktop/cookies.h"
		104	#include "desktop/options.h"
		105	#include "utils/log.h"
		106	#include "utils/corestrings.h"
		107	#include "utils/filename.h"
		108	#include "utils/url.h"
		109	#include "utils/utils.h"
		110
		111	struct cookie_internal_data {
		112	char name; /< Cookie name /
		113	char value; /< Cookie value /
		114	bool value_was_quoted; /*< Value was quoted in Set-Cookie: /
		115	char comment; /< Cookie comment /
		116	bool domain_from_set; /*< Domain came from Set-Cookie: header /
		117	char domain; /< Domain /
		118	bool path_from_set; /*< Path came from Set-Cookie: header /
		119	char path; /< Path /
		120	time_t expires; /*< Expiry timestamp, or -1 for session /
		121	time_t last_used; /*< Last used time /
		122	bool secure; /*< Only send for HTTPS requests /
		123	bool http_only; /*< Only expose to HTTP(S) requests /
		124	cookie_version version; /*< Specification compliance /
		125	bool no_destroy; /**< Never destroy this cookie,
		126	* unless it's expired */
		127
		128	struct cookie_internal_data prev; /< Previous in list /
		129	struct cookie_internal_data next; /< Next in list /
		130	};
		131
		132	/* A protection space is defined as a tuple canonical_root_url and realm.
		133	* This structure lives as linked list element in a leaf host_part struct
		134	* so we need additional scheme and port to have a canonical_root_url. */
		135	struct prot_space_data {
		136	lwc_string scheme; /*< URL scheme of canonical hostname of this
		137	* protection space. */
		138	unsigned int port; /**< Port number of canonical hostname of this
		139	* protection space. When 0, it means the
		140	* default port for given scheme, i.e. 80
		141	* (http), 443 (https). */
		142	char realm; /< Protection realm /
		143
		144	char auth; /*< Authentication details for this
		145	* protection space in form
		146	* username:password */
		147	struct prot_space_data next; /< Next sibling /
		148	};
		149
		150	struct cache_internal_data {
		151	char filename[12]; /*< Cached filename, or first byte 0 for none /
		152	};
		153
		154	struct url_internal_data {
		155	char title; /< Resource title /
		156	unsigned int visits; /*< Visit count /
		157	time_t last_visit; /*< Last visit time /
		158	content_type type; /*< Type of resource /
		159	};
		160
		161	struct path_data {
		162	nsurl url; /< Full URL /
		163	lwc_string scheme; /< URL scheme for data /
		164	unsigned int port; /**< Port number for data. When 0, it means
		165	* the default port for given scheme, i.e.
		166	* 80 (http), 443 (https). */
		167	char segment; /< Path segment for this node /
		168	unsigned int frag_cnt; /*< Number of entries in path_data::fragment /
		169	char fragment; /< Array of fragments */
		170	bool persistent; /*< This entry should persist /
		171
		172	struct bitmap thumb; /< Thumbnail image of resource /
		173	struct url_internal_data urld; /*< URL data for resource /
		174	struct cache_internal_data cache; /*< Cache data for resource /
		175	const struct prot_space_data prot_space; /*< Protection space
		176	* to which this resource belongs too. Can be
		177	* NULL when it does not belong to a protection
		178	* space or when it is not known. No
		179	* ownership (is with struct host_part::prot_space). */
		180	struct cookie_internal_data cookies; /< Cookies associated with resource /
		181	struct cookie_internal_data cookies_end; /< Last cookie in list /
		182
		183	struct path_data next; /< Next sibling /
		184	struct path_data prev; /< Previous sibling /
		185	struct path_data parent; /< Parent path segment /
		186	struct path_data children; /< Child path segments /
		187	struct path_data last; /< Last child /
		188	};
		189
		190	struct host_part {
		191	/**< Known paths on this host. This _must_ be first so that
		192	* struct host_part h = (struct host_part )mypath; works */
		193	struct path_data paths;
		194	bool permit_invalid_certs; /**< Allow access to SSL protected
		195	* resources on this host without
		196	* verifying certificate authenticity
		197	*/
		198
		199	char part; /< Part of host string /
		200
		201	struct prot_space_data prot_space; /*< Linked list of all known
		202	* proctection spaces known for his host and
		203	* all its schems and ports. */
		204
		205	struct host_part next; /< Next sibling /
		206	struct host_part prev; /< Previous sibling /
		207	struct host_part parent; /< Parent host part /
		208	struct host_part children; /< Child host parts /
		209	};
		210
		211	struct search_node {
		212	const struct host_part data; /< Host tree entry /
		213
		214	unsigned int level; /*< Node level /
		215
		216	struct search_node left; /< Left subtree /
		217	struct search_node right; /< Right subtree /
		218	};
		219
		220	/* Destruction */
		221	static void urldb_destroy_host_tree(struct host_part *root);
		222	static void urldb_destroy_path_tree(struct path_data *root);
		223	static void urldb_destroy_path_node_content(struct path_data *node);
		224	static void urldb_destroy_cookie(struct cookie_internal_data *c);
		225	static void urldb_destroy_prot_space(struct prot_space_data *space);
		226	static void urldb_destroy_search_tree(struct search_node *root);
		227
		228	/* Saving */
		229	static void urldb_save_search_tree(struct search_node root, FILE fp);
		230	static void urldb_count_urls(const struct path_data *root, time_t expiry,
		231	unsigned int *count);
		232	static void urldb_write_paths(const struct path_data *parent,
		233	const char host, FILE fp, char *path, int path_alloc,
		234	int *path_used, time_t expiry);
		235
		236	/* Iteration */
		237	static bool urldb_iterate_partial_host(struct search_node *root,
		238	const char prefix, bool (callback)(nsurl *url,
		239	const struct url_data *data));
		240	static bool urldb_iterate_partial_path(const struct path_data *parent,
		241	const char prefix, bool (callback)(nsurl *url,
		242	const struct url_data *data));
		243	static bool urldb_iterate_entries_host(struct search_node *parent,
		244	bool (url_callback)(nsurl url,
		245	const struct url_data *data),
		246	bool (cookie_callback)(const struct cookie_data data));
		247	static bool urldb_iterate_entries_path(const struct path_data *parent,
		248	bool (url_callback)(nsurl url,
		249	const struct url_data *data),
		250	bool (cookie_callback)(const struct cookie_data data));
		251
		252	/* Insertion */
		253	static struct host_part urldb_add_host_node(const char part,
		254	struct host_part *parent);
		255	static struct path_data urldb_add_path_node(lwc_string scheme,
		256	unsigned int port, const char segment, lwc_string fragment,
		257	struct path_data *parent);
		258	static int urldb_add_path_fragment_cmp(const void a, const void b);
		259	static struct path_data urldb_add_path_fragment(struct path_data segment,
		260	lwc_string *fragment);
		261
		262	/* Lookup */
		263	static struct path_data urldb_find_url(nsurl url);
		264	static struct path_data urldb_match_path(const struct path_data parent,
		265	const char path, lwc_string scheme, unsigned short port);
		266	static struct search_node *urldb_get_search_tree_direct(const char host);
		267	static struct search_node urldb_get_search_tree(const char host);
		268
		269	/* Dump */
		270	static void urldb_dump_hosts(struct host_part *parent);
		271	static void urldb_dump_paths(struct path_data *parent);
		272	static void urldb_dump_search(struct search_node *parent, int depth);
		273
		274	/* Search tree */
		275	static struct search_node urldb_search_insert(struct search_node root,
		276	const struct host_part *data);
		277	static struct search_node *urldb_search_insert_internal(
		278	struct search_node root, struct search_node n);
		279	/* for urldb_search_remove, see r5531 which removed it */
		280	static const struct host_part urldb_search_find(struct search_node root,
		281	const char *host);
		282	static struct search_node urldb_search_skew(struct search_node root);
		283	static struct search_node urldb_search_split(struct search_node root);
		284	static int urldb_search_match_host(const struct host_part *a,
		285	const struct host_part *b);
		286	static int urldb_search_match_string(const struct host_part *a,
		287	const char *b);
		288	static int urldb_search_match_prefix(const struct host_part *a,
		289	const char *b);
		290
		291	/* Cookies */
		292	static struct cookie_internal_data urldb_parse_cookie(nsurl url,
		293	const char **cookie);
		294	static bool urldb_parse_avpair(struct cookie_internal_data c, char n,
		295	char *v, bool was_quoted);
		296	static bool urldb_insert_cookie(struct cookie_internal_data *c,
		297	lwc_string scheme, nsurl url);
		298	static void urldb_free_cookie(struct cookie_internal_data *c);
		299	static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
		300	int used, int alloc, char **buf);
		301	static void urldb_delete_cookie_hosts(const char domain, const char path,
		302	const char name, struct host_part parent);
		303	static void urldb_delete_cookie_paths(const char domain, const char path,
		304	const char name, struct path_data parent);
		305	static void urldb_save_cookie_hosts(FILE fp, struct host_part parent);
		306	static void urldb_save_cookie_paths(FILE fp, struct path_data parent);
		307
		308	/** Root database handle */
		309	static struct host_part db_root;
		310
		311	/** Search trees - one per letter + 1 for IPs + 1 for Everything Else */
		312	#define NUM_SEARCH_TREES 28
		313	#define ST_IP 0
		314	#define ST_EE 1
		315	#define ST_DN 2
		316	static struct search_node empty = { 0, 0, &empty, &empty };
		317	static struct search_node *search_trees[NUM_SEARCH_TREES] = {
		318	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
		319	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
		320	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
		321	&empty, &empty, &empty, &empty
		322	};
		323
		324	#define MIN_COOKIE_FILE_VERSION 100
		325	#define COOKIE_FILE_VERSION 102
		326	static int loaded_cookie_file_version;
		327	#define MIN_URL_FILE_VERSION 106
		328	#define URL_FILE_VERSION 106
		329
		330	/**
		331	* Import an URL database from file, replacing any existing database
		332	*
		333	* \param filename Name of file containing data
		334	*/
		335	void urldb_load(const char *filename)
		336	{
		337	#define MAXIMUM_URL_LENGTH 4096
		338	char s[MAXIMUM_URL_LENGTH];
		339	char host[256];
		340	struct host_part *h;
		341	int urls;
		342	int i;
		343	int version;
		344	int length;
		345	FILE *fp;
		346
		347	assert(filename);
		348
		349	LOG(("Loading URL file"));
		350
		351	fp = fopen(filename, "r");
		352	if (!fp) {
		353	LOG(("Failed to open file '%s' for reading", filename));
		354	return;
		355	}
		356
		357	if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
		358	fclose(fp);
		359	return;
		360	}
		361
		362	version = atoi(s);
		363	if (version < MIN_URL_FILE_VERSION) {
		364	LOG(("Unsupported URL file version."));
		365	fclose(fp);
		366	return;
		367	}
		368	if (version > URL_FILE_VERSION) {
		369	LOG(("Unknown URL file version."));
		370	fclose(fp);
		371	return;
		372	}
		373
		374	while (fgets(host, sizeof host, fp)) {
		375	/* get the hostname */
		376	length = strlen(host) - 1;
		377	host[length] = '\0';
		378
		379	/* skip data that has ended up with a host of '' */
		380	if (length == 0) {
		381	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		382	break;
		383	urls = atoi(s);
		384	/* Eight fields/url */
		385	for (i = 0; i < (8 * urls); i++) {
		386	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		387	break;
		388	}
		389	continue;
		390	}
		391
		392	/* read number of URLs */
		393	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		394	break;
		395	urls = atoi(s);
		396
		397	/* no URLs => try next host */
		398	if (urls == 0) {
		399	LOG(("No URLs for '%s'", host));
		400	continue;
		401	}
		402
		403	h = urldb_add_host(host);
		404	if (!h) {
		405	LOG(("Failed adding host: '%s'", host));
		406	die("Memory exhausted whilst loading URL file");
		407	}
		408
		409	/* load the non-corrupt data */
		410	for (i = 0; i < urls; i++) {
		411	struct path_data *p = NULL;
		412	char scheme[64], ports[10];
		413	char url[64 + 3 + 256 + 6 + 4096 + 1];
		414	unsigned int port;
		415	bool is_file = false;
		416	nsurl *nsurl;
		417	lwc_string scheme_lwc, fragment_lwc;
		418	char *path_query;
		419	size_t len;
		420
		421	if (!fgets(scheme, sizeof scheme, fp))
		422	break;
		423	length = strlen(scheme) - 1;
		424	scheme[length] = '\0';
		425
		426	if (!fgets(ports, sizeof ports, fp))
		427	break;
		428	length = strlen(ports) - 1;
		429	ports[length] = '\0';
		430	port = atoi(ports);
		431
		432	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		433	break;
		434	length = strlen(s) - 1;
		435	s[length] = '\0';
		436
		437	if (!strcasecmp(host, "localhost") &&
		438	!strcasecmp(scheme, "file"))
		439	is_file = true;
		440
		441	snprintf(url, sizeof url, "%s://%s%s%s%s",
		442	scheme,
		443	/* file URLs have no host */
		444	(is_file ? "" : host),
		445	(port ? ":" : ""),
		446	(port ? ports : ""),
		447	s);
		448
		449	/* TODO: store URLs in pre-parsed state, and make
		450	* a nsurl_load to generate the nsurl more
		451	* swiftly.
		452	* Need a nsurl_save too.
		453	*/
		454	if (nsurl_create(url, &nsurl) != NSERROR_OK) {
		455	LOG(("Failed inserting '%s'", url));
		456	die("Memory exhausted whilst loading "
		457	"URL file");
		458	}
		459
		460	/* Copy and merge path/query strings */
		461	if (nsurl_get(nsurl, NSURL_PATH \| NSURL_QUERY,
		462	&path_query, &len) != NSERROR_OK) {
		463	LOG(("Failed inserting '%s'", url));
		464	die("Memory exhausted whilst loading "
		465	"URL file");
		466	}
		467
		468	scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
		469	fragment_lwc = nsurl_get_component(nsurl,
		470	NSURL_FRAGMENT);
		471	p = urldb_add_path(scheme_lwc, port, h, path_query,
		472	fragment_lwc, nsurl);
		473	if (!p) {
		474	LOG(("Failed inserting '%s'", url));
		475	die("Memory exhausted whilst loading "
		476	"URL file");
		477	}
		478	nsurl_unref(nsurl);
		479	lwc_string_unref(scheme_lwc);
		480	if (fragment_lwc != NULL)
		481	lwc_string_unref(fragment_lwc);
		482
		483	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		484	break;
		485	if (p)
		486	p->urld.visits = (unsigned int)atoi(s);
		487
		488	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		489	break;
		490	if (p)
		491	p->urld.last_visit = (time_t)atoi(s);
		492
		493	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		494	break;
		495	if (p)
		496	p->urld.type = (content_type)atoi(s);
		497
		498	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		499	break;
		500
		501
		502	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
		503	break;
		504	length = strlen(s) - 1;
		505	if (p && length > 0) {
		506	s[length] = '\0';
		507	p->urld.title = malloc(length + 1);
		508	if (p->urld.title)
		509	memcpy(p->urld.title, s, length + 1);
		510	}
		511	}
		512	}
		513
		514	fclose(fp);
		515	LOG(("Successfully loaded URL file"));
		516	#undef MAXIMUM_URL_LENGTH
		517	}
		518
		519	/**
		520	* Export the current database to file
		521	*
		522	* \param filename Name of file to export to
		523	*/
		524	void urldb_save(const char *filename)
		525	{
		526	FILE *fp;
		527	int i;
		528
		529	assert(filename);
		530
		531	fp = fopen(filename, "w");
		532	if (!fp) {
		533	LOG(("Failed to open file '%s' for writing", filename));
		534	return;
		535	}
		536
		537	/* file format version number */
		538	fprintf(fp, "%d\n", URL_FILE_VERSION);
		539
		540	for (i = 0; i != NUM_SEARCH_TREES; i++) {
		541	urldb_save_search_tree(search_trees[i], fp);
		542	}
		543
		544	fclose(fp);
		545	}
		546
		547	/**
		548	* Save a search (sub)tree
		549	*
		550	* \param root Root of (sub)tree to save
		551	* \param fp File to write to
		552	*/
		553	void urldb_save_search_tree(struct search_node parent, FILE fp)
		554	{
		555	char host[256];
		556	const struct host_part *h;
		557	unsigned int path_count = 0;
		558	char path, p, *end;
		559	int path_alloc = 64, path_used = 1;
		560	time_t expiry;
		561
		562	expiry = time(NULL) - ((60 * 60 * 24) * nsoption_int(expire_url));
		563
		564	if (parent == &empty)
		565	return;
		566
		567	urldb_save_search_tree(parent->left, fp);
		568
		569	path = malloc(path_alloc);
		570	if (!path)
		571	return;
		572
		573	path[0] = '\0';
		574
		575	for (h = parent->data, p = host, end = host + sizeof host;
		576	h && h != &db_root && p < end; h = h->parent) {
		577	int written = snprintf(p, end - p, "%s%s", h->part,
		578	(h->parent && h->parent->parent) ? "." : "");
		579	if (written < 0) {
		580	free(path);
		581	return;
		582	}
		583	p += written;
		584	}
		585
		586	urldb_count_urls(&parent->data->paths, expiry, &path_count);
		587
		588	if (path_count > 0) {
		589	fprintf(fp, "%s\n%i\n", host, path_count);
		590
		591	urldb_write_paths(&parent->data->paths, host, fp,
		592	&path, &path_alloc, &path_used, expiry);
		593	}
		594
		595	free(path);
		596
		597	urldb_save_search_tree(parent->right, fp);
		598	}
		599
		600	/**
		601	* Count number of URLs associated with a host
		602	*
		603	* \param root Root of path data tree
		604	* \param expiry Expiry time for URLs
		605	* \param count Pointer to count
		606	*/
		607	void urldb_count_urls(const struct path_data *root, time_t expiry,
		608	unsigned int *count)
		609	{
		610	const struct path_data *p = root;
		611
		612	do {
		613	if (p->children != NULL) {
		614	/* Drill down into children */
		615	p = p->children;
		616	} else {
		617	/* No more children, increment count if required */
		618	if (p->persistent \|\| ((p->urld.last_visit > expiry) &&
		619	(p->urld.visits > 0)))
		620	(*count)++;
		621
		622	/* Now, find next node to process. */
		623	while (p != root) {
		624	if (p->next != NULL) {
		625	/* Have a sibling, process that */
		626	p = p->next;
		627	break;
		628	}
		629
		630	/* Ascend tree */
		631	p = p->parent;
		632	}
		633	}
		634	} while (p != root);
		635	}
		636
		637	/**
		638	* Write paths associated with a host
		639	*
		640	* \param parent Root of (sub)tree to write
		641	* \param host Current host name
		642	* \param fp File to write to
		643	* \param path Current path string
		644	* \param path_alloc Allocated size of path
		645	* \param path_used Used size of path
		646	* \param expiry Expiry time of URLs
		647	*/
		648	void urldb_write_paths(const struct path_data parent, const char host,
		649	FILE fp, char path, int path_alloc, int *path_used,
		650	time_t expiry)
		651	{
		652	const struct path_data *p = parent;
		653	int i;
		654
		655	do {
		656	int seglen = p->segment != NULL ? strlen(p->segment) : 0;
		657	int len = *path_used + seglen + 1;
		658
		659	if (*path_alloc < len) {
		660	char temp = realloc(path,
		661	(len > 64) ? len : *path_alloc + 64);
		662	if (!temp)
		663	return;
		664	*path = temp;
		665	path_alloc = (len > 64) ? len : path_alloc + 64;
		666	}
		667
		668	if (p->segment != NULL)
		669	memcpy(path + path_used - 1, p->segment, seglen);
		670
		671	if (p->children != NULL) {
		672	(path)[path_used + seglen - 1] = '/';
		673	(path)[path_used + seglen] = '\0';
		674	} else {
		675	(path)[path_used + seglen - 1] = '\0';
		676	len -= 1;
		677	}
		678
		679	*path_used = len;
		680
		681	if (p->children != NULL) {
		682	/* Drill down into children */
		683	p = p->children;
		684	} else {
		685	/* leaf node */
		686	if (p->persistent \|\|((p->urld.last_visit > expiry) &&
		687	(p->urld.visits > 0))) {
		688	fprintf(fp, "%s\n", lwc_string_data(p->scheme));
		689
		690	if (p->port)
		691	fprintf(fp,"%d\n", p->port);
		692	else
		693	fprintf(fp, "\n");
		694
		695	fprintf(fp, "%s\n", *path);
		696
		697	/** \todo handle fragments? */
		698
		699	fprintf(fp, "%i\n%i\n%i\n", p->urld.visits,
		700	(int)p->urld.last_visit,
		701	(int)p->urld.type);
		702
		703	fprintf(fp, "\n");
		704
		705	if (p->urld.title) {
		706	uint8_t s = (uint8_t ) p->urld.title;
		707
		708	for (i = 0; s[i] != '\0'; i++)
		709	if (s[i] < 32)
		710	s[i] = ' ';
		711	for (--i; ((i > 0) && (s[i] == ' '));
		712	i--)
		713	s[i] = '\0';
		714	fprintf(fp, "%s\n", p->urld.title);
		715	} else
		716	fprintf(fp, "\n");
		717	}
		718
		719	/* Now, find next node to process. */
		720	while (p != parent) {
		721	int seglen = p->segment != NULL
		722	? strlen(p->segment) : 0;
		723
		724	/* Remove our segment from the path */
		725	*path_used -= seglen;
		726	(path)[path_used - 1] = '\0';
		727
		728	if (p->next != NULL) {
		729	/* Have a sibling, process that */
		730	p = p->next;
		731	break;
		732	}
		733
		734	/* Going up, so remove '/' */
		735	*path_used -= 1;
		736	(path)[path_used - 1] = '\0';
		737
		738	/* Ascend tree */
		739	p = p->parent;
		740	}
		741	}
		742	} while (p != parent);
		743	}
		744
		745	/**
		746	* Set the cross-session persistence of the entry for an URL
		747	*
		748	* \param url Absolute URL to persist
		749	* \param persist True to persist, false otherwise
		750	*/
		751	void urldb_set_url_persistence(nsurl *url, bool persist)
		752	{
		753	struct path_data *p;
		754
		755	assert(url);
		756
		757	p = urldb_find_url(url);
		758	if (!p)
		759	return;
		760
		761	p->persistent = persist;
		762	}
		763
		764	/**
		765	* Insert an URL into the database
		766	*
		767	* \param url Absolute URL to insert
		768	* \return true on success, false otherwise
		769	*/
		770	bool urldb_add_url(nsurl *url)
		771	{
		772	struct host_part *h;
		773	struct path_data *p;
		774	lwc_string *scheme;
		775	lwc_string *port;
		776	lwc_string *host;
		777	lwc_string *fragment;
		778	const char *host_str;
		779	char *path_query;
		780	size_t len;
		781	bool match;
		782	unsigned int port_int;
		783
		784	assert(url);
		785
		786	/* Copy and merge path/query strings */
		787	if (nsurl_get(url, NSURL_PATH \| NSURL_QUERY, &path_query, &len) !=
		788	NSERROR_OK) {
		789	return false;
		790	}
		791
		792	scheme = nsurl_get_component(url, NSURL_SCHEME);
		793	if (scheme == NULL)
		794	return false;
		795
		796	host = nsurl_get_component(url, NSURL_HOST);
		797	if (host != NULL) {
		798	host_str = lwc_string_data(host);
		799	lwc_string_unref(host);
		800
		801	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
		802	lwc_error_ok && match == true) {
		803	host_str = "localhost";
		804
		805	} else {
		806	lwc_string_unref(scheme);
		807	return false;
		808	}
		809
		810	fragment = nsurl_get_component(url, NSURL_FRAGMENT);
		811
		812	port = nsurl_get_component(url, NSURL_PORT);
		813	if (port != NULL) {
		814	port_int = atoi(lwc_string_data(port));
		815	lwc_string_unref(port);
		816	} else {
		817	port_int = 0;
		818	}
		819
		820	/* Get host entry */
		821	h = urldb_add_host(host_str);
		822
		823	/* Get path entry */
		824	p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query,
		825	fragment, url) : NULL;
		826
		827	lwc_string_unref(scheme);
		828	if (fragment != NULL)
		829	lwc_string_unref(fragment);
		830
		831	return (p != NULL);
		832	}
		833
		834	/**
		835	* Set an URL's title string, replacing any existing one
		836	*
		837	* \param url The URL to look for
		838	* \param title The title string to use (copied)
		839	*/
		840	void urldb_set_url_title(nsurl url, const char title)
		841	{
		842	struct path_data *p;
		843	char *temp;
		844
		845	assert(url && title);
		846
		847	p = urldb_find_url(url);
		848	if (!p)
		849	return;
		850
		851	temp = strdup(title);
		852	if (!temp)
		853	return;
		854
		855	free(p->urld.title);
		856	p->urld.title = temp;
		857	}
		858
		859	/**
		860	* Set an URL's content type
		861	*
		862	* \param url The URL to look for
		863	* \param type The type to set
		864	*/
		865	void urldb_set_url_content_type(nsurl *url, content_type type)
		866	{
		867	struct path_data *p;
		868
		869	assert(url);
		870
		871	p = urldb_find_url(url);
		872	if (!p)
		873	return;
		874
		875	p->urld.type = type;
		876	}
		877
		878	/**
		879	* Update an URL's visit data
		880	*
		881	* \param url The URL to update
		882	*/
		883	void urldb_update_url_visit_data(nsurl *url)
		884	{
		885	struct path_data *p;
		886
		887	assert(url);
		888
		889	p = urldb_find_url(url);
		890	if (!p)
		891	return;
		892
		893	p->urld.last_visit = time(NULL);
		894	p->urld.visits++;
		895	}
		896
		897	/**
		898	* Reset an URL's visit statistics
		899	*
		900	* \param url The URL to reset
		901	*/
		902	void urldb_reset_url_visit_data(nsurl *url)
		903	{
		904	struct path_data *p;
		905
		906	assert(url);
		907
		908	p = urldb_find_url(url);
		909	if (!p)
		910	return;
		911
		912	p->urld.last_visit = (time_t)0;
		913	p->urld.visits = 0;
		914	}
		915
		916
		917	/**
		918	* Find data for an URL.
		919	*
		920	* \param url Absolute URL to look for
		921	* \return Pointer to result struct, or NULL
		922	*/
		923	const struct url_data urldb_get_url_data(nsurl url)
		924	{
		925	struct path_data *p;
		926	struct url_internal_data *u;
		927
		928	assert(url);
		929
		930	p = urldb_find_url(url);
		931	if (!p)
		932	return NULL;
		933
		934	u = &p->urld;
		935
		936	return (const struct url_data *) u;
		937	}
		938
		939	/**
		940	* Extract an URL from the db
		941	*
		942	* \param url URL to extract
		943	* \return Pointer to database's copy of URL or NULL if not found
		944	*/
		945	nsurl urldb_get_url(nsurl url)
		946	{
		947	struct path_data *p;
		948
		949	assert(url);
		950
		951	p = urldb_find_url(url);
		952	if (!p)
		953	return NULL;
		954
		955	return p->url;
		956	}
		957
		958	/**
		959	* Look up authentication details in database
		960	*
		961	* \param url Absolute URL to search for
		962	* \param realm When non-NULL, it is realm which can be used to determine
		963	* the protection space when that's not been done before for given URL.
		964	* \return Pointer to authentication details, or NULL if not found
		965	*/
		966	const char urldb_get_auth_details(nsurl url, const char *realm)
		967	{
		968	struct path_data p, p_cur, *p_top;
		969
		970	assert(url);
		971
		972	/* add to the db, so our lookup will work */
		973	urldb_add_url(url);
		974
		975	p = urldb_find_url(url);
		976	if (!p)
		977	return NULL;
		978
		979	/* Check for any auth details attached to the path_data node or any of
		980	* its parents. */
		981	for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
		982	if (p_cur->prot_space) {
		983	return p_cur->prot_space->auth;
		984	}
		985	}
		986
		987	/* Only when we have a realm (and canonical root of given URL), we can
		988	* uniquely locate the protection space. */
		989	if (realm != NULL) {
		990	const struct host_part h = (const struct host_part )p_top;
		991	const struct prot_space_data *space;
		992	bool match;
		993
		994	/* Search for a possible matching protection space. */
		995	for (space = h->prot_space; space != NULL;
		996	space = space->next) {
		997	if (!strcmp(space->realm, realm) &&
		998	lwc_string_isequal(space->scheme,
		999	p->scheme, &match) ==
		1000	lwc_error_ok &&
		1001	match == true &&
		1002	space->port == p->port) {
		1003	p->prot_space = space;
		1004	return p->prot_space->auth;
		1005	}
		1006	}
		1007	}
		1008
		1009	return NULL;
		1010	}
		1011
		1012	/**
		1013	* Retrieve certificate verification permissions from database
		1014	*
		1015	* \param url Absolute URL to search for
		1016	* \return true to permit connections to hosts with invalid certificates,
		1017	* false otherwise.
		1018	*/
		1019	bool urldb_get_cert_permissions(nsurl *url)
		1020	{
		1021	struct path_data *p;
		1022	const struct host_part *h;
		1023
		1024	assert(url);
		1025
		1026	p = urldb_find_url(url);
		1027	if (!p)
		1028	return false;
		1029
		1030	for (; p && p->parent; p = p->parent)
		1031	/* do nothing */;
		1032	assert(p);
		1033
		1034	h = (const struct host_part *)p;
		1035
		1036	return h->permit_invalid_certs;
		1037	}
		1038
		1039	/**
		1040	* Set authentication data for an URL
		1041	*
		1042	* \param url The URL to consider
		1043	* \param realm The authentication realm
		1044	* \param auth The authentication details (in form username:password)
		1045	*/
		1046	void urldb_set_auth_details(nsurl url, const char realm,
		1047	const char *auth)
		1048	{
		1049	struct path_data p, pi;
		1050	struct host_part *h;
		1051	struct prot_space_data space, space_alloc;
		1052	char realm_alloc, auth_alloc;
		1053	bool match;
		1054
		1055	assert(url && realm && auth);
		1056
		1057	/* add url, in case it's missing */
		1058	urldb_add_url(url);
		1059
		1060	p = urldb_find_url(url);
		1061
		1062	if (!p)
		1063	return;
		1064
		1065	/* Search for host_part */
		1066	for (pi = p; pi->parent != NULL; pi = pi->parent)
		1067	;
		1068	h = (struct host_part *)pi;
		1069
		1070	/* Search if given URL belongs to a protection space we already know of. */
		1071	for (space = h->prot_space; space; space = space->next) {
		1072	if (!strcmp(space->realm, realm) &&
		1073	lwc_string_isequal(space->scheme, p->scheme,
		1074	&match) == lwc_error_ok &&
		1075	match == true &&
		1076	space->port == p->port)
		1077	break;
		1078	}
		1079
		1080	if (space != NULL) {
		1081	/* Overrule existing auth. */
		1082	free(space->auth);
		1083	space->auth = strdup(auth);
		1084	} else {
		1085	/* Create a new protection space. */
		1086	space = space_alloc = malloc(sizeof(struct prot_space_data));
		1087	realm_alloc = strdup(realm);
		1088	auth_alloc = strdup(auth);
		1089
		1090	if (!space_alloc \|\| !realm_alloc \|\| !auth_alloc) {
		1091	free(space_alloc);
		1092	free(realm_alloc);
		1093	free(auth_alloc);
		1094	return;
		1095	}
		1096
		1097	space->scheme = lwc_string_ref(p->scheme);
		1098	space->port = p->port;
		1099	space->realm = realm_alloc;
		1100	space->auth = auth_alloc;
		1101	space->next = h->prot_space;
		1102	h->prot_space = space;
		1103	}
		1104
		1105	p->prot_space = space;
		1106	}
		1107
		1108	/**
		1109	* Set certificate verification permissions
		1110	*
		1111	* \param url URL to consider
		1112	* \param permit Set to true to allow invalid certificates
		1113	*/
		1114	void urldb_set_cert_permissions(nsurl *url, bool permit)
		1115	{
		1116	struct path_data *p;
		1117	struct host_part *h;
		1118
		1119	assert(url);
		1120
		1121	/* add url, in case it's missing */
		1122	urldb_add_url(url);
		1123
		1124	p = urldb_find_url(url);
		1125	if (!p)
		1126	return;
		1127
		1128	for (; p && p->parent; p = p->parent)
		1129	/* do nothing */;
		1130	assert(p);
		1131
		1132	h = (struct host_part *)p;
		1133
		1134	h->permit_invalid_certs = permit;
		1135	}
		1136
		1137	/**
		1138	* Set thumbnail for url, replacing any existing thumbnail
		1139	*
		1140	* \param url Absolute URL to consider
		1141	* \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate
		1142	*/
		1143	void urldb_set_thumbnail(nsurl url, struct bitmap bitmap)
		1144	{
		1145	struct path_data *p;
		1146
		1147	assert(url);
		1148
		1149	p = urldb_find_url(url);
		1150	if (!p)
		1151	return;
		1152
		1153	if (p->thumb && p->thumb != bitmap)
		1154	bitmap_destroy(p->thumb);
		1155
		1156	p->thumb = bitmap;
		1157	}
		1158
		1159	/**
		1160	* Retrieve thumbnail data for given URL
		1161	*
		1162	* \param url Absolute URL to search for
		1163	* \return Pointer to thumbnail data, or NULL if not found.
		1164	*/
		1165	struct bitmap urldb_get_thumbnail(nsurl url)
		1166	{
		1167	struct path_data *p;
		1168
		1169	assert(url);
		1170
		1171	p = urldb_find_url(url);
		1172	if (!p)
		1173	return NULL;
		1174
		1175	return p->thumb;
		1176	}
		1177
		1178	/**
		1179	* Iterate over entries in the database which match the given prefix
		1180	*
		1181	* \param prefix Prefix to match
		1182	* \param callback Callback function
		1183	*/
		1184	void urldb_iterate_partial(const char *prefix,
		1185	bool (callback)(nsurl url,
		1186	const struct url_data *data))
		1187	{
		1188	char host[256];
		1189	char buf[260]; /* max domain + "www." */
		1190	const char slash, scheme_sep;
		1191	struct search_node *tree;
		1192	const struct host_part *h;
		1193
		1194	assert(prefix && callback);
		1195
		1196	/* strip scheme */
		1197	scheme_sep = strstr(prefix, "://");
		1198	if (scheme_sep)
		1199	prefix = scheme_sep + 3;
		1200
		1201	slash = strchr(prefix, '/');
		1202	tree = urldb_get_search_tree(prefix);
		1203
		1204	if (slash) {
		1205	/* if there's a slash in the input, then we can
		1206	* assume that we're looking for a path */
		1207	snprintf(host, sizeof host, "%.*s",
		1208	(int) (slash - prefix), prefix);
		1209
		1210	h = urldb_search_find(tree, host);
		1211	if (!h) {
		1212	int len = slash - prefix;
		1213
		1214	if (len <= 3 \|\| strncasecmp(host, "www.", 4) != 0) {
		1215	snprintf(buf, sizeof buf, "www.%s", host);
		1216	h = urldb_search_find(
		1217	search_trees[ST_DN + 'w' - 'a'],
		1218	buf);
		1219	if (!h)
		1220	return;
		1221	} else
		1222	return;
		1223	}
		1224
		1225	if (h->paths.children) {
		1226	/* Have paths, iterate them */
		1227	urldb_iterate_partial_path(&h->paths, slash + 1,
		1228	callback);
		1229	}
		1230
		1231	} else {
		1232	int len = strlen(prefix);
		1233
		1234	/* looking for hosts */
		1235	if (!urldb_iterate_partial_host(tree, prefix, callback))
		1236	return;
		1237
		1238	if (len <= 3 \|\| strncasecmp(prefix, "www.", 4) != 0) {
		1239	/* now look for www.prefix */
		1240	snprintf(buf, sizeof buf, "www.%s", prefix);
		1241	if(!urldb_iterate_partial_host(
		1242	search_trees[ST_DN + 'w' - 'a'],
		1243	buf, callback))
		1244	return;
		1245	}
		1246	}
		1247	}
		1248
		1249	/**
		1250	* Partial host iterator (internal)
		1251	*
		1252	* \param root Root of (sub)tree to traverse
		1253	* \param prefix Prefix to match
		1254	* \param callback Callback function
		1255	* \return true to continue, false otherwise
		1256	*/
		1257	bool urldb_iterate_partial_host(struct search_node root, const char prefix,
		1258	bool (callback)(nsurl url, const struct url_data *data))
		1259	{
		1260	int c;
		1261
		1262	assert(root && prefix && callback);
		1263
		1264	if (root == &empty)
		1265	return true;
		1266
		1267	c = urldb_search_match_prefix(root->data, prefix);
		1268
		1269	if (c > 0)
		1270	/* No match => look in left subtree */
		1271	return urldb_iterate_partial_host(root->left, prefix,
		1272	callback);
		1273	else if (c < 0)
		1274	/* No match => look in right subtree */
		1275	return urldb_iterate_partial_host(root->right, prefix,
		1276	callback);
		1277	else {
		1278	/* Match => iterate over l/r subtrees & process this node */
		1279	if (!urldb_iterate_partial_host(root->left, prefix,
		1280	callback))
		1281	return false;
		1282
		1283	if (root->data->paths.children) {
		1284	/* and extract all paths attached to this host */
		1285	if (!urldb_iterate_entries_path(&root->data->paths,
		1286	callback, NULL)) {
		1287	return false;
		1288	}
		1289	}
		1290
		1291	if (!urldb_iterate_partial_host(root->right, prefix,
		1292	callback))
		1293	return false;
		1294	}
		1295
		1296	return true;
		1297	}
		1298
		1299	/**
		1300	* Partial path iterator (internal)
		1301	*
		1302	* \param parent Root of (sub)tree to traverse
		1303	* \param prefix Prefix to match
		1304	* \param callback Callback function
		1305	* \return true to continue, false otherwise
		1306	*/
		1307	bool urldb_iterate_partial_path(const struct path_data *parent,
		1308	const char prefix, bool (callback)(nsurl *url,
		1309	const struct url_data *data))
		1310	{
		1311	const struct path_data *p = parent->children;
		1312	const char slash, end = prefix + strlen(prefix);
		1313
		1314	/*
		1315	* Given: http://www.example.org/a/b/c/d//e
		1316	* and assuming a path tree:
		1317	* .
		1318	* / \
		1319	* a1 b1
		1320	* / \
		1321	* a2 b2
		1322	* /\|\
		1323	* a b c
		1324	* 3 3 \|
		1325	* d
		1326	* \|
		1327	* e
		1328	* / \
		1329	* f g
		1330	*
		1331	* Prefix will be: p will be:
		1332	*
		1333	* a/b/c/d//e a1
		1334	* b/c/d//e a2
		1335	* b/c/d//e b3
		1336	* c/d//e a3
		1337	* c/d//e b3
		1338	* c/d//e c
		1339	* d//e d
		1340	* /e e (skip /)
		1341	* e e
		1342	*
		1343	* I.E. we perform a breadth-first search of the tree.
		1344	*/
		1345
		1346	do {
		1347	slash = strchr(prefix, '/');
		1348	if (!slash)
		1349	slash = end;
		1350
		1351	if (slash == prefix && *prefix == '/') {
		1352	/* Ignore "//" */
		1353	prefix++;
		1354	continue;
		1355	}
		1356
		1357	if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {
		1358	/* prefix matches so far */
		1359	if (slash == end) {
		1360	/* we've run out of prefix, so all
		1361	* paths below this one match */
		1362	if (!urldb_iterate_entries_path(p, callback,
		1363	NULL))
		1364	return false;
		1365
		1366	/* Progress to next sibling */
		1367	p = p->next;
		1368	} else {
		1369	/* Skip over this segment */
		1370	prefix = slash + 1;
		1371
		1372	p = p->children;
		1373	}
		1374	} else {
		1375	/* Doesn't match this segment, try next sibling */
		1376	p = p->next;
		1377	}
		1378	} while (p != NULL);
		1379
		1380	return true;
		1381	}
		1382
		1383	/**
		1384	* Iterate over all entries in database
		1385	*
		1386	* \param callback Function to callback for each entry
		1387	*/
		1388	void urldb_iterate_entries(bool (callback)(nsurl url,
		1389	const struct url_data *data))
		1390	{
		1391	int i;
		1392
		1393	assert(callback);
		1394
		1395	for (i = 0; i < NUM_SEARCH_TREES; i++) {
		1396	if (!urldb_iterate_entries_host(search_trees[i],
		1397	callback, NULL))
		1398	break;
		1399	}
		1400	}
		1401
		1402	/**
		1403	* Iterate over all cookies in database
		1404	*
		1405	* \param callback Function to callback for each entry
		1406	*/
		1407	void urldb_iterate_cookies(bool (callback)(const struct cookie_data data))
		1408	{
		1409	int i;
		1410
		1411	assert(callback);
		1412
		1413	for (i = 0; i < NUM_SEARCH_TREES; i++) {
		1414	if (!urldb_iterate_entries_host(search_trees[i],
		1415	NULL, callback))
		1416	break;
		1417	}
		1418	}
		1419
		1420	/**
		1421	* Host data iterator (internal)
		1422	*
		1423	* \param parent Root of subtree to iterate over
		1424	* \param url_callback Callback function
		1425	* \param cookie_callback Callback function
		1426	* \return true to continue, false otherwise
		1427	*/
		1428	bool urldb_iterate_entries_host(struct search_node *parent,
		1429	bool (url_callback)(nsurl url,
		1430	const struct url_data *data),
		1431	bool (cookie_callback)(const struct cookie_data data))
		1432	{
		1433	if (parent == &empty)
		1434	return true;
		1435
		1436	if (!urldb_iterate_entries_host(parent->left,
		1437	url_callback, cookie_callback))
		1438	return false;
		1439
		1440	if ((parent->data->paths.children) \|\| ((cookie_callback) &&
		1441	(parent->data->paths.cookies))) {
		1442	/* We have paths (or domain cookies), so iterate them */
		1443	if (!urldb_iterate_entries_path(&parent->data->paths,
		1444	url_callback, cookie_callback)) {
		1445	return false;
		1446	}
		1447	}
		1448
		1449	if (!urldb_iterate_entries_host(parent->right,
		1450	url_callback, cookie_callback))
		1451	return false;
		1452
		1453	return true;
		1454	}
		1455
		1456	/**
		1457	* Path data iterator (internal)
		1458	*
		1459	* \param parent Root of subtree to iterate over
		1460	* \param url_callback Callback function
		1461	* \param cookie_callback Callback function
		1462	* \return true to continue, false otherwise
		1463	*/
		1464	bool urldb_iterate_entries_path(const struct path_data *parent,
		1465	bool (url_callback)(nsurl url,
		1466	const struct url_data *data),
		1467	bool (cookie_callback)(const struct cookie_data data))
		1468	{
		1469	const struct path_data *p = parent;
		1470	const struct cookie_data *c;
		1471
		1472	do {
		1473	if (p->children != NULL) {
		1474	/* Drill down into children */
		1475	p = p->children;
		1476	} else {
		1477	/* All leaf nodes in the path tree should have an URL or
		1478	* cookies attached to them. If this is not the case, it
		1479	* indicates that there's a bug in the file loader/URL
		1480	* insertion code. Therefore, assert this here. */
		1481	assert(url_callback \|\| cookie_callback);
		1482
		1483	/** \todo handle fragments? */
		1484	if (url_callback) {
		1485	const struct url_internal_data *u = &p->urld;
		1486
		1487	assert(p->url);
		1488
		1489	if (!url_callback(p->url,
		1490	(const struct url_data *) u))
		1491	return false;
		1492	} else {
		1493	c = (const struct cookie_data *)p->cookies;
		1494	for (; c != NULL; c = c->next)
		1495	if (!cookie_callback(c))
		1496	return false;
		1497	}
		1498
		1499	/* Now, find next node to process. */
		1500	while (p != parent) {
		1501	if (p->next != NULL) {
		1502	/* Have a sibling, process that */
		1503	p = p->next;
		1504	break;
		1505	}
		1506
		1507	/* Ascend tree */
		1508	p = p->parent;
		1509	}
		1510	}
		1511	} while (p != parent);
		1512
		1513	return true;
		1514	}
		1515
		1516	/**
		1517	* Add a host node to the tree
		1518	*
		1519	* \param part Host segment to add (or whole IP address) (copied)
		1520	* \param parent Parent node to add to
		1521	* \return Pointer to added node, or NULL on memory exhaustion
		1522	*/
		1523	struct host_part urldb_add_host_node(const char part,
		1524	struct host_part *parent)
		1525	{
		1526	struct host_part *d;
		1527
		1528	assert(part && parent);
		1529
		1530	d = calloc(1, sizeof(struct host_part));
		1531	if (!d)
		1532	return NULL;
		1533
		1534	d->part = strdup(part);
		1535	if (!d->part) {
		1536	free(d);
		1537	return NULL;
		1538	}
		1539
		1540	d->next = parent->children;
		1541	if (parent->children)
		1542	parent->children->prev = d;
		1543	d->parent = parent;
		1544	parent->children = d;
		1545
		1546	return d;
		1547	}
		1548
		1549	/**
		1550	* Add a host to the database, creating any intermediate entries
		1551	*
		1552	* \param host Hostname to add
		1553	* \return Pointer to leaf node, or NULL on memory exhaustion
		1554	*/
		1555	struct host_part urldb_add_host(const char host)
		1556	{
		1557	struct host_part d = (struct host_part ) &db_root, *e;
		1558	struct search_node *s;
		1559	char buf[256]; /* 256 bytes is sufficient - domain names are
		1560	* limited to 255 chars. */
		1561	char *part;
		1562
		1563	assert(host);
		1564
		1565	if (url_host_is_ip_address(host)) {
		1566	/* Host is an IP, so simply add as TLD */
		1567
		1568	/* Check for existing entry */
		1569	for (e = d->children; e; e = e->next)
		1570	if (strcasecmp(host, e->part) == 0)
		1571	/* found => return it */
		1572	return e;
		1573
		1574	d = urldb_add_host_node(host, d);
		1575
		1576	s = urldb_search_insert(search_trees[ST_IP], d);
		1577	if (!s) {
		1578	/* failed */
		1579	d = NULL;
		1580	} else {
		1581	search_trees[ST_IP] = s;
		1582	}
		1583
		1584	return d;
		1585	}
		1586
		1587	/* Copy host string, so we can corrupt it */
		1588	strncpy(buf, host, sizeof buf);
		1589	buf[sizeof buf - 1] = '\0';
		1590
		1591	/* Process FQDN segments backwards */
		1592	do {
		1593	part = strrchr(buf, '.');
		1594	if (!part) {
		1595	/* last segment */
		1596	/* Check for existing entry */
		1597	for (e = d->children; e; e = e->next)
		1598	if (strcasecmp(buf, e->part) == 0)
		1599	break;
		1600
		1601	if (e) {
		1602	d = e;
		1603	} else {
		1604	d = urldb_add_host_node(buf, d);
		1605	}
		1606
		1607	/* And insert into search tree */
		1608	if (d) {
		1609	struct search_node **r;
		1610
		1611	r = urldb_get_search_tree_direct(buf);
		1612	s = urldb_search_insert(*r, d);
		1613	if (!s) {
		1614	/* failed */
		1615	d = NULL;
		1616	} else {
		1617	*r = s;
		1618	}
		1619	}
		1620	break;
		1621	}
		1622
		1623	/* Check for existing entry */
		1624	for (e = d->children; e; e = e->next)
		1625	if (strcasecmp(part + 1, e->part) == 0)
		1626	break;
		1627
		1628	d = e ? e : urldb_add_host_node(part + 1, d);
		1629	if (!d)
		1630	break;
		1631
		1632	*part = '\0';
		1633	} while (1);
		1634
		1635	return d;
		1636	}
		1637
		1638	/**
		1639	* Add a path node to the tree
		1640	*
		1641	* \param scheme URL scheme associated with path (copied)
		1642	* \param port Port number on host associated with path
		1643	* \param segment Path segment to add (copied)
		1644	* \param fragment URL fragment (copied), or NULL
		1645	* \param parent Parent node to add to
		1646	* \return Pointer to added node, or NULL on memory exhaustion
		1647	*/
		1648	struct path_data urldb_add_path_node(lwc_string scheme, unsigned int port,
		1649	const char segment, lwc_string fragment,
		1650	struct path_data *parent)
		1651	{
		1652	struct path_data d, e;
		1653
		1654	assert(scheme && segment && parent);
		1655
		1656	d = calloc(1, sizeof(struct path_data));
		1657	if (!d)
		1658	return NULL;
		1659
		1660	d->scheme = lwc_string_ref(scheme);
		1661
		1662	d->port = port;
		1663
		1664	d->segment = strdup(segment);
		1665	if (!d->segment) {
		1666	lwc_string_unref(d->scheme);
		1667	free(d);
		1668	return NULL;
		1669	}
		1670
		1671	if (fragment) {
		1672	if (!urldb_add_path_fragment(d, fragment)) {
		1673	free(d->segment);
		1674	lwc_string_unref(d->scheme);
		1675	free(d);
		1676	return NULL;
		1677	}
		1678	}
		1679
		1680	for (e = parent->children; e; e = e->next)
		1681	if (strcmp(e->segment, d->segment) > 0)
		1682	break;
		1683
		1684	if (e) {
		1685	d->prev = e->prev;
		1686	d->next = e;
		1687	if (e->prev)
		1688	e->prev->next = d;
		1689	else
		1690	parent->children = d;
		1691	e->prev = d;
		1692	} else if (!parent->children) {
		1693	d->prev = d->next = NULL;
		1694	parent->children = parent->last = d;
		1695	} else {
		1696	d->next = NULL;
		1697	d->prev = parent->last;
		1698	parent->last->next = d;
		1699	parent->last = d;
		1700	}
		1701	d->parent = parent;
		1702
		1703	return d;
		1704	}
		1705
		1706	/**
		1707	* Add a path to the database, creating any intermediate entries
		1708	*
		1709	* \param scheme URL scheme associated with path
		1710	* \param port Port number on host associated with path
		1711	* \param host Host tree node to attach to
		1712	* \param path_query Absolute path plus query to add (freed)
		1713	* \param fragment URL fragment, or NULL
		1714	* \param url URL (fragment ignored)
		1715	* \return Pointer to leaf node, or NULL on memory exhaustion
		1716	*/
		1717	struct path_data urldb_add_path(lwc_string scheme, unsigned int port,
		1718	const struct host_part host, char path_query,
		1719	lwc_string fragment, nsurl url)
		1720	{
		1721	struct path_data d, e;
		1722	char *buf = path_query;
		1723	char segment, slash;
		1724	bool match;
		1725
		1726	assert(scheme && host && url);
		1727
		1728	d = (struct path_data *) &host->paths;
		1729
		1730	/* skip leading '/' */
		1731	segment = buf;
		1732	if (*segment == '/')
		1733	segment++;
		1734
		1735	/* Process path segments */
		1736	do {
		1737	slash = strchr(segment, '/');
		1738	if (!slash) {
		1739	/* last segment */
		1740	/* look for existing entry */
		1741	for (e = d->children; e; e = e->next)
		1742	if (strcmp(segment, e->segment) == 0 &&
		1743	lwc_string_isequal(scheme,
		1744	e->scheme, &match) ==
		1745	lwc_error_ok &&
		1746	match == true &&
		1747	e->port == port)
		1748	break;
		1749
		1750	d = e ? urldb_add_path_fragment(e, fragment) :
		1751	urldb_add_path_node(scheme, port,
		1752	segment, fragment, d);
		1753	break;
		1754	}
		1755
		1756	*slash = '\0';
		1757
		1758	/* look for existing entry */
		1759	for (e = d->children; e; e = e->next)
		1760	if (strcmp(segment, e->segment) == 0 &&
		1761	lwc_string_isequal(scheme, e->scheme,
		1762	&match) == lwc_error_ok &&
		1763	match == true &&
		1764	e->port == port)
		1765	break;
		1766
		1767	d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
		1768	if (!d)
		1769	break;
		1770
		1771	segment = slash + 1;
		1772	} while (1);
		1773
		1774	free(path_query);
		1775
		1776	if (d && !d->url) {
		1777	/* Insert URL */
		1778	if (nsurl_has_component(url, NSURL_FRAGMENT)) {
		1779	nserror err = nsurl_defragment(url, &d->url);
		1780	if (err != NSERROR_OK)
		1781	return NULL;
		1782	} else {
		1783	d->url = nsurl_ref(url);
		1784	}
		1785	}
		1786
		1787	return d;
		1788	}
		1789
		1790	/**
		1791	* Fragment comparator callback for qsort
		1792	*/
		1793	int urldb_add_path_fragment_cmp(const void a, const void b)
		1794	{
		1795	return strcasecmp(((const char ) a), ((const char **) b));
		1796	}
		1797
		1798	/**
		1799	* Add a fragment to a path segment
		1800	*
		1801	* \param segment Path segment to add to
		1802	* \param fragment Fragment to add (copied), or NULL
		1803	* \return segment or NULL on memory exhaustion
		1804	*/
		1805	struct path_data urldb_add_path_fragment(struct path_data segment,
		1806	lwc_string *fragment)
		1807	{
		1808	char **temp;
		1809
		1810	assert(segment);
		1811
		1812	/* If no fragment, this function is a NOP
		1813	* This may seem strange, but it makes the rest
		1814	* of the code cleaner */
		1815	if (!fragment)
		1816	return segment;
		1817
		1818	temp = realloc(segment->fragment,
		1819	(segment->frag_cnt + 1) * sizeof(char *));
		1820	if (!temp)
		1821	return NULL;
		1822
		1823	segment->fragment = temp;
		1824	segment->fragment[segment->frag_cnt] =
		1825	strdup(lwc_string_data(fragment));
		1826	if (!segment->fragment[segment->frag_cnt]) {
		1827	/* Don't free temp - it's now our buffer */
		1828	return NULL;
		1829	}
		1830
		1831	segment->frag_cnt++;
		1832
		1833	/* We want fragments in alphabetical order, so sort them
		1834	* It may prove better to insert in alphabetical order instead */
		1835	qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
		1836	urldb_add_path_fragment_cmp);
		1837
		1838	return segment;
		1839	}
		1840
		1841	/**
		1842	* Find an URL in the database
		1843	*
		1844	* \param url Absolute URL to find
		1845	* \return Pointer to path data, or NULL if not found
		1846	*/
		1847	struct path_data urldb_find_url(nsurl url)
		1848	{
		1849	const struct host_part *h;
		1850	struct path_data *p;
		1851	struct search_node *tree;
		1852	char *plq;
		1853	const char *host_str;
		1854	lwc_string scheme, host, *port;
		1855	size_t len = 0;
		1856	unsigned int port_int;
		1857	bool match;
		1858
		1859	assert(url);
		1860
		1861	scheme = nsurl_get_component(url, NSURL_SCHEME);
		1862	if (scheme == NULL)
		1863	return NULL;
		1864
		1865	host = nsurl_get_component(url, NSURL_HOST);
		1866	if (host != NULL) {
		1867	host_str = lwc_string_data(host);
		1868	lwc_string_unref(host);
		1869
		1870	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
		1871	lwc_error_ok && match == true) {
		1872	host_str = "localhost";
		1873
		1874	} else {
		1875	lwc_string_unref(scheme);
		1876	return NULL;
		1877	}
		1878
		1879	tree = urldb_get_search_tree(host_str);
		1880	h = urldb_search_find(tree, host_str);
		1881	if (!h) {
		1882	lwc_string_unref(scheme);
		1883	return NULL;
		1884	}
		1885
		1886	/* generate plq (path, leaf, query) */
		1887	if (nsurl_get(url, NSURL_PATH \| NSURL_QUERY, &plq, &len) !=
		1888	NSERROR_OK) {
		1889	lwc_string_unref(scheme);
		1890	return NULL;
		1891	}
		1892
		1893	/* Get port */
		1894	port = nsurl_get_component(url, NSURL_PORT);
		1895	if (port != NULL) {
		1896	port_int = atoi(lwc_string_data(port));
		1897	lwc_string_unref(port);
		1898	} else {
		1899	port_int = 0;
		1900	}
		1901
		1902	p = urldb_match_path(&h->paths, plq, scheme, port_int);
		1903
		1904	free(plq);
		1905	lwc_string_unref(scheme);
		1906
		1907	return p;
		1908	}
		1909
		1910	/**
		1911	* Match a path string
		1912	*
		1913	* \param parent Path (sub)tree to look in
		1914	* \param path The path to search for
		1915	* \param scheme The URL scheme associated with the path
		1916	* \param port The port associated with the path
		1917	* \return Pointer to path data or NULL if not found.
		1918	*/
		1919	struct path_data urldb_match_path(const struct path_data parent,
		1920	const char path, lwc_string scheme, unsigned short port)
		1921	{
		1922	const struct path_data *p;
		1923	const char *slash;
		1924	bool match;
		1925
		1926	assert(parent != NULL);
		1927	assert(parent->segment == NULL);
		1928	assert(path[0] == '/');
		1929
		1930	/* Start with children, as parent has no segment */
		1931	p = parent->children;
		1932
		1933	while (p != NULL) {
		1934	slash = strchr(path + 1, '/');
		1935	if (!slash)
		1936	slash = path + strlen(path);
		1937
		1938	if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
		1939	lwc_string_isequal(p->scheme, scheme, &match) ==
		1940	lwc_error_ok &&
		1941	match == true &&
		1942	p->port == port) {
		1943	if (*slash == '\0') {
		1944	/* Complete match */
		1945	return (struct path_data *) p;
		1946	}
		1947
		1948	/* Match so far, go down tree */
		1949	p = p->children;
		1950
		1951	path = slash;
		1952	} else {
		1953	/* No match, try next sibling */
		1954	p = p->next;
		1955	}
		1956	}
		1957
		1958	return NULL;
		1959	}
		1960
		1961	/**
		1962	* Get the search tree for a particular host
		1963	*
		1964	* \param host the host to lookup
		1965	* \return the corresponding search tree
		1966	*/
		1967	struct search_node *urldb_get_search_tree_direct(const char host) {
		1968	assert(host);
		1969
		1970	if (url_host_is_ip_address(host))
		1971	return &search_trees[ST_IP];
		1972	else if (isalpha(*host))
		1973	return &search_trees[ST_DN + tolower(*host) - 'a'];
		1974	return &search_trees[ST_EE];
		1975	}
		1976
		1977	/**
		1978	* Get the search tree for a particular host
		1979	*
		1980	* \param host the host to lookup
		1981	* \return the corresponding search tree
		1982	*/
		1983	struct search_node urldb_get_search_tree(const char host) {
		1984	return *urldb_get_search_tree_direct(host);
		1985	}
		1986
		1987	/**
		1988	* Dump URL database to stderr
		1989	*/
		1990	void urldb_dump(void)
		1991	{
		1992	int i;
		1993
		1994	urldb_dump_hosts(&db_root);
		1995
		1996	for (i = 0; i != NUM_SEARCH_TREES; i++)
		1997	urldb_dump_search(search_trees[i], 0);
		1998	}
		1999
		2000	/**
		2001	* Dump URL database hosts to stderr
		2002	*
		2003	* \param parent Parent node of tree to dump
		2004	*/
		2005	void urldb_dump_hosts(struct host_part *parent)
		2006	{
		2007	struct host_part *h;
		2008
		2009	if (parent->part) {
		2010	LOG(("%s", parent->part));
		2011
		2012	LOG(("\t%s invalid SSL certs",
		2013	parent->permit_invalid_certs ? "Permits" : "Denies"));
		2014	}
		2015
		2016	/* Dump path data */
		2017	urldb_dump_paths(&parent->paths);
		2018
		2019	/* and recurse */
		2020	for (h = parent->children; h; h = h->next)
		2021	urldb_dump_hosts(h);
		2022	}
		2023
		2024	/**
		2025	* Dump URL database paths to stderr
		2026	*
		2027	* \param parent Parent node of tree to dump
		2028	*/
		2029	void urldb_dump_paths(struct path_data *parent)
		2030	{
		2031	const struct path_data *p = parent;
		2032	unsigned int i;
		2033
		2034	do {
		2035	if (p->segment != NULL) {
		2036	LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port));
		2037
		2038	LOG(("\t\t'%s'", p->segment));
		2039
		2040	for (i = 0; i != p->frag_cnt; i++)
		2041	LOG(("\t\t\t#%s", p->fragment[i]));
		2042	}
		2043
		2044	if (p->children != NULL) {
		2045	p = p->children;
		2046	} else {
		2047	while (p != parent) {
		2048	if (p->next != NULL) {
		2049	p = p->next;
		2050	break;
		2051	}
		2052
		2053	p = p->parent;
		2054	}
		2055	}
		2056	} while (p != parent);
		2057	}
		2058
		2059	/**
		2060	* Dump search tree
		2061	*
		2062	* \param parent Parent node of tree to dump
		2063	* \param depth Tree depth
		2064	*/
		2065	void urldb_dump_search(struct search_node *parent, int depth)
		2066	{
		2067	const struct host_part *h;
		2068	int i;
		2069
		2070	if (parent == &empty)
		2071	return;
		2072
		2073	urldb_dump_search(parent->left, depth + 1);
		2074
		2075	for (i = 0; i != depth; i++)
		2076	fputc(' ', stderr);
		2077
		2078	for (h = parent->data; h; h = h->parent) {
		2079	if (h->part)
		2080	fprintf(stderr, "%s", h->part);
		2081
		2082	if (h->parent && h->parent->parent)
		2083	fputc('.', stderr);
		2084	}
		2085
		2086	fputc('\n', stderr);
		2087
		2088	urldb_dump_search(parent->right, depth + 1);
		2089	}
		2090
		2091	/**
		2092	* Insert a node into the search tree
		2093	*
		2094	* \param root Root of tree to insert into
		2095	* \param data User data to insert
		2096	* \return Pointer to updated root, or NULL if failed
		2097	*/
		2098	struct search_node urldb_search_insert(struct search_node root,
		2099	const struct host_part *data)
		2100	{
		2101	struct search_node *n;
		2102
		2103	assert(root && data);
		2104
		2105	n = malloc(sizeof(struct search_node));
		2106	if (!n)
		2107	return NULL;
		2108
		2109	n->level = 1;
		2110	n->data = data;
		2111	n->left = n->right = ∅
		2112
		2113	root = urldb_search_insert_internal(root, n);
		2114
		2115	return root;
		2116	}
		2117
		2118	/**
		2119	* Insert node into search tree
		2120	*
		2121	* \param root Root of (sub)tree to insert into
		2122	* \param n Node to insert
		2123	* \return Pointer to updated root
		2124	*/
		2125	struct search_node urldb_search_insert_internal(struct search_node root,
		2126	struct search_node *n)
		2127	{
		2128	assert(root && n);
		2129
		2130	if (root == &empty) {
		2131	root = n;
		2132	} else {
		2133	int c = urldb_search_match_host(root->data, n->data);
		2134
		2135	if (c > 0) {
		2136	root->left = urldb_search_insert_internal(
		2137	root->left, n);
		2138	} else if (c < 0) {
		2139	root->right = urldb_search_insert_internal(
		2140	root->right, n);
		2141	} else {
		2142	/* exact match */
		2143	free(n);
		2144	return root;
		2145	}
		2146
		2147	root = urldb_search_skew(root);
		2148	root = urldb_search_split(root);
		2149	}
		2150
		2151	return root;
		2152	}
		2153
		2154	/**
		2155	* Find a node in a search tree
		2156	*
		2157	* \param root Tree to look in
		2158	* \param host Host to find
		2159	* \return Pointer to host tree node, or NULL if not found
		2160	*/
		2161	const struct host_part urldb_search_find(struct search_node root,
		2162	const char *host)
		2163	{
		2164	int c;
		2165
		2166	assert(root && host);
		2167
		2168	if (root == &empty) {
		2169	return NULL;
		2170	}
		2171
		2172	c = urldb_search_match_string(root->data, host);
		2173
		2174	if (c > 0)
		2175	return urldb_search_find(root->left, host);
		2176	else if (c < 0)
		2177	return urldb_search_find(root->right, host);
		2178	else
		2179	return root->data;
		2180	}
		2181
		2182	/**
		2183	* Compare a pair of host_parts
		2184	*
		2185	* \param a
		2186	* \param b
		2187	* \return 0 if match, non-zero, otherwise
		2188	*/
		2189	int urldb_search_match_host(const struct host_part *a,
		2190	const struct host_part *b)
		2191	{
		2192	int ret;
		2193
		2194	assert(a && b);
		2195
		2196	/* traverse up tree to root, comparing parts as we go. */
		2197	for (; a && a != &db_root && b && b != &db_root;
		2198	a = a->parent, b = b->parent)
		2199	if ((ret = strcasecmp(a->part, b->part)) != 0)
		2200	/* They differ => return the difference here */
		2201	return ret;
		2202
		2203	/* If we get here then either:
		2204	* a) The path lengths differ
		2205	* or b) The hosts are identical
		2206	*/
		2207	if (a && a != &db_root && (!b \|\| b == &db_root))
		2208	/* len(a) > len(b) */
		2209	return 1;
		2210	else if ((!a \|\| a == &db_root) && b && b != &db_root)
		2211	/* len(a) < len(b) */
		2212	return -1;
		2213
		2214	/* identical */
		2215	return 0;
		2216	}
		2217
		2218	/**
		2219	* Compare host_part with a string
		2220	*
		2221	* \param a
		2222	* \param b
		2223	* \return 0 if match, non-zero, otherwise
		2224	*/
		2225	int urldb_search_match_string(const struct host_part *a,
		2226	const char *b)
		2227	{
		2228	const char end, dot;
		2229	int plen, ret;
		2230
		2231	assert(a && a != &db_root && b);
		2232
		2233	if (url_host_is_ip_address(b)) {
		2234	/* IP address */
		2235	return strcasecmp(a->part, b);
		2236	}
		2237
		2238	end = b + strlen(b) + 1;
		2239
		2240	while (b < end && a && a != &db_root) {
		2241	dot = strchr(b, '.');
		2242	if (!dot) {
		2243	/* last segment */
		2244	dot = end - 1;
		2245	}
		2246
		2247	/* Compare strings (length limited) */
		2248	if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
		2249	/* didn't match => return difference */
		2250	return ret;
		2251
		2252	/* The strings matched, now check that the lengths do, too */
		2253	plen = strlen(a->part);
		2254
		2255	if (plen > dot - b)
		2256	/* len(a) > len(b) */
		2257	return 1;
		2258	else if (plen < dot - b)
		2259	/* len(a) < len(b) */
		2260	return -1;
		2261
		2262	b = dot + 1;
		2263	a = a->parent;
		2264	}
		2265
		2266	/* If we get here then either:
		2267	* a) The path lengths differ
		2268	* or b) The hosts are identical
		2269	*/
		2270	if (a && a != &db_root && b >= end)
		2271	/* len(a) > len(b) */
		2272	return 1;
		2273	else if ((!a \|\| a == &db_root) && b < end)
		2274	/* len(a) < len(b) */
		2275	return -1;
		2276
		2277	/* Identical */
		2278	return 0;
		2279	}
		2280
		2281	/**
		2282	* Compare host_part with prefix
		2283	*
		2284	* \param a
		2285	* \param b
		2286	* \return 0 if match, non-zero, otherwise
		2287	*/
		2288	int urldb_search_match_prefix(const struct host_part *a,
		2289	const char *b)
		2290	{
		2291	const char end, dot;
		2292	int plen, ret;
		2293
		2294	assert(a && a != &db_root && b);
		2295
		2296	if (url_host_is_ip_address(b)) {
		2297	/* IP address */
		2298	return strncasecmp(a->part, b, strlen(b));
		2299	}
		2300
		2301	end = b + strlen(b) + 1;
		2302
		2303	while (b < end && a && a != &db_root) {
		2304	dot = strchr(b, '.');
		2305	if (!dot) {
		2306	/* last segment */
		2307	dot = end - 1;
		2308	}
		2309
		2310	/* Compare strings (length limited) */
		2311	if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
		2312	/* didn't match => return difference */
		2313	return ret;
		2314
		2315	/* The strings matched */
		2316	if (dot < end - 1) {
		2317	/* Consider segment lengths only in the case
		2318	* where the prefix contains segments */
		2319	plen = strlen(a->part);
		2320	if (plen > dot - b)
		2321	/* len(a) > len(b) */
		2322	return 1;
		2323	else if (plen < dot - b)
		2324	/* len(a) < len(b) */
		2325	return -1;
		2326	}
		2327
		2328	b = dot + 1;
		2329	a = a->parent;
		2330	}
		2331
		2332	/* If we get here then either:
		2333	* a) The path lengths differ
		2334	* or b) The hosts are identical
		2335	*/
		2336	if (a && a != &db_root && b >= end)
		2337	/* len(a) > len(b) => prefix matches */
		2338	return 0;
		2339	else if ((!a \|\| a == &db_root) && b < end)
		2340	/* len(a) < len(b) => prefix does not match */
		2341	return -1;
		2342
		2343	/* Identical */
		2344	return 0;
		2345	}
		2346
		2347	/**
		2348	* Rotate a subtree right
		2349	*
		2350	* \param root Root of subtree to rotate
		2351	* \return new root of subtree
		2352	*/
		2353	struct search_node urldb_search_skew(struct search_node root)
		2354	{
		2355	struct search_node *temp;
		2356
		2357	assert(root);
		2358
		2359	if (root->left->level == root->level) {
		2360	temp = root->left;
		2361	root->left = temp->right;
		2362	temp->right = root;
		2363	root = temp;
		2364	}
		2365
		2366	return root;
		2367	}
		2368
		2369	/**
		2370	* Rotate a node left, increasing the parent's level
		2371	*
		2372	* \param root Root of subtree to rotate
		2373	* \return New root of subtree
		2374	*/
		2375	struct search_node urldb_search_split(struct search_node root)
		2376	{
		2377	struct search_node *temp;
		2378
		2379	assert(root);
		2380
		2381	if (root->right->right->level == root->level) {
		2382	temp = root->right;
		2383	root->right = temp->left;
		2384	temp->left = root;
		2385	root = temp;
		2386
		2387	root->level++;
		2388	}
		2389
		2390	return root;
		2391	}
		2392
		2393	/**
		2394	* Retrieve cookies for an URL
		2395	*
		2396	* \param url URL being fetched
		2397	* \param include_http_only Whether to include HTTP(S) only cookies.
		2398	* \return Cookies string for libcurl (on heap), or NULL on error/no cookies
		2399	*/
		2400	char urldb_get_cookie(nsurl url, bool include_http_only)
		2401	{
		2402	const struct path_data p, q;
		2403	const struct host_part *h;
		2404	lwc_string *path_lwc;
		2405	struct cookie_internal_data *c;
		2406	int count = 0, version = COOKIE_RFC2965;
		2407	struct cookie_internal_data **matched_cookies;
		2408	int matched_cookies_size = 20;
		2409	int ret_alloc = 4096, ret_used = 1;
		2410	const char *path;
		2411	char *ret;
		2412	lwc_string *scheme;
		2413	time_t now;
		2414	int i;
		2415	bool match;
		2416
		2417	assert(url != NULL);
		2418
		2419	/* The URL must exist in the db in order to find relevant cookies, since
		2420	* we search up the tree from the URL node, and cookies from further
		2421	* up also apply. */
		2422	urldb_add_url(url);
		2423
		2424	p = urldb_find_url(url);
		2425	if (!p)
		2426	return NULL;
		2427
		2428	scheme = p->scheme;
		2429
		2430	matched_cookies = malloc(matched_cookies_size *
		2431	sizeof(struct cookie_internal_data *));
		2432	if (!matched_cookies)
		2433	return NULL;
		2434
		2435	#define GROW_MATCHED_COOKIES \
		2436	do { \
		2437	if (count == matched_cookies_size) { \
		2438	struct cookie_internal_data **temp; \
		2439	temp = realloc(matched_cookies, \
		2440	(matched_cookies_size + 20) * \
		2441	sizeof(struct cookie_internal_data *)); \
		2442	\
		2443	if (temp == NULL) { \
		2444	free(ret); \
		2445	free(matched_cookies); \
		2446	return NULL; \
		2447	} \
		2448	\
		2449	matched_cookies = temp; \
		2450	matched_cookies_size += 20; \
		2451	} \
		2452	} while(0)
		2453
		2454	ret = malloc(ret_alloc);
		2455	if (!ret) {
		2456	free(matched_cookies);
		2457	return NULL;
		2458	}
		2459
		2460	ret[0] = '\0';
		2461
		2462	path_lwc = nsurl_get_component(url, NSURL_PATH);
		2463	if (path_lwc == NULL) {
		2464	free(ret);
		2465	free(matched_cookies);
		2466	return NULL;
		2467	}
		2468	path = lwc_string_data(path_lwc);
		2469	lwc_string_unref(path_lwc);
		2470
		2471	now = time(NULL);
		2472
		2473	if (*(p->segment) != '\0') {
		2474	/* Match exact path, unless directory, when prefix matching
		2475	* will handle this case for us. */
		2476	for (q = p->parent->children; q; q = q->next) {
		2477	if (strcmp(q->segment, p->segment))
		2478	continue;
		2479
		2480	/* Consider all cookies associated with
		2481	* this exact path */
		2482	for (c = q->cookies; c; c = c->next) {
		2483	if (c->expires != -1 && c->expires < now)
		2484	/* cookie has expired => ignore */
		2485	continue;
		2486
		2487	if (c->secure && lwc_string_isequal(
		2488	q->scheme,
		2489	corestring_lwc_https,
		2490	&match) &&
		2491	match == false)
		2492	/* secure cookie for insecure host.
		2493	* ignore */
		2494	continue;
		2495
		2496	if (c->http_only && !include_http_only)
		2497	/* Ignore HttpOnly */
		2498	continue;
		2499
		2500	matched_cookies[count++] = c;
		2501
		2502	GROW_MATCHED_COOKIES;
		2503
		2504	if (c->version < (unsigned int)version)
		2505	version = c->version;
		2506
		2507	c->last_used = now;
		2508	cookies_schedule_update((struct cookie_data *)c);
		2509	}
		2510	}
		2511	}
		2512
		2513	/* Now consider cookies whose paths prefix-match ours */
		2514	for (p = p->parent; p; p = p->parent) {
		2515	/* Find directory's path entry(ies) */
		2516	/* There are potentially multiple due to differing schemes */
		2517	for (q = p->children; q; q = q->next) {
		2518	if (*(q->segment) != '\0')
		2519	continue;
		2520
		2521	for (c = q->cookies; c; c = c->next) {
		2522	if (c->expires != -1 && c->expires < now)
		2523	/* cookie has expired => ignore */
		2524	continue;
		2525
		2526	if (c->secure && lwc_string_isequal(
		2527	q->scheme,
		2528	corestring_lwc_https,
		2529	&match) &&
		2530	match == false)
		2531	/* Secure cookie for insecure server
		2532	* => ignore */
		2533	continue;
		2534
		2535	matched_cookies[count++] = c;
		2536
		2537	GROW_MATCHED_COOKIES;
		2538
		2539	if (c->version < (unsigned int) version)
		2540	version = c->version;
		2541
		2542	c->last_used = now;
		2543	cookies_schedule_update((struct cookie_data *)c);
		2544	}
		2545	}
		2546
		2547	if (!p->parent) {
		2548	/* No parent, so bail here. This can't go in
		2549	* the loop exit condition as we also want to
		2550	* process the top-level node.
		2551	*
		2552	* If p->parent is NULL then p->cookies are
		2553	* the domain cookies and thus we don't even
		2554	* try matching against them.
		2555	*/
		2556	break;
		2557	}
		2558
		2559	/* Consider p itself - may be the result of Path=/foo */
		2560	for (c = p->cookies; c; c = c->next) {
		2561	if (c->expires != -1 && c->expires < now)
		2562	/* cookie has expired => ignore */
		2563	continue;
		2564
		2565	/* Ensure cookie path is a prefix of the resource */
		2566	if (strncmp(c->path, path, strlen(c->path)) != 0)
		2567	/* paths don't match => ignore */
		2568	continue;
		2569
		2570	if (c->secure && lwc_string_isequal(p->scheme,
		2571	corestring_lwc_https,
		2572	&match) &&
		2573	match == false)
		2574	/* Secure cookie for insecure server
		2575	* => ignore */
		2576	continue;
		2577
		2578	matched_cookies[count++] = c;
		2579
		2580	GROW_MATCHED_COOKIES;
		2581
		2582	if (c->version < (unsigned int) version)
		2583	version = c->version;
		2584
		2585	c->last_used = now;
		2586	cookies_schedule_update((struct cookie_data *)c);
		2587	}
		2588
		2589	}
		2590
		2591	/* Finally consider domain cookies for hosts which domain match ours */
		2592	for (h = (const struct host_part *)p; h && h != &db_root;
		2593	h = h->parent) {
		2594	for (c = h->paths.cookies; c; c = c->next) {
		2595	if (c->expires != -1 && c->expires < now)
		2596	/* cookie has expired => ignore */
		2597	continue;
		2598
		2599	/* Ensure cookie path is a prefix of the resource */
		2600	if (strncmp(c->path, path, strlen(c->path)) != 0)
		2601	/* paths don't match => ignore */
		2602	continue;
		2603
		2604	if (c->secure && lwc_string_isequal(scheme,
		2605	corestring_lwc_https,
		2606	&match) &&
		2607	match == false)
		2608	/* secure cookie for insecure host. ignore */
		2609	continue;
		2610
		2611	matched_cookies[count++] = c;
		2612
		2613	GROW_MATCHED_COOKIES;
		2614
		2615	if (c->version < (unsigned int)version)
		2616	version = c->version;
		2617
		2618	c->last_used = now;
		2619	cookies_schedule_update((struct cookie_data *)c);
		2620	}
		2621	}
		2622
		2623	if (count == 0) {
		2624	/* No cookies found */
		2625	free(ret);
		2626	free(matched_cookies);
		2627	return NULL;
		2628	}
		2629
		2630	/* and build output string */
		2631	if (version > COOKIE_NETSCAPE) {
		2632	sprintf(ret, "$Version=%d", version);
		2633	ret_used = strlen(ret) + 1;
		2634	}
		2635
		2636	for (i = 0; i < count; i++) {
		2637	if (!urldb_concat_cookie(matched_cookies[i], version,
		2638	&ret_used, &ret_alloc, &ret)) {
		2639	free(ret);
		2640	free(matched_cookies);
		2641	return NULL;
		2642	}
		2643	}
		2644
		2645	if (version == COOKIE_NETSCAPE) {
		2646	/* Old-style cookies => no version & skip "; " */
		2647	memmove(ret, ret + 2, ret_used - 2);
		2648	ret_used -= 2;
		2649	}
		2650
		2651	/* Now, shrink the output buffer to the required size */
		2652	{
		2653	char *temp = realloc(ret, ret_used);
		2654	if (!temp) {
		2655	free(ret);
		2656	free(matched_cookies);
		2657	return NULL;
		2658	}
		2659
		2660	ret = temp;
		2661	}
		2662
		2663	free(matched_cookies);
		2664
		2665	return ret;
		2666
		2667	#undef GROW_MATCHED_COOKIES
		2668	}
		2669
		2670	/**
		2671	* Parse Set-Cookie header and insert cookie(s) into database
		2672	*
		2673	* \param header Header to parse, with Set-Cookie: stripped
		2674	* \param url URL being fetched
		2675	* \param referer Referring resource, or 0 for verifiable transaction
		2676	* \return true on success, false otherwise
		2677	*/
		2678	bool urldb_set_cookie(const char header, nsurl url, nsurl *referer)
		2679	{
		2680	const char cur = header, end;
		2681	lwc_string path, host, *scheme;
		2682	nsurl *urlt;
		2683	bool match;
		2684
		2685	assert(url && header);
		2686
		2687	/* Get defragmented URL, as 'urlt' */
		2688	if (nsurl_has_component(url, NSURL_FRAGMENT)) {
		2689	if (nsurl_defragment(url, &urlt) != NSERROR_OK)
		2690	return NULL;
		2691	} else {
		2692	urlt = nsurl_ref(url);
		2693	}
		2694
		2695	scheme = nsurl_get_component(url, NSURL_SCHEME);
		2696	if (scheme == NULL) {
		2697	nsurl_unref(urlt);
		2698	return false;
		2699	}
		2700
		2701	path = nsurl_get_component(url, NSURL_PATH);
		2702	if (path == NULL) {
		2703	lwc_string_unref(scheme);
		2704	nsurl_unref(urlt);
		2705	return false;
		2706	}
		2707
		2708	host = nsurl_get_component(url, NSURL_HOST);
		2709	if (host == NULL) {
		2710	lwc_string_unref(path);
		2711	lwc_string_unref(scheme);
		2712	nsurl_unref(urlt);
		2713	return false;
		2714	}
		2715
		2716	if (referer) {
		2717	lwc_string *rhost;
		2718
		2719	/* Ensure that url's host name domain matches
		2720	* referer's (4.3.5) */
		2721	rhost = nsurl_get_component(url, NSURL_HOST);
		2722	if (rhost == NULL) {
		2723	goto error;
		2724	}
		2725
		2726	/* Domain match host names */
		2727	if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&
		2728	match == false) {
		2729	const char *hptr;
		2730	const char *rptr;
		2731	const char *dot;
		2732	const char *host_data = lwc_string_data(host);
		2733	const char *rhost_data = lwc_string_data(rhost);
		2734
		2735	/* Ensure neither host nor rhost are IP addresses */
		2736	if (url_host_is_ip_address(host_data) \|\|
		2737	url_host_is_ip_address(rhost_data)) {
		2738	/* IP address, so no partial match */
		2739	lwc_string_unref(rhost);
		2740	goto error;
		2741	}
		2742
		2743	/* Not exact match, so try the following:
		2744	*
		2745	* 1) Find the longest common suffix of host and rhost
		2746	* (may be all of host/rhost)
		2747	* 2) Discard characters from the start of the suffix
		2748	* until the suffix starts with a dot
		2749	* (prevents foobar.com matching bar.com)
		2750	* 3) Ensure the suffix is non-empty and contains
		2751	* embedded dots (to avoid permitting .com as a
		2752	* suffix)
		2753	*
		2754	* Note that the above in no way resembles the
		2755	* domain matching algorithm found in RFC2109.
		2756	* It does, however, model the real world rather
		2757	* more accurately.
		2758	*/
		2759
		2760	/** \todo In future, we should consult a TLD service
		2761	* instead of just looking for embedded dots.
		2762	*/
		2763
		2764	hptr = host_data + lwc_string_length(host) - 1;
		2765	rptr = rhost_data + lwc_string_length(rhost) - 1;
		2766
		2767	/* 1 */
		2768	while (hptr >= host_data && rptr >= rhost_data) {
		2769	if (hptr != rptr)
		2770	break;
		2771	hptr--;
		2772	rptr--;
		2773	}
		2774	/* Ensure we end up pointing at the start of the
		2775	* common suffix. The above loop will exit pointing
		2776	* to the byte before the start of the suffix. */
		2777	hptr++;
		2778
		2779	/* 2 */
		2780	while (hptr != '\0' && hptr != '.')
		2781	hptr++;
		2782
		2783	/* 3 */
		2784	if (*hptr == '\0' \|\|
		2785	(dot = strchr(hptr + 1, '.')) == NULL \|\|
		2786	*(dot + 1) == '\0') {
		2787	lwc_string_unref(rhost);
		2788	goto error;
		2789	}
		2790	}
		2791
		2792	lwc_string_unref(rhost);
		2793	}
		2794
		2795	end = cur + strlen(cur) - 2 /* Trailing CRLF */;
		2796
		2797	do {
		2798	struct cookie_internal_data *c;
		2799	char *dot;
		2800	size_t len;
		2801
		2802	c = urldb_parse_cookie(url, &cur);
		2803	if (!c) {
		2804	/* failed => stop parsing */
		2805	goto error;
		2806	}
		2807
		2808	/* validate cookie */
		2809
		2810	/* 4.2.2:i Cookie must have NAME and VALUE */
		2811	if (!c->name \|\| !c->value) {
		2812	urldb_free_cookie(c);
		2813	goto error;
		2814	}
		2815
		2816	/* 4.3.2:i Cookie path must be a prefix of URL path */
		2817	len = strlen(c->path);
		2818	if (len > lwc_string_length(path) \|\|
		2819	strncmp(c->path, lwc_string_data(path),
		2820	len) != 0) {
		2821	urldb_free_cookie(c);
		2822	goto error;
		2823	}
		2824
		2825	/* 4.3.2:ii Cookie domain must contain embedded dots */
		2826	dot = strchr(c->domain + 1, '.');
		2827	if (!dot \|\| *(dot + 1) == '\0') {
		2828	/* no embedded dots */
		2829	urldb_free_cookie(c);
		2830	goto error;
		2831	}
		2832
		2833	/* Domain match fetch host with cookie domain */
		2834	if (strcasecmp(lwc_string_data(host), c->domain) != 0) {
		2835	int hlen, dlen;
		2836	char *domain = c->domain;
		2837
		2838	/* c->domain must be a domain cookie here because:
		2839	* c->domain is either:
		2840	* + specified in the header as a domain cookie
		2841	* (non-domain cookies in the header are ignored
		2842	* by urldb_parse_cookie / urldb_parse_avpair)
		2843	* + defaulted to the URL's host part
		2844	* (by urldb_parse_cookie if no valid domain was
		2845	* specified in the header)
		2846	*
		2847	* The latter will pass the strcasecmp above, which
		2848	* leaves the former (i.e. a domain cookie)
		2849	*/
		2850	assert(c->domain[0] == '.');
		2851
		2852	/* 4.3.2:iii */
		2853	if (url_host_is_ip_address(lwc_string_data(host))) {
		2854	/* IP address, so no partial match */
		2855	urldb_free_cookie(c);
		2856	goto error;
		2857	}
		2858
		2859	hlen = lwc_string_length(host);
		2860	dlen = strlen(c->domain);
		2861
		2862	if (hlen <= dlen && hlen != dlen - 1) {
		2863	/* Partial match not possible */
		2864	urldb_free_cookie(c);
		2865	goto error;
		2866	}
		2867
		2868	if (hlen == dlen - 1) {
		2869	/* Relax matching to allow
		2870	* host a.com to match .a.com */
		2871	domain++;
		2872	dlen--;
		2873	}
		2874
		2875	if (strcasecmp(lwc_string_data(host) + (hlen - dlen),
		2876	domain)) {
		2877	urldb_free_cookie(c);
		2878	goto error;
		2879	}
		2880
		2881	/* 4.3.2:iv Ensure H contains no dots
		2882	*
		2883	* If you believe the spec, H should contain no
		2884	* dots in _any_ cookie. Unfortunately, however,
		2885	* reality differs in that many sites send domain
		2886	* cookies of the form .foo.com from hosts such
		2887	* as bar.bat.foo.com and then expect domain
		2888	* matching to work. Thus we have to do what they
		2889	* expect, regardless of any potential security
		2890	* implications.
		2891	*
		2892	* This is what code conforming to the spec would
		2893	* look like:
		2894	*
		2895	* for (int i = 0; i < (hlen - dlen); i++) {
		2896	* if (host[i] == '.') {
		2897	* urldb_free_cookie(c);
		2898	* goto error;
		2899	* }
		2900	* }
		2901	*/
		2902	}
		2903
		2904	/* Now insert into database */
		2905	if (!urldb_insert_cookie(c, scheme, urlt))
		2906	goto error;
		2907	} while (cur < end);
		2908
		2909	lwc_string_unref(host);
		2910	lwc_string_unref(path);
		2911	lwc_string_unref(scheme);
		2912	nsurl_unref(urlt);
		2913
		2914	return true;
		2915
		2916	error:
		2917	lwc_string_unref(host);
		2918	lwc_string_unref(path);
		2919	lwc_string_unref(scheme);
		2920	nsurl_unref(urlt);
		2921
		2922	return false;
		2923	}
		2924
		2925	/**
		2926	* Parse a cookie
		2927	*
		2928	* \param url URL being fetched
		2929	* \param cookie Pointer to cookie string (updated on exit)
		2930	* \return Pointer to cookie structure (on heap, caller frees) or NULL
		2931	*/
		2932	struct cookie_internal_data urldb_parse_cookie(nsurl url,
		2933	const char **cookie)
		2934	{
		2935	struct cookie_internal_data *c;
		2936	const char *cur;
		2937	char name[1024], value[4096];
		2938	char n = name, v = value;
		2939	bool in_value = false;
		2940	bool had_value_data = false;
		2941	bool value_verbatim = false;
		2942	bool quoted = false;
		2943	bool was_quoted = false;
		2944
		2945	assert(url && cookie && *cookie);
		2946
		2947	c = calloc(1, sizeof(struct cookie_internal_data));
		2948	if (c == NULL)
		2949	return NULL;
		2950
		2951	c->expires = -1;
		2952
		2953	name[0] = '\0';
		2954	value[0] = '\0';
		2955
		2956	for (cur = cookie; cur; cur++) {
		2957	if (cur == '\r' && (cur + 1) == '\n') {
		2958	/* End of header */
		2959	if (quoted) {
		2960	/* Unmatched quote encountered */
		2961
		2962	/* Match Firefox 2.0.0.11 */
		2963	value[0] = '\0';
		2964
		2965	#if 0
		2966	/* This is what IE6/7 & Safari 3 do */
		2967	/* Opera 9.25 discards the entire cookie */
		2968
		2969	/* Shuffle value up by 1 */
		2970	memmove(value + 1, value,
		2971	min(v - value, sizeof(value) - 2));
		2972	v++;
		2973	/* And insert " character at the start */
		2974	value[0] = '"';
		2975
		2976	/* Now, run forwards through the value
		2977	* looking for a semicolon. If one exists,
		2978	* terminate the value at this point. */
		2979	for (char *s = value; s < v; s++) {
		2980	if (*s == ';') {
		2981	*s = '\0';
		2982	v = s;
		2983	break;
		2984	}
		2985	}
		2986	#endif
		2987	}
		2988
		2989	break;
		2990	} else if (*cur == '\r') {
		2991	/* Spurious linefeed */
		2992	continue;
		2993	} else if (*cur == '\n') {
		2994	/* Spurious newline */
		2995	continue;
		2996	}
		2997
		2998	if (in_value && !had_value_data) {
		2999	if (cur == ' ' \|\| cur == '\t') {
		3000	/* Strip leading whitespace from value */
		3001	continue;
		3002	} else {
		3003	had_value_data = true;
		3004
		3005	/* Value is taken verbatim if first non-space
		3006	* character is not a " */
		3007	if (*cur != '"') {
		3008	value_verbatim = true;
		3009	}
		3010	}
		3011	}
		3012
		3013	if (in_value && !value_verbatim && (*cur == '"')) {
		3014	/* Only non-verbatim values may be quoted */
		3015	if (cur == cookie \|\| (cur - 1) != '\\') {
		3016	/* Only unescaped quotes count */
		3017	was_quoted = quoted;
		3018	quoted = !quoted;
		3019
		3020	continue;
		3021	}
		3022	}
		3023
		3024	if (!quoted && !in_value && *cur == '=') {
		3025	/* First equals => attr-value separator */
		3026	in_value = true;
		3027	continue;
		3028	}
		3029
		3030	if (!quoted && (was_quoted \|\| *cur == ';')) {
		3031	/* Semicolon or after quoted value
		3032	* => end of current avpair */
		3033
		3034	/* NUL-terminate tokens */
		3035	*n = '\0';
		3036	*v = '\0';
		3037
		3038	if (!urldb_parse_avpair(c, name, value, was_quoted)) {
		3039	/* Memory exhausted */
		3040	urldb_free_cookie(c);
		3041	return NULL;
		3042	}
		3043
		3044	/* And reset to start */
		3045	n = name;
		3046	v = value;
		3047	in_value = false;
		3048	had_value_data = false;
		3049	value_verbatim = false;
		3050	was_quoted = false;
		3051
		3052	/* Now, if the current input is anything other than a
		3053	* semicolon, we must be sure to reprocess it */
		3054	if (*cur != ';') {
		3055	cur--;
		3056	}
		3057
		3058	continue;
		3059	}
		3060
		3061	/* And now handle commas. These are a pain as they may mean
		3062	* any of the following:
		3063	*
		3064	* + End of cookie
		3065	* + Day separator in Expires avpair
		3066	* + (Invalid) comma in unquoted value
		3067	*
		3068	* Therefore, in order to handle all 3 cases (2 and 3 are
		3069	* identical, the difference being that 2 is in the spec and
		3070	* 3 isn't), we need to determine where the comma actually
		3071	* lies. We use the following heuristic:
		3072	*
		3073	* Given a comma at the current input position, find the
		3074	* immediately following semicolon (or end of input if none
		3075	* found). Then, consider the input characters between
		3076	* these two positions. If any of these characters is an
		3077	* '=', we must assume that the comma signified the end of
		3078	* the current cookie.
		3079	*
		3080	* This holds as the first avpair of any cookie must be
		3081	* NAME=VALUE, so the '=' is guaranteed to appear in the
		3082	* case where the comma marks the end of a cookie.
		3083	*
		3084	* This will fail, however, in the case where '=' appears in
		3085	* the value of the current avpair after the comma or the
		3086	* subsequent cookie does not start with NAME=VALUE. Neither
		3087	* of these is particularly likely and if they do occur, the
		3088	* website is more broken than we can be bothered to handle.
		3089	*/
		3090	if (!quoted && *cur == ',') {
		3091	/* Find semi-colon, if any */
		3092	const char *p;
		3093	const char *semi = strchr(cur + 1, ';');
		3094	if (!semi)
		3095	semi = cur + strlen(cur) - 2 /* CRLF */;
		3096
		3097	/* Look for equals sign between comma and semi */
		3098	for (p = cur + 1; p < semi; p++)
		3099	if (*p == '=')
		3100	break;
		3101
		3102	if (p == semi) {
		3103	/* none found => comma internal to value */
		3104	/* do nothing */
		3105	} else {
		3106	/* found one => comma marks end of cookie */
		3107	cur++;
		3108	break;
		3109	}
		3110	}
		3111
		3112	/* Accumulate into buffers, always leaving space for a NUL */
		3113	/** \todo is silently truncating overlong names/values wise? */
		3114	if (!in_value) {
		3115	if (n < name + (sizeof(name) - 1))
		3116	n++ = cur;
		3117	} else {
		3118	if (v < value + (sizeof(value) - 1))
		3119	v++ = cur;
		3120	}
		3121	}
		3122
		3123	/* Parse final avpair */
		3124	*n = '\0';
		3125	*v = '\0';
		3126
		3127	if (!urldb_parse_avpair(c, name, value, was_quoted)) {
		3128	/* Memory exhausted */
		3129	urldb_free_cookie(c);
		3130	return NULL;
		3131	}
		3132
		3133	/* Now fix-up default values */
		3134	if (c->domain == NULL) {
		3135	lwc_string *host = nsurl_get_component(url, NSURL_HOST);
		3136	if (host == NULL) {
		3137	urldb_free_cookie(c);
		3138	return NULL;
		3139	}
		3140	c->domain = strdup(lwc_string_data(host));
		3141	lwc_string_unref(host);
		3142	}
		3143
		3144	if (c->path == NULL) {
		3145	const char *path_data;
		3146	char path, slash;
		3147	lwc_string *path_lwc;
		3148
		3149	path_lwc = nsurl_get_component(url, NSURL_PATH);
		3150	if (path_lwc == NULL) {
		3151	urldb_free_cookie(c);
		3152	return NULL;
		3153	}
		3154	path_data = lwc_string_data(path_lwc);
		3155
		3156	/* Strip leafname and trailing slash (4.3.1) */
		3157	slash = strrchr(path_data, '/');
		3158	if (slash != NULL) {
		3159	/* Special case: retain first slash in path */
		3160	if (slash == path_data)
		3161	slash++;
		3162
		3163	slash = strndup(path_data, slash - path_data);
		3164	if (slash == NULL) {
		3165	lwc_string_unref(path_lwc);
		3166	urldb_free_cookie(c);
		3167	return NULL;
		3168	}
		3169
		3170	path = slash;
		3171	lwc_string_unref(path_lwc);
		3172	} else {
		3173	path = strdup(lwc_string_data(path_lwc));
		3174	lwc_string_unref(path_lwc);
		3175	if (path == NULL) {
		3176	urldb_free_cookie(c);
		3177	return NULL;
		3178	}
		3179	}
		3180
		3181	c->path = path;
		3182	}
		3183
		3184	/* Write back current position */
		3185	*cookie = cur;
		3186
		3187	return c;
		3188	}
		3189
		3190	/**
		3191	* Parse a cookie avpair
		3192	*
		3193	* \param c Cookie struct to populate
		3194	* \param n Name component
		3195	* \param v Value component
		3196	* \param was_quoted Whether ::v was quoted in the input
		3197	* \return true on success, false on memory exhaustion
		3198	*/
		3199	bool urldb_parse_avpair(struct cookie_internal_data c, char n, char *v,
		3200	bool was_quoted)
		3201	{
		3202	int vlen;
		3203
		3204	assert(c && n && v);
		3205
		3206	/* Strip whitespace from start of name */
		3207	for (; *n; n++) {
		3208	if (n != ' ' && n != '\t')
		3209	break;
		3210	}
		3211
		3212	/* Strip whitespace from end of name */
		3213	for (vlen = strlen(n); vlen; vlen--) {
		3214	if (n[vlen] == ' ' \|\| n[vlen] == '\t')
		3215	n[vlen] = '\0';
		3216	else
		3217	break;
		3218	}
		3219
		3220	/* Strip whitespace from start of value */
		3221	for (; *v; v++) {
		3222	if (v != ' ' && v != '\t')
		3223	break;
		3224	}
		3225
		3226	/* Strip whitespace from end of value */
		3227	for (vlen = strlen(v); vlen; vlen--) {
		3228	if (v[vlen] == ' ' \|\| v[vlen] == '\t')
		3229	v[vlen] = '\0';
		3230	else
		3231	break;
		3232	}
		3233
		3234	if (!c->comment && strcasecmp(n, "Comment") == 0) {
		3235	c->comment = strdup(v);
		3236	if (!c->comment)
		3237	return false;
		3238	} else if (!c->domain && strcasecmp(n, "Domain") == 0) {
		3239	if (v[0] == '.') {
		3240	/* Domain must start with a dot */
		3241	c->domain_from_set = true;
		3242	c->domain = strdup(v);
		3243	if (!c->domain)
		3244	return false;
		3245	}
		3246	} else if (strcasecmp(n, "Max-Age") == 0) {
		3247	int temp = atoi(v);
		3248	if (temp == 0)
		3249	/* Special case - 0 means delete */
		3250	c->expires = 0;
		3251	else
		3252	c->expires = time(NULL) + temp;
		3253	} else if (!c->path && strcasecmp(n, "Path") == 0) {
		3254	c->path_from_set = true;
		3255	c->path = strdup(v);
		3256	if (!c->path)
		3257	return false;
		3258	} else if (strcasecmp(n, "Version") == 0) {
		3259	c->version = atoi(v);
		3260	} else if (strcasecmp(n, "Expires") == 0) {
		3261	char *datenoday;
		3262	time_t expires;
		3263
		3264	/* Strip dayname from date (these are hugely
		3265	* variable and liable to break the parser.
		3266	* They also serve no useful purpose) */
		3267	for (datenoday = v; datenoday && !isdigit(datenoday);
		3268	datenoday++)
		3269	; /* do nothing */
		3270
5043	ashmew2	3271	/* TODO: expires = curl_getdate(datenoday, NULL); */
		3272	expires = (time_t) 100123123;
		3273
3584	sourcerer	3274	if (expires == -1) {
		3275	/* assume we have an unrepresentable
		3276	* date => force it to the maximum
		3277	* possible value of a 32bit time_t
		3278	* (this may break in 2038. We'll
		3279	* deal with that once we come to
		3280	* it) */
		3281	expires = (time_t)0x7fffffff;
		3282	}
		3283	c->expires = expires;
		3284	} else if (strcasecmp(n, "Secure") == 0) {
		3285	c->secure = true;
		3286	} else if (strcasecmp(n, "HttpOnly") == 0) {
		3287	c->http_only = true;
		3288	} else if (!c->name) {
		3289	c->name = strdup(n);
		3290	c->value = strdup(v);
		3291	c->value_was_quoted = was_quoted;
		3292	if (!c->name \|\| !c->value)
		3293	return false;
		3294	}
		3295
		3296	return true;
		3297	}
		3298
		3299	/**
		3300	* Insert a cookie into the database
		3301	*
		3302	* \param c The cookie to insert
		3303	* \param scheme URL scheme associated with cookie path
		3304	* \param url URL (sans fragment) associated with cookie
		3305	* \return true on success, false on memory exhaustion (c will be freed)
		3306	*/
		3307	bool urldb_insert_cookie(struct cookie_internal_data c, lwc_string scheme,
		3308	nsurl *url)
		3309	{
		3310	struct cookie_internal_data *d;
		3311	const struct host_part *h;
		3312	struct path_data *p;
		3313	time_t now = time(NULL);
		3314
		3315	assert(c);
		3316
		3317	if (c->domain[0] == '.') {
		3318	h = urldb_search_find(
		3319	urldb_get_search_tree(&(c->domain[1])),
		3320	c->domain + 1);
		3321	if (!h) {
		3322	h = urldb_add_host(c->domain + 1);
		3323	if (!h) {
		3324	urldb_free_cookie(c);
		3325	return false;
		3326	}
		3327	}
		3328
		3329	p = (struct path_data *) &h->paths;
		3330	} else {
		3331	/* Need to have a URL and scheme, if it's not a domain cookie */
		3332	assert(url != NULL);
		3333	assert(scheme != NULL);
		3334
		3335	h = urldb_search_find(
		3336	urldb_get_search_tree(c->domain),
		3337	c->domain);
		3338
		3339	if (!h) {
		3340	h = urldb_add_host(c->domain);
		3341	if (!h) {
		3342	urldb_free_cookie(c);
		3343	return false;
		3344	}
		3345	}
		3346
		3347	/* find path */
		3348	p = urldb_add_path(scheme, 0, h,
		3349	strdup(c->path), NULL, url);
		3350	if (!p) {
		3351	urldb_free_cookie(c);
		3352	return false;
		3353	}
		3354	}
		3355
		3356	/* add cookie */
		3357	for (d = p->cookies; d; d = d->next) {
		3358	if (!strcmp(d->domain, c->domain) &&
		3359	!strcmp(d->path, c->path) &&
		3360	!strcmp(d->name, c->name))
		3361	break;
		3362	}
		3363
		3364	if (d) {
		3365	if (c->expires != -1 && c->expires < now) {
		3366	/* remove cookie */
		3367	if (d->next)
		3368	d->next->prev = d->prev;
		3369	else
		3370	p->cookies_end = d->prev;
		3371	if (d->prev)
		3372	d->prev->next = d->next;
		3373	else
		3374	p->cookies = d->next;
		3375
		3376	cookies_remove((struct cookie_data *)d);
		3377	urldb_free_cookie(d);
		3378	urldb_free_cookie(c);
		3379	} else {
		3380	/* replace d with c */
		3381	c->prev = d->prev;
		3382	c->next = d->next;
		3383	if (c->next)
		3384	c->next->prev = c;
		3385	else
		3386	p->cookies_end = c;
		3387	if (c->prev)
		3388	c->prev->next = c;
		3389	else
		3390	p->cookies = c;
		3391
		3392	cookies_remove((struct cookie_data *)d);
		3393	urldb_free_cookie(d);
		3394
		3395	cookies_schedule_update((struct cookie_data *)c);
		3396	}
		3397	} else {
		3398	c->prev = p->cookies_end;
		3399	c->next = NULL;
		3400	if (p->cookies_end)
		3401	p->cookies_end->next = c;
		3402	else
		3403	p->cookies = c;
		3404	p->cookies_end = c;
		3405
		3406	cookies_schedule_update((struct cookie_data *)c);
		3407	}
		3408
		3409	return true;
		3410	}
		3411
		3412	/**
		3413	* Free a cookie
		3414	*
		3415	* \param c The cookie to free
		3416	*/
		3417	void urldb_free_cookie(struct cookie_internal_data *c)
		3418	{
		3419	assert(c);
		3420
		3421	free(c->comment);
		3422	free(c->domain);
		3423	free(c->path);
		3424	free(c->name);
		3425	free(c->value);
		3426	free(c);
		3427	}
		3428
		3429	/**
		3430	* Concatenate a cookie into the provided buffer
		3431	*
		3432	* \param c Cookie to concatenate
		3433	* \param version The version of the cookie string to output
		3434	* \param used Pointer to amount of buffer used (updated)
		3435	* \param alloc Pointer to allocated size of buffer (updated)
		3436	* \param buf Pointer to Pointer to buffer (updated)
		3437	* \return true on success, false on memory exhaustion
		3438	*/
		3439	bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
		3440	int used, int alloc, char **buf)
		3441	{
		3442	/* Combined (A)BNF for the Cookie: request header:
		3443	*
		3444	* CHAR =
		3445	* CTL =
		3446	* (octets 0 - 31) and DEL (127)>
		3447	* CR =
		3448	* LF =
		3449	* SP =
		3450	* HT =
		3451	* <"> =
		3452	*
		3453	* CRLF = CR LF
		3454	*
		3455	* LWS = [CRLF] 1*( SP \| HT )
		3456	*
		3457	* TEXT =
		3458	* but including LWS>
		3459	*
		3460	* token = 1*
		3461	* separators = "(" \| ")" \| "<" \| ">" \| "@"
		3462	* \| "," \| ";" \| ":" \| "\" \| <">
		3463	* \| "/" \| "[" \| "]" \| "?" \| "="
		3464	* \| "{" \| "}" \| SP \| HT
		3465	*
		3466	* quoted-string = ( <"> *(qdtext \| quoted-pair ) <"> )
		3467	* qdtext = >
		3468	* quoted-pair = "\" CHAR
		3469	*
		3470	* attr = token
		3471	* value = word
		3472	* word = token \| quoted-string
		3473	*
		3474	* cookie = "Cookie:" cookie-version
		3475	* 1*((";" \| ",") cookie-value)
		3476	* cookie-value = NAME "=" VALUE [";" path] [";" domain]
		3477	* cookie-version = "$Version" "=" value
		3478	* NAME = attr
		3479	* VALUE = value
		3480	* path = "$Path" "=" value
		3481	* domain = "$Domain" "=" value
		3482	*
		3483	* A note on quoted-string handling:
		3484	* The cookie data stored in the db is verbatim (i.e. sans enclosing
		3485	* <">, if any, and with all quoted-pairs intact) thus all that we
		3486	* need to do here is ensure that value strings which were quoted
		3487	* in Set-Cookie or which include any of the separators are quoted
		3488	* before use.
		3489	*
		3490	* A note on cookie-value separation:
		3491	* We use semicolons for all separators, including between
		3492	* cookie-values. This simplifies things and is backwards compatible.
		3493	*/
		3494	const char * const separators = "()<>@,;:\\\"/[]?={} \t";
		3495
		3496	int max_len;
		3497
		3498	assert(c && used && alloc && buf && *buf);
		3499
		3500	/* "; " cookie-value
		3501	* We allow for the possibility that values are quoted
		3502	*/
		3503	max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
		3504	(c->path_from_set ?
		3505	8 + strlen(c->path) + 2 : 0) +
		3506	(c->domain_from_set ?
		3507	10 + strlen(c->domain) + 2 : 0);
		3508
		3509	if (used + max_len >= alloc) {
		3510	char temp = realloc(buf, *alloc + 4096);
		3511	if (!temp) {
		3512	return false;
		3513	}
		3514	*buf = temp;
		3515	*alloc += 4096;
		3516	}
		3517
		3518	if (version == COOKIE_NETSCAPE) {
		3519	/* Original Netscape cookie */
		3520	sprintf(buf + used - 1, "; %s=", c->name);
		3521	*used += 2 + strlen(c->name) + 1;
		3522
		3523	/* The Netscape spec doesn't mention quoting of cookie values.
		3524	* RFC 2109 $10.1.3 indicates that values must not be quoted.
		3525	*
		3526	* However, other browsers preserve quoting, so we should, too
		3527	*/
		3528	if (c->value_was_quoted) {
		3529	sprintf(buf + used - 1, "\"%s\"", c->value);
		3530	*used += 1 + strlen(c->value) + 1;
		3531	} else {
		3532	/** \todo should we %XX-encode [;HT,SP] ? */
		3533	/** \todo Should we strip escaping backslashes? */
		3534	sprintf(buf + used - 1, "%s", c->value);
		3535	*used += strlen(c->value);
		3536	}
		3537
		3538	/* We don't send path/domain information -- that's what the
		3539	* Netscape spec suggests we should do, anyway. */
		3540	} else {
		3541	/* RFC2109 or RFC2965 cookie */
		3542	sprintf(buf + used - 1, "; %s=", c->name);
		3543	*used += 2 + strlen(c->name) + 1;
		3544
		3545	/* Value needs quoting if it contains any separator or if
		3546	* it needs preserving from the Set-Cookie header */
		3547	if (c->value_was_quoted \|\|
		3548	strpbrk(c->value, separators) != NULL) {
		3549	sprintf(buf + used - 1, "\"%s\"", c->value);
		3550	*used += 1 + strlen(c->value) + 1;
		3551	} else {
		3552	sprintf(buf + used - 1, "%s", c->value);
		3553	*used += strlen(c->value);
		3554	}
		3555
		3556	if (c->path_from_set) {
		3557	/* Path, quoted if necessary */
		3558	sprintf(buf + used - 1, "; $Path=");
		3559	*used += 8;
		3560
		3561	if (strpbrk(c->path, separators) != NULL) {
		3562	sprintf(buf + used - 1, "\"%s\"", c->path);
		3563	*used += 1 + strlen(c->path) + 1;
		3564	} else {
		3565	sprintf(buf + used - 1, "%s", c->path);
		3566	*used += strlen(c->path);
		3567	}
		3568	}
		3569
		3570	if (c->domain_from_set) {
		3571	/* Domain, quoted if necessary */
		3572	sprintf(buf + used - 1, "; $Domain=");
		3573	*used += 10;
		3574
		3575	if (strpbrk(c->domain, separators) != NULL) {
		3576	sprintf(buf + used - 1, "\"%s\"", c->domain);
		3577	*used += 1 + strlen(c->domain) + 1;
		3578	} else {
		3579	sprintf(buf + used - 1, "%s", c->domain);
		3580	*used += strlen(c->domain);
		3581	}
		3582	}
		3583	}
		3584
		3585	return true;
		3586	}
		3587
		3588	/**
		3589	* Load a cookie file into the database
		3590	*
		3591	* \param filename File to load
		3592	*/
		3593	void urldb_load_cookies(const char *filename)
		3594	{
		3595	FILE *fp;
		3596	char s[16*1024];
		3597
		3598	assert(filename);
		3599
		3600	fp = fopen(filename, "r");
		3601	if (!fp)
		3602	return;
		3603
		3604	#define FIND_T { \
		3605	for (; p && p != '\t'; p++) \
		3606	; /* do nothing */ \
		3607	if (p >= end) { \
		3608	LOG(("Overran input")); \
		3609	continue; \
		3610	} \
		3611	*p++ = '\0'; \
		3612	}
		3613
		3614	#define SKIP_T { \
		3615	for (; p && p == '\t'; p++) \
		3616	; /* do nothing */ \
		3617	if (p >= end) { \
		3618	LOG(("Overran input")); \
		3619	continue; \
		3620	} \
		3621	}
		3622
		3623	while (fgets(s, sizeof s, fp)) {
		3624	char p = s, end = 0,
		3625	domain, path, name, value, scheme, url,
		3626	*comment;
		3627	int version, domain_specified, path_specified,
		3628	secure, http_only, no_destroy, value_quoted;
		3629	time_t expires, last_used;
		3630	struct cookie_internal_data *c;
		3631
		3632	if(s[0] == 0 \|\| s[0] == '#')
		3633	/* Skip blank lines or comments */
		3634	continue;
		3635
		3636	s[strlen(s) - 1] = '\0'; /* lose terminating newline */
		3637	end = s + strlen(s);
		3638
		3639	/* Look for file version first
		3640	* (all input is ignored until this is read)
		3641	*/
		3642	if (strncasecmp(s, "Version:", 8) == 0) {
		3643	FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);
		3644
		3645	if (loaded_cookie_file_version <
		3646	MIN_COOKIE_FILE_VERSION) {
		3647	LOG(("Unsupported Cookie file version"));
		3648	break;
		3649	}
		3650
		3651	continue;
		3652	} else if (loaded_cookie_file_version == 0) {
		3653	/* Haven't yet seen version; skip this input */
		3654	continue;
		3655	}
		3656
		3657	/* One cookie/line */
		3658
		3659	/* Parse input */
		3660	FIND_T; version = atoi(s);
		3661	SKIP_T; domain = p; FIND_T;
		3662	SKIP_T; domain_specified = atoi(p); FIND_T;
		3663	SKIP_T; path = p; FIND_T;
		3664	SKIP_T; path_specified = atoi(p); FIND_T;
		3665	SKIP_T; secure = atoi(p); FIND_T;
		3666	if (loaded_cookie_file_version > 101) {
		3667	/* Introduced in version 1.02 */
		3668	SKIP_T; http_only = atoi(p); FIND_T;
		3669	} else {
		3670	http_only = 0;
		3671	}
		3672	SKIP_T; expires = (time_t)atoi(p); FIND_T;
		3673	SKIP_T; last_used = (time_t)atoi(p); FIND_T;
		3674	SKIP_T; no_destroy = atoi(p); FIND_T;
		3675	SKIP_T; name = p; FIND_T;
		3676	SKIP_T; value = p; FIND_T;
		3677	if (loaded_cookie_file_version > 100) {
		3678	/* Introduced in version 1.01 */
		3679	SKIP_T; value_quoted = atoi(p); FIND_T;
		3680	} else {
		3681	value_quoted = 0;
		3682	}
		3683	SKIP_T; scheme = p; FIND_T;
		3684	SKIP_T; url = p; FIND_T;
		3685
		3686	/* Comment may have no content, so don't
		3687	* use macros as they'll break */
		3688	for (; p && p == '\t'; p++)
		3689	; /* do nothing */
		3690	comment = p;
		3691
		3692	assert(p <= end);
		3693
		3694	/* Now create cookie */
		3695	c = malloc(sizeof(struct cookie_internal_data));
		3696	if (!c)
		3697	break;
		3698
		3699	c->name = strdup(name);
		3700	c->value = strdup(value);
		3701	c->value_was_quoted = value_quoted;
		3702	c->comment = strdup(comment);
		3703	c->domain_from_set = domain_specified;
		3704	c->domain = strdup(domain);
		3705	c->path_from_set = path_specified;
		3706	c->path = strdup(path);
		3707	c->expires = expires;
		3708	c->last_used = last_used;
		3709	c->secure = secure;
		3710	c->http_only = http_only;
		3711	c->version = version;
		3712	c->no_destroy = no_destroy;
		3713
		3714	if (!(c->name && c->value && c->comment &&
		3715	c->domain && c->path)) {
		3716	urldb_free_cookie(c);
		3717	break;
		3718	}
		3719
		3720	if (c->domain[0] != '.') {
		3721	lwc_string *scheme_lwc = NULL;
		3722	nsurl *url_nsurl = NULL;
		3723
		3724	assert(scheme[0] != 'u');
		3725
		3726	if (nsurl_create(url, &url_nsurl) != NSERROR_OK) {
		3727	urldb_free_cookie(c);
		3728	break;
		3729	}
		3730	scheme_lwc = nsurl_get_component(url_nsurl,
		3731	NSURL_SCHEME);
		3732
		3733	/* And insert it into database */
		3734	if (!urldb_insert_cookie(c, scheme_lwc, url_nsurl)) {
		3735	/* Cookie freed for us */
		3736	nsurl_unref(url_nsurl);
		3737	lwc_string_unref(scheme_lwc);
		3738	break;
		3739	}
		3740	nsurl_unref(url_nsurl);
		3741	lwc_string_unref(scheme_lwc);
		3742
		3743	} else {
		3744	if (!urldb_insert_cookie(c, NULL, NULL)) {
		3745	/* Cookie freed for us */
		3746	break;
		3747	}
		3748	}
		3749	}
		3750
		3751	#undef SKIP_T
		3752	#undef FIND_T
		3753
		3754	fclose(fp);
		3755	}
		3756
		3757	/**
		3758	* Delete a cookie
		3759	*
		3760	* \param domain The cookie's domain
		3761	* \param path The cookie's path
		3762	* \param name The cookie's name
		3763	*/
		3764	void urldb_delete_cookie(const char domain, const char path,
		3765	const char *name)
		3766	{
		3767	urldb_delete_cookie_hosts(domain, path, name, &db_root);
		3768	}
		3769
		3770	void urldb_delete_cookie_hosts(const char domain, const char path,
		3771	const char name, struct host_part parent)
		3772	{
		3773	struct host_part *h;
		3774	assert(parent);
		3775
		3776	urldb_delete_cookie_paths(domain, path, name, &parent->paths);
		3777
		3778	for (h = parent->children; h; h = h->next)
		3779	urldb_delete_cookie_hosts(domain, path, name, h);
		3780	}
		3781
		3782	void urldb_delete_cookie_paths(const char domain, const char path,
		3783	const char name, struct path_data parent)
		3784	{
		3785	struct cookie_internal_data *c;
		3786	struct path_data *p = parent;
		3787
		3788	assert(parent);
		3789
		3790	do {
		3791	for (c = p->cookies; c; c = c->next) {
		3792	if (strcmp(c->domain, domain) == 0 &&
		3793	strcmp(c->path, path) == 0 &&
		3794	strcmp(c->name, name) == 0) {
		3795	if (c->prev)
		3796	c->prev->next = c->next;
		3797	else
		3798	p->cookies = c->next;
		3799
		3800	if (c->next)
		3801	c->next->prev = c->prev;
		3802	else
		3803	p->cookies_end = c->prev;
		3804
		3805	cookies_remove((struct cookie_data *)c);
		3806	urldb_free_cookie(c);
		3807
		3808	return;
		3809	}
		3810	}
		3811
		3812	if (p->children) {
		3813	p = p->children;
		3814	} else {
		3815	while (p != parent) {
		3816	if (p->next != NULL) {
		3817	p = p->next;
		3818	break;
		3819	}
		3820
		3821	p = p->parent;
		3822	}
		3823	}
		3824	} while(p != parent);
		3825	}
		3826
		3827	/**
		3828	* Save persistent cookies to file
		3829	*
		3830	* \param filename Path to save to
		3831	*/
		3832	void urldb_save_cookies(const char *filename)
		3833	{
		3834	FILE *fp;
		3835	int cookie_file_version = max(loaded_cookie_file_version,
		3836	COOKIE_FILE_VERSION);
		3837
		3838	assert(filename);
		3839
		3840	fp = fopen(filename, "w");
		3841	if (!fp)
		3842	return;
		3843
		3844	fprintf(fp, "# >%s\n", filename);
		3845	fprintf(fp, "# NetSurf cookies file.\n"
		3846	"#\n"
		3847	"# Lines starting with a '#' are comments, "
		3848	"blank lines are ignored.\n"
		3849	"#\n"
		3850	"# All lines prior to \"Version:\t%d\" are discarded.\n"
		3851	"#\n"
		3852	"# Version\tDomain\tDomain from Set-Cookie\tPath\t"
		3853	"Path from Set-Cookie\tSecure\tHTTP-Only\tExpires\tLast used\t"
		3854	"No destroy\tName\tValue\tValue was quoted\tScheme\t"
		3855	"URL\tComment\n",
		3856	cookie_file_version);
		3857	fprintf(fp, "Version:\t%d\n", cookie_file_version);
		3858
		3859	urldb_save_cookie_hosts(fp, &db_root);
		3860
		3861	fclose(fp);
		3862	}
		3863
		3864	/**
		3865	* Save a host subtree's cookies
		3866	*
		3867	* \param fp File pointer to write to
		3868	* \param parent Parent host
		3869	*/
		3870	void urldb_save_cookie_hosts(FILE fp, struct host_part parent)
		3871	{
		3872	struct host_part *h;
		3873	assert(fp && parent);
		3874
		3875	urldb_save_cookie_paths(fp, &parent->paths);
		3876
		3877	for (h = parent->children; h; h = h->next)
		3878	urldb_save_cookie_hosts(fp, h);
		3879	}
		3880
		3881	/**
		3882	* Save a path subtree's cookies
		3883	*
		3884	* \param fp File pointer to write to
		3885	* \param parent Parent path
		3886	*/
		3887	void urldb_save_cookie_paths(FILE fp, struct path_data parent)
		3888	{
		3889	struct path_data *p = parent;
		3890	time_t now = time(NULL);
		3891
		3892	assert(fp && parent);
		3893
		3894	do {
		3895	if (p->cookies != NULL) {
		3896	struct cookie_internal_data *c;
		3897
		3898	for (c = p->cookies; c != NULL; c = c->next) {
		3899	if (c->expires == -1 \|\| c->expires < now)
		3900	/* Skip expired & session cookies */
		3901	continue;
		3902
		3903	fprintf(fp,
		3904	"%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
		3905	"%s\t%s\t%d\t%s\t%s\t%s\n",
		3906	c->version, c->domain,
		3907	c->domain_from_set, c->path,
		3908	c->path_from_set, c->secure,
		3909	c->http_only,
		3910	(int)c->expires, (int)c->last_used,
		3911	c->no_destroy, c->name, c->value,
		3912	c->value_was_quoted,
		3913	p->scheme ? lwc_string_data(p->scheme) :
		3914	"unused",
		3915	p->url ? nsurl_access(p->url) :
		3916	"unused",
		3917	c->comment ? c->comment : "");
		3918	}
		3919	}
		3920
		3921	if (p->children != NULL) {
		3922	p = p->children;
		3923	} else {
		3924	while (p != parent) {
		3925	if (p->next != NULL) {
		3926	p = p->next;
		3927	break;
		3928	}
		3929
		3930	p = p->parent;
		3931	}
		3932	}
		3933	} while (p != parent);
		3934	}
		3935
		3936
		3937	/**
		3938	* Destroy urldb
		3939	*/
		3940	void urldb_destroy(void)
		3941	{
		3942	struct host_part a, b;
		3943	int i;
		3944
		3945	/* Clean up search trees */
		3946	for (i = 0; i < NUM_SEARCH_TREES; i++) {
		3947	if (search_trees[i] != &empty)
		3948	urldb_destroy_search_tree(search_trees[i]);
		3949	}
		3950
		3951	/* And database */
		3952	for (a = db_root.children; a; a = b) {
		3953	b = a->next;
		3954	urldb_destroy_host_tree(a);
		3955	}
		3956	}
		3957
		3958	/**
		3959	* Destroy a host tree
		3960	*
		3961	* \param root Root node of tree to destroy
		3962	*/
		3963	void urldb_destroy_host_tree(struct host_part *root)
		3964	{
		3965	struct host_part a, b;
		3966	struct path_data p, q;
		3967	struct prot_space_data s, t;
		3968
		3969	/* Destroy children */
		3970	for (a = root->children; a; a = b) {
		3971	b = a->next;
		3972	urldb_destroy_host_tree(a);
		3973	}
		3974
		3975	/* Now clean up paths */
		3976	for (p = root->paths.children; p; p = q) {
		3977	q = p->next;
		3978	urldb_destroy_path_tree(p);
		3979	}
		3980
		3981	/* Root path */
		3982	urldb_destroy_path_node_content(&root->paths);
		3983
		3984	/* Proctection space data */
		3985	for (s = root->prot_space; s; s = t) {
		3986	t = s->next;
		3987	urldb_destroy_prot_space(s);
		3988	}
		3989
		3990	/* And ourselves */
		3991	free(root->part);
		3992	free(root);
		3993	}
		3994
		3995	/**
		3996	* Destroy a path tree
		3997	*
		3998	* \param root Root node of tree to destroy
		3999	*/
		4000	void urldb_destroy_path_tree(struct path_data *root)
		4001	{
		4002	struct path_data *p = root;
		4003
		4004	do {
		4005	if (p->children != NULL) {
		4006	p = p->children;
		4007	} else {
		4008	struct path_data *q = p;
		4009
		4010	while (p != root) {
		4011	if (p->next != NULL) {
		4012	p = p->next;
		4013	break;
		4014	}
		4015
		4016	p = p->parent;
		4017
		4018	urldb_destroy_path_node_content(q);
		4019	free(q);
		4020
		4021	q = p;
		4022	}
		4023
		4024	urldb_destroy_path_node_content(q);
		4025	free(q);
		4026	}
		4027	} while (p != root);
		4028	}
		4029
		4030	/**
		4031	* Destroy the contents of a path node
		4032	*
		4033	* \param node Node to destroy contents of (does not destroy node)
		4034	*/
		4035	void urldb_destroy_path_node_content(struct path_data *node)
		4036	{
		4037	struct cookie_internal_data a, b;
		4038	unsigned int i;
		4039
		4040	if (node->url != NULL)
		4041	nsurl_unref(node->url);
		4042
		4043	if (node->scheme != NULL)
		4044	lwc_string_unref(node->scheme);
		4045
		4046	free(node->segment);
		4047	for (i = 0; i < node->frag_cnt; i++)
		4048	free(node->fragment[i]);
		4049	free(node->fragment);
		4050
		4051	if (node->thumb)
		4052	bitmap_destroy(node->thumb);
		4053
		4054	free(node->urld.title);
		4055
		4056	for (a = node->cookies; a; a = b) {
		4057	b = a->next;
		4058	urldb_destroy_cookie(a);
		4059	}
		4060	}
		4061
		4062	/**
		4063	* Destroy a cookie node
		4064	*
		4065	* \param c Cookie to destroy
		4066	*/
		4067	void urldb_destroy_cookie(struct cookie_internal_data *c)
		4068	{
		4069	free(c->name);
		4070	free(c->value);
		4071	free(c->comment);
		4072	free(c->domain);
		4073	free(c->path);
		4074
		4075	free(c);
		4076	}
		4077
		4078	/**
		4079	* Destroy protection space data
		4080	*
		4081	* \param space Protection space to destroy
		4082	*/
		4083	void urldb_destroy_prot_space(struct prot_space_data *space)
		4084	{
		4085	lwc_string_unref(space->scheme);
		4086	free(space->realm);
		4087	free(space->auth);
		4088
		4089	free(space);
		4090	}
		4091
		4092
		4093	/**
		4094	* Destroy a search tree
		4095	*
		4096	* \param root Root node of tree to destroy
		4097	*/
		4098	void urldb_destroy_search_tree(struct search_node *root)
		4099	{
		4100	/* Destroy children */
		4101	if (root->left != &empty)
		4102	urldb_destroy_search_tree(root->left);
		4103	if (root->right != &empty)
		4104	urldb_destroy_search_tree(root->right);
		4105
		4106	/* And destroy ourselves */
		4107	free(root);
		4108	}
		4109

Subversion Repositories Kolibri OS

(root)/contrib/network/netsurf/netsurf/content/urldb.c – Rev 5043