Subversion Repositories Kolibri OS

Rev

Rev 4364 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3584 sourcerer 1
/*
2
 * Copyright 2006 John M Bell 
3
 * Copyright 2009 John Tytgat 
4
 *
5
 * This file is part of NetSurf, http://www.netsurf-browser.org/
6
 *
7
 * NetSurf is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; version 2 of the License.
10
 *
11
 * NetSurf is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program.  If not, see .
18
 */
19
 
20
/** \file
21
 * Unified URL information database (implementation)
22
 *
23
 * URLs are stored in a tree-based structure as follows:
24
 *
25
 * The host component is extracted from each URL and, if a FQDN, split on
26
 * every '.'.The tree is constructed by inserting each FQDN segment in
27
 * reverse order. Duplicate nodes are merged.
28
 *
29
 * If the host part of an URL is an IP address, then this is added to the
30
 * tree verbatim (as if it were a TLD).
31
 *
32
 * This provides something looking like:
33
 *
34
 * 			      root (a sentinel)
35
 * 				|
36
 * 	-------------------------------------------------
37
 * 	|	|	|	|	|	|	|
38
 *     com     edu     gov  127.0.0.1  net     org     uk	TLDs
39
 * 	|	|	|		|	|	|
40
 *    google   ...     ...             ...     ...     co	2LDs
41
 * 	|						|
42
 *     www					       bbc  Hosts/Subdomains
43
 *							|
44
 *						       www	...
45
 *
46
 * Each of the nodes in this tree is a struct host_part. This stores the
47
 * FQDN segment (or IP address) with which the node is concerned. Each node
48
 * may contain further information about paths on a host (struct path_data)
49
 * or SSL certificate processing on a host-wide basis
50
 * (host_part::permit_invalid_certs).
51
 *
52
 * Path data is concerned with storing various metadata about the path in
53
 * question. This includes global history data, HTTP authentication details
54
 * and any associated HTTP cookies. This is stored as a tree of path segments
55
 * hanging off the relevant host_part node.
56
 *
57
 * Therefore, to find the last visited time of the URL
58
 * http://www.example.com/path/to/resource.html, the FQDN tree would be
59
 * traversed in the order root -> "com" -> "example" -> "www". The "www"
60
 * node would have attached to it a tree of struct path_data:
61
 *
62
 *			    (sentinel)
63
 *				|
64
 * 			       path
65
 * 				|
66
 * 			       to
67
 * 				|
68
 * 			   resource.html
69
 *
70
 * This represents the absolute path "/path/to/resource.html". The leaf node
71
 * "resource.html" contains the last visited time of the resource.
72
 *
73
 * The mechanism described above is, however, not particularly conducive to
74
 * fast searching of the database for a given URL (or URLs beginning with a
75
 * given prefix). Therefore, an anciliary data structure is used to enable
76
 * fast searching. This structure simply reflects the contents of the
77
 * database, with entries being added/removed at the same time as for the
78
 * core database. In order to ensure that degenerate cases are kept to a
79
 * minimum, we use an AAtree. This is an approximation of a Red-Black tree
80
 * with similar performance characteristics, but with a significantly
81
 * simpler implementation. Entries in this tree comprise pointers to the
82
 * leaf nodes of the host tree described above.
83
 *
84
 * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of
85
 * non-normalised URLs with urldb will result in undefined behaviour and
86
 * potential crashes.
87
 */
88
 
89
#include 
90
#include 
91
#include 
92
#include 
93
#include 
94
#include 
95
#include 
96
#include 
97
 
98
#include 
99
 
100
#include "image/bitmap.h"
101
#include "content/content.h"
102
#include "content/urldb.h"
103
#include "desktop/cookies.h"
104
#include "desktop/options.h"
105
#include "utils/log.h"
106
#include "utils/corestrings.h"
107
#include "utils/filename.h"
108
#include "utils/url.h"
109
#include "utils/utils.h"
110
 
111
struct cookie_internal_data {
112
	char *name;		/**< Cookie name */
113
	char *value;		/**< Cookie value */
114
	bool value_was_quoted;	/**< Value was quoted in Set-Cookie: */
115
	char *comment;		/**< Cookie comment */
116
	bool domain_from_set;	/**< Domain came from Set-Cookie: header */
117
	char *domain;		/**< Domain */
118
	bool path_from_set;	/**< Path came from Set-Cookie: header */
119
	char *path;		/**< Path */
120
	time_t expires;		/**< Expiry timestamp, or -1 for session */
121
	time_t last_used;	/**< Last used time */
122
	bool secure;		/**< Only send for HTTPS requests */
123
	bool http_only;		/**< Only expose to HTTP(S) requests */
124
	cookie_version version;	/**< Specification compliance */
125
	bool no_destroy;	/**< Never destroy this cookie,
126
				 * unless it's expired */
127
 
128
	struct cookie_internal_data *prev;	/**< Previous in list */
129
	struct cookie_internal_data *next;	/**< Next in list */
130
};
131
 
132
/* A protection space is defined as a tuple canonical_root_url and realm.
133
 * This structure lives as linked list element in a leaf host_part struct
134
 * so we need additional scheme and port to have a canonical_root_url.  */
135
struct prot_space_data {
136
	lwc_string *scheme;	/**< URL scheme of canonical hostname of this
137
				 * protection space. */
138
	unsigned int port;	/**< Port number of canonical hostname of this
139
				 * protection space. When 0, it means the
140
				 * default port for given scheme, i.e. 80
141
				 * (http), 443 (https). */
142
	char *realm;		/**< Protection realm */
143
 
144
	char *auth;		/**< Authentication details for this
145
				 * protection space in form
146
				 * username:password */
147
	struct prot_space_data *next;	/**< Next sibling */
148
};
149
 
150
struct cache_internal_data {
151
	char filename[12];	/**< Cached filename, or first byte 0 for none */
152
};
153
 
154
struct url_internal_data {
155
	char *title;		/**< Resource title */
156
	unsigned int visits;	/**< Visit count */
157
	time_t last_visit;	/**< Last visit time */
158
	content_type type;	/**< Type of resource */
159
};
160
 
161
struct path_data {
162
	nsurl *url;		/**< Full URL */
163
	lwc_string *scheme;	/**< URL scheme for data */
164
	unsigned int port;	/**< Port number for data. When 0, it means
165
				 * the default port for given scheme, i.e.
166
				 * 80 (http), 443 (https). */
167
	char *segment;		/**< Path segment for this node */
168
	unsigned int frag_cnt;	/**< Number of entries in path_data::fragment */
169
	char **fragment;	/**< Array of fragments */
170
	bool persistent;	/**< This entry should persist */
171
 
172
	struct bitmap *thumb;	/**< Thumbnail image of resource */
173
	struct url_internal_data urld;	/**< URL data for resource */
174
	struct cache_internal_data cache;	/**< Cache data for resource */
175
	const struct prot_space_data *prot_space;	/**< Protection space
176
				 * to which this resource belongs too. Can be
177
				 * NULL when it does not belong to a protection
178
				 * space or when it is not known. No
179
				 * ownership (is with struct host_part::prot_space). */
180
	struct cookie_internal_data *cookies;	/**< Cookies associated with resource */
181
	struct cookie_internal_data *cookies_end;	/**< Last cookie in list */
182
 
183
	struct path_data *next;	/**< Next sibling */
184
	struct path_data *prev;	/**< Previous sibling */
185
	struct path_data *parent;	/**< Parent path segment */
186
	struct path_data *children;	/**< Child path segments */
187
	struct path_data *last;		/**< Last child */
188
};
189
 
190
struct host_part {
191
	/**< Known paths on this host. This _must_ be first so that
192
	 * struct host_part *h = (struct host_part *)mypath; works */
193
	struct path_data paths;
194
	bool permit_invalid_certs;	/**< Allow access to SSL protected
195
					 * resources on this host without
196
					 * verifying certificate authenticity
197
					 */
198
 
199
	char *part;		/**< Part of host string */
200
 
201
	struct prot_space_data *prot_space;	/**< Linked list of all known
202
				 * proctection spaces known for his host and
203
				 * all its schems and ports. */
204
 
205
	struct host_part *next;	/**< Next sibling */
206
	struct host_part *prev;	/**< Previous sibling */
207
	struct host_part *parent;	/**< Parent host part */
208
	struct host_part *children;	/**< Child host parts */
209
};
210
 
211
struct search_node {
212
	const struct host_part *data;	/**< Host tree entry */
213
 
214
	unsigned int level;		/**< Node level */
215
 
216
	struct search_node *left;	/**< Left subtree */
217
	struct search_node *right;	/**< Right subtree */
218
};
219
 
220
/* Destruction */
221
static void urldb_destroy_host_tree(struct host_part *root);
222
static void urldb_destroy_path_tree(struct path_data *root);
223
static void urldb_destroy_path_node_content(struct path_data *node);
224
static void urldb_destroy_cookie(struct cookie_internal_data *c);
225
static void urldb_destroy_prot_space(struct prot_space_data *space);
226
static void urldb_destroy_search_tree(struct search_node *root);
227
 
228
/* Saving */
229
static void urldb_save_search_tree(struct search_node *root, FILE *fp);
230
static void urldb_count_urls(const struct path_data *root, time_t expiry,
231
		unsigned int *count);
232
static void urldb_write_paths(const struct path_data *parent,
233
		const char *host, FILE *fp, char **path, int *path_alloc,
234
		int *path_used, time_t expiry);
235
 
236
/* Iteration */
237
static bool urldb_iterate_partial_host(struct search_node *root,
238
		const char *prefix, bool (*callback)(nsurl *url,
239
		const struct url_data *data));
240
static bool urldb_iterate_partial_path(const struct path_data *parent,
241
		const char *prefix, bool (*callback)(nsurl *url,
242
		const struct url_data *data));
243
static bool urldb_iterate_entries_host(struct search_node *parent,
244
		bool (*url_callback)(nsurl *url,
245
		const struct url_data *data),
246
		bool (*cookie_callback)(const struct cookie_data *data));
247
static bool urldb_iterate_entries_path(const struct path_data *parent,
248
		bool (*url_callback)(nsurl *url,
249
		const struct url_data *data),
250
		bool (*cookie_callback)(const struct cookie_data *data));
251
 
252
/* Insertion */
253
static struct host_part *urldb_add_host_node(const char *part,
254
		struct host_part *parent);
255
static struct path_data *urldb_add_path_node(lwc_string *scheme,
256
		unsigned int port, const char *segment, lwc_string *fragment,
257
		struct path_data *parent);
258
static int urldb_add_path_fragment_cmp(const void *a, const void *b);
259
static struct path_data *urldb_add_path_fragment(struct path_data *segment,
260
		lwc_string *fragment);
261
 
262
/* Lookup */
263
static struct path_data *urldb_find_url(nsurl *url);
264
static struct path_data *urldb_match_path(const struct path_data *parent,
265
		const char *path, lwc_string *scheme, unsigned short port);
266
static struct search_node **urldb_get_search_tree_direct(const char *host);
267
static struct search_node *urldb_get_search_tree(const char *host);
268
 
269
/* Dump */
270
static void urldb_dump_hosts(struct host_part *parent);
271
static void urldb_dump_paths(struct path_data *parent);
272
static void urldb_dump_search(struct search_node *parent, int depth);
273
 
274
/* Search tree */
275
static struct search_node *urldb_search_insert(struct search_node *root,
276
		const struct host_part *data);
277
static struct search_node *urldb_search_insert_internal(
278
		struct search_node *root, struct search_node *n);
279
/* for urldb_search_remove, see r5531 which removed it */
280
static const struct host_part *urldb_search_find(struct search_node *root,
281
		const char *host);
282
static struct search_node *urldb_search_skew(struct search_node *root);
283
static struct search_node *urldb_search_split(struct search_node *root);
284
static int urldb_search_match_host(const struct host_part *a,
285
		const struct host_part *b);
286
static int urldb_search_match_string(const struct host_part *a,
287
		const char *b);
288
static int urldb_search_match_prefix(const struct host_part *a,
289
		const char *b);
290
 
291
/* Cookies */
292
static struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
293
		const char **cookie);
294
static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n,
295
		char *v, bool was_quoted);
296
static bool urldb_insert_cookie(struct cookie_internal_data *c,
297
		lwc_string *scheme, nsurl *url);
298
static void urldb_free_cookie(struct cookie_internal_data *c);
299
static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
300
		int *used, int *alloc, char **buf);
301
static void urldb_delete_cookie_hosts(const char *domain, const char *path,
302
		const char *name, struct host_part *parent);
303
static void urldb_delete_cookie_paths(const char *domain, const char *path,
304
		const char *name, struct path_data *parent);
305
static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent);
306
static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent);
307
 
308
/** Root database handle */
309
static struct host_part db_root;
310
 
311
/** Search trees - one per letter + 1 for IPs + 1 for Everything Else */
312
#define NUM_SEARCH_TREES 28
313
#define ST_IP 0
314
#define ST_EE 1
315
#define ST_DN 2
316
static struct search_node empty = { 0, 0, &empty, &empty };
317
static struct search_node *search_trees[NUM_SEARCH_TREES] = {
318
	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
319
	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
320
	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
321
	&empty, &empty, &empty, &empty
322
};
323
 
324
#define MIN_COOKIE_FILE_VERSION 100
325
#define COOKIE_FILE_VERSION 102
326
static int loaded_cookie_file_version;
327
#define MIN_URL_FILE_VERSION 106
328
#define URL_FILE_VERSION 106
329
 
330
/**
331
 * Import an URL database from file, replacing any existing database
332
 *
333
 * \param filename Name of file containing data
334
 */
335
void urldb_load(const char *filename)
336
{
337
#define MAXIMUM_URL_LENGTH 4096
338
	char s[MAXIMUM_URL_LENGTH];
339
	char host[256];
340
	struct host_part *h;
341
	int urls;
342
	int i;
343
	int version;
344
	int length;
345
	FILE *fp;
346
 
347
	assert(filename);
348
 
349
	LOG(("Loading URL file"));
350
 
351
	fp = fopen(filename, "r");
352
	if (!fp) {
353
		LOG(("Failed to open file '%s' for reading", filename));
354
		return;
355
	}
356
 
357
	if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
358
		fclose(fp);
359
		return;
360
	}
361
 
362
	version = atoi(s);
363
	if (version < MIN_URL_FILE_VERSION) {
364
		LOG(("Unsupported URL file version."));
365
		fclose(fp);
366
		return;
367
	}
368
	if (version > URL_FILE_VERSION) {
369
		LOG(("Unknown URL file version."));
370
		fclose(fp);
371
		return;
372
	}
373
 
374
	while (fgets(host, sizeof host, fp)) {
375
		/* get the hostname */
376
		length = strlen(host) - 1;
377
		host[length] = '\0';
378
 
379
		/* skip data that has ended up with a host of '' */
380
		if (length == 0) {
381
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
382
				break;
383
			urls = atoi(s);
384
			/* Eight fields/url */
385
			for (i = 0; i < (8 * urls); i++) {
386
				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
387
					break;
388
			}
389
			continue;
390
		}
391
 
392
		/* read number of URLs */
393
		if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
394
			break;
395
		urls = atoi(s);
396
 
397
		/* no URLs => try next host */
398
		if (urls == 0) {
399
			LOG(("No URLs for '%s'", host));
400
			continue;
401
		}
402
 
403
		h = urldb_add_host(host);
404
		if (!h) {
405
			LOG(("Failed adding host: '%s'", host));
406
			die("Memory exhausted whilst loading URL file");
407
		}
408
 
409
		/* load the non-corrupt data */
410
		for (i = 0; i < urls; i++) {
411
			struct path_data *p = NULL;
412
			char scheme[64], ports[10];
413
			char url[64 + 3 + 256 + 6 + 4096 + 1];
414
			unsigned int port;
415
			bool is_file = false;
416
			nsurl *nsurl;
417
			lwc_string *scheme_lwc, *fragment_lwc;
418
			char *path_query;
419
			size_t len;
420
 
421
			if (!fgets(scheme, sizeof scheme, fp))
422
				break;
423
			length = strlen(scheme) - 1;
424
			scheme[length] = '\0';
425
 
426
			if (!fgets(ports, sizeof ports, fp))
427
				break;
428
			length = strlen(ports) - 1;
429
			ports[length] = '\0';
430
			port = atoi(ports);
431
 
432
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
433
				break;
434
			length = strlen(s) - 1;
435
			s[length] = '\0';
436
 
437
			if (!strcasecmp(host, "localhost") &&
438
					!strcasecmp(scheme, "file"))
439
				is_file = true;
440
 
441
			snprintf(url, sizeof url, "%s://%s%s%s%s",
442
					scheme,
443
					/* file URLs have no host */
444
					(is_file ? "" : host),
445
					(port ? ":" : ""),
446
					(port ? ports : ""),
447
					s);
448
 
449
			/* TODO: store URLs in pre-parsed state, and make
450
			 *       a nsurl_load to generate the nsurl more
451
			 *       swiftly.
452
			 *       Need a nsurl_save too.
453
			 */
454
			if (nsurl_create(url, &nsurl) != NSERROR_OK) {
455
				LOG(("Failed inserting '%s'", url));
456
				die("Memory exhausted whilst loading "
457
						"URL file");
458
			}
459
 
460
			/* Copy and merge path/query strings */
461
			if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
462
					&path_query, &len) != NSERROR_OK) {
463
				LOG(("Failed inserting '%s'", url));
464
				die("Memory exhausted whilst loading "
465
						"URL file");
466
			}
467
 
468
			scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
469
			fragment_lwc = nsurl_get_component(nsurl,
470
					NSURL_FRAGMENT);
471
			p = urldb_add_path(scheme_lwc, port, h, path_query,
472
					fragment_lwc, nsurl);
473
			if (!p) {
474
				LOG(("Failed inserting '%s'", url));
475
				die("Memory exhausted whilst loading "
476
						"URL file");
477
			}
478
			nsurl_unref(nsurl);
479
			lwc_string_unref(scheme_lwc);
480
			if (fragment_lwc != NULL)
481
				lwc_string_unref(fragment_lwc);
482
 
483
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
484
				break;
485
			if (p)
486
				p->urld.visits = (unsigned int)atoi(s);
487
 
488
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
489
				break;
490
			if (p)
491
				p->urld.last_visit = (time_t)atoi(s);
492
 
493
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
494
				break;
495
			if (p)
496
				p->urld.type = (content_type)atoi(s);
497
 
498
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
499
				break;
500
 
501
 
502
			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
503
				break;
504
			length = strlen(s) - 1;
505
			if (p && length > 0) {
506
				s[length] = '\0';
507
				p->urld.title = malloc(length + 1);
508
				if (p->urld.title)
509
					memcpy(p->urld.title, s, length + 1);
510
			}
511
		}
512
	}
513
 
514
	fclose(fp);
515
	LOG(("Successfully loaded URL file"));
516
#undef MAXIMUM_URL_LENGTH
517
}
518
 
519
/**
520
 * Export the current database to file
521
 *
522
 * \param filename Name of file to export to
523
 */
524
void urldb_save(const char *filename)
525
{
526
	FILE *fp;
527
	int i;
528
 
529
	assert(filename);
530
 
531
	fp = fopen(filename, "w");
532
	if (!fp) {
533
		LOG(("Failed to open file '%s' for writing", filename));
534
		return;
535
	}
536
 
537
	/* file format version number */
538
	fprintf(fp, "%d\n", URL_FILE_VERSION);
539
 
540
	for (i = 0; i != NUM_SEARCH_TREES; i++) {
541
		urldb_save_search_tree(search_trees[i], fp);
542
	}
543
 
544
	fclose(fp);
545
}
546
 
547
/**
548
 * Save a search (sub)tree
549
 *
550
 * \param root Root of (sub)tree to save
551
 * \param fp File to write to
552
 */
553
void urldb_save_search_tree(struct search_node *parent, FILE *fp)
554
{
555
	char host[256];
556
	const struct host_part *h;
557
	unsigned int path_count = 0;
558
	char *path, *p, *end;
559
	int path_alloc = 64, path_used = 1;
560
	time_t expiry;
561
 
562
	expiry = time(NULL) - ((60 * 60 * 24) * nsoption_int(expire_url));
563
 
564
	if (parent == &empty)
565
		return;
566
 
567
	urldb_save_search_tree(parent->left, fp);
568
 
569
	path = malloc(path_alloc);
570
	if (!path)
571
		return;
572
 
573
	path[0] = '\0';
574
 
575
	for (h = parent->data, p = host, end = host + sizeof host;
576
			h && h != &db_root && p < end; h = h->parent) {
577
		int written = snprintf(p, end - p, "%s%s", h->part,
578
				(h->parent && h->parent->parent) ? "." : "");
579
		if (written < 0) {
580
			free(path);
581
			return;
582
		}
583
		p += written;
584
	}
585
 
586
	urldb_count_urls(&parent->data->paths, expiry, &path_count);
587
 
588
	if (path_count > 0) {
589
		fprintf(fp, "%s\n%i\n", host, path_count);
590
 
591
		urldb_write_paths(&parent->data->paths, host, fp,
592
				&path, &path_alloc, &path_used, expiry);
593
	}
594
 
595
	free(path);
596
 
597
	urldb_save_search_tree(parent->right, fp);
598
}
599
 
600
/**
601
 * Count number of URLs associated with a host
602
 *
603
 * \param root Root of path data tree
604
 * \param expiry Expiry time for URLs
605
 * \param count Pointer to count
606
 */
607
void urldb_count_urls(const struct path_data *root, time_t expiry,
608
		unsigned int *count)
609
{
610
	const struct path_data *p = root;
611
 
612
	do {
613
		if (p->children != NULL) {
614
			/* Drill down into children */
615
			p = p->children;
616
		} else {
617
			/* No more children, increment count if required */
618
			if (p->persistent || ((p->urld.last_visit > expiry) &&
619
					(p->urld.visits > 0)))
620
				(*count)++;
621
 
622
			/* Now, find next node to process. */
623
			while (p != root) {
624
				if (p->next != NULL) {
625
					/* Have a sibling, process that */
626
					p = p->next;
627
					break;
628
				}
629
 
630
				/* Ascend tree */
631
				p = p->parent;
632
			}
633
		}
634
	} while (p != root);
635
}
636
 
637
/**
638
 * Write paths associated with a host
639
 *
640
 * \param parent Root of (sub)tree to write
641
 * \param host Current host name
642
 * \param fp File to write to
643
 * \param path Current path string
644
 * \param path_alloc Allocated size of path
645
 * \param path_used Used size of path
646
 * \param expiry Expiry time of URLs
647
 */
648
void urldb_write_paths(const struct path_data *parent, const char *host,
649
		FILE *fp, char **path, int *path_alloc, int *path_used,
650
		time_t expiry)
651
{
652
	const struct path_data *p = parent;
653
	int i;
654
 
655
	do {
656
		int seglen = p->segment != NULL ? strlen(p->segment) : 0;
657
		int len = *path_used + seglen + 1;
658
 
659
		if (*path_alloc < len) {
660
			char *temp = realloc(*path,
661
					(len > 64) ? len : *path_alloc + 64);
662
			if (!temp)
663
				return;
664
			*path = temp;
665
			*path_alloc = (len > 64) ? len : *path_alloc + 64;
666
		}
667
 
668
		if (p->segment != NULL)
669
			memcpy(*path + *path_used - 1, p->segment, seglen);
670
 
671
		if (p->children != NULL) {
672
			(*path)[*path_used + seglen - 1] = '/';
673
			(*path)[*path_used + seglen] = '\0';
674
		} else {
675
			(*path)[*path_used + seglen - 1] = '\0';
676
			len -= 1;
677
		}
678
 
679
		*path_used = len;
680
 
681
		if (p->children != NULL) {
682
			/* Drill down into children */
683
			p = p->children;
684
		} else {
685
			/* leaf node */
686
			if (p->persistent ||((p->urld.last_visit > expiry) &&
687
					(p->urld.visits > 0))) {
688
				fprintf(fp, "%s\n", lwc_string_data(p->scheme));
689
 
690
				if (p->port)
691
					fprintf(fp,"%d\n", p->port);
692
				else
693
					fprintf(fp, "\n");
694
 
695
				fprintf(fp, "%s\n", *path);
696
 
697
				/** \todo handle fragments? */
698
 
699
				fprintf(fp, "%i\n%i\n%i\n", p->urld.visits,
700
						(int)p->urld.last_visit,
701
						(int)p->urld.type);
702
 
703
				fprintf(fp, "\n");
704
 
705
				if (p->urld.title) {
706
					uint8_t *s = (uint8_t *) p->urld.title;
707
 
708
					for (i = 0; s[i] != '\0'; i++)
709
						if (s[i] < 32)
710
							s[i] = ' ';
711
					for (--i; ((i > 0) && (s[i] == ' '));
712
							i--)
713
						s[i] = '\0';
714
					fprintf(fp, "%s\n", p->urld.title);
715
				} else
716
					fprintf(fp, "\n");
717
			}
718
 
719
			/* Now, find next node to process. */
720
			while (p != parent) {
721
				int seglen = p->segment != NULL
722
						? strlen(p->segment) : 0;
723
 
724
				/* Remove our segment from the path */
725
				*path_used -= seglen;
726
				(*path)[*path_used - 1] = '\0';
727
 
728
				if (p->next != NULL) {
729
					/* Have a sibling, process that */
730
					p = p->next;
731
					break;
732
				}
733
 
734
				/* Going up, so remove '/' */
735
				*path_used -= 1;
736
				(*path)[*path_used - 1] = '\0';
737
 
738
				/* Ascend tree */
739
				p = p->parent;
740
			}
741
		}
742
	} while (p != parent);
743
}
744
 
745
/**
746
 * Set the cross-session persistence of the entry for an URL
747
 *
748
 * \param url Absolute URL to persist
749
 * \param persist True to persist, false otherwise
750
 */
751
void urldb_set_url_persistence(nsurl *url, bool persist)
752
{
753
	struct path_data *p;
754
 
755
	assert(url);
756
 
757
	p = urldb_find_url(url);
758
	if (!p)
759
		return;
760
 
761
	p->persistent = persist;
762
}
763
 
764
/**
765
 * Insert an URL into the database
766
 *
767
 * \param url Absolute URL to insert
768
 * \return true on success, false otherwise
769
 */
770
bool urldb_add_url(nsurl *url)
771
{
772
	struct host_part *h;
773
	struct path_data *p;
774
	lwc_string *scheme;
775
	lwc_string *port;
776
	lwc_string *host;
777
	lwc_string *fragment;
778
	const char *host_str;
779
	char *path_query;
780
	size_t len;
781
	bool match;
782
	unsigned int port_int;
783
 
784
	assert(url);
785
 
786
	/* Copy and merge path/query strings */
787
	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
788
			NSERROR_OK) {
789
		return false;
790
	}
791
 
792
	scheme = nsurl_get_component(url, NSURL_SCHEME);
793
	if (scheme == NULL)
794
		return false;
795
 
796
	host = nsurl_get_component(url, NSURL_HOST);
797
	if (host != NULL) {
798
		host_str = lwc_string_data(host);
799
		lwc_string_unref(host);
800
 
801
	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
802
			lwc_error_ok && match == true) {
803
		host_str = "localhost";
804
 
805
	} else {
806
		lwc_string_unref(scheme);
807
		return false;
808
	}
809
 
810
	fragment = nsurl_get_component(url, NSURL_FRAGMENT);
811
 
812
	port = nsurl_get_component(url, NSURL_PORT);
813
	if (port != NULL) {
814
		port_int = atoi(lwc_string_data(port));
815
		lwc_string_unref(port);
816
	} else {
817
		port_int = 0;
818
	}
819
 
820
	/* Get host entry */
821
	h = urldb_add_host(host_str);
822
 
823
	/* Get path entry */
824
	p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query,
825
			fragment, url) : NULL;
826
 
827
	lwc_string_unref(scheme);
828
	if (fragment != NULL)
829
		lwc_string_unref(fragment);
830
 
831
	return (p != NULL);
832
}
833
 
834
/**
835
 * Set an URL's title string, replacing any existing one
836
 *
837
 * \param url The URL to look for
838
 * \param title The title string to use (copied)
839
 */
840
void urldb_set_url_title(nsurl *url, const char *title)
841
{
842
	struct path_data *p;
843
	char *temp;
844
 
845
	assert(url && title);
846
 
847
	p = urldb_find_url(url);
848
	if (!p)
849
		return;
850
 
851
	temp = strdup(title);
852
	if (!temp)
853
		return;
854
 
855
	free(p->urld.title);
856
	p->urld.title = temp;
857
}
858
 
859
/**
860
 * Set an URL's content type
861
 *
862
 * \param url The URL to look for
863
 * \param type The type to set
864
 */
865
void urldb_set_url_content_type(nsurl *url, content_type type)
866
{
867
	struct path_data *p;
868
 
869
	assert(url);
870
 
871
	p = urldb_find_url(url);
872
	if (!p)
873
		return;
874
 
875
	p->urld.type = type;
876
}
877
 
878
/**
879
 * Update an URL's visit data
880
 *
881
 * \param url The URL to update
882
 */
883
void urldb_update_url_visit_data(nsurl *url)
884
{
885
	struct path_data *p;
886
 
887
	assert(url);
888
 
889
	p = urldb_find_url(url);
890
	if (!p)
891
		return;
892
 
893
	p->urld.last_visit = time(NULL);
894
	p->urld.visits++;
895
}
896
 
897
/**
898
 * Reset an URL's visit statistics
899
 *
900
 * \param url The URL to reset
901
 */
902
void urldb_reset_url_visit_data(nsurl *url)
903
{
904
	struct path_data *p;
905
 
906
	assert(url);
907
 
908
	p = urldb_find_url(url);
909
	if (!p)
910
		return;
911
 
912
	p->urld.last_visit = (time_t)0;
913
	p->urld.visits = 0;
914
}
915
 
916
 
917
/**
918
 * Find data for an URL.
919
 *
920
 * \param url Absolute URL to look for
921
 * \return Pointer to result struct, or NULL
922
 */
923
const struct url_data *urldb_get_url_data(nsurl *url)
924
{
925
	struct path_data *p;
926
	struct url_internal_data *u;
927
 
928
	assert(url);
929
 
930
	p = urldb_find_url(url);
931
	if (!p)
932
		return NULL;
933
 
934
	u = &p->urld;
935
 
936
	return (const struct url_data *) u;
937
}
938
 
939
/**
940
 * Extract an URL from the db
941
 *
942
 * \param url URL to extract
943
 * \return Pointer to database's copy of URL or NULL if not found
944
 */
945
nsurl *urldb_get_url(nsurl *url)
946
{
947
	struct path_data *p;
948
 
949
	assert(url);
950
 
951
	p = urldb_find_url(url);
952
	if (!p)
953
		return NULL;
954
 
955
	return p->url;
956
}
957
 
958
/**
959
 * Look up authentication details in database
960
 *
961
 * \param url Absolute URL to search for
962
 * \param realm When non-NULL, it is realm which can be used to determine
963
 * the protection space when that's not been done before for given URL.
964
 * \return Pointer to authentication details, or NULL if not found
965
 */
966
const char *urldb_get_auth_details(nsurl *url, const char *realm)
967
{
968
	struct path_data *p, *p_cur, *p_top;
969
 
970
	assert(url);
971
 
972
	/* add to the db, so our lookup will work */
973
	urldb_add_url(url);
974
 
975
	p = urldb_find_url(url);
976
	if (!p)
977
		return NULL;
978
 
979
	/* Check for any auth details attached to the path_data node or any of
980
	 * its parents. */
981
	for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
982
		if (p_cur->prot_space) {
983
			return p_cur->prot_space->auth;
984
		}
985
	}
986
 
987
	/* Only when we have a realm (and canonical root of given URL), we can
988
	 * uniquely locate the protection space. */
989
	if (realm != NULL) {
990
		const struct host_part *h = (const struct host_part *)p_top;
991
		const struct prot_space_data *space;
992
		bool match;
993
 
994
		/* Search for a possible matching protection space. */
995
		for (space = h->prot_space; space != NULL;
996
				space = space->next) {
997
			if (!strcmp(space->realm, realm) &&
998
					lwc_string_isequal(space->scheme,
999
							p->scheme, &match) ==
1000
							lwc_error_ok &&
1001
					match == true &&
1002
					space->port == p->port) {
1003
				p->prot_space = space;
1004
				return p->prot_space->auth;
1005
			}
1006
		}
1007
	}
1008
 
1009
	return NULL;
1010
}
1011
 
1012
/**
1013
 * Retrieve certificate verification permissions from database
1014
 *
1015
 * \param url Absolute URL to search for
1016
 * \return true to permit connections to hosts with invalid certificates,
1017
 * false otherwise.
1018
 */
1019
bool urldb_get_cert_permissions(nsurl *url)
1020
{
1021
	struct path_data *p;
1022
	const struct host_part *h;
1023
 
1024
	assert(url);
1025
 
1026
	p = urldb_find_url(url);
1027
	if (!p)
1028
		return false;
1029
 
1030
	for (; p && p->parent; p = p->parent)
1031
		/* do nothing */;
1032
	assert(p);
1033
 
1034
	h = (const struct host_part *)p;
1035
 
1036
	return h->permit_invalid_certs;
1037
}
1038
 
1039
/**
1040
 * Set authentication data for an URL
1041
 *
1042
 * \param url The URL to consider
1043
 * \param realm The authentication realm
1044
 * \param auth The authentication details (in form username:password)
1045
 */
1046
void urldb_set_auth_details(nsurl *url, const char *realm,
1047
		const char *auth)
1048
{
1049
	struct path_data *p, *pi;
1050
	struct host_part *h;
1051
	struct prot_space_data *space, *space_alloc;
1052
	char *realm_alloc, *auth_alloc;
1053
	bool match;
1054
 
1055
	assert(url && realm && auth);
1056
 
1057
	/* add url, in case it's missing */
1058
	urldb_add_url(url);
1059
 
1060
	p = urldb_find_url(url);
1061
 
1062
	if (!p)
1063
		return;
1064
 
1065
	/* Search for host_part */
1066
	for (pi = p; pi->parent != NULL; pi = pi->parent)
1067
		;
1068
	h = (struct host_part *)pi;
1069
 
1070
	/* Search if given URL belongs to a protection space we already know of. */
1071
	for (space = h->prot_space; space; space = space->next) {
1072
		if (!strcmp(space->realm, realm) &&
1073
				lwc_string_isequal(space->scheme, p->scheme,
1074
						&match) == lwc_error_ok &&
1075
				match == true &&
1076
				space->port == p->port)
1077
			break;
1078
	}
1079
 
1080
	if (space != NULL) {
1081
		/* Overrule existing auth. */
1082
		free(space->auth);
1083
		space->auth = strdup(auth);
1084
	} else {
1085
		/* Create a new protection space. */
1086
		space = space_alloc = malloc(sizeof(struct prot_space_data));
1087
		realm_alloc = strdup(realm);
1088
		auth_alloc = strdup(auth);
1089
 
1090
		if (!space_alloc || !realm_alloc || !auth_alloc) {
1091
			free(space_alloc);
1092
			free(realm_alloc);
1093
			free(auth_alloc);
1094
			return;
1095
		}
1096
 
1097
		space->scheme = lwc_string_ref(p->scheme);
1098
		space->port = p->port;
1099
		space->realm = realm_alloc;
1100
		space->auth = auth_alloc;
1101
		space->next = h->prot_space;
1102
		h->prot_space = space;
1103
	}
1104
 
1105
	p->prot_space = space;
1106
}
1107
 
1108
/**
1109
 * Set certificate verification permissions
1110
 *
1111
 * \param url URL to consider
1112
 * \param permit Set to true to allow invalid certificates
1113
 */
1114
void urldb_set_cert_permissions(nsurl *url, bool permit)
1115
{
1116
	struct path_data *p;
1117
	struct host_part *h;
1118
 
1119
	assert(url);
1120
 
1121
	/* add url, in case it's missing */
1122
	urldb_add_url(url);
1123
 
1124
	p = urldb_find_url(url);
1125
	if (!p)
1126
		return;
1127
 
1128
	for (; p && p->parent; p = p->parent)
1129
		/* do nothing */;
1130
	assert(p);
1131
 
1132
	h = (struct host_part *)p;
1133
 
1134
	h->permit_invalid_certs = permit;
1135
}
1136
 
1137
/**
1138
 * Set thumbnail for url, replacing any existing thumbnail
1139
 *
1140
 * \param url Absolute URL to consider
1141
 * \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate
1142
 */
1143
void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap)
1144
{
1145
	struct path_data *p;
1146
 
1147
	assert(url);
1148
 
1149
	p = urldb_find_url(url);
1150
	if (!p)
1151
		return;
1152
 
1153
	if (p->thumb && p->thumb != bitmap)
1154
		bitmap_destroy(p->thumb);
1155
 
1156
	p->thumb = bitmap;
1157
}
1158
 
1159
/**
1160
 * Retrieve thumbnail data for given URL
1161
 *
1162
 * \param url Absolute URL to search for
1163
 * \return Pointer to thumbnail data, or NULL if not found.
1164
 */
1165
struct bitmap *urldb_get_thumbnail(nsurl *url)
1166
{
1167
	struct path_data *p;
1168
 
1169
	assert(url);
1170
 
1171
	p = urldb_find_url(url);
1172
	if (!p)
1173
		return NULL;
1174
 
1175
	return p->thumb;
1176
}
1177
 
1178
/**
1179
 * Iterate over entries in the database which match the given prefix
1180
 *
1181
 * \param prefix Prefix to match
1182
 * \param callback Callback function
1183
 */
1184
void urldb_iterate_partial(const char *prefix,
1185
		bool (*callback)(nsurl *url,
1186
		const struct url_data *data))
1187
{
1188
	char host[256];
1189
	char buf[260]; /* max domain + "www." */
1190
	const char *slash, *scheme_sep;
1191
	struct search_node *tree;
1192
	const struct host_part *h;
1193
 
1194
	assert(prefix && callback);
1195
 
1196
	/* strip scheme */
1197
	scheme_sep = strstr(prefix, "://");
1198
	if (scheme_sep)
1199
		prefix = scheme_sep + 3;
1200
 
1201
	slash = strchr(prefix, '/');
1202
	tree = urldb_get_search_tree(prefix);
1203
 
1204
	if (slash) {
1205
		/* if there's a slash in the input, then we can
1206
		 * assume that we're looking for a path */
1207
		snprintf(host, sizeof host, "%.*s",
1208
				(int) (slash - prefix), prefix);
1209
 
1210
		h = urldb_search_find(tree, host);
1211
		if (!h) {
1212
			int len = slash - prefix;
1213
 
1214
			if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {
1215
				snprintf(buf, sizeof buf, "www.%s", host);
1216
				h = urldb_search_find(
1217
					search_trees[ST_DN + 'w' - 'a'],
1218
					buf);
1219
				if (!h)
1220
					return;
1221
			} else
1222
				return;
1223
		}
1224
 
1225
		if (h->paths.children) {
1226
			/* Have paths, iterate them */
1227
			urldb_iterate_partial_path(&h->paths, slash + 1,
1228
					callback);
1229
		}
1230
 
1231
	} else {
1232
		int len = strlen(prefix);
1233
 
1234
		/* looking for hosts */
1235
		if (!urldb_iterate_partial_host(tree, prefix, callback))
1236
			return;
1237
 
1238
		if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {
1239
			/* now look for www.prefix */
1240
			snprintf(buf, sizeof buf, "www.%s", prefix);
1241
			if(!urldb_iterate_partial_host(
1242
					search_trees[ST_DN + 'w' - 'a'],
1243
					buf, callback))
1244
				return;
1245
		}
1246
	}
1247
}
1248
 
1249
/**
1250
 * Partial host iterator (internal)
1251
 *
1252
 * \param root Root of (sub)tree to traverse
1253
 * \param prefix Prefix to match
1254
 * \param callback Callback function
1255
 * \return true to continue, false otherwise
1256
 */
1257
bool urldb_iterate_partial_host(struct search_node *root, const char *prefix,
1258
		bool (*callback)(nsurl *url, const struct url_data *data))
1259
{
1260
	int c;
1261
 
1262
	assert(root && prefix && callback);
1263
 
1264
	if (root == &empty)
1265
		return true;
1266
 
1267
	c = urldb_search_match_prefix(root->data, prefix);
1268
 
1269
	if (c > 0)
1270
		/* No match => look in left subtree */
1271
		return urldb_iterate_partial_host(root->left, prefix,
1272
				callback);
1273
	else if (c < 0)
1274
		/* No match => look in right subtree */
1275
		return urldb_iterate_partial_host(root->right, prefix,
1276
				callback);
1277
	else {
1278
		/* Match => iterate over l/r subtrees & process this node */
1279
		if (!urldb_iterate_partial_host(root->left, prefix,
1280
				callback))
1281
			return false;
1282
 
1283
		if (root->data->paths.children) {
1284
			/* and extract all paths attached to this host */
1285
			if (!urldb_iterate_entries_path(&root->data->paths,
1286
					callback, NULL)) {
1287
				return false;
1288
			}
1289
		}
1290
 
1291
		if (!urldb_iterate_partial_host(root->right, prefix,
1292
				callback))
1293
			return false;
1294
	}
1295
 
1296
	return true;
1297
}
1298
 
1299
/**
1300
 * Partial path iterator (internal)
1301
 *
1302
 * \param parent Root of (sub)tree to traverse
1303
 * \param prefix Prefix to match
1304
 * \param callback Callback function
1305
 * \return true to continue, false otherwise
1306
 */
1307
bool urldb_iterate_partial_path(const struct path_data *parent,
1308
		const char *prefix, bool (*callback)(nsurl *url,
1309
		const struct url_data *data))
1310
{
1311
	const struct path_data *p = parent->children;
1312
	const char *slash, *end = prefix + strlen(prefix);
1313
 
1314
	/*
1315
	 * Given: http://www.example.org/a/b/c/d//e
1316
	 * and assuming a path tree:
1317
	 *     .
1318
	 *    / \
1319
	 *   a1 b1
1320
	 *  / \
1321
	 * a2 b2
1322
	 *    /|\
1323
	 *   a b c
1324
	 *   3 3 |
1325
	 *       d
1326
	 *       |
1327
	 *       e
1328
	 *      / \
1329
	 *      f g
1330
	 *
1331
	 * Prefix will be:	p will be:
1332
	 *
1333
	 * a/b/c/d//e		a1
1334
	 *   b/c/d//e		a2
1335
	 *   b/c/d//e		b3
1336
	 *     c/d//e		a3
1337
	 *     c/d//e		b3
1338
	 *     c/d//e		c
1339
	 *       d//e		d
1340
	 *         /e		e		(skip /)
1341
	 *          e		e
1342
	 *
1343
	 * I.E. we perform a breadth-first search of the tree.
1344
	 */
1345
 
1346
	do {
1347
		slash = strchr(prefix, '/');
1348
		if (!slash)
1349
			slash = end;
1350
 
1351
		if (slash == prefix && *prefix == '/') {
1352
			/* Ignore "//" */
1353
			prefix++;
1354
			continue;
1355
		}
1356
 
1357
		if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {
1358
			/* prefix matches so far */
1359
			if (slash == end) {
1360
				/* we've run out of prefix, so all
1361
				 * paths below this one match */
1362
				if (!urldb_iterate_entries_path(p, callback,
1363
						NULL))
1364
					return false;
1365
 
1366
				/* Progress to next sibling */
1367
				p = p->next;
1368
			} else {
1369
				/* Skip over this segment */
1370
				prefix = slash + 1;
1371
 
1372
				p = p->children;
1373
			}
1374
		} else {
1375
			/* Doesn't match this segment, try next sibling */
1376
			p = p->next;
1377
		}
1378
	} while (p != NULL);
1379
 
1380
	return true;
1381
}
1382
 
1383
/**
1384
 * Iterate over all entries in database
1385
 *
1386
 * \param callback Function to callback for each entry
1387
 */
1388
void urldb_iterate_entries(bool (*callback)(nsurl *url,
1389
		const struct url_data *data))
1390
{
1391
	int i;
1392
 
1393
	assert(callback);
1394
 
1395
	for (i = 0; i < NUM_SEARCH_TREES; i++) {
1396
		if (!urldb_iterate_entries_host(search_trees[i],
1397
				callback, NULL))
1398
			break;
1399
	}
1400
}
1401
 
1402
/**
1403
 * Iterate over all cookies in database
1404
 *
1405
 * \param callback Function to callback for each entry
1406
 */
1407
void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))
1408
{
1409
	int i;
1410
 
1411
	assert(callback);
1412
 
1413
	for (i = 0; i < NUM_SEARCH_TREES; i++) {
1414
		if (!urldb_iterate_entries_host(search_trees[i],
1415
				NULL, callback))
1416
			break;
1417
	}
1418
}
1419
 
1420
/**
1421
 * Host data iterator (internal)
1422
 *
1423
 * \param parent Root of subtree to iterate over
1424
 * \param url_callback Callback function
1425
 * \param cookie_callback Callback function
1426
 * \return true to continue, false otherwise
1427
 */
1428
bool urldb_iterate_entries_host(struct search_node *parent,
1429
		bool (*url_callback)(nsurl *url,
1430
				const struct url_data *data),
1431
		bool (*cookie_callback)(const struct cookie_data *data))
1432
{
1433
	if (parent == &empty)
1434
		return true;
1435
 
1436
	if (!urldb_iterate_entries_host(parent->left,
1437
			url_callback, cookie_callback))
1438
		return false;
1439
 
1440
	if ((parent->data->paths.children) || ((cookie_callback) &&
1441
			(parent->data->paths.cookies))) {
1442
		/* We have paths (or domain cookies), so iterate them */
1443
		if (!urldb_iterate_entries_path(&parent->data->paths,
1444
				url_callback, cookie_callback)) {
1445
			return false;
1446
		}
1447
	}
1448
 
1449
	if (!urldb_iterate_entries_host(parent->right,
1450
			url_callback, cookie_callback))
1451
		return false;
1452
 
1453
	return true;
1454
}
1455
 
1456
/**
1457
 * Path data iterator (internal)
1458
 *
1459
 * \param parent Root of subtree to iterate over
1460
 * \param url_callback Callback function
1461
 * \param cookie_callback Callback function
1462
 * \return true to continue, false otherwise
1463
 */
1464
bool urldb_iterate_entries_path(const struct path_data *parent,
1465
		bool (*url_callback)(nsurl *url,
1466
				const struct url_data *data),
1467
		bool (*cookie_callback)(const struct cookie_data *data))
1468
{
1469
	const struct path_data *p = parent;
1470
	const struct cookie_data *c;
1471
 
1472
	do {
1473
		if (p->children != NULL) {
1474
			/* Drill down into children */
1475
			p = p->children;
1476
		} else {
1477
			/* All leaf nodes in the path tree should have an URL or
1478
			 * cookies attached to them. If this is not the case, it
1479
			 * indicates that there's a bug in the file loader/URL
1480
			 * insertion code. Therefore, assert this here. */
1481
			assert(url_callback || cookie_callback);
1482
 
1483
			/** \todo handle fragments? */
1484
			if (url_callback) {
1485
				const struct url_internal_data *u = &p->urld;
1486
 
1487
				assert(p->url);
1488
 
1489
				if (!url_callback(p->url,
1490
						(const struct url_data *) u))
1491
					return false;
1492
			} else {
1493
				c = (const struct cookie_data *)p->cookies;
1494
				for (; c != NULL; c = c->next)
1495
					if (!cookie_callback(c))
1496
						return false;
1497
			}
1498
 
1499
			/* Now, find next node to process. */
1500
			while (p != parent) {
1501
				if (p->next != NULL) {
1502
					/* Have a sibling, process that */
1503
					p = p->next;
1504
					break;
1505
				}
1506
 
1507
				/* Ascend tree */
1508
				p = p->parent;
1509
			}
1510
		}
1511
	} while (p != parent);
1512
 
1513
	return true;
1514
}
1515
 
1516
/**
1517
 * Add a host node to the tree
1518
 *
1519
 * \param part Host segment to add (or whole IP address) (copied)
1520
 * \param parent Parent node to add to
1521
 * \return Pointer to added node, or NULL on memory exhaustion
1522
 */
1523
struct host_part *urldb_add_host_node(const char *part,
1524
		struct host_part *parent)
1525
{
1526
	struct host_part *d;
1527
 
1528
	assert(part && parent);
1529
 
1530
	d = calloc(1, sizeof(struct host_part));
1531
	if (!d)
1532
		return NULL;
1533
 
1534
	d->part = strdup(part);
1535
	if (!d->part) {
1536
		free(d);
1537
		return NULL;
1538
	}
1539
 
1540
	d->next = parent->children;
1541
	if (parent->children)
1542
		parent->children->prev = d;
1543
	d->parent = parent;
1544
	parent->children = d;
1545
 
1546
	return d;
1547
}
1548
 
1549
/**
1550
 * Add a host to the database, creating any intermediate entries
1551
 *
1552
 * \param host Hostname to add
1553
 * \return Pointer to leaf node, or NULL on memory exhaustion
1554
 */
1555
struct host_part *urldb_add_host(const char *host)
1556
{
1557
	struct host_part *d = (struct host_part *) &db_root, *e;
1558
	struct search_node *s;
1559
	char buf[256]; /* 256 bytes is sufficient - domain names are
1560
			* limited to 255 chars. */
1561
	char *part;
1562
 
1563
	assert(host);
1564
 
1565
	if (url_host_is_ip_address(host)) {
1566
		/* Host is an IP, so simply add as TLD */
1567
 
1568
		/* Check for existing entry */
1569
		for (e = d->children; e; e = e->next)
1570
			if (strcasecmp(host, e->part) == 0)
1571
				/* found => return it */
1572
				return e;
1573
 
1574
		d = urldb_add_host_node(host, d);
1575
 
1576
		s = urldb_search_insert(search_trees[ST_IP], d);
1577
		if (!s) {
1578
			/* failed */
1579
			d = NULL;
1580
		} else {
1581
			search_trees[ST_IP] = s;
1582
		}
1583
 
1584
		return d;
1585
	}
1586
 
1587
	/* Copy host string, so we can corrupt it */
1588
	strncpy(buf, host, sizeof buf);
1589
	buf[sizeof buf - 1] = '\0';
1590
 
1591
	/* Process FQDN segments backwards */
1592
	do {
1593
		part = strrchr(buf, '.');
1594
		if (!part) {
1595
			/* last segment */
1596
			/* Check for existing entry */
1597
			for (e = d->children; e; e = e->next)
1598
				if (strcasecmp(buf, e->part) == 0)
1599
					break;
1600
 
1601
			if (e) {
1602
				d = e;
1603
			} else {
1604
				d = urldb_add_host_node(buf, d);
1605
			}
1606
 
1607
			/* And insert into search tree */
1608
			if (d) {
1609
				struct search_node **r;
1610
 
1611
				r = urldb_get_search_tree_direct(buf);
1612
				s = urldb_search_insert(*r, d);
1613
				if (!s) {
1614
					/* failed */
1615
					d = NULL;
1616
				} else {
1617
					*r = s;
1618
				}
1619
			}
1620
			break;
1621
		}
1622
 
1623
		/* Check for existing entry */
1624
		for (e = d->children; e; e = e->next)
1625
			if (strcasecmp(part + 1, e->part) == 0)
1626
				break;
1627
 
1628
		d = e ? e : urldb_add_host_node(part + 1, d);
1629
		if (!d)
1630
			break;
1631
 
1632
		*part = '\0';
1633
	} while (1);
1634
 
1635
	return d;
1636
}
1637
 
1638
/**
1639
 * Add a path node to the tree
1640
 *
1641
 * \param scheme URL scheme associated with path (copied)
1642
 * \param port Port number on host associated with path
1643
 * \param segment Path segment to add (copied)
1644
 * \param fragment URL fragment (copied), or NULL
1645
 * \param parent Parent node to add to
1646
 * \return Pointer to added node, or NULL on memory exhaustion
1647
 */
1648
struct path_data *urldb_add_path_node(lwc_string *scheme, unsigned int port,
1649
		const char *segment, lwc_string *fragment,
1650
		struct path_data *parent)
1651
{
1652
	struct path_data *d, *e;
1653
 
1654
	assert(scheme && segment && parent);
1655
 
1656
	d = calloc(1, sizeof(struct path_data));
1657
	if (!d)
1658
		return NULL;
1659
 
1660
	d->scheme = lwc_string_ref(scheme);
1661
 
1662
	d->port = port;
1663
 
1664
	d->segment = strdup(segment);
1665
	if (!d->segment) {
1666
		lwc_string_unref(d->scheme);
1667
		free(d);
1668
		return NULL;
1669
	}
1670
 
1671
	if (fragment) {
1672
		if (!urldb_add_path_fragment(d, fragment)) {
1673
			free(d->segment);
1674
			lwc_string_unref(d->scheme);
1675
			free(d);
1676
			return NULL;
1677
		}
1678
	}
1679
 
1680
	for (e = parent->children; e; e = e->next)
1681
		if (strcmp(e->segment, d->segment) > 0)
1682
			break;
1683
 
1684
	if (e) {
1685
		d->prev = e->prev;
1686
		d->next = e;
1687
		if (e->prev)
1688
			e->prev->next = d;
1689
		else
1690
			parent->children = d;
1691
		e->prev = d;
1692
	} else if (!parent->children) {
1693
		d->prev = d->next = NULL;
1694
		parent->children = parent->last = d;
1695
	} else {
1696
		d->next = NULL;
1697
		d->prev = parent->last;
1698
		parent->last->next = d;
1699
		parent->last = d;
1700
	}
1701
	d->parent = parent;
1702
 
1703
	return d;
1704
}
1705
 
1706
/**
1707
 * Add a path to the database, creating any intermediate entries
1708
 *
1709
 * \param scheme URL scheme associated with path
1710
 * \param port Port number on host associated with path
1711
 * \param host Host tree node to attach to
1712
 * \param path_query Absolute path plus query to add (freed)
1713
 * \param fragment URL fragment, or NULL
1714
 * \param url URL (fragment ignored)
1715
 * \return Pointer to leaf node, or NULL on memory exhaustion
1716
 */
1717
struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port,
1718
		const struct host_part *host, char *path_query,
1719
		lwc_string *fragment, nsurl *url)
1720
{
1721
	struct path_data *d, *e;
1722
	char *buf = path_query;
1723
	char *segment, *slash;
1724
	bool match;
1725
 
1726
	assert(scheme && host && url);
1727
 
1728
	d = (struct path_data *) &host->paths;
1729
 
1730
	/* skip leading '/' */
1731
	segment = buf;
1732
	if (*segment == '/')
1733
		segment++;
1734
 
1735
	/* Process path segments */
1736
	do {
1737
		slash = strchr(segment, '/');
1738
		if (!slash) {
1739
			/* last segment */
1740
			/* look for existing entry */
1741
			for (e = d->children; e; e = e->next)
1742
				if (strcmp(segment, e->segment) == 0 &&
1743
						lwc_string_isequal(scheme,
1744
						e->scheme, &match) ==
1745
						lwc_error_ok &&
1746
						match == true &&
1747
						e->port == port)
1748
					break;
1749
 
1750
			d = e ? urldb_add_path_fragment(e, fragment) :
1751
					urldb_add_path_node(scheme, port,
1752
					segment, fragment, d);
1753
			break;
1754
		}
1755
 
1756
		*slash = '\0';
1757
 
1758
		/* look for existing entry */
1759
		for (e = d->children; e; e = e->next)
1760
			if (strcmp(segment, e->segment) == 0 &&
1761
					lwc_string_isequal(scheme, e->scheme,
1762
						&match) == lwc_error_ok &&
1763
						match == true &&
1764
					e->port == port)
1765
				break;
1766
 
1767
		d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
1768
		if (!d)
1769
			break;
1770
 
1771
		segment = slash + 1;
1772
	} while (1);
1773
 
1774
	free(path_query);
1775
 
1776
	if (d && !d->url) {
1777
		/* Insert URL */
1778
		if (nsurl_has_component(url, NSURL_FRAGMENT)) {
1779
			nserror err = nsurl_defragment(url, &d->url);
1780
			if (err != NSERROR_OK)
1781
				return NULL;
1782
		} else {
1783
			d->url = nsurl_ref(url);
1784
		}
1785
	}
1786
 
1787
	return d;
1788
}
1789
 
1790
/**
1791
 * Fragment comparator callback for qsort
1792
 */
1793
int urldb_add_path_fragment_cmp(const void *a, const void *b)
1794
{
1795
	return strcasecmp(*((const char **) a), *((const char **) b));
1796
}
1797
 
1798
/**
1799
 * Add a fragment to a path segment
1800
 *
1801
 * \param segment Path segment to add to
1802
 * \param fragment Fragment to add (copied), or NULL
1803
 * \return segment or NULL on memory exhaustion
1804
 */
1805
struct path_data *urldb_add_path_fragment(struct path_data *segment,
1806
		lwc_string *fragment)
1807
{
1808
	char **temp;
1809
 
1810
	assert(segment);
1811
 
1812
	/* If no fragment, this function is a NOP
1813
	 * This may seem strange, but it makes the rest
1814
	 * of the code cleaner */
1815
	if (!fragment)
1816
		return segment;
1817
 
1818
	temp = realloc(segment->fragment,
1819
			(segment->frag_cnt + 1) * sizeof(char *));
1820
	if (!temp)
1821
		return NULL;
1822
 
1823
	segment->fragment = temp;
1824
	segment->fragment[segment->frag_cnt] =
1825
			strdup(lwc_string_data(fragment));
1826
	if (!segment->fragment[segment->frag_cnt]) {
1827
		/* Don't free temp - it's now our buffer */
1828
		return NULL;
1829
	}
1830
 
1831
	segment->frag_cnt++;
1832
 
1833
	/* We want fragments in alphabetical order, so sort them
1834
	 * It may prove better to insert in alphabetical order instead */
1835
	qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
1836
			urldb_add_path_fragment_cmp);
1837
 
1838
	return segment;
1839
}
1840
 
1841
/**
1842
 * Find an URL in the database
1843
 *
1844
 * \param url Absolute URL to find
1845
 * \return Pointer to path data, or NULL if not found
1846
 */
1847
struct path_data *urldb_find_url(nsurl *url)
1848
{
1849
	const struct host_part *h;
1850
	struct path_data *p;
1851
	struct search_node *tree;
1852
	char *plq;
1853
	const char *host_str;
1854
	lwc_string *scheme, *host, *port;
1855
	size_t len = 0;
1856
	unsigned int port_int;
1857
	bool match;
1858
 
1859
	assert(url);
1860
 
1861
	scheme = nsurl_get_component(url, NSURL_SCHEME);
1862
	if (scheme == NULL)
1863
		return NULL;
1864
 
1865
	host = nsurl_get_component(url, NSURL_HOST);
1866
	if (host != NULL) {
1867
		host_str = lwc_string_data(host);
1868
		lwc_string_unref(host);
1869
 
1870
	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
1871
			lwc_error_ok && match == true) {
1872
		host_str = "localhost";
1873
 
1874
	} else {
1875
		lwc_string_unref(scheme);
1876
		return NULL;
1877
	}
1878
 
1879
	tree = urldb_get_search_tree(host_str);
1880
	h = urldb_search_find(tree, host_str);
1881
	if (!h) {
1882
		lwc_string_unref(scheme);
1883
		return NULL;
1884
	}
1885
 
1886
	/* generate plq (path, leaf, query) */
1887
	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) !=
1888
			NSERROR_OK) {
1889
		lwc_string_unref(scheme);
1890
		return NULL;
1891
	}
1892
 
1893
	/* Get port */
1894
	port = nsurl_get_component(url, NSURL_PORT);
1895
	if (port != NULL) {
1896
		port_int = atoi(lwc_string_data(port));
1897
		lwc_string_unref(port);
1898
	} else {
1899
		port_int = 0;
1900
	}
1901
 
1902
	p = urldb_match_path(&h->paths, plq, scheme, port_int);
1903
 
1904
	free(plq);
1905
	lwc_string_unref(scheme);
1906
 
1907
	return p;
1908
}
1909
 
1910
/**
1911
 * Match a path string
1912
 *
1913
 * \param parent Path (sub)tree to look in
1914
 * \param path The path to search for
1915
 * \param scheme The URL scheme associated with the path
1916
 * \param port The port associated with the path
1917
 * \return Pointer to path data or NULL if not found.
1918
 */
1919
struct path_data *urldb_match_path(const struct path_data *parent,
1920
		const char *path, lwc_string *scheme, unsigned short port)
1921
{
1922
	const struct path_data *p;
1923
	const char *slash;
1924
	bool match;
1925
 
1926
	assert(parent != NULL);
1927
	assert(parent->segment == NULL);
1928
	assert(path[0] == '/');
1929
 
1930
	/* Start with children, as parent has no segment */
1931
	p = parent->children;
1932
 
1933
	while (p != NULL) {
1934
		slash = strchr(path + 1, '/');
1935
		if (!slash)
1936
			slash = path + strlen(path);
1937
 
1938
		if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
1939
				lwc_string_isequal(p->scheme, scheme, &match) ==
1940
						lwc_error_ok &&
1941
				match == true &&
1942
				p->port == port) {
1943
			if (*slash == '\0') {
1944
				/* Complete match */
1945
				return (struct path_data *) p;
1946
			}
1947
 
1948
			/* Match so far, go down tree */
1949
			p = p->children;
1950
 
1951
			path = slash;
1952
		} else {
1953
			/* No match, try next sibling */
1954
			p = p->next;
1955
		}
1956
	}
1957
 
1958
	return NULL;
1959
}
1960
 
1961
/**
1962
 * Get the search tree for a particular host
1963
 *
1964
 * \param host  the host to lookup
1965
 * \return the corresponding search tree
1966
 */
1967
struct search_node **urldb_get_search_tree_direct(const char *host) {
1968
	assert(host);
1969
 
1970
	if (url_host_is_ip_address(host))
1971
		return &search_trees[ST_IP];
1972
	else if (isalpha(*host))
1973
		return &search_trees[ST_DN + tolower(*host) - 'a'];
1974
	return &search_trees[ST_EE];
1975
}
1976
 
1977
/**
1978
 * Get the search tree for a particular host
1979
 *
1980
 * \param host  the host to lookup
1981
 * \return the corresponding search tree
1982
 */
1983
struct search_node *urldb_get_search_tree(const char *host) {
1984
  	return *urldb_get_search_tree_direct(host);
1985
}
1986
 
1987
/**
1988
 * Dump URL database to stderr
1989
 */
1990
void urldb_dump(void)
1991
{
1992
	int i;
1993
 
1994
	urldb_dump_hosts(&db_root);
1995
 
1996
	for (i = 0; i != NUM_SEARCH_TREES; i++)
1997
		urldb_dump_search(search_trees[i], 0);
1998
}
1999
 
2000
/**
2001
 * Dump URL database hosts to stderr
2002
 *
2003
 * \param parent Parent node of tree to dump
2004
 */
2005
void urldb_dump_hosts(struct host_part *parent)
2006
{
2007
	struct host_part *h;
2008
 
2009
	if (parent->part) {
2010
		LOG(("%s", parent->part));
2011
 
2012
		LOG(("\t%s invalid SSL certs",
2013
			parent->permit_invalid_certs ? "Permits" : "Denies"));
2014
	}
2015
 
2016
	/* Dump path data */
2017
	urldb_dump_paths(&parent->paths);
2018
 
2019
	/* and recurse */
2020
	for (h = parent->children; h; h = h->next)
2021
		urldb_dump_hosts(h);
2022
}
2023
 
2024
/**
2025
 * Dump URL database paths to stderr
2026
 *
2027
 * \param parent Parent node of tree to dump
2028
 */
2029
void urldb_dump_paths(struct path_data *parent)
2030
{
2031
	const struct path_data *p = parent;
2032
	unsigned int i;
2033
 
2034
	do {
2035
		if (p->segment != NULL) {
2036
			LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port));
2037
 
2038
			LOG(("\t\t'%s'", p->segment));
2039
 
2040
			for (i = 0; i != p->frag_cnt; i++)
2041
				LOG(("\t\t\t#%s", p->fragment[i]));
2042
		}
2043
 
2044
		if (p->children != NULL) {
2045
			p = p->children;
2046
		} else {
2047
			while (p != parent) {
2048
				if (p->next != NULL) {
2049
					p = p->next;
2050
					break;
2051
				}
2052
 
2053
				p = p->parent;
2054
			}
2055
		}
2056
	} while (p != parent);
2057
}
2058
 
2059
/**
2060
 * Dump search tree
2061
 *
2062
 * \param parent Parent node of tree to dump
2063
 * \param depth Tree depth
2064
 */
2065
void urldb_dump_search(struct search_node *parent, int depth)
2066
{
2067
	const struct host_part *h;
2068
	int i;
2069
 
2070
	if (parent == &empty)
2071
		return;
2072
 
2073
	urldb_dump_search(parent->left, depth + 1);
2074
 
2075
	for (i = 0; i != depth; i++)
2076
			fputc(' ', stderr);
2077
 
2078
	for (h = parent->data; h; h = h->parent) {
2079
		if (h->part)
2080
			fprintf(stderr, "%s", h->part);
2081
 
2082
		if (h->parent && h->parent->parent)
2083
			fputc('.', stderr);
2084
	}
2085
 
2086
	fputc('\n', stderr);
2087
 
2088
	urldb_dump_search(parent->right, depth + 1);
2089
}
2090
 
2091
/**
2092
 * Insert a node into the search tree
2093
 *
2094
 * \param root Root of tree to insert into
2095
 * \param data User data to insert
2096
 * \return Pointer to updated root, or NULL if failed
2097
 */
2098
struct search_node *urldb_search_insert(struct search_node *root,
2099
		const struct host_part *data)
2100
{
2101
	struct search_node *n;
2102
 
2103
	assert(root && data);
2104
 
2105
	n = malloc(sizeof(struct search_node));
2106
	if (!n)
2107
		return NULL;
2108
 
2109
	n->level = 1;
2110
	n->data = data;
2111
	n->left = n->right = ∅
2112
 
2113
	root = urldb_search_insert_internal(root, n);
2114
 
2115
	return root;
2116
}
2117
 
2118
/**
2119
 * Insert node into search tree
2120
 *
2121
 * \param root Root of (sub)tree to insert into
2122
 * \param n Node to insert
2123
 * \return Pointer to updated root
2124
 */
2125
struct search_node *urldb_search_insert_internal(struct search_node *root,
2126
		struct search_node *n)
2127
{
2128
	assert(root && n);
2129
 
2130
	if (root == &empty) {
2131
		root = n;
2132
	} else {
2133
		int c = urldb_search_match_host(root->data, n->data);
2134
 
2135
		if (c > 0) {
2136
			root->left = urldb_search_insert_internal(
2137
					root->left, n);
2138
		} else if (c < 0) {
2139
			root->right = urldb_search_insert_internal(
2140
					root->right, n);
2141
		} else {
2142
			/* exact match */
2143
			free(n);
2144
			return root;
2145
		}
2146
 
2147
		root = urldb_search_skew(root);
2148
		root = urldb_search_split(root);
2149
	}
2150
 
2151
	return root;
2152
}
2153
 
2154
/**
2155
 * Find a node in a search tree
2156
 *
2157
 * \param root Tree to look in
2158
 * \param host Host to find
2159
 * \return Pointer to host tree node, or NULL if not found
2160
 */
2161
const struct host_part *urldb_search_find(struct search_node *root,
2162
		const char *host)
2163
{
2164
	int c;
2165
 
2166
	assert(root && host);
2167
 
2168
	if (root == &empty) {
2169
		return NULL;
2170
	}
2171
 
2172
	c = urldb_search_match_string(root->data, host);
2173
 
2174
	if (c > 0)
2175
		return urldb_search_find(root->left, host);
2176
	else if (c < 0)
2177
		return urldb_search_find(root->right, host);
2178
	else
2179
		return root->data;
2180
}
2181
 
2182
/**
2183
 * Compare a pair of host_parts
2184
 *
2185
 * \param a
2186
 * \param b
2187
 * \return 0 if match, non-zero, otherwise
2188
 */
2189
int urldb_search_match_host(const struct host_part *a,
2190
		const struct host_part *b)
2191
{
2192
	int ret;
2193
 
2194
	assert(a && b);
2195
 
2196
	/* traverse up tree to root, comparing parts as we go. */
2197
	for (; a && a != &db_root && b && b != &db_root;
2198
			a = a->parent, b = b->parent)
2199
		if ((ret = strcasecmp(a->part, b->part)) != 0)
2200
			/* They differ => return the difference here */
2201
			return ret;
2202
 
2203
	/* If we get here then either:
2204
	 *    a) The path lengths differ
2205
	 * or b) The hosts are identical
2206
	 */
2207
	if (a && a != &db_root && (!b || b == &db_root))
2208
		/* len(a) > len(b) */
2209
		return 1;
2210
	else if ((!a || a == &db_root) && b && b != &db_root)
2211
		/* len(a) < len(b) */
2212
		return -1;
2213
 
2214
	/* identical */
2215
	return 0;
2216
}
2217
 
2218
/**
2219
 * Compare host_part with a string
2220
 *
2221
 * \param a
2222
 * \param b
2223
 * \return 0 if match, non-zero, otherwise
2224
 */
2225
int urldb_search_match_string(const struct host_part *a,
2226
		const char *b)
2227
{
2228
	const char *end, *dot;
2229
	int plen, ret;
2230
 
2231
	assert(a && a != &db_root && b);
2232
 
2233
	if (url_host_is_ip_address(b)) {
2234
		/* IP address */
2235
		return strcasecmp(a->part, b);
2236
	}
2237
 
2238
	end = b + strlen(b) + 1;
2239
 
2240
	while (b < end && a && a != &db_root) {
2241
		dot = strchr(b, '.');
2242
		if (!dot) {
2243
			/* last segment */
2244
			dot = end - 1;
2245
		}
2246
 
2247
		/* Compare strings (length limited) */
2248
		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
2249
			/* didn't match => return difference */
2250
			return ret;
2251
 
2252
		/* The strings matched, now check that the lengths do, too */
2253
		plen = strlen(a->part);
2254
 
2255
		if (plen > dot - b)
2256
			/* len(a) > len(b) */
2257
			return 1;
2258
		else if (plen < dot - b)
2259
			/* len(a) < len(b) */
2260
			return -1;
2261
 
2262
		b = dot + 1;
2263
		a = a->parent;
2264
	}
2265
 
2266
	/* If we get here then either:
2267
	 *    a) The path lengths differ
2268
	 * or b) The hosts are identical
2269
	 */
2270
	if (a && a != &db_root && b >= end)
2271
		/* len(a) > len(b) */
2272
		return 1;
2273
	else if ((!a || a == &db_root) && b < end)
2274
		/* len(a) < len(b) */
2275
		return -1;
2276
 
2277
	/* Identical */
2278
	return 0;
2279
}
2280
 
2281
/**
2282
 * Compare host_part with prefix
2283
 *
2284
 * \param a
2285
 * \param b
2286
 * \return 0 if match, non-zero, otherwise
2287
 */
2288
int urldb_search_match_prefix(const struct host_part *a,
2289
		const char *b)
2290
{
2291
	const char *end, *dot;
2292
	int plen, ret;
2293
 
2294
	assert(a && a != &db_root && b);
2295
 
2296
	if (url_host_is_ip_address(b)) {
2297
		/* IP address */
2298
		return strncasecmp(a->part, b, strlen(b));
2299
	}
2300
 
2301
	end = b + strlen(b) + 1;
2302
 
2303
	while (b < end && a && a != &db_root) {
2304
		dot = strchr(b, '.');
2305
		if (!dot) {
2306
			/* last segment */
2307
			dot = end - 1;
2308
		}
2309
 
2310
		/* Compare strings (length limited) */
2311
		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
2312
			/* didn't match => return difference */
2313
			return ret;
2314
 
2315
		/* The strings matched */
2316
		if (dot < end - 1) {
2317
			/* Consider segment lengths only in the case
2318
			 * where the prefix contains segments */
2319
			plen = strlen(a->part);
2320
			if (plen > dot - b)
2321
				/* len(a) > len(b) */
2322
				return 1;
2323
			else if (plen < dot - b)
2324
				/* len(a) < len(b) */
2325
				return -1;
2326
		}
2327
 
2328
		b = dot + 1;
2329
		a = a->parent;
2330
	}
2331
 
2332
	/* If we get here then either:
2333
	 *    a) The path lengths differ
2334
	 * or b) The hosts are identical
2335
	 */
2336
	if (a && a != &db_root && b >= end)
2337
		/* len(a) > len(b) => prefix matches */
2338
		return 0;
2339
	else if ((!a || a == &db_root) && b < end)
2340
		/* len(a) < len(b) => prefix does not match */
2341
		return -1;
2342
 
2343
	/* Identical */
2344
	return 0;
2345
}
2346
 
2347
/**
2348
 * Rotate a subtree right
2349
 *
2350
 * \param root Root of subtree to rotate
2351
 * \return new root of subtree
2352
 */
2353
struct search_node *urldb_search_skew(struct search_node *root)
2354
{
2355
	struct search_node *temp;
2356
 
2357
	assert(root);
2358
 
2359
	if (root->left->level == root->level) {
2360
		temp = root->left;
2361
		root->left = temp->right;
2362
		temp->right = root;
2363
		root = temp;
2364
	}
2365
 
2366
	return root;
2367
}
2368
 
2369
/**
2370
 * Rotate a node left, increasing the parent's level
2371
 *
2372
 * \param root Root of subtree to rotate
2373
 * \return New root of subtree
2374
 */
2375
struct search_node *urldb_search_split(struct search_node *root)
2376
{
2377
	struct search_node *temp;
2378
 
2379
	assert(root);
2380
 
2381
	if (root->right->right->level == root->level) {
2382
		temp = root->right;
2383
		root->right = temp->left;
2384
		temp->left = root;
2385
		root = temp;
2386
 
2387
		root->level++;
2388
	}
2389
 
2390
	return root;
2391
}
2392
 
2393
/**
2394
 * Retrieve cookies for an URL
2395
 *
2396
 * \param url URL being fetched
2397
 * \param include_http_only Whether to include HTTP(S) only cookies.
2398
 * \return Cookies string for libcurl (on heap), or NULL on error/no cookies
2399
 */
2400
char *urldb_get_cookie(nsurl *url, bool include_http_only)
2401
{
2402
	const struct path_data *p, *q;
2403
	const struct host_part *h;
2404
	lwc_string *path_lwc;
2405
	struct cookie_internal_data *c;
2406
	int count = 0, version = COOKIE_RFC2965;
2407
	struct cookie_internal_data **matched_cookies;
2408
	int matched_cookies_size = 20;
2409
	int ret_alloc = 4096, ret_used = 1;
2410
	const char *path;
2411
	char *ret;
2412
	lwc_string *scheme;
2413
	time_t now;
2414
	int i;
2415
	bool match;
2416
 
2417
	assert(url != NULL);
2418
 
2419
	/* The URL must exist in the db in order to find relevant cookies, since
2420
	 * we search up the tree from the URL node, and cookies from further
2421
	 * up also apply. */
2422
	urldb_add_url(url);
2423
 
2424
	p = urldb_find_url(url);
2425
	if (!p)
2426
		return NULL;
2427
 
2428
	scheme = p->scheme;
2429
 
2430
	matched_cookies = malloc(matched_cookies_size *
2431
			sizeof(struct cookie_internal_data *));
2432
	if (!matched_cookies)
2433
		return NULL;
2434
 
2435
#define GROW_MATCHED_COOKIES						\
2436
	do {								\
2437
		if (count == matched_cookies_size) {			\
2438
			struct cookie_internal_data **temp;		\
2439
			temp = realloc(matched_cookies,			\
2440
				(matched_cookies_size + 20) *		\
2441
				sizeof(struct cookie_internal_data *));	\
2442
									\
2443
			if (temp == NULL) {				\
2444
				free(ret);				\
2445
				free(matched_cookies);			\
2446
				return NULL;				\
2447
			}						\
2448
									\
2449
			matched_cookies = temp;				\
2450
			matched_cookies_size += 20;			\
2451
		}							\
2452
	} while(0)
2453
 
2454
	ret = malloc(ret_alloc);
2455
	if (!ret) {
2456
		free(matched_cookies);
2457
		return NULL;
2458
	}
2459
 
2460
	ret[0] = '\0';
2461
 
2462
	path_lwc = nsurl_get_component(url, NSURL_PATH);
2463
	if (path_lwc == NULL) {
2464
		free(ret);
2465
		free(matched_cookies);
2466
		return NULL;
2467
	}
2468
	path = lwc_string_data(path_lwc);
2469
	lwc_string_unref(path_lwc);
2470
 
2471
	now = time(NULL);
2472
 
2473
	if (*(p->segment) != '\0') {
2474
		/* Match exact path, unless directory, when prefix matching
2475
		 * will handle this case for us. */
2476
		for (q = p->parent->children; q; q = q->next) {
2477
			if (strcmp(q->segment, p->segment))
2478
				continue;
2479
 
2480
			/* Consider all cookies associated with
2481
			 * this exact path */
2482
			for (c = q->cookies; c; c = c->next) {
2483
				if (c->expires != -1 && c->expires < now)
2484
					/* cookie has expired => ignore */
2485
					continue;
2486
 
2487
				if (c->secure && lwc_string_isequal(
2488
							q->scheme,
2489
							corestring_lwc_https,
2490
							&match) &&
2491
						match == false)
2492
					/* secure cookie for insecure host.
2493
					 * ignore */
2494
					continue;
2495
 
2496
				if (c->http_only && !include_http_only)
2497
					/* Ignore HttpOnly */
2498
					continue;
2499
 
2500
				matched_cookies[count++] = c;
2501
 
2502
				GROW_MATCHED_COOKIES;
2503
 
2504
				if (c->version < (unsigned int)version)
2505
					version = c->version;
2506
 
2507
				c->last_used = now;
2508
				cookies_schedule_update((struct cookie_data *)c);
2509
			}
2510
		}
2511
	}
2512
 
2513
	/* Now consider cookies whose paths prefix-match ours */
2514
	for (p = p->parent; p; p = p->parent) {
2515
		/* Find directory's path entry(ies) */
2516
		/* There are potentially multiple due to differing schemes */
2517
		for (q = p->children; q; q = q->next) {
2518
			if (*(q->segment) != '\0')
2519
				continue;
2520
 
2521
			for (c = q->cookies; c; c = c->next) {
2522
				if (c->expires != -1 && c->expires < now)
2523
					/* cookie has expired => ignore */
2524
					continue;
2525
 
2526
				if (c->secure && lwc_string_isequal(
2527
							q->scheme,
2528
							corestring_lwc_https,
2529
							&match) &&
2530
						match == false)
2531
					/* Secure cookie for insecure server
2532
					 * => ignore */
2533
					continue;
2534
 
2535
				matched_cookies[count++] = c;
2536
 
2537
				GROW_MATCHED_COOKIES;
2538
 
2539
				if (c->version < (unsigned int) version)
2540
					version = c->version;
2541
 
2542
				c->last_used = now;
2543
				cookies_schedule_update((struct cookie_data *)c);
2544
			}
2545
		}
2546
 
2547
		if (!p->parent) {
2548
			/* No parent, so bail here. This can't go in
2549
			 * the loop exit condition as we also want to
2550
			 * process the top-level node.
2551
                         *
2552
                         * If p->parent is NULL then p->cookies are
2553
                         * the domain cookies and thus we don't even
2554
                         * try matching against them.
2555
                         */
2556
			break;
2557
		}
2558
 
2559
		/* Consider p itself - may be the result of Path=/foo */
2560
		for (c = p->cookies; c; c = c->next) {
2561
			if (c->expires != -1 && c->expires < now)
2562
				/* cookie has expired => ignore */
2563
				continue;
2564
 
2565
			/* Ensure cookie path is a prefix of the resource */
2566
			if (strncmp(c->path, path, strlen(c->path)) != 0)
2567
				/* paths don't match => ignore */
2568
				continue;
2569
 
2570
			if (c->secure && lwc_string_isequal(p->scheme,
2571
						corestring_lwc_https,
2572
						&match) &&
2573
					match == false)
2574
				/* Secure cookie for insecure server
2575
				 * => ignore */
2576
				continue;
2577
 
2578
			matched_cookies[count++] = c;
2579
 
2580
			GROW_MATCHED_COOKIES;
2581
 
2582
			if (c->version < (unsigned int) version)
2583
				version = c->version;
2584
 
2585
			c->last_used = now;
2586
			cookies_schedule_update((struct cookie_data *)c);
2587
		}
2588
 
2589
	}
2590
 
2591
	/* Finally consider domain cookies for hosts which domain match ours */
2592
	for (h = (const struct host_part *)p; h && h != &db_root;
2593
			h = h->parent) {
2594
		for (c = h->paths.cookies; c; c = c->next) {
2595
			if (c->expires != -1 && c->expires < now)
2596
				/* cookie has expired => ignore */
2597
				continue;
2598
 
2599
			/* Ensure cookie path is a prefix of the resource */
2600
			if (strncmp(c->path, path, strlen(c->path)) != 0)
2601
				/* paths don't match => ignore */
2602
				continue;
2603
 
2604
			if (c->secure && lwc_string_isequal(scheme,
2605
						corestring_lwc_https,
2606
						&match) &&
2607
					match == false)
2608
				/* secure cookie for insecure host. ignore */
2609
				continue;
2610
 
2611
			matched_cookies[count++] = c;
2612
 
2613
			GROW_MATCHED_COOKIES;
2614
 
2615
			if (c->version < (unsigned int)version)
2616
				version = c->version;
2617
 
2618
			c->last_used = now;
2619
			cookies_schedule_update((struct cookie_data *)c);
2620
		}
2621
	}
2622
 
2623
	if (count == 0) {
2624
		/* No cookies found */
2625
		free(ret);
2626
		free(matched_cookies);
2627
		return NULL;
2628
	}
2629
 
2630
	/* and build output string */
2631
	if (version > COOKIE_NETSCAPE) {
2632
		sprintf(ret, "$Version=%d", version);
2633
		ret_used = strlen(ret) + 1;
2634
	}
2635
 
2636
	for (i = 0; i < count; i++) {
2637
		if (!urldb_concat_cookie(matched_cookies[i], version,
2638
				&ret_used, &ret_alloc, &ret)) {
2639
			free(ret);
2640
			free(matched_cookies);
2641
			return NULL;
2642
		}
2643
	}
2644
 
2645
	if (version == COOKIE_NETSCAPE) {
2646
		/* Old-style cookies => no version & skip "; " */
2647
		memmove(ret, ret + 2, ret_used - 2);
2648
		ret_used -= 2;
2649
	}
2650
 
2651
	/* Now, shrink the output buffer to the required size */
2652
	{
2653
		char *temp = realloc(ret, ret_used);
2654
		if (!temp) {
2655
			free(ret);
2656
			free(matched_cookies);
2657
			return NULL;
2658
		}
2659
 
2660
		ret = temp;
2661
	}
2662
 
2663
	free(matched_cookies);
2664
 
2665
	return ret;
2666
 
2667
#undef GROW_MATCHED_COOKIES
2668
}
2669
 
2670
/**
2671
 * Parse Set-Cookie header and insert cookie(s) into database
2672
 *
2673
 * \param header Header to parse, with Set-Cookie: stripped
2674
 * \param url URL being fetched
2675
 * \param referer Referring resource, or 0 for verifiable transaction
2676
 * \return true on success, false otherwise
2677
 */
2678
bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
2679
{
2680
	const char *cur = header, *end;
2681
	lwc_string *path, *host, *scheme;
2682
	nsurl *urlt;
2683
	bool match;
2684
 
2685
	assert(url && header);
2686
 
2687
	/* Get defragmented URL, as 'urlt' */
2688
	if (nsurl_has_component(url, NSURL_FRAGMENT)) {
2689
		if (nsurl_defragment(url, &urlt) != NSERROR_OK)
2690
			return NULL;
2691
	} else {
2692
		urlt = nsurl_ref(url);
2693
	}
2694
 
2695
	scheme = nsurl_get_component(url, NSURL_SCHEME);
2696
	if (scheme == NULL) {
2697
		nsurl_unref(urlt);
2698
		return false;
2699
	}
2700
 
2701
	path = nsurl_get_component(url, NSURL_PATH);
2702
	if (path == NULL) {
2703
		lwc_string_unref(scheme);
2704
		nsurl_unref(urlt);
2705
		return false;
2706
	}
2707
 
2708
	host = nsurl_get_component(url, NSURL_HOST);
2709
	if (host == NULL) {
2710
		lwc_string_unref(path);
2711
		lwc_string_unref(scheme);
2712
		nsurl_unref(urlt);
2713
		return false;
2714
	}
2715
 
2716
	if (referer) {
2717
		lwc_string *rhost;
2718
 
2719
		/* Ensure that url's host name domain matches
2720
		 * referer's (4.3.5) */
2721
		rhost = nsurl_get_component(url, NSURL_HOST);
2722
		if (rhost == NULL) {
2723
			goto error;
2724
		}
2725
 
2726
		/* Domain match host names */
2727
		if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&
2728
				match == false) {
2729
			const char *hptr;
2730
			const char *rptr;
2731
			const char *dot;
2732
			const char *host_data = lwc_string_data(host);
2733
			const char *rhost_data = lwc_string_data(rhost);
2734
 
2735
			/* Ensure neither host nor rhost are IP addresses */
2736
			if (url_host_is_ip_address(host_data) ||
2737
					url_host_is_ip_address(rhost_data)) {
2738
				/* IP address, so no partial match */
2739
				lwc_string_unref(rhost);
2740
				goto error;
2741
			}
2742
 
2743
			/* Not exact match, so try the following:
2744
			 *
2745
			 * 1) Find the longest common suffix of host and rhost
2746
			 *    (may be all of host/rhost)
2747
			 * 2) Discard characters from the start of the suffix
2748
			 *    until the suffix starts with a dot
2749
			 *    (prevents foobar.com matching bar.com)
2750
			 * 3) Ensure the suffix is non-empty and contains
2751
			 *    embedded dots (to avoid permitting .com as a
2752
			 *    suffix)
2753
			 *
2754
			 * Note that the above in no way resembles the
2755
			 * domain matching algorithm found in RFC2109.
2756
			 * It does, however, model the real world rather
2757
			 * more accurately.
2758
			 */
2759
 
2760
			/** \todo In future, we should consult a TLD service
2761
			 * instead of just looking for embedded dots.
2762
			 */
2763
 
2764
			hptr = host_data + lwc_string_length(host) - 1;
2765
			rptr = rhost_data + lwc_string_length(rhost) - 1;
2766
 
2767
			/* 1 */
2768
			while (hptr >= host_data && rptr >= rhost_data) {
2769
				if (*hptr != *rptr)
2770
					break;
2771
				hptr--;
2772
				rptr--;
2773
			}
2774
			/* Ensure we end up pointing at the start of the
2775
			 * common suffix. The above loop will exit pointing
2776
			 * to the byte before the start of the suffix. */
2777
			hptr++;
2778
 
2779
			/* 2 */
2780
			while (*hptr != '\0' && *hptr != '.')
2781
				hptr++;
2782
 
2783
			/* 3 */
2784
			if (*hptr == '\0' ||
2785
				(dot = strchr(hptr + 1, '.')) == NULL ||
2786
					*(dot + 1) == '\0') {
2787
				lwc_string_unref(rhost);
2788
				goto error;
2789
			}
2790
		}
2791
 
2792
		lwc_string_unref(rhost);
2793
	}
2794
 
2795
	end = cur + strlen(cur) - 2 /* Trailing CRLF */;
2796
 
2797
	do {
2798
		struct cookie_internal_data *c;
2799
		char *dot;
2800
		size_t len;
2801
 
2802
		c = urldb_parse_cookie(url, &cur);
2803
		if (!c) {
2804
			/* failed => stop parsing */
2805
			goto error;
2806
		}
2807
 
2808
		/* validate cookie */
2809
 
2810
		/* 4.2.2:i Cookie must have NAME and VALUE */
2811
		if (!c->name || !c->value) {
2812
			urldb_free_cookie(c);
2813
			goto error;
2814
		}
2815
 
2816
		/* 4.3.2:i Cookie path must be a prefix of URL path */
2817
		len = strlen(c->path);
2818
		if (len > lwc_string_length(path) ||
2819
				strncmp(c->path, lwc_string_data(path),
2820
						len) != 0) {
2821
			urldb_free_cookie(c);
2822
			goto error;
2823
		}
2824
 
2825
		/* 4.3.2:ii Cookie domain must contain embedded dots */
2826
		dot = strchr(c->domain + 1, '.');
2827
		if (!dot || *(dot + 1) == '\0') {
2828
			/* no embedded dots */
2829
			urldb_free_cookie(c);
2830
			goto error;
2831
		}
2832
 
2833
		/* Domain match fetch host with cookie domain */
2834
		if (strcasecmp(lwc_string_data(host), c->domain) != 0) {
2835
			int hlen, dlen;
2836
			char *domain = c->domain;
2837
 
2838
			/* c->domain must be a domain cookie here because:
2839
			 * c->domain is either:
2840
			 *   + specified in the header as a domain cookie
2841
			 *     (non-domain cookies in the header are ignored
2842
			 *      by urldb_parse_cookie / urldb_parse_avpair)
2843
			 *   + defaulted to the URL's host part
2844
			 *     (by urldb_parse_cookie if no valid domain was
2845
			 *      specified in the header)
2846
			 *
2847
			 * The latter will pass the strcasecmp above, which
2848
			 * leaves the former (i.e. a domain cookie)
2849
			 */
2850
			assert(c->domain[0] == '.');
2851
 
2852
			/* 4.3.2:iii */
2853
			if (url_host_is_ip_address(lwc_string_data(host))) {
2854
				/* IP address, so no partial match */
2855
				urldb_free_cookie(c);
2856
				goto error;
2857
			}
2858
 
2859
			hlen = lwc_string_length(host);
2860
			dlen = strlen(c->domain);
2861
 
2862
			if (hlen <= dlen && hlen != dlen - 1) {
2863
				/* Partial match not possible */
2864
				urldb_free_cookie(c);
2865
				goto error;
2866
			}
2867
 
2868
			if (hlen == dlen - 1) {
2869
				/* Relax matching to allow
2870
				 * host a.com to match .a.com */
2871
				domain++;
2872
				dlen--;
2873
			}
2874
 
2875
			if (strcasecmp(lwc_string_data(host) + (hlen - dlen),
2876
					domain)) {
2877
				urldb_free_cookie(c);
2878
				goto error;
2879
			}
2880
 
2881
			/* 4.3.2:iv Ensure H contains no dots
2882
			 *
2883
			 * If you believe the spec, H should contain no
2884
			 * dots in _any_ cookie. Unfortunately, however,
2885
			 * reality differs in that many sites send domain
2886
			 * cookies of the form .foo.com from hosts such
2887
			 * as bar.bat.foo.com and then expect domain
2888
			 * matching to work. Thus we have to do what they
2889
			 * expect, regardless of any potential security
2890
			 * implications.
2891
			 *
2892
			 * This is what code conforming to the spec would
2893
			 * look like:
2894
			 *
2895
			 * for (int i = 0; i < (hlen - dlen); i++) {
2896
			 *	if (host[i] == '.') {
2897
			 *		urldb_free_cookie(c);
2898
			 *		goto error;
2899
			 *	}
2900
			 * }
2901
			 */
2902
		}
2903
 
2904
		/* Now insert into database */
2905
		if (!urldb_insert_cookie(c, scheme, urlt))
2906
			goto error;
2907
	} while (cur < end);
2908
 
2909
	lwc_string_unref(host);
2910
	lwc_string_unref(path);
2911
	lwc_string_unref(scheme);
2912
	nsurl_unref(urlt);
2913
 
2914
	return true;
2915
 
2916
error:
2917
	lwc_string_unref(host);
2918
	lwc_string_unref(path);
2919
	lwc_string_unref(scheme);
2920
	nsurl_unref(urlt);
2921
 
2922
	return false;
2923
}
2924
 
2925
/**
2926
 * Parse a cookie
2927
 *
2928
 * \param url URL being fetched
2929
 * \param cookie Pointer to cookie string (updated on exit)
2930
 * \return Pointer to cookie structure (on heap, caller frees) or NULL
2931
 */
2932
struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
2933
		const char **cookie)
2934
{
2935
	struct cookie_internal_data *c;
2936
	const char *cur;
2937
	char name[1024], value[4096];
2938
	char *n = name, *v = value;
2939
	bool in_value = false;
2940
	bool had_value_data = false;
2941
	bool value_verbatim = false;
2942
	bool quoted = false;
2943
	bool was_quoted = false;
2944
 
2945
	assert(url && cookie && *cookie);
2946
 
2947
	c = calloc(1, sizeof(struct cookie_internal_data));
2948
	if (c == NULL)
2949
		return NULL;
2950
 
2951
	c->expires = -1;
2952
 
2953
	name[0] = '\0';
2954
	value[0] = '\0';
2955
 
2956
	for (cur = *cookie; *cur; cur++) {
2957
		if (*cur == '\r' && *(cur + 1) == '\n') {
2958
			/* End of header */
2959
			if (quoted) {
2960
				/* Unmatched quote encountered */
2961
 
2962
				/* Match Firefox 2.0.0.11 */
2963
				value[0] = '\0';
2964
 
2965
#if 0
2966
				/* This is what IE6/7 & Safari 3 do */
2967
				/* Opera 9.25 discards the entire cookie */
2968
 
2969
				/* Shuffle value up by 1 */
2970
				memmove(value + 1, value,
2971
					min(v - value, sizeof(value) - 2));
2972
				v++;
2973
				/* And insert " character at the start */
2974
				value[0] = '"';
2975
 
2976
				/* Now, run forwards through the value
2977
				 * looking for a semicolon. If one exists,
2978
				 * terminate the value at this point. */
2979
				for (char *s = value; s < v; s++) {
2980
					if (*s == ';') {
2981
						*s = '\0';
2982
						v = s;
2983
						break;
2984
					}
2985
				}
2986
#endif
2987
			}
2988
 
2989
			break;
2990
		} else if (*cur == '\r') {
2991
			/* Spurious linefeed */
2992
			continue;
2993
		} else if (*cur == '\n') {
2994
			/* Spurious newline */
2995
			continue;
2996
		}
2997
 
2998
		if (in_value && !had_value_data) {
2999
			if (*cur == ' ' || *cur == '\t') {
3000
				/* Strip leading whitespace from value */
3001
				continue;
3002
			} else {
3003
				had_value_data = true;
3004
 
3005
				/* Value is taken verbatim if first non-space
3006
				 * character is not a " */
3007
				if (*cur != '"') {
3008
					value_verbatim = true;
3009
				}
3010
			}
3011
		}
3012
 
3013
		if (in_value && !value_verbatim && (*cur == '"')) {
3014
			/* Only non-verbatim values may be quoted */
3015
			if (cur == *cookie || *(cur - 1) != '\\') {
3016
				/* Only unescaped quotes count */
3017
				was_quoted = quoted;
3018
				quoted = !quoted;
3019
 
3020
				continue;
3021
			}
3022
		}
3023
 
3024
		if (!quoted && !in_value && *cur == '=') {
3025
			/* First equals => attr-value separator */
3026
			in_value = true;
3027
			continue;
3028
		}
3029
 
3030
		if (!quoted && (was_quoted || *cur == ';')) {
3031
			/* Semicolon or after quoted value
3032
			 * => end of current avpair */
3033
 
3034
			/* NUL-terminate tokens */
3035
			*n = '\0';
3036
			*v = '\0';
3037
 
3038
			if (!urldb_parse_avpair(c, name, value, was_quoted)) {
3039
				/* Memory exhausted */
3040
				urldb_free_cookie(c);
3041
				return NULL;
3042
			}
3043
 
3044
			/* And reset to start */
3045
			n = name;
3046
			v = value;
3047
			in_value = false;
3048
			had_value_data = false;
3049
			value_verbatim = false;
3050
			was_quoted = false;
3051
 
3052
			/* Now, if the current input is anything other than a
3053
			 * semicolon, we must be sure to reprocess it */
3054
			if (*cur != ';') {
3055
				cur--;
3056
			}
3057
 
3058
			continue;
3059
		}
3060
 
3061
		/* And now handle commas. These are a pain as they may mean
3062
		 * any of the following:
3063
		 *
3064
		 * + End of cookie
3065
		 * + Day separator in Expires avpair
3066
		 * + (Invalid) comma in unquoted value
3067
		 *
3068
		 * Therefore, in order to handle all 3 cases (2 and 3 are
3069
		 * identical, the difference being that 2 is in the spec and
3070
		 * 3 isn't), we need to determine where the comma actually
3071
		 * lies. We use the following heuristic:
3072
		 *
3073
		 *   Given a comma at the current input position, find the
3074
		 *   immediately following semicolon (or end of input if none
3075
		 *   found). Then, consider the input characters between
3076
		 *   these two positions. If any of these characters is an
3077
		 *   '=', we must assume that the comma signified the end of
3078
		 *   the current cookie.
3079
		 *
3080
		 * This holds as the first avpair of any cookie must be
3081
		 * NAME=VALUE, so the '=' is guaranteed to appear in the
3082
		 * case where the comma marks the end of a cookie.
3083
		 *
3084
		 * This will fail, however, in the case where '=' appears in
3085
		 * the value of the current avpair after the comma or the
3086
		 * subsequent cookie does not start with NAME=VALUE. Neither
3087
		 * of these is particularly likely and if they do occur, the
3088
		 * website is more broken than we can be bothered to handle.
3089
		 */
3090
		if (!quoted && *cur == ',') {
3091
			/* Find semi-colon, if any */
3092
			const char *p;
3093
			const char *semi = strchr(cur + 1, ';');
3094
			if (!semi)
3095
				semi = cur + strlen(cur) - 2 /* CRLF */;
3096
 
3097
			/* Look for equals sign between comma and semi */
3098
			for (p = cur + 1; p < semi; p++)
3099
				if (*p == '=')
3100
					break;
3101
 
3102
			if (p == semi) {
3103
				/* none found => comma internal to value */
3104
				/* do nothing */
3105
			} else {
3106
				/* found one => comma marks end of cookie */
3107
				cur++;
3108
				break;
3109
			}
3110
		}
3111
 
3112
		/* Accumulate into buffers, always leaving space for a NUL */
3113
		/** \todo is silently truncating overlong names/values wise? */
3114
		if (!in_value) {
3115
			if (n < name + (sizeof(name) - 1))
3116
				*n++ = *cur;
3117
		} else {
3118
			if (v < value + (sizeof(value) - 1))
3119
				*v++ = *cur;
3120
		}
3121
	}
3122
 
3123
	/* Parse final avpair */
3124
	*n = '\0';
3125
	*v = '\0';
3126
 
3127
	if (!urldb_parse_avpair(c, name, value, was_quoted)) {
3128
		/* Memory exhausted */
3129
		urldb_free_cookie(c);
3130
		return NULL;
3131
	}
3132
 
3133
	/* Now fix-up default values */
3134
	if (c->domain == NULL) {
3135
		lwc_string *host = nsurl_get_component(url, NSURL_HOST);
3136
		if (host == NULL) {
3137
			urldb_free_cookie(c);
3138
			return NULL;
3139
		}
3140
		c->domain = strdup(lwc_string_data(host));
3141
		lwc_string_unref(host);
3142
	}
3143
 
3144
	if (c->path == NULL) {
3145
		const char *path_data;
3146
		char *path, *slash;
3147
		lwc_string *path_lwc;
3148
 
3149
		path_lwc = nsurl_get_component(url, NSURL_PATH);
3150
		if (path_lwc == NULL) {
3151
			urldb_free_cookie(c);
3152
			return NULL;
3153
		}
3154
		path_data = lwc_string_data(path_lwc);
3155
 
3156
		/* Strip leafname and trailing slash (4.3.1) */
3157
		slash = strrchr(path_data, '/');
3158
		if (slash != NULL) {
3159
			/* Special case: retain first slash in path */
3160
			if (slash == path_data)
3161
				slash++;
3162
 
3163
			slash = strndup(path_data, slash - path_data);
3164
			if (slash == NULL) {
3165
				lwc_string_unref(path_lwc);
3166
				urldb_free_cookie(c);
3167
				return NULL;
3168
			}
3169
 
3170
			path = slash;
3171
			lwc_string_unref(path_lwc);
3172
		} else {
3173
			path = strdup(lwc_string_data(path_lwc));
3174
			lwc_string_unref(path_lwc);
3175
			if (path == NULL) {
3176
				urldb_free_cookie(c);
3177
				return NULL;
3178
			}
3179
		}
3180
 
3181
		c->path = path;
3182
	}
3183
 
3184
	/* Write back current position */
3185
	*cookie = cur;
3186
 
3187
	return c;
3188
}
3189
 
3190
/**
3191
 * Parse a cookie avpair
3192
 *
3193
 * \param c Cookie struct to populate
3194
 * \param n Name component
3195
 * \param v Value component
3196
 * \param was_quoted Whether ::v was quoted in the input
3197
 * \return true on success, false on memory exhaustion
3198
 */
3199
bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v,
3200
		bool was_quoted)
3201
{
3202
	int vlen;
3203
 
3204
	assert(c && n && v);
3205
 
3206
	/* Strip whitespace from start of name */
3207
	for (; *n; n++) {
3208
		if (*n != ' ' && *n != '\t')
3209
			break;
3210
	}
3211
 
3212
	/* Strip whitespace from end of name */
3213
	for (vlen = strlen(n); vlen; vlen--) {
3214
		if (n[vlen] == ' ' || n[vlen] == '\t')
3215
			n[vlen] = '\0';
3216
		else
3217
			break;
3218
	}
3219
 
3220
	/* Strip whitespace from start of value */
3221
	for (; *v; v++) {
3222
		if (*v != ' ' && *v != '\t')
3223
			break;
3224
	}
3225
 
3226
	/* Strip whitespace from end of value */
3227
	for (vlen = strlen(v); vlen; vlen--) {
3228
		if (v[vlen] == ' ' || v[vlen] == '\t')
3229
			v[vlen] = '\0';
3230
		else
3231
			break;
3232
	}
3233
 
3234
	if (!c->comment && strcasecmp(n, "Comment") == 0) {
3235
		c->comment = strdup(v);
3236
		if (!c->comment)
3237
			return false;
3238
	} else if (!c->domain && strcasecmp(n, "Domain") == 0) {
3239
		if (v[0] == '.') {
3240
			/* Domain must start with a dot */
3241
			c->domain_from_set = true;
3242
			c->domain = strdup(v);
3243
			if (!c->domain)
3244
				return false;
3245
		}
3246
	} else if (strcasecmp(n, "Max-Age") == 0) {
3247
		int temp = atoi(v);
3248
		if (temp == 0)
3249
			/* Special case - 0 means delete */
3250
			c->expires = 0;
3251
		else
3252
			c->expires = time(NULL) + temp;
3253
	} else if (!c->path && strcasecmp(n, "Path") == 0) {
3254
		c->path_from_set = true;
3255
		c->path = strdup(v);
3256
		if (!c->path)
3257
			return false;
3258
	} else if (strcasecmp(n, "Version") == 0) {
3259
		c->version = atoi(v);
3260
	} else if (strcasecmp(n, "Expires") == 0) {
3261
		char *datenoday;
3262
		time_t expires;
3263
 
3264
		/* Strip dayname from date (these are hugely
3265
		 * variable and liable to break the parser.
3266
		 * They also serve no useful purpose) */
3267
		for (datenoday = v; *datenoday && !isdigit(*datenoday);
3268
				datenoday++)
3269
			; /* do nothing */
3270
 
5043 ashmew2 3271
		/* TODO: expires = curl_getdate(datenoday, NULL); */
3272
		expires = (time_t) 100123123;
3273
 
3584 sourcerer 3274
		if (expires == -1) {
3275
			/* assume we have an unrepresentable
3276
			 * date => force it to the maximum
3277
			 * possible value of a 32bit time_t
3278
			 * (this may break in 2038. We'll
3279
			 * deal with that once we come to
3280
			 * it) */
3281
			expires = (time_t)0x7fffffff;
3282
		}
3283
		c->expires = expires;
3284
	} else if (strcasecmp(n, "Secure") == 0) {
3285
		c->secure = true;
3286
	} else if (strcasecmp(n, "HttpOnly") == 0) {
3287
		c->http_only = true;
3288
	} else if (!c->name) {
3289
		c->name = strdup(n);
3290
		c->value = strdup(v);
3291
		c->value_was_quoted = was_quoted;
3292
		if (!c->name || !c->value)
3293
			return false;
3294
	}
3295
 
3296
	return true;
3297
}
3298
 
3299
/**
3300
 * Insert a cookie into the database
3301
 *
3302
 * \param c The cookie to insert
3303
 * \param scheme URL scheme associated with cookie path
3304
 * \param url URL (sans fragment) associated with cookie
3305
 * \return true on success, false on memory exhaustion (c will be freed)
3306
 */
3307
bool urldb_insert_cookie(struct cookie_internal_data *c, lwc_string *scheme,
3308
		nsurl *url)
3309
{
3310
	struct cookie_internal_data *d;
3311
	const struct host_part *h;
3312
	struct path_data *p;
3313
	time_t now = time(NULL);
3314
 
3315
	assert(c);
3316
 
3317
	if (c->domain[0] == '.') {
3318
		h = urldb_search_find(
3319
			urldb_get_search_tree(&(c->domain[1])),
3320
			c->domain + 1);
3321
		if (!h) {
3322
			h = urldb_add_host(c->domain + 1);
3323
			if (!h) {
3324
				urldb_free_cookie(c);
3325
				return false;
3326
			}
3327
		}
3328
 
3329
		p = (struct path_data *) &h->paths;
3330
	} else {
3331
		/* Need to have a URL and scheme, if it's not a domain cookie */
3332
		assert(url != NULL);
3333
		assert(scheme != NULL);
3334
 
3335
		h = urldb_search_find(
3336
				urldb_get_search_tree(c->domain),
3337
				c->domain);
3338
 
3339
		if (!h) {
3340
			h = urldb_add_host(c->domain);
3341
			if (!h) {
3342
				urldb_free_cookie(c);
3343
				return false;
3344
			}
3345
		}
3346
 
3347
		/* find path */
3348
		p = urldb_add_path(scheme, 0, h,
3349
				strdup(c->path), NULL, url);
3350
		if (!p) {
3351
			urldb_free_cookie(c);
3352
			return false;
3353
		}
3354
	}
3355
 
3356
	/* add cookie */
3357
	for (d = p->cookies; d; d = d->next) {
3358
		if (!strcmp(d->domain, c->domain) &&
3359
				!strcmp(d->path, c->path) &&
3360
				!strcmp(d->name, c->name))
3361
			break;
3362
	}
3363
 
3364
	if (d) {
3365
		if (c->expires != -1 && c->expires < now) {
3366
			/* remove cookie */
3367
			if (d->next)
3368
				d->next->prev = d->prev;
3369
			else
3370
				p->cookies_end = d->prev;
3371
			if (d->prev)
3372
				d->prev->next = d->next;
3373
			else
3374
				p->cookies = d->next;
3375
 
3376
			cookies_remove((struct cookie_data *)d);
3377
			urldb_free_cookie(d);
3378
			urldb_free_cookie(c);
3379
		} else {
3380
			/* replace d with c */
3381
			c->prev = d->prev;
3382
			c->next = d->next;
3383
			if (c->next)
3384
				c->next->prev = c;
3385
			else
3386
				p->cookies_end = c;
3387
			if (c->prev)
3388
				c->prev->next = c;
3389
			else
3390
				p->cookies = c;
3391
 
3392
			cookies_remove((struct cookie_data *)d);
3393
			urldb_free_cookie(d);
3394
 
3395
			cookies_schedule_update((struct cookie_data *)c);
3396
		}
3397
	} else {
3398
		c->prev = p->cookies_end;
3399
		c->next = NULL;
3400
		if (p->cookies_end)
3401
			p->cookies_end->next = c;
3402
		else
3403
			p->cookies = c;
3404
		p->cookies_end = c;
3405
 
3406
		cookies_schedule_update((struct cookie_data *)c);
3407
	}
3408
 
3409
	return true;
3410
}
3411
 
3412
/**
3413
 * Free a cookie
3414
 *
3415
 * \param c The cookie to free
3416
 */
3417
void urldb_free_cookie(struct cookie_internal_data *c)
3418
{
3419
	assert(c);
3420
 
3421
	free(c->comment);
3422
	free(c->domain);
3423
	free(c->path);
3424
	free(c->name);
3425
	free(c->value);
3426
	free(c);
3427
}
3428
 
3429
/**
3430
 * Concatenate a cookie into the provided buffer
3431
 *
3432
 * \param c Cookie to concatenate
3433
 * \param version The version of the cookie string to output
3434
 * \param used Pointer to amount of buffer used (updated)
3435
 * \param alloc Pointer to allocated size of buffer (updated)
3436
 * \param buf Pointer to Pointer to buffer (updated)
3437
 * \return true on success, false on memory exhaustion
3438
 */
3439
bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
3440
		int *used, int *alloc, char **buf)
3441
{
3442
	/* Combined (A)BNF for the Cookie: request header:
3443
	 *
3444
	 * CHAR           = 
3445
	 * CTL            = 
3446
	 *                  (octets 0 - 31) and DEL (127)>
3447
	 * CR             = 
3448
	 * LF             = 
3449
	 * SP             = 
3450
	 * HT             = 
3451
	 * <">            = 
3452
	 *
3453
	 * CRLF           = CR LF
3454
	 *
3455
	 * LWS            = [CRLF] 1*( SP | HT )
3456
	 *
3457
	 * TEXT           = 
3458
	 *                  but including LWS>
3459
	 *
3460
	 * token          = 1*
3461
	 * separators     = "(" | ")" | "<" | ">" | "@"
3462
	 *                | "," | ";" | ":" | "\" | <">
3463
	 *                | "/" | "[" | "]" | "?" | "="
3464
	 *                | "{" | "}" | SP | HT
3465
	 *
3466
	 * quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
3467
	 * qdtext         = >
3468
	 * quoted-pair    = "\" CHAR
3469
	 *
3470
	 * attr            =       token
3471
	 * value           =       word
3472
	 * word            =       token | quoted-string
3473
	 *
3474
	 * cookie          =       "Cookie:" cookie-version
3475
	 *                         1*((";" | ",") cookie-value)
3476
	 * cookie-value    =       NAME "=" VALUE [";" path] [";" domain]
3477
	 * cookie-version  =       "$Version" "=" value
3478
	 * NAME            =       attr
3479
	 * VALUE           =       value
3480
	 * path            =       "$Path" "=" value
3481
	 * domain          =       "$Domain" "=" value
3482
	 *
3483
	 * A note on quoted-string handling:
3484
	 *   The cookie data stored in the db is verbatim (i.e. sans enclosing
3485
	 *   <">, if any, and with all quoted-pairs intact) thus all that we
3486
	 *   need to do here is ensure that value strings which were quoted
3487
	 *   in Set-Cookie or which include any of the separators are quoted
3488
	 *   before use.
3489
	 *
3490
	 * A note on cookie-value separation:
3491
	 *   We use semicolons for all separators, including between
3492
	 *   cookie-values. This simplifies things and is backwards compatible.
3493
	 */
3494
	const char * const separators = "()<>@,;:\\\"/[]?={} \t";
3495
 
3496
	int max_len;
3497
 
3498
	assert(c && used && alloc && buf && *buf);
3499
 
3500
	/* "; " cookie-value
3501
	 * We allow for the possibility that values are quoted
3502
	 */
3503
	max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
3504
			(c->path_from_set ?
3505
				8 + strlen(c->path) + 2 : 0) +
3506
			(c->domain_from_set ?
3507
				10 + strlen(c->domain) + 2 : 0);
3508
 
3509
	if (*used + max_len >= *alloc) {
3510
		char *temp = realloc(*buf, *alloc + 4096);
3511
		if (!temp) {
3512
			return false;
3513
		}
3514
		*buf = temp;
3515
		*alloc += 4096;
3516
	}
3517
 
3518
	if (version == COOKIE_NETSCAPE) {
3519
		/* Original Netscape cookie */
3520
		sprintf(*buf + *used - 1, "; %s=", c->name);
3521
		*used += 2 + strlen(c->name) + 1;
3522
 
3523
		/* The Netscape spec doesn't mention quoting of cookie values.
3524
		 * RFC 2109 $10.1.3 indicates that values must not be quoted.
3525
		 *
3526
		 * However, other browsers preserve quoting, so we should, too
3527
		 */
3528
		if (c->value_was_quoted) {
3529
			sprintf(*buf + *used - 1, "\"%s\"", c->value);
3530
			*used += 1 + strlen(c->value) + 1;
3531
		} else {
3532
			/** \todo should we %XX-encode [;HT,SP] ? */
3533
			/** \todo Should we strip escaping backslashes? */
3534
			sprintf(*buf + *used - 1, "%s", c->value);
3535
			*used += strlen(c->value);
3536
		}
3537
 
3538
		/* We don't send path/domain information -- that's what the
3539
		 * Netscape spec suggests we should do, anyway. */
3540
	} else {
3541
		/* RFC2109 or RFC2965 cookie */
3542
		sprintf(*buf + *used - 1, "; %s=", c->name);
3543
		*used += 2 + strlen(c->name) + 1;
3544
 
3545
		/* Value needs quoting if it contains any separator or if
3546
		 * it needs preserving from the Set-Cookie header */
3547
		if (c->value_was_quoted ||
3548
				strpbrk(c->value, separators) != NULL) {
3549
			sprintf(*buf + *used - 1, "\"%s\"", c->value);
3550
			*used += 1 + strlen(c->value) + 1;
3551
		} else {
3552
			sprintf(*buf + *used - 1, "%s", c->value);
3553
			*used += strlen(c->value);
3554
		}
3555
 
3556
		if (c->path_from_set) {
3557
			/* Path, quoted if necessary */
3558
			sprintf(*buf + *used - 1, "; $Path=");
3559
			*used += 8;
3560
 
3561
			if (strpbrk(c->path, separators) != NULL) {
3562
				sprintf(*buf + *used - 1, "\"%s\"", c->path);
3563
				*used += 1 + strlen(c->path) + 1;
3564
			} else {
3565
				sprintf(*buf + *used - 1, "%s", c->path);
3566
				*used += strlen(c->path);
3567
			}
3568
		}
3569
 
3570
		if (c->domain_from_set) {
3571
			/* Domain, quoted if necessary */
3572
			sprintf(*buf + *used - 1, "; $Domain=");
3573
			*used += 10;
3574
 
3575
			if (strpbrk(c->domain, separators) != NULL) {
3576
				sprintf(*buf + *used - 1, "\"%s\"", c->domain);
3577
				*used += 1 + strlen(c->domain) + 1;
3578
			} else {
3579
				sprintf(*buf + *used - 1, "%s", c->domain);
3580
				*used += strlen(c->domain);
3581
			}
3582
		}
3583
	}
3584
 
3585
	return true;
3586
}
3587
 
3588
/**
3589
 * Load a cookie file into the database
3590
 *
3591
 * \param filename File to load
3592
 */
3593
void urldb_load_cookies(const char *filename)
3594
{
3595
	FILE *fp;
3596
	char s[16*1024];
3597
 
3598
	assert(filename);
3599
 
3600
	fp = fopen(filename, "r");
3601
	if (!fp)
3602
		return;
3603
 
3604
#define FIND_T {							\
3605
		for (; *p && *p != '\t'; p++)				\
3606
			; /* do nothing */				\
3607
		if (p >= end) {						\
3608
			LOG(("Overran input"));				\
3609
			continue;					\
3610
		}							\
3611
		*p++ = '\0';						\
3612
}
3613
 
3614
#define SKIP_T {							\
3615
		for (; *p && *p == '\t'; p++)				\
3616
			; /* do nothing */				\
3617
		if (p >= end) {						\
3618
			LOG(("Overran input"));				\
3619
			continue;					\
3620
		}							\
3621
}
3622
 
3623
	while (fgets(s, sizeof s, fp)) {
3624
		char *p = s, *end = 0,
3625
			*domain, *path, *name, *value, *scheme, *url,
3626
			*comment;
3627
		int version, domain_specified, path_specified,
3628
			secure, http_only, no_destroy, value_quoted;
3629
		time_t expires, last_used;
3630
		struct cookie_internal_data *c;
3631
 
3632
		if(s[0] == 0 || s[0] == '#')
3633
			/* Skip blank lines or comments */
3634
			continue;
3635
 
3636
		s[strlen(s) - 1] = '\0'; /* lose terminating newline */
3637
		end = s + strlen(s);
3638
 
3639
		/* Look for file version first
3640
		 * (all input is ignored until this is read)
3641
		 */
3642
		if (strncasecmp(s, "Version:", 8) == 0) {
3643
			FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);
3644
 
3645
			if (loaded_cookie_file_version <
3646
					MIN_COOKIE_FILE_VERSION) {
3647
				LOG(("Unsupported Cookie file version"));
3648
				break;
3649
			}
3650
 
3651
			continue;
3652
		} else if (loaded_cookie_file_version == 0) {
3653
			/* Haven't yet seen version; skip this input */
3654
			continue;
3655
		}
3656
 
3657
		/* One cookie/line */
3658
 
3659
		/* Parse input */
3660
		FIND_T; version = atoi(s);
3661
		SKIP_T; domain = p; FIND_T;
3662
		SKIP_T; domain_specified = atoi(p); FIND_T;
3663
		SKIP_T; path = p; FIND_T;
3664
		SKIP_T; path_specified = atoi(p); FIND_T;
3665
		SKIP_T; secure = atoi(p); FIND_T;
3666
		if (loaded_cookie_file_version > 101) {
3667
			/* Introduced in version 1.02 */
3668
			SKIP_T; http_only = atoi(p); FIND_T;
3669
		} else {
3670
			http_only = 0;
3671
		}
3672
		SKIP_T; expires = (time_t)atoi(p); FIND_T;
3673
		SKIP_T; last_used = (time_t)atoi(p); FIND_T;
3674
		SKIP_T; no_destroy = atoi(p); FIND_T;
3675
		SKIP_T; name = p; FIND_T;
3676
		SKIP_T; value = p; FIND_T;
3677
		if (loaded_cookie_file_version > 100) {
3678
			/* Introduced in version 1.01 */
3679
			SKIP_T;	value_quoted = atoi(p); FIND_T;
3680
		} else {
3681
			value_quoted = 0;
3682
		}
3683
		SKIP_T; scheme = p; FIND_T;
3684
		SKIP_T; url = p; FIND_T;
3685
 
3686
		/* Comment may have no content, so don't
3687
		 * use macros as they'll break */
3688
		for (; *p && *p == '\t'; p++)
3689
			; /* do nothing */
3690
		comment = p;
3691
 
3692
		assert(p <= end);
3693
 
3694
		/* Now create cookie */
3695
		c = malloc(sizeof(struct cookie_internal_data));
3696
		if (!c)
3697
			break;
3698
 
3699
		c->name = strdup(name);
3700
		c->value = strdup(value);
3701
		c->value_was_quoted = value_quoted;
3702
		c->comment = strdup(comment);
3703
		c->domain_from_set = domain_specified;
3704
		c->domain = strdup(domain);
3705
		c->path_from_set = path_specified;
3706
		c->path = strdup(path);
3707
		c->expires = expires;
3708
		c->last_used = last_used;
3709
		c->secure = secure;
3710
		c->http_only = http_only;
3711
		c->version = version;
3712
		c->no_destroy = no_destroy;
3713
 
3714
		if (!(c->name && c->value && c->comment &&
3715
				c->domain && c->path)) {
3716
			urldb_free_cookie(c);
3717
			break;
3718
		}
3719
 
3720
		if (c->domain[0] != '.') {
3721
			lwc_string *scheme_lwc = NULL;
3722
			nsurl *url_nsurl = NULL;
3723
 
3724
			assert(scheme[0] != 'u');
3725
 
3726
			if (nsurl_create(url, &url_nsurl) != NSERROR_OK) {
3727
				urldb_free_cookie(c);
3728
				break;
3729
			}
3730
			scheme_lwc = nsurl_get_component(url_nsurl,
3731
					NSURL_SCHEME);
3732
 
3733
			/* And insert it into database */
3734
			if (!urldb_insert_cookie(c, scheme_lwc, url_nsurl)) {
3735
				/* Cookie freed for us */
3736
				nsurl_unref(url_nsurl);
3737
				lwc_string_unref(scheme_lwc);
3738
				break;
3739
			}
3740
			nsurl_unref(url_nsurl);
3741
			lwc_string_unref(scheme_lwc);
3742
 
3743
		} else {
3744
			if (!urldb_insert_cookie(c, NULL, NULL)) {
3745
				/* Cookie freed for us */
3746
				break;
3747
			}
3748
		}
3749
	}
3750
 
3751
#undef SKIP_T
3752
#undef FIND_T
3753
 
3754
	fclose(fp);
3755
}
3756
 
3757
/**
3758
 * Delete a cookie
3759
 *
3760
 * \param domain The cookie's domain
3761
 * \param path The cookie's path
3762
 * \param name The cookie's name
3763
 */
3764
void urldb_delete_cookie(const char *domain, const char *path,
3765
		const char *name)
3766
{
3767
	urldb_delete_cookie_hosts(domain, path, name, &db_root);
3768
}
3769
 
3770
void urldb_delete_cookie_hosts(const char *domain, const char *path,
3771
		const char *name, struct host_part *parent)
3772
{
3773
	struct host_part *h;
3774
	assert(parent);
3775
 
3776
	urldb_delete_cookie_paths(domain, path, name, &parent->paths);
3777
 
3778
	for (h = parent->children; h; h = h->next)
3779
		urldb_delete_cookie_hosts(domain, path, name, h);
3780
}
3781
 
3782
void urldb_delete_cookie_paths(const char *domain, const char *path,
3783
		const char *name, struct path_data *parent)
3784
{
3785
	struct cookie_internal_data *c;
3786
	struct path_data *p = parent;
3787
 
3788
	assert(parent);
3789
 
3790
	do {
3791
		for (c = p->cookies; c; c = c->next) {
3792
			if (strcmp(c->domain, domain) == 0 &&
3793
					strcmp(c->path, path) == 0 &&
3794
					strcmp(c->name, name) == 0) {
3795
				if (c->prev)
3796
					c->prev->next = c->next;
3797
				else
3798
					p->cookies = c->next;
3799
 
3800
				if (c->next)
3801
					c->next->prev = c->prev;
3802
				else
3803
					p->cookies_end = c->prev;
3804
 
3805
				cookies_remove((struct cookie_data *)c);
3806
				urldb_free_cookie(c);
3807
 
3808
				return;
3809
			}
3810
		}
3811
 
3812
		if (p->children) {
3813
			p = p->children;
3814
		} else {
3815
			while (p != parent) {
3816
				if (p->next != NULL) {
3817
					p = p->next;
3818
					break;
3819
				}
3820
 
3821
				p = p->parent;
3822
			}
3823
		}
3824
	} while(p != parent);
3825
}
3826
 
3827
/**
3828
 * Save persistent cookies to file
3829
 *
3830
 * \param filename Path to save to
3831
 */
3832
void urldb_save_cookies(const char *filename)
3833
{
3834
	FILE *fp;
3835
	int cookie_file_version = max(loaded_cookie_file_version,
3836
			COOKIE_FILE_VERSION);
3837
 
3838
	assert(filename);
3839
 
3840
	fp = fopen(filename, "w");
3841
	if (!fp)
3842
		return;
3843
 
3844
	fprintf(fp, "# >%s\n", filename);
3845
	fprintf(fp, "# NetSurf cookies file.\n"
3846
		    "#\n"
3847
		    "# Lines starting with a '#' are comments, "
3848
						"blank lines are ignored.\n"
3849
		    "#\n"
3850
		    "# All lines prior to \"Version:\t%d\" are discarded.\n"
3851
		    "#\n"
3852
		    "# Version\tDomain\tDomain from Set-Cookie\tPath\t"
3853
			"Path from Set-Cookie\tSecure\tHTTP-Only\tExpires\tLast used\t"
3854
			"No destroy\tName\tValue\tValue was quoted\tScheme\t"
3855
			"URL\tComment\n",
3856
			cookie_file_version);
3857
	fprintf(fp, "Version:\t%d\n", cookie_file_version);
3858
 
3859
	urldb_save_cookie_hosts(fp, &db_root);
3860
 
3861
	fclose(fp);
3862
}
3863
 
3864
/**
3865
 * Save a host subtree's cookies
3866
 *
3867
 * \param fp File pointer to write to
3868
 * \param parent Parent host
3869
 */
3870
void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
3871
{
3872
	struct host_part *h;
3873
	assert(fp && parent);
3874
 
3875
	urldb_save_cookie_paths(fp, &parent->paths);
3876
 
3877
	for (h = parent->children; h; h = h->next)
3878
		urldb_save_cookie_hosts(fp, h);
3879
}
3880
 
3881
/**
3882
 * Save a path subtree's cookies
3883
 *
3884
 * \param fp File pointer to write to
3885
 * \param parent Parent path
3886
 */
3887
void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
3888
{
3889
	struct path_data *p = parent;
3890
	time_t now = time(NULL);
3891
 
3892
	assert(fp && parent);
3893
 
3894
	do {
3895
		if (p->cookies != NULL) {
3896
			struct cookie_internal_data *c;
3897
 
3898
			for (c = p->cookies; c != NULL; c = c->next) {
3899
				if (c->expires == -1 || c->expires < now)
3900
					/* Skip expired & session cookies */
3901
					continue;
3902
 
3903
				fprintf(fp,
3904
					"%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
3905
					"%s\t%s\t%d\t%s\t%s\t%s\n",
3906
					c->version, c->domain,
3907
					c->domain_from_set, c->path,
3908
					c->path_from_set, c->secure,
3909
					c->http_only,
3910
					(int)c->expires, (int)c->last_used,
3911
					c->no_destroy, c->name, c->value,
3912
					c->value_was_quoted,
3913
					p->scheme ? lwc_string_data(p->scheme) :
3914
							"unused",
3915
					p->url ? nsurl_access(p->url) :
3916
							"unused",
3917
					c->comment ? c->comment : "");
3918
			}
3919
		}
3920
 
3921
		if (p->children != NULL) {
3922
			p = p->children;
3923
		} else {
3924
			while (p != parent) {
3925
				if (p->next != NULL) {
3926
					p = p->next;
3927
					break;
3928
				}
3929
 
3930
				p = p->parent;
3931
			}
3932
		}
3933
	} while (p != parent);
3934
}
3935
 
3936
 
3937
/**
3938
 * Destroy urldb
3939
 */
3940
void urldb_destroy(void)
3941
{
3942
	struct host_part *a, *b;
3943
	int i;
3944
 
3945
	/* Clean up search trees */
3946
	for (i = 0; i < NUM_SEARCH_TREES; i++) {
3947
		if (search_trees[i] != &empty)
3948
			urldb_destroy_search_tree(search_trees[i]);
3949
	}
3950
 
3951
	/* And database */
3952
	for (a = db_root.children; a; a = b) {
3953
		b = a->next;
3954
		urldb_destroy_host_tree(a);
3955
	}
3956
}
3957
 
3958
/**
3959
 * Destroy a host tree
3960
 *
3961
 * \param root Root node of tree to destroy
3962
 */
3963
void urldb_destroy_host_tree(struct host_part *root)
3964
{
3965
	struct host_part *a, *b;
3966
	struct path_data *p, *q;
3967
	struct prot_space_data *s, *t;
3968
 
3969
	/* Destroy children */
3970
	for (a = root->children; a; a = b) {
3971
		b = a->next;
3972
		urldb_destroy_host_tree(a);
3973
	}
3974
 
3975
	/* Now clean up paths */
3976
	for (p = root->paths.children; p; p = q) {
3977
		q = p->next;
3978
		urldb_destroy_path_tree(p);
3979
	}
3980
 
3981
	/* Root path */
3982
	urldb_destroy_path_node_content(&root->paths);
3983
 
3984
	/* Proctection space data */
3985
	for (s = root->prot_space; s; s = t) {
3986
		t = s->next;
3987
		urldb_destroy_prot_space(s);
3988
	}
3989
 
3990
	/* And ourselves */
3991
	free(root->part);
3992
	free(root);
3993
}
3994
 
3995
/**
3996
 * Destroy a path tree
3997
 *
3998
 * \param root Root node of tree to destroy
3999
 */
4000
void urldb_destroy_path_tree(struct path_data *root)
4001
{
4002
	struct path_data *p = root;
4003
 
4004
	do {
4005
		if (p->children != NULL) {
4006
			p = p->children;
4007
		} else {
4008
			struct path_data *q = p;
4009
 
4010
			while (p != root) {
4011
				if (p->next != NULL) {
4012
					p = p->next;
4013
					break;
4014
				}
4015
 
4016
				p = p->parent;
4017
 
4018
				urldb_destroy_path_node_content(q);
4019
				free(q);
4020
 
4021
				q = p;
4022
			}
4023
 
4024
			urldb_destroy_path_node_content(q);
4025
			free(q);
4026
		}
4027
	} while (p != root);
4028
}
4029
 
4030
/**
4031
 * Destroy the contents of a path node
4032
 *
4033
 * \param node Node to destroy contents of (does not destroy node)
4034
 */
4035
void urldb_destroy_path_node_content(struct path_data *node)
4036
{
4037
	struct cookie_internal_data *a, *b;
4038
	unsigned int i;
4039
 
4040
	if (node->url != NULL)
4041
		nsurl_unref(node->url);
4042
 
4043
	if (node->scheme != NULL)
4044
		lwc_string_unref(node->scheme);
4045
 
4046
	free(node->segment);
4047
	for (i = 0; i < node->frag_cnt; i++)
4048
		free(node->fragment[i]);
4049
	free(node->fragment);
4050
 
4051
	if (node->thumb)
4052
		bitmap_destroy(node->thumb);
4053
 
4054
	free(node->urld.title);
4055
 
4056
	for (a = node->cookies; a; a = b) {
4057
		b = a->next;
4058
		urldb_destroy_cookie(a);
4059
	}
4060
}
4061
 
4062
/**
4063
 * Destroy a cookie node
4064
 *
4065
 * \param c Cookie to destroy
4066
 */
4067
void urldb_destroy_cookie(struct cookie_internal_data *c)
4068
{
4069
	free(c->name);
4070
	free(c->value);
4071
	free(c->comment);
4072
	free(c->domain);
4073
	free(c->path);
4074
 
4075
	free(c);
4076
}
4077
 
4078
/**
4079
 * Destroy protection space data
4080
 *
4081
 * \param space Protection space to destroy
4082
 */
4083
void urldb_destroy_prot_space(struct prot_space_data *space)
4084
{
4085
	lwc_string_unref(space->scheme);
4086
	free(space->realm);
4087
	free(space->auth);
4088
 
4089
	free(space);
4090
}
4091
 
4092
 
4093
/**
4094
 * Destroy a search tree
4095
 *
4096
 * \param root Root node of tree to destroy
4097
 */
4098
void urldb_destroy_search_tree(struct search_node *root)
4099
{
4100
	/* Destroy children */
4101
	if (root->left != &empty)
4102
		urldb_destroy_search_tree(root->left);
4103
	if (root->right != &empty)
4104
		urldb_destroy_search_tree(root->right);
4105
 
4106
	/* And destroy ourselves */
4107
	free(root);
4108
}
4109