Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3584 sourcerer 1
/*
2
 * Copyright 2012 John-Mark Bell 
3
 * Copyright 2004-2007 James Bursa 
4
 *
5
 * This file is part of NetSurf, http://www.netsurf-browser.org/
6
 *
7
 * NetSurf is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; version 2 of the License.
10
 *
11
 * NetSurf is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program.  If not, see .
18
 */
19
 
20
/** \file
21
 * Save HTML document with dependencies (implementation).
22
 */
23
 
24
#include "utils/config.h"
25
 
26
#include 
27
#include 
28
#include 
29
#include 
30
#include 
31
#include 
32
#include 
33
 
34
#include 
35
 
36
#include "content/content.h"
37
#include "content/hlcache.h"
38
#include "css/css.h"
39
#include "desktop/save_complete.h"
40
#include "render/box.h"
41
#include "render/html.h"
42
#include "utils/corestrings.h"
43
#include "utils/log.h"
44
#include "utils/nsurl.h"
45
#include "utils/utf8.h"
46
#include "utils/utils.h"
47
 
48
regex_t save_complete_import_re;
49
 
50
/** An entry in save_complete_list. */
51
typedef struct save_complete_entry {
52
	hlcache_handle *content;
53
	struct save_complete_entry *next; /**< Next entry in list */
54
} save_complete_entry;
55
 
56
typedef struct save_complete_ctx {
57
    const char *path;
58
    save_complete_entry *list;
59
    save_complete_set_type_cb set_type;
60
 
61
    nsurl *base;
62
    FILE *fp;
63
    enum { STATE_NORMAL, STATE_IN_STYLE } iter_state;
64
} save_complete_ctx;
65
 
66
typedef enum {
67
	EVENT_ENTER,
68
	EVENT_LEAVE
69
} save_complete_event_type;
70
 
71
 
72
static bool save_complete_save_html(save_complete_ctx *ctx, hlcache_handle *c,
73
		bool index);
74
static bool save_complete_save_imported_sheets(save_complete_ctx *ctx,
75
		struct nscss_import *imports, uint32_t import_count);
76
 
77
 
78
static void save_complete_ctx_initialise(save_complete_ctx *ctx,
79
		const char *path, save_complete_set_type_cb set_type)
80
{
81
	ctx->path = path;
82
	ctx->list = NULL;
83
	ctx->set_type = set_type;
84
}
85
 
86
static void save_complete_ctx_finalise(save_complete_ctx *ctx)
87
{
88
	save_complete_entry *list = ctx->list;
89
 
90
	while (list != NULL) {
91
		save_complete_entry *next = list->next;
92
		free(list);
93
		list = next;
94
	}
95
}
96
 
97
static bool save_complete_ctx_add_content(save_complete_ctx *ctx,
98
		hlcache_handle *content)
99
{
100
	save_complete_entry *entry;
101
 
102
	entry = malloc(sizeof (*entry));
103
	if (entry == NULL)
104
		return false;
105
 
106
	entry->content = content;
107
	entry->next = ctx->list;
108
	ctx->list = entry;
109
 
110
	return true;
111
}
112
 
113
 
114
static hlcache_handle *save_complete_ctx_find_content(save_complete_ctx *ctx,
115
		const nsurl *url)
116
{
117
	save_complete_entry *entry;
118
 
119
	for (entry = ctx->list; entry != NULL; entry = entry->next)
120
		if (nsurl_compare(url,
121
				hlcache_handle_get_url(entry->content),
122
				NSURL_COMPLETE))
123
			return entry->content;
124
 
125
	return NULL;
126
}
127
 
128
 
129
static bool save_complete_ctx_has_content(save_complete_ctx *ctx,
130
		hlcache_handle *content)
131
{
132
	save_complete_entry *entry;
133
 
134
	for (entry = ctx->list; entry != NULL; entry = entry->next)
135
		if (entry->content == content)
136
			return true;
137
 
138
	return false;
139
}
140
 
141
static bool save_complete_save_buffer(save_complete_ctx *ctx,
142
		const char *leafname, const char *data, size_t data_len,
143
		lwc_string *mime_type)
144
{
145
	FILE *fp;
146
	bool error;
147
	char fullpath[PATH_MAX];
148
 
149
	strncpy(fullpath, ctx->path, sizeof fullpath);
150
	error = path_add_part(fullpath, sizeof fullpath, leafname);
151
	if (error == false) {
152
		warn_user("NoMemory", NULL);
153
		return false;
154
	}
155
 
156
	fp = fopen(fullpath, "wb");
157
	if (fp == NULL) {
158
		LOG(("fopen(): errno = %i", errno));
159
		warn_user("SaveError", strerror(errno));
160
		return false;
161
	}
162
 
163
	fwrite(data, sizeof(*data), data_len, fp);
164
 
165
	fclose(fp);
166
 
167
	if (ctx->set_type != NULL)
168
		ctx->set_type(fullpath, mime_type);
169
 
170
	return true;
171
}
172
 
173
/**
174
 * Rewrite stylesheet \@import rules for save complete.
175
 *
176
 * \param  source  stylesheet source
177
 * \param  size    size of source
178
 * \param  base    url of stylesheet
179
 * \param  osize   updated with the size of the result
180
 * \return  converted source, or NULL on out of memory
181
 */
182
 
183
static char *save_complete_rewrite_stylesheet_urls(save_complete_ctx *ctx,
184
		const char *source, unsigned long size, const nsurl *base,
185
		unsigned long *osize)
186
{
187
	char *rewritten;
188
	unsigned long offset = 0;
189
	unsigned int imports = 0;
190
	nserror error;
191
 
192
	/* count number occurrences of @import to (over)estimate result size */
193
	/* can't use strstr because source is not 0-terminated string */
194
	for (offset = 0; SLEN("@import") < size &&
195
			offset <= size - SLEN("@import"); offset++) {
196
		if (source[offset] == '@' &&
197
				tolower(source[offset + 1]) == 'i' &&
198
				tolower(source[offset + 2]) == 'm' &&
199
				tolower(source[offset + 3]) == 'p' &&
200
				tolower(source[offset + 4]) == 'o' &&
201
				tolower(source[offset + 5]) == 'r' &&
202
				tolower(source[offset + 6]) == 't')
203
			imports++;
204
	}
205
 
206
	rewritten = malloc(size + imports * 20);
207
	if (rewritten == NULL)
208
		return NULL;
209
	*osize = 0;
210
 
211
	offset = 0;
212
	while (offset < size) {
213
		const char *import_url = NULL;
214
		char *import_url_copy;
215
		int import_url_len = 0;
216
		nsurl *url = NULL;
217
		regmatch_t match[11];
218
		int m = regexec(&save_complete_import_re, source + offset,
219
				11, match, 0);
220
		if (m)
221
			break;
222
 
223
		if (match[2].rm_so != -1) {
224
			import_url = source + offset + match[2].rm_so;
225
			import_url_len = match[2].rm_eo - match[2].rm_so;
226
		} else if (match[4].rm_so != -1) {
227
			import_url = source + offset + match[4].rm_so;
228
			import_url_len = match[4].rm_eo - match[4].rm_so;
229
		} else if (match[6].rm_so != -1) {
230
			import_url = source + offset + match[6].rm_so;
231
			import_url_len = match[6].rm_eo - match[6].rm_so;
232
		} else if (match[8].rm_so != -1) {
233
			import_url = source + offset + match[8].rm_so;
234
			import_url_len = match[8].rm_eo - match[8].rm_so;
235
		} else if (match[10].rm_so != -1) {
236
			import_url = source + offset + match[10].rm_so;
237
			import_url_len = match[10].rm_eo - match[10].rm_so;
238
		}
239
		assert(import_url != NULL);
240
 
241
		import_url_copy = strndup(import_url, import_url_len);
242
		if (import_url_copy == NULL) {
243
			free(rewritten);
244
			return NULL;
245
		}
246
 
247
		error = nsurl_join(base, import_url_copy, &url);
248
		free(import_url_copy);
249
		if (error == NSERROR_NOMEM) {
250
			free(rewritten);
251
			return NULL;
252
		}
253
 
254
		/* copy data before match */
255
		memcpy(rewritten + *osize, source + offset, match[0].rm_so);
256
		*osize += match[0].rm_so;
257
 
258
		if (url != NULL) {
259
			hlcache_handle *content;
260
			content = save_complete_ctx_find_content(ctx, url);
261
			if (content != NULL) {
262
				/* replace import */
263
				char buf[64];
264
				snprintf(buf, sizeof buf, "@import '%p'",
265
						content);
266
				memcpy(rewritten + *osize, buf, strlen(buf));
267
				*osize += strlen(buf);
268
			} else {
269
				/* copy import */
270
				memcpy(rewritten + *osize,
271
					source + offset + match[0].rm_so,
272
					match[0].rm_eo - match[0].rm_so);
273
				*osize += match[0].rm_eo - match[0].rm_so;
274
			}
275
			nsurl_unref(url);
276
		} else {
277
			/* copy import */
278
			memcpy(rewritten + *osize,
279
				source + offset + match[0].rm_so,
280
				match[0].rm_eo - match[0].rm_so);
281
			*osize += match[0].rm_eo - match[0].rm_so;
282
		}
283
 
284
		assert(0 < match[0].rm_eo);
285
		offset += match[0].rm_eo;
286
	}
287
 
288
	/* copy rest of source */
289
	if (offset < size) {
290
		memcpy(rewritten + *osize, source + offset, size - offset);
291
		*osize += size - offset;
292
	}
293
 
294
	return rewritten;
295
}
296
 
297
static bool save_complete_save_stylesheet(save_complete_ctx *ctx,
298
		hlcache_handle *css)
299
{
300
	const char *css_data;
301
	unsigned long css_size;
302
	char *source;
303
	unsigned long source_len;
304
	struct nscss_import *imports;
305
	uint32_t import_count;
306
	lwc_string *type;
307
	char filename[32];
308
	bool result;
309
 
310
	if (save_complete_ctx_has_content(ctx, css))
311
		return true;
312
 
313
	if (save_complete_ctx_add_content(ctx, css) == false) {
314
		warn_user("NoMemory", 0);
315
		return false;
316
	}
317
 
318
	imports = nscss_get_imports(css, &import_count);
319
	if (save_complete_save_imported_sheets(ctx,
320
			imports, import_count) == false)
321
		return false;
322
 
323
	css_data = content_get_source_data(css, &css_size);
324
	source = save_complete_rewrite_stylesheet_urls(ctx, css_data, css_size,
325
			hlcache_handle_get_url(css), &source_len);
326
	if (source == NULL) {
327
		warn_user("NoMemory", 0);
328
		return false;
329
	}
330
 
331
	type = content_get_mime_type(css);
332
	if (type == NULL) {
333
		free(source);
334
		return false;
335
	}
336
 
337
	snprintf(filename, sizeof filename, "%p", css);
338
 
339
	result = save_complete_save_buffer(ctx, filename,
340
			source, source_len, type);
341
 
342
	lwc_string_unref(type);
343
	free(source);
344
 
345
	return result;
346
}
347
 
348
static bool save_complete_save_imported_sheets(save_complete_ctx *ctx,
349
		struct nscss_import *imports, uint32_t import_count)
350
{
351
	uint32_t i;
352
 
353
	for (i = 0; i < import_count; i++) {
354
		if (save_complete_save_stylesheet(ctx, imports[i].c) == false)
355
			return false;
356
	}
357
 
358
	return true;
359
}
360
 
361
static bool save_complete_save_html_stylesheet(save_complete_ctx *ctx,
362
		struct html_stylesheet *sheet)
363
{
364
	if (sheet->type == HTML_STYLESHEET_INTERNAL) {
365
		if (save_complete_save_imported_sheets(ctx,
366
				sheet->data.internal->imports,
367
				sheet->data.internal->import_count) == false)
368
			return false;
369
 
370
		return true;
371
	}
372
 
373
	if (sheet->data.external == NULL)
374
		return true;
375
 
376
	return save_complete_save_stylesheet(ctx, sheet->data.external);
377
}
378
 
379
static bool save_complete_save_html_stylesheets(save_complete_ctx *ctx,
380
		hlcache_handle *c)
381
{
382
	struct html_stylesheet *sheets;
383
	unsigned int i, count;
384
 
385
	sheets = html_get_stylesheets(c, &count);
386
 
387
	for (i = STYLESHEET_START; i != count; i++) {
388
		if (save_complete_save_html_stylesheet(ctx,
389
				&sheets[i]) == false)
390
			return false;
391
	}
392
 
393
	return true;
394
}
395
 
396
static bool save_complete_save_html_object(save_complete_ctx *ctx,
397
		hlcache_handle *obj)
398
{
399
	const char *obj_data;
400
	unsigned long obj_size;
401
	lwc_string *type;
402
	bool result;
403
	char filename[32];
404
 
405
	if (content_get_type(obj) == CONTENT_NONE)
406
		return true;
407
 
408
	obj_data = content_get_source_data(obj, &obj_size);
409
	if (obj_data == NULL)
410
		return true;
411
 
412
	if (save_complete_ctx_has_content(ctx, obj))
413
		return true;
414
 
415
	if (save_complete_ctx_add_content(ctx, obj) == false) {
416
		warn_user("NoMemory", 0);
417
		return false;
418
	}
419
 
420
	if (content_get_type(obj) == CONTENT_HTML) {
421
		return save_complete_save_html(ctx, obj, false);
422
	}
423
 
424
	snprintf(filename, sizeof filename, "%p", obj);
425
 
426
	type = content_get_mime_type(obj);
427
	if (type == NULL)
428
		return false;
429
 
430
	result = save_complete_save_buffer(ctx, filename,
431
			obj_data, obj_size, type);
432
 
433
	lwc_string_unref(type);
434
 
435
	return result;
436
}
437
 
438
static bool save_complete_save_html_objects(save_complete_ctx *ctx,
439
		hlcache_handle *c)
440
{
441
	struct content_html_object *object;
442
	unsigned int count;
443
 
444
	object = html_get_objects(c, &count);
445
 
446
	for (; object != NULL; object = object->next) {
447
		if (object->content != NULL) {
448
			if (save_complete_save_html_object(ctx,
449
					object->content) == false)
450
				return false;
451
		}
452
	}
453
 
454
	return true;
455
}
456
 
457
static bool save_complete_libdom_treewalk(dom_node *root,
458
		bool (*callback)(dom_node *node,
459
				save_complete_event_type event_type, void *ctx),
460
		void *ctx)
461
{
462
	dom_node *node;
463
 
464
	node = dom_node_ref(root); /* tree root */
465
 
466
	while (node != NULL) {
467
		dom_node *next = NULL;
468
		dom_exception exc;
469
 
470
		exc = dom_node_get_first_child(node, &next);
471
		if (exc != DOM_NO_ERR) {
472
			dom_node_unref(node);
473
			break;
474
		}
475
 
476
		if (next != NULL) {  /* 1. children */
477
			dom_node_unref(node);
478
			node = next;
479
		} else {
480
			exc = dom_node_get_next_sibling(node, &next);
481
			if (exc != DOM_NO_ERR) {
482
				dom_node_unref(node);
483
				break;
484
			}
485
 
486
			if (next != NULL) {  /* 2. siblings */
487
				if (callback(node, EVENT_LEAVE, ctx) == false) {
488
					return false;
489
				}
490
				dom_node_unref(node);
491
				node = next;
492
			} else {  /* 3. ancestor siblings */
493
				while (node != NULL) {
494
					exc = dom_node_get_next_sibling(node,
495
							&next);
496
					if (exc != DOM_NO_ERR) {
497
						dom_node_unref(node);
498
						node = NULL;
499
						break;
500
					}
501
 
502
					if (next != NULL) {
503
						dom_node_unref(next);
504
						break;
505
					}
506
 
507
					exc = dom_node_get_parent_node(node,
508
							&next);
509
					if (exc != DOM_NO_ERR) {
510
						dom_node_unref(node);
511
						node = NULL;
512
						break;
513
					}
514
 
515
					if (callback(node, EVENT_LEAVE,
516
							ctx) == false) {
517
						return false;
518
					}
519
					dom_node_unref(node);
520
					node = next;
521
				}
522
 
523
				if (node == NULL)
524
					break;
525
 
526
				exc = dom_node_get_next_sibling(node, &next);
527
				if (exc != DOM_NO_ERR) {
528
					dom_node_unref(node);
529
					break;
530
				}
531
 
532
				if (callback(node, EVENT_LEAVE, ctx) == false) {
533
					return false;
534
				}
535
				dom_node_unref(node);
536
				node = next;
537
			}
538
		}
539
 
540
		assert(node != NULL);
541
 
542
		if (callback(node, EVENT_ENTER, ctx) == false) {
543
			return false; /* callback caused early termination */
544
		}
545
 
546
	}
547
 
548
	return true;
549
}
550
 
551
static bool save_complete_rewrite_url_value(save_complete_ctx *ctx,
552
		const char *value, size_t value_len)
553
{
554
	nsurl *url;
555
	hlcache_handle *content;
556
	char *escaped;
557
	nserror error;
558
	utf8_convert_ret ret;
559
 
560
	error = nsurl_join(ctx->base, value, &url);
561
	if (error == NSERROR_NOMEM)
562
		return false;
563
 
564
	if (url != NULL) {
565
		content = save_complete_ctx_find_content(ctx, url);
566
		if (content != NULL) {
567
			/* found a match */
568
			nsurl_unref(url);
569
 
570
			fprintf(ctx->fp, "\"%p\"", content);
571
		} else {
572
			/* no match found */
573
			ret = utf8_to_html(nsurl_access(url), "UTF-8",
574
					nsurl_length(url), &escaped);
575
			nsurl_unref(url);
576
 
577
			if (ret != UTF8_CONVERT_OK)
578
				return false;
579
 
580
			fprintf(ctx->fp, "\"%s\"", escaped);
581
 
582
			free(escaped);
583
		}
584
	} else {
585
		ret = utf8_to_html(value, "UTF-8", value_len, &escaped);
586
		if (ret != UTF8_CONVERT_OK)
587
			return false;
588
 
589
		fprintf(ctx->fp, "\"%s\"", escaped);
590
 
591
		free(escaped);
592
	}
593
 
594
	return true;
595
}
596
 
597
static bool save_complete_write_value(save_complete_ctx *ctx,
598
		const char *value, size_t value_len)
599
{
600
	char *escaped;
601
	utf8_convert_ret ret;
602
 
603
	ret = utf8_to_html(value, "UTF-8", value_len, &escaped);
604
	if (ret != UTF8_CONVERT_OK)
605
		return false;
606
 
607
	fprintf(ctx->fp, "\"%s\"", escaped);
608
 
609
	free(escaped);
610
 
611
	return true;
612
}
613
 
614
static bool save_complete_handle_attr_value(save_complete_ctx *ctx,
615
		dom_string *node_name, dom_string *attr_name,
616
		dom_string *attr_value)
617
{
618
	const char *node_data = dom_string_data(node_name);
619
	size_t node_len = dom_string_byte_length(node_name);
620
	const char *name_data = dom_string_data(attr_name);
621
	size_t name_len = dom_string_byte_length(attr_name);
622
	const char *value_data = dom_string_data(attr_value);
623
	size_t value_len = dom_string_byte_length(attr_value);
624
 
625
	/**
626
	 * We only need to consider the following cases:
627
	 *
628
	 * Attribute:      Elements:
629
	 *
630
	 * 1)   data         
631
	 * 2)   href           
632
	 * 3)   src          
1212