Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3584 sourcerer 1
/*
2
 * Copyright 2011 John-Mark Bell 
3
 *
4
 * This file is part of NetSurf, http://www.netsurf-browser.org/
5
 *
6
 * NetSurf is free software; you can redistribute it and/or modify
7
 * it under the terms of the GNU General Public License as published by
8
 * the Free Software Foundation; version 2 of the License.
9
 *
10
 * NetSurf is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
 * GNU General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU General Public License
16
 * along with this program.  If not, see .
17
 */
18
 
19
/** \file
20
 * MIME type sniffer (implementation)
21
 *
22
 * Spec version: 2011-11-27
23
 */
24
 
25
#include
26
 
27
#include "content/content_factory.h"
28
#include "content/llcache.h"
29
#include "content/mimesniff.h"
30
#include "utils/http.h"
31
#include "utils/utils.h"
32
 
33
struct map_s {
34
	const uint8_t *sig;
35
	size_t len;
36
	bool safe;
37
	lwc_string **type;
38
};
39
 
40
static lwc_string *unknown_unknown;
41
static lwc_string *application_unknown;
42
static lwc_string *any;
43
static lwc_string *text_xml;
44
static lwc_string *application_xml;
45
static lwc_string *text_html;
46
static lwc_string *text_plain;
47
static lwc_string *application_octet_stream;
48
static lwc_string *image_gif;
49
static lwc_string *image_png;
50
static lwc_string *image_jpeg;
51
static lwc_string *image_bmp;
52
static lwc_string *image_vnd_microsoft_icon;
53
static lwc_string *image_webp;
54
static lwc_string *application_rss_xml;
55
static lwc_string *application_atom_xml;
56
static lwc_string *audio_wave;
57
static lwc_string *application_ogg;
58
static lwc_string *video_webm;
59
static lwc_string *application_x_rar_compressed;
60
static lwc_string *application_zip;
61
static lwc_string *application_x_gzip;
62
static lwc_string *application_postscript;
63
static lwc_string *application_pdf;
64
static lwc_string *video_mp4;
65
static lwc_string *image_svg;
66
 
67
nserror mimesniff_init(void)
68
{
69
	lwc_error lerror;
70
 
71
#define SINIT(v, s) \
72
	lerror = lwc_intern_string(s, SLEN(s), &v); \
73
	if (lerror != lwc_error_ok) \
74
		return NSERROR_NOMEM
75
 
76
	SINIT(unknown_unknown,              "unknown/unknown");
77
	SINIT(application_unknown,          "application/unknown");
78
	SINIT(any,                          "*/*");
79
	SINIT(text_xml,                     "text/xml");
80
	SINIT(application_xml,              "application/xml");
81
	SINIT(text_html,                    "text/html");
82
	SINIT(text_plain,                   "text/plain");
83
	SINIT(application_octet_stream,     "application/octet-stream");
84
	SINIT(image_gif,                    "image/gif");
85
	SINIT(image_png,                    "image/png");
86
	SINIT(image_jpeg,                   "image/jpeg");
87
	SINIT(image_bmp,                    "image/bmp");
88
	SINIT(image_vnd_microsoft_icon,     "image/vnd.microsoft.icon");
89
	SINIT(image_webp,                   "image/webp");
90
	SINIT(application_rss_xml,          "application/rss+xml");
91
	SINIT(application_atom_xml,         "application/atom+xml");
92
	SINIT(audio_wave,                   "audio/wave");
93
	SINIT(application_ogg,              "application/ogg");
94
	SINIT(video_webm,                   "video/webm");
95
	SINIT(application_x_rar_compressed, "application/x-rar-compressed");
96
	SINIT(application_zip,              "application/zip");
97
	SINIT(application_x_gzip,           "application/x-gzip");
98
	SINIT(application_postscript,       "application/postscript");
99
	SINIT(application_pdf,              "application/pdf");
100
	SINIT(video_mp4,                    "video/mp4");
101
	SINIT(image_svg,                    "image/svg+xml");
102
#undef SINIT
103
 
104
	return NSERROR_OK;
105
}
106
 
107
void mimesniff_fini(void)
108
{
109
	lwc_string_unref(image_svg);
110
	lwc_string_unref(video_mp4);
111
	lwc_string_unref(application_pdf);
112
	lwc_string_unref(application_postscript);
113
	lwc_string_unref(application_x_gzip);
114
	lwc_string_unref(application_zip);
115
	lwc_string_unref(application_x_rar_compressed);
116
	lwc_string_unref(video_webm);
117
	lwc_string_unref(application_ogg);
118
	lwc_string_unref(audio_wave);
119
	lwc_string_unref(application_atom_xml);
120
	lwc_string_unref(application_rss_xml);
121
	lwc_string_unref(image_webp);
122
	lwc_string_unref(image_vnd_microsoft_icon);
123
	lwc_string_unref(image_bmp);
124
	lwc_string_unref(image_jpeg);
125
	lwc_string_unref(image_png);
126
	lwc_string_unref(image_gif);
127
	lwc_string_unref(application_octet_stream);
128
	lwc_string_unref(text_plain);
129
	lwc_string_unref(text_html);
130
	lwc_string_unref(application_xml);
131
	lwc_string_unref(text_xml);
132
	lwc_string_unref(any);
133
	lwc_string_unref(application_unknown);
134
	lwc_string_unref(unknown_unknown);
135
}
136
 
137
static bool mimesniff__has_binary_octets(const uint8_t *data, size_t len)
138
{
139
	const uint8_t *end = data + len;
140
 
141
	while (data != end) {
142
		const uint8_t c = *data;
143
 
144
		/* Binary iff in C0 and not ESC, CR, FF, LF, HT */
145
		if (c <= 0x1f && c != 0x1b && c != '\r' && c != '\f' &&
146
				c != '\n' && c != '\t')
147
			break;
148
 
149
		data++;
150
	}
151
 
152
	return data != end;
153
}
154
 
155
static nserror mimesniff__match_mp4(const uint8_t *data, size_t len,
156
		lwc_string **effective_type)
157
{
158
	size_t box_size, i;
159
 
160
	/* ISO/IEC 14496-12:2008 $4.3 says (effectively):
161
	 *
162
	 * struct ftyp_box {
163
	 *   uint32_t size; (in octets, including size+type words)
164
	 *   uint32_t type; (== 'ftyp')
165
	 *   uint32_t major_brand;
166
	 *   uint32_t minor_version;
167
	 *   uint32_t compatible_brands[];
168
	 * }
169
	 *
170
	 * Note 1: A size of 0 implies that the length of the box is designated
171
	 * by the remaining input data (and thus may only occur in the last
172
	 * box in the input). We'll reject this below, as it's pointless
173
	 * sniffing input that contains no boxes other than 'ftyp'.
174
	 *
175
	 * Note 2: A size of 1 implies an additional uint64_t field after
176
	 * the type which contains the extended box size. We'll reject this,
177
	 * too, as it implies a minimum of (2^32 - 24) / 4 compatible brands,
178
	 * which is decidely unlikely.
179
	 */
180
 
181
	/* 12 reflects the minimum number of octets needed to sniff useful
182
	 * information out of an 'ftyp' box (i.e. the size, type,
183
	 * and major_brand words). */
184
	if (len < 12)
185
		return NSERROR_NOT_FOUND;
186
 
187
	/* Box size is big-endian */
188
	box_size = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];
189
 
190
	/* Require that we can read the entire box, and reject bad box sizes */
191
	if (len < box_size || box_size % 4 != 0)
192
		return NSERROR_NOT_FOUND;
193
 
194
	/* Ensure this is an 'ftyp' box */
195
	if (data[4] != 'f' || data[5] != 't' ||
196
			data[6] != 'y' || data[7] != 'p')
197
		return NSERROR_NOT_FOUND;
198
 
199
	/* Check if major brand begins with 'mp4' */
200
	if (data[8] == 'm' && data[9] == 'p' && data[10] == '4') {
201
		*effective_type = lwc_string_ref(video_mp4);
202
		return NSERROR_OK;
203
	}
204
 
205
	/* Search each compatible brand in the box for "mp4" */
206
	for (i = 16; i <= box_size - 4; i += 4) {
207
		if (data[i] == 'm' && data[i+1] == 'p' && data[i+2] == '4') {
208
			*effective_type = lwc_string_ref(video_mp4);
209
			return NSERROR_OK;
210
		}
211
	}
212
 
213
	return NSERROR_NOT_FOUND;
214
}
215
 
216
static nserror mimesniff__match_unknown_ws(const uint8_t *data, size_t len,
217
		lwc_string **effective_type)
218
{
219
#define SIG(t, s, x) { (const uint8_t *) s, SLEN(s), x, t }
220
	static const struct map_s ws_exact_match_types[] = {
221
		SIG(&text_xml, "
222
		{ NULL, 0, false, NULL }
223
	};
224
 
225
	static const struct map_s ws_inexact_match_types[] = {
226
		SIG(&text_html, "
227
		SIG(&text_html, "
228
		SIG(&text_html, "
229
		SIG(&text_html, "
230
		SIG(&text_html, "
231
		SIG(&text_html, "
232
		SIG(&text_html, "
233
		SIG(&text_html, "
234
		SIG(&text_html, "
235
		SIG(&text_html, "
236
		SIG(&text_html, "
237
		SIG(&text_html, "
238
		SIG(&text_html, "
239
		SIG(&text_html, "
240
		SIG(&text_html, "
241
		SIG(&text_html, "
242
		SIG(&text_html, "