Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3584 sourcerer 1
/*
2
 * Copyright 2004 John M Bell 
3
 * Copyright 2005 Adrian Lees 
4
 * Copyright 2009 Mark Benjamin 
5
 *
6
 * This file is part of NetSurf, http://www.netsurf-browser.org/
7
 *
8
 * NetSurf is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; version 2 of the License.
11
 *
12
 * NetSurf is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License
18
 * along with this program.  If not, see .
19
 */
20
 
21
 /** \file
22
 * Free text search (core)
23
 */
24
#include "utils/config.h"
25
 
26
#include 
27
#include 
28
 
29
#include 
30
 
31
#include "content/content.h"
32
#include "content/hlcache.h"
33
#include "desktop/gui.h"
34
#include "desktop/selection.h"
35
#include "render/box.h"
36
#include "render/html.h"
37
#include "render/html_internal.h"
38
#include "render/search.h"
39
#include "render/textplain.h"
40
#include "utils/config.h"
41
#include "utils/log.h"
42
#include "utils/messages.h"
43
#include "utils/url.h"
44
#include "utils/utils.h"
45
 
46
 
47
#ifndef NOF_ELEMENTS
48
#define NOF_ELEMENTS(array) (sizeof(array)/sizeof(*(array)))
49
#endif
50
 
51
 
52
struct list_entry {
53
	unsigned start_idx;	/* start position of match */
54
	unsigned end_idx;	/* end of match */
55
 
56
	struct box *start_box;	/* used only for html contents */
57
	struct box *end_box;
58
 
59
	struct selection *sel;
60
 
61
	struct list_entry *prev;
62
	struct list_entry *next;
63
};
64
 
65
struct search_context {
66
	struct search_callbacks		callbacks;
67
	struct content	 		*c;
68
	struct list_entry 		*found;
69
	struct list_entry 		*current; /* first for select all */
70
	char 				*string;
71
	bool 				prev_case_sens;
72
	bool 				newsearch;
73
	bool				is_html;
74
};
75
 
76
 
77
/**
78
 * create a search_context
79
 * \param h the hlcache_handle the search_context is connected to
80
 * \param callbacks the callbacks to modify appearance according to results
81
 * \param p the pointer to send to the callbacks
82
 * \return true for success
83
 */
84
struct search_context * search_create_context(hlcache_handle *h,
85
		struct search_callbacks callbacks)
86
{
87
	struct search_context *context;
88
	struct list_entry *search_head;
89
	struct content *c = hlcache_handle_get_content(h);
90
 
91
	if (h == NULL)
92
		return NULL;
93
 
94
	if (content_get_type(h) != CONTENT_HTML &&
95
			content_get_type(h) != CONTENT_TEXTPLAIN) {
96
		return NULL;
97
	}
98
 
99
	context = malloc(sizeof(struct search_context));
100
	if (context == NULL) {
101
		warn_user("NoMemory", 0);
102
		return NULL;
103
	}
104
 
105
	search_head = malloc(sizeof(struct list_entry));
106
	if (search_head == NULL) {
107
		warn_user("NoMemory", 0);
108
		free(context);
109
		return NULL;
110
	}
111
 
112
	search_head->start_idx = 0;
113
	search_head->end_idx = 0;
114
	search_head->start_box = NULL;
115
	search_head->end_box = NULL;
116
	search_head->sel = NULL;
117
	search_head->prev = NULL;
118
	search_head->next = NULL;
119
 
120
	context->found = search_head;
121
	context->current = NULL;
122
	context->string = NULL;
123
	context->prev_case_sens = false;
124
	context->newsearch = true;
125
	context->c = c;
126
	context->is_html = (content_get_type(h) == CONTENT_HTML) ? true : false;
127
	context->callbacks = callbacks;
128
 
129
	if (context->is_html) {
130
		html_set_search(context->c, context);
131
	} else {
132
		textplain_set_search(context->c, context);
133
	}
134
 
135
	return context;
136
}
137
 
138
 
139
/**
140
 * Release the memory used by the list of matches,
141
 * deleting selection objects too
142
 */
143
 
144
static void free_matches(struct search_context *context)
145
{
146
	struct list_entry *a;
147
	struct list_entry *b;
148
 
149
	a = context->found->next;
150
 
151
	/* empty the list before clearing and deleting the
152
	   selections because the the clearing updates the
153
	   screen immediately, causing nested accesses to the list */
154
 
155
	context->found->prev = NULL;
156
	context->found->next = NULL;
157
 
158
	for (; a; a = b) {
159
		b = a->next;
160
		if (a->sel) {
161
			selection_clear(a->sel, true);
162
			selection_destroy(a->sel);
163
		}
164
		free(a);
165
	}
166
}
167
 
168
 
169
/**
170
 * Find the first occurrence of 'match' in 'string' and return its index
171
 *
172
 * \param  string     the string to be searched (unterminated)
173
 * \param  s_len      length of the string to be searched
174
 * \param  pattern    the pattern for which we are searching (unterminated)
175
 * \param  p_len      length of pattern
176
 * \param  case_sens  true iff case sensitive match required
177
 * \param  m_len      accepts length of match in bytes
178
 * \return pointer to first match, NULL if none
179
 */
180
 
181
static const char *find_pattern(const char *string, int s_len,
182
		const char *pattern, int p_len, bool case_sens,
183
		unsigned int *m_len)
184
{
185
	struct { const char *ss, *s, *p; bool first; } context[16];
186
	const char *ep = pattern + p_len;
187
	const char *es = string  + s_len;
188
	const char *p = pattern - 1;  /* a virtual '*' before the pattern */
189
	const char *ss = string;
190
	const char *s = string;
191
	bool first = true;
192
	int top = 0;
193
 
194
	while (p < ep) {
195
		bool matches;
196
		if (p < pattern || *p == '*') {
197
			char ch;
198
 
199
			/* skip any further asterisks; one is the same as many
200
			*/
201
			do p++; while (p < ep && *p == '*');
202
 
203
			/* if we're at the end of the pattern, yes, it matches
204
			*/
205
			if (p >= ep) break;
206
 
207
			/* anything matches a # so continue matching from
208
			   here, and stack a context that will try to match
209
			   the wildcard against the next character */
210
 
211
			ch = *p;
212
			if (ch != '#') {
213
				/* scan forwards until we find a match for
214
				   this char */
215
				if (!case_sens) ch = toupper(ch);
216
				while (s < es) {
217
					if (case_sens) {
218
						if (*s == ch) break;
219
					} else if (toupper(*s) == ch)
220
						break;
221
					s++;
222
				}
223
			}
224
 
225
			if (s < es) {
226
				/* remember where we are in case the match
227
				   fails; we may then resume */
228
				if (top < (int)NOF_ELEMENTS(context)) {
229
					context[top].ss = ss;
230
					context[top].s  = s + 1;
231
					context[top].p  = p - 1;
232
					/* ptr to last asterisk */
233
					context[top].first = first;
234
					top++;
235
				}
236
 
237
				if (first) {
238
					ss = s;
239
					/* remember first non-'*' char */
240
					first = false;
241
				}
242
 
243
				matches = true;
244
			}
245
			else
246
				matches = false;
247
		}
248
		else if (s < es) {
249
			char ch = *p;
250
			if (ch == '#')
251
				matches = true;
252
			else {
253
				if (case_sens)
254
					matches = (*s == ch);
255
				else
256
					matches = (toupper(*s) == toupper(ch));
257
			}
258
			if (matches && first) {
259
				ss = s;  /* remember first non-'*' char */
260
				first = false;
261
			}
262
		}
263
		else
264
			matches = false;
265
 
266
		if (matches) {
267
			p++; s++;
268
		}
269
		else {
270
			/* doesn't match, resume with stacked context if we have one */
271
			if (--top < 0) return NULL;  /* no match, give up */
272
 
273
			ss = context[top].ss;
274
			s  = context[top].s;
275
			p  = context[top].p;
276
			first = context[top].first;
277
		}
278
	}
279
 
280
	/* end of pattern reached */
281
	*m_len = max(s - ss, 1);
282
	return ss;
283
}
284
 
285
 
286
/**
287
 * Add a new entry to the list of matches
288
 *
289
 * \param  start_idx  offset of match start within textual representation
290
 * \param  end_idx    offset of match end
291
 * \return pointer to added entry, NULL iff failed
292
 */
293
 
294
static struct list_entry *add_entry(unsigned start_idx, unsigned end_idx,
295
		struct search_context *context)
296
{
297
	struct list_entry *entry;
298
 
299
	/* found string in box => add to list */
300
	entry = calloc(1, sizeof(*entry));
301
	if (!entry) {
302
		warn_user("NoMemory", 0);
303
		return NULL;
304
	}
305
 
306
	entry->start_idx = start_idx;
307
	entry->end_idx = end_idx;
308
	entry->sel = NULL;
309
 
310
	entry->next = 0;
311
	entry->prev = context->found->prev;
312
	if (context->found->prev == NULL)
313
		context->found->next = entry;
314
	else
315
		context->found->prev->next = entry;
316
	context->found->prev = entry;
317
 
318
	return entry;
319
}
320
 
321
 
322
/**
323
 * Finds all occurrences of a given string in the html box tree
324
 *
325
 * \param pattern   the string pattern to search for
326
 * \param p_len     pattern length
327
 * \param cur       pointer to the current box
328
 * \param case_sens whether to perform a case sensitive search
329
 * \return true on success, false on memory allocation failure
330
 */
331
static bool find_occurrences_html(const char *pattern, int p_len,
332
		struct box *cur, bool case_sens,
333
		struct search_context *context)
334
{
335
	struct box *a;
336
 
337
	/* ignore this box, if there's no visible text */
338
	if (!cur->object && cur->text) {
339
		const char *text = cur->text;
340
		unsigned length = cur->length;
341
 
342
		while (length > 0) {
343
			struct list_entry *entry;
344
			unsigned match_length;
345
			unsigned match_offset;
346
			const char *new_text;
347
			const char *pos = find_pattern(text, length,
348
					pattern, p_len, case_sens,
349
					&match_length);
350
			if (!pos) break;
351
 
352
			/* found string in box => add to list */
353
			match_offset = pos - cur->text;
354
 
355
			entry = add_entry(cur->byte_offset + match_offset,
356
						cur->byte_offset +
357
							match_offset +
358
							match_length, context);
359
			if (!entry)
360
				return false;
361
 
362
			entry->start_box = cur;
363
			entry->end_box = cur;
364
 
365
			new_text = pos + match_length;
366
			length -= (new_text - text);
367
			text = new_text;
368
		}
369
	}
370
 
371
	/* and recurse */
372
	for (a = cur->children; a; a = a->next) {
373
		if (!find_occurrences_html(pattern, p_len, a, case_sens,
374
				context))
375
			return false;
376
	}
377
 
378
	return true;
379
}
380
 
381
 
382
/**
383
 * Finds all occurrences of a given string in a textplain content
384
 *
385
 * \param pattern   the string pattern to search for
386
 * \param p_len     pattern length
387
 * \param c         the content to be searched
388
 * \param case_sens wheteher to perform a case sensitive search
389
 * \return true on success, false on memory allocation failure
390
 */
391
 
392
static bool find_occurrences_text(const char *pattern, int p_len,
393
		struct content *c, bool case_sens,
394
		struct search_context *context)
395
{
396
	int nlines = textplain_line_count(c);
397
	int line;
398
 
399
	for(line = 0; line < nlines; line++) {
400
		size_t offset, length;
401
		const char *text = textplain_get_line(c, line,
402
				&offset, &length);
403
		if (text) {
404
			while (length > 0) {
405
				struct list_entry *entry;
406
				unsigned match_length;
407
				size_t start_idx;
408
				const char *new_text;
409
				const char *pos = find_pattern(text, length,
410
						pattern, p_len, case_sens,
411
						&match_length);
412
				if (!pos) break;
413
 
414
				/* found string in line => add to list */
415
				start_idx = offset + (pos - text);
416
				entry = add_entry(start_idx, start_idx +
417
						match_length, context);
418
				if (!entry)
419
					return false;
420
 
421
				new_text = pos + match_length;
422
				offset += (new_text - text);
423
				length -= (new_text - text);
424
				text = new_text;
425
			}
426
		}
427
	}
428
 
429
	return true;
430
}
431
 
432
 
433
/**
434
 * Search for a string in the box tree
435
 *
436
 * \param string the string to search for
437
 * \param string_len length of search string
438
 */
439
static void search_text(const char *string, int string_len,
440
		struct search_context *context, search_flags_t flags)
441
{
442
	struct rect bounds;
443
	struct box *box = NULL;
444
	union content_msg_data msg_data;
445
	bool case_sensitive, forwards, showall;
446
 
447
	case_sensitive = ((flags & SEARCH_FLAG_CASE_SENSITIVE) != 0) ?
448
			true : false;
449
	forwards = ((flags & SEARCH_FLAG_FORWARDS) != 0) ? true : false;
450
	showall = ((flags & SEARCH_FLAG_SHOWALL) != 0) ? true : false;
451
 
452
	if (context->c == NULL)
453
		return;
454
 
455
	if (context->is_html == true) {
456
		html_content *html = (html_content *)context->c;
457
 
458
		box = html->layout;
459
 
460
		if (!box)
461
			return;
462
	}
463
 
464
	/* LOG(("do_search '%s' - '%s' (%p, %p) %p (%d, %d) %d",
465
		search_data.string, string, search_data.content, c, search_data.found->next,
466
		search_data.prev_case_sens, case_sens, forwards)); */
467
 
468
	/* check if we need to start a new search or continue an old one */
469
	if (context->newsearch) {
470
		bool res;
471
 
472
		if (context->string != NULL)
473
			free(context->string);
474
		context->current = NULL;
475
		free_matches(context);
476
 
477
		context->string = malloc(string_len + 1);
478
		if (context->string != NULL) {
479
			memcpy(context->string, string, string_len);
480
			context->string[string_len] = '\0';
481
		}
482
 
483
		if ((context->callbacks.gui != NULL) &&
484
				(context->callbacks.gui->hourglass != NULL))
485
			context->callbacks.gui->hourglass(true,
486
					context->callbacks.gui_p);
487
 
488
		if (context->is_html == true) {
489
			res = find_occurrences_html(string, string_len,
490
					box, case_sensitive, context);
491
		} else {
492
			res = find_occurrences_text(string, string_len,
493
					context->c, case_sensitive, context);
494
		}
495
 
496
		if (!res) {
497
			free_matches(context);
498
			if ((context->callbacks.gui != NULL) &&
499
					(context->callbacks.gui->hourglass !=
500
					NULL))
501
				context->callbacks.gui->hourglass(false,
502
						context->callbacks.gui_p);
503
			return;
504
		}
505
		if ((context->callbacks.gui != NULL) &&
506
				(context->callbacks.gui->hourglass != NULL))
507
			context->callbacks.gui->hourglass(false,
508
					context->callbacks.gui_p);
509
 
510
		context->prev_case_sens = case_sensitive;
511
/* LOG(("%d %p %p (%p, %p)", new, search_data.found->next, search_data.current,
512
		search_data.current->prev, search_data.current->next)); */
513
		/* new search, beginning at the top of the page */
514
		context->current = context->found->next;
515
		context->newsearch = false;
516
	}
517
	else if (context->current != NULL) {
518
		/* continued search in the direction specified */
519
		if (forwards) {
520
			if (context->current->next)
521
				context->current = context->current->next;
522
		}
523
		else {
524
			if (context->current->prev)
525
				context->current = context->current->prev;
526
		}
527
	}
528
 
529
	if (context->callbacks.gui == NULL)
530
		return;
531
	if (context->callbacks.gui->status != NULL)
532
		context->callbacks.gui->status((context->current != NULL),
533
				context->callbacks.gui_p);
534
	search_show_all(showall, context);
535
 
536
	if (context->callbacks.gui->back_state != NULL)
537
		context->callbacks.gui->back_state((context->current != NULL) &&
538
				(context->current->prev != NULL),
539
				context->callbacks.gui_p);
540
	if (context->callbacks.gui->forward_state != NULL)
541
		context->callbacks.gui->forward_state(
542
				(context->current != NULL) &&
543
					(context->current->next != NULL),
544
				context->callbacks.gui_p);
545
 
546
	if (context->current == NULL)
547
		return;
548
 
549
	if (context->is_html == true) {
550
		/* get box position and jump to it */
551
		box_coords(context->current->start_box, &bounds.x0, &bounds.y0);
552
		/* \todo: move x0 in by correct idx */
553
		box_coords(context->current->end_box, &bounds.x1, &bounds.y1);
554
		/* \todo: move x1 in by correct idx */
555
		bounds.x1 += context->current->end_box->width;
556
		bounds.y1 += context->current->end_box->height;
557
	} else {
558
		textplain_coords_from_range(context->c,
559
				context->current->start_idx,
560
				context->current->end_idx, &bounds);
561
	}
562
 
563
	msg_data.scroll.area = true;
564
	msg_data.scroll.x0 = bounds.x0;
565
	msg_data.scroll.y0 = bounds.y0;
566
	msg_data.scroll.x1 = bounds.x1;
567
	msg_data.scroll.y1 = bounds.y1;
568
	content_broadcast(context->c, CONTENT_MSG_SCROLL, msg_data);
569
}
570
 
571
 
572
/**
573
 * Begins/continues the search process
574
 * Note that this may be called many times for a single search.
575
 *
576
 * \param bw the browser_window to search in
577
 * \param flags the flags forward/back etc
578
 * \param string the string to match
579
 */
580
 
581
void search_step(struct search_context *context, search_flags_t flags,
582
		const char *string)
583
{
584
	int string_len;
585
	int i = 0;
586
 
587
	if ((context == NULL) || (context->callbacks.gui == NULL)) {
588
		warn_user("SearchError", 0);
589
		return;
590
	}
591
 
592
	if (context->callbacks.gui->add_recent != NULL)
593
		context->callbacks.gui->add_recent(string,
594
				context->callbacks.gui_p);
595
 
596
	string_len = strlen(string);
597
	for(i = 0; i < string_len; i++)
598
		if (string[i] != '#' && string[i] != '*') break;
599
	if (i >= string_len) {
600
		union content_msg_data msg_data;
601
		free_matches(context);
602
		if (context->callbacks.gui->status != NULL)
603
			context->callbacks.gui->status(true,
604
					context->callbacks.gui_p);
605
		if (context->callbacks.gui->back_state != NULL)
606
			context->callbacks.gui->back_state(false,
607
					context->callbacks.gui_p);
608
		if (context->callbacks.gui->forward_state != NULL)
609
			context->callbacks.gui->forward_state(false,
610
					context->callbacks.gui_p);
611
 
612
		msg_data.scroll.area = false;
613
		msg_data.scroll.x0 = 0;
614
		msg_data.scroll.y0 = 0;
615
		content_broadcast(context->c, CONTENT_MSG_SCROLL, msg_data);
616
		return;
617
	}
618
	search_text(string, string_len, context, flags);
619
}
620
 
621
 
622
/**
623
 * Determines whether any portion of the given text box should be
624
 * selected because it matches the current search string.
625
 *
626
 * \param  bw            browser window
627
 * \param  start_offset  byte offset within text of string to be checked
628
 * \param  end_offset    byte offset within text
629
 * \param  start_idx     byte offset within string of highlight start
630
 * \param  end_idx       byte offset of highlight end
631
 * \return true iff part of the box should be highlighted
632
 */
633
 
634
bool search_term_highlighted(struct content *c,
635
		unsigned start_offset, unsigned end_offset,
636
		unsigned *start_idx, unsigned *end_idx,
637
		struct search_context *context)
638
{
639
	if (c == context->c) {
640
		struct list_entry *a;
641
		for(a = context->found->next; a; a = a->next)
642
			if (a->sel && selection_defined(a->sel) &&
643
				selection_highlighted(a->sel,
644
					start_offset, end_offset,
645
					start_idx, end_idx))
646
				return true;
647
	}
648
 
649
	return false;
650
}
651
 
652
 
653
/**
654
 * Specifies whether all matches or just the current match should
655
 * be highlighted in the search text.
656
 */
657
 
658
void search_show_all(bool all, struct search_context *context)
659
{
660
	struct list_entry *a;
661
 
662
	for (a = context->found->next; a; a = a->next) {
663
		bool add = true;
664
		if (!all && a != context->current) {
665
			add = false;
666
			if (a->sel) {
667
				selection_clear(a->sel, true);
668
				selection_destroy(a->sel);
669
				a->sel = NULL;
670
			}
671
		}
672
		if (add && !a->sel) {
673
 
674
			if (context->is_html == true) {
675
				html_content *html = (html_content *)context->c;
676
				a->sel = selection_create(context->c, true);
677
				if (!a->sel)
678
					continue;
679
 
680
				selection_init(a->sel, html->layout);
681
			} else {
682
				a->sel = selection_create(context->c, false);
683
				if (!a->sel)
684
					continue;
685
 
686
				selection_init(a->sel, NULL);
687
			}
688
 
689
			selection_set_start(a->sel, a->start_idx);
690
			selection_set_end(a->sel, a->end_idx);
691
		}
692
	}
693
}
694
 
695
 
696
/**
697
 * Ends the search process, invalidating all state
698
 * freeing the list of found boxes
699
 */
700
void search_destroy_context(struct search_context *context)
701
{
702
	assert(context != NULL);
703
 
704
	if (context->callbacks.invalidate != NULL) {
705
		context->callbacks.invalidate(context, context->callbacks.p);
706
	}
707
 
708
	if (context->c != NULL) {
709
 
710
		if (context->is_html)
711
			html_set_search(context->c, NULL);
712
		else
713
			textplain_set_search(context->c, NULL);
714
	}
715
	if ((context->string != NULL) && (context->callbacks.gui != NULL) &&
716
			(context->callbacks.gui->add_recent != NULL)) {
717
		context->callbacks.gui->add_recent(context->string,
718
				context->callbacks.gui_p);
719
		free(context->string);
720
	}
721
	free_matches(context);
722
	free(context);
723
}