blob: 24cf0afc0caf9bd60d32e87c3b2b0d0c4f739f95 [file] [log] [blame]
Jeff Thompson86b6d642013-10-17 15:01:56 -07001/*
2 *
3 * Copyright (c) 2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12#ifndef NDNBOOST_REGEX_MATCHER_HPP
13#define NDNBOOST_REGEX_MATCHER_HPP
14
15#include <ndnboost/regex/v4/iterator_category.hpp>
16
17#ifdef NDNBOOST_MSVC
18#pragma warning(push)
19#pragma warning(disable: 4103)
20#endif
21#ifdef NDNBOOST_HAS_ABI_HEADERS
22# include NDNBOOST_ABI_PREFIX
23#endif
24#ifdef NDNBOOST_MSVC
25#pragma warning(pop)
26#endif
27
28#ifdef NDNBOOST_MSVC
29# pragma warning(push)
30# pragma warning(disable: 4800)
31#endif
32
33namespace ndnboost{
34namespace re_detail{
35
36//
37// error checking API:
38//
39NDNBOOST_REGEX_DECL void NDNBOOST_REGEX_CALL verify_options(ndnboost::regex_constants::syntax_option_type ef, match_flag_type mf);
40//
41// function can_start:
42//
43template <class charT>
44inline bool can_start(charT c, const unsigned char* map, unsigned char mask)
45{
46 return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask));
47}
48inline bool can_start(char c, const unsigned char* map, unsigned char mask)
49{
50 return map[(unsigned char)c] & mask;
51}
52inline bool can_start(signed char c, const unsigned char* map, unsigned char mask)
53{
54 return map[(unsigned char)c] & mask;
55}
56inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask)
57{
58 return map[c] & mask;
59}
60inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask)
61{
62 return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
63}
64#if !defined(__hpux) && !defined(__WINSCW__)// WCHAR_MIN not usable in pp-directives.
65#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(NDNBOOST_NO_INTRINSIC_WCHAR_T)
66inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
67{
68 return ((c >= static_cast<wchar_t>(1u << CHAR_BIT)) ? true : map[c] & mask);
69}
70#endif
71#endif
72#if !defined(NDNBOOST_NO_INTRINSIC_WCHAR_T)
73inline bool can_start(unsigned int c, const unsigned char* map, unsigned char mask)
74{
75 return (((c >= static_cast<unsigned int>(1u << CHAR_BIT)) ? true : map[c] & mask));
76}
77#endif
78
79
80//
81// Unfortunately Rogue Waves standard library appears to have a bug
82// in std::basic_string::compare that results in eroneous answers
83// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version
84// 0x020101) the test case was:
85// {39135,0} < {0xff,0}
86// which succeeds when it should not.
87//
88#ifndef _RWSTD_VER
89#if !NDNBOOST_WORKAROUND(NDNBOOST_MSVC, < 1310)
90template <class C, class T, class A>
91inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
92{
93 if(0 == *p)
94 {
95 if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
96 return 0;
97 }
98 return s.compare(p);
99}
100#endif
101#else
102#if !NDNBOOST_WORKAROUND(NDNBOOST_MSVC, < 1310)
103template <class C, class T, class A>
104inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
105{
106 if(0 == *p)
107 {
108 if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
109 return 0;
110 }
111 return s.compare(p);
112}
113#endif
114inline int string_compare(const std::string& s, const char* p)
115{ return std::strcmp(s.c_str(), p); }
116# ifndef NDNBOOST_NO_WREGEX
117inline int string_compare(const std::wstring& s, const wchar_t* p)
118{ return std::wcscmp(s.c_str(), p); }
119#endif
120#endif
121template <class Seq, class C>
122inline int string_compare(const Seq& s, const C* p)
123{
124 std::size_t i = 0;
125 while((i < s.size()) && (p[i] == s[i]))
126 {
127 ++i;
128 }
129 return (i == s.size()) ? -p[i] : s[i] - p[i];
130}
131# define STR_COMP(s,p) string_compare(s,p)
132
133template<class charT>
134inline const charT* re_skip_past_null(const charT* p)
135{
136 while (*p != static_cast<charT>(0)) ++p;
137 return ++p;
138}
139
140template <class iterator, class charT, class traits_type, class char_classT>
141iterator NDNBOOST_REGEX_CALL re_is_set_member(iterator next,
142 iterator last,
143 const re_set_long<char_classT>* set_,
144 const regex_data<charT, traits_type>& e, bool icase)
145{
146 const charT* p = reinterpret_cast<const charT*>(set_+1);
147 iterator ptr;
148 unsigned int i;
149 //bool icase = e.m_flags & regex_constants::icase;
150
151 if(next == last) return next;
152
153 typedef typename traits_type::string_type traits_string_type;
154 const ::ndnboost::regex_traits_wrapper<traits_type>& traits_inst = *(e.m_ptraits);
155
156 // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
157 // referenced
158 (void)traits_inst;
159
160 // try and match a single character, could be a multi-character
161 // collating element...
162 for(i = 0; i < set_->csingles; ++i)
163 {
164 ptr = next;
165 if(*p == static_cast<charT>(0))
166 {
167 // treat null string as special case:
168 if(traits_inst.translate(*ptr, icase) != *p)
169 {
170 while(*p == static_cast<charT>(0))++p;
171 continue;
172 }
173 return set_->isnot ? next : (ptr == next) ? ++next : ptr;
174 }
175 else
176 {
177 while(*p && (ptr != last))
178 {
179 if(traits_inst.translate(*ptr, icase) != *p)
180 break;
181 ++p;
182 ++ptr;
183 }
184
185 if(*p == static_cast<charT>(0)) // if null we've matched
186 return set_->isnot ? next : (ptr == next) ? ++next : ptr;
187
188 p = re_skip_past_null(p); // skip null
189 }
190 }
191
192 charT col = traits_inst.translate(*next, icase);
193
194
195 if(set_->cranges || set_->cequivalents)
196 {
197 traits_string_type s1;
198 //
199 // try and match a range, NB only a single character can match
200 if(set_->cranges)
201 {
202 if((e.m_flags & regex_constants::collate) == 0)
203 s1.assign(1, col);
204 else
205 {
206 charT a[2] = { col, charT(0), };
207 s1 = traits_inst.transform(a, a + 1);
208 }
209 for(i = 0; i < set_->cranges; ++i)
210 {
211 if(STR_COMP(s1, p) >= 0)
212 {
213 do{ ++p; }while(*p);
214 ++p;
215 if(STR_COMP(s1, p) <= 0)
216 return set_->isnot ? next : ++next;
217 }
218 else
219 {
220 // skip first string
221 do{ ++p; }while(*p);
222 ++p;
223 }
224 // skip second string
225 do{ ++p; }while(*p);
226 ++p;
227 }
228 }
229 //
230 // try and match an equivalence class, NB only a single character can match
231 if(set_->cequivalents)
232 {
233 charT a[2] = { col, charT(0), };
234 s1 = traits_inst.transform_primary(a, a +1);
235 for(i = 0; i < set_->cequivalents; ++i)
236 {
237 if(STR_COMP(s1, p) == 0)
238 return set_->isnot ? next : ++next;
239 // skip string
240 do{ ++p; }while(*p);
241 ++p;
242 }
243 }
244 }
245 if(traits_inst.isctype(col, set_->cclasses) == true)
246 return set_->isnot ? next : ++next;
247 if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
248 return set_->isnot ? next : ++next;
249 return set_->isnot ? ++next : next;
250}
251
252template <class BidiIterator>
253class repeater_count
254{
255 repeater_count** stack;
256 repeater_count* next;
257 int state_id;
258 std::size_t count; // the number of iterations so far
259 BidiIterator start_pos; // where the last repeat started
260public:
261 repeater_count(repeater_count** s)
262 {
263 stack = s;
264 next = 0;
265 state_id = -1;
266 count = 0;
267 }
268 repeater_count(int i, repeater_count** s, BidiIterator start)
269 : start_pos(start)
270 {
271 state_id = i;
272 stack = s;
273 next = *stack;
274 *stack = this;
275 if(state_id > next->state_id)
276 count = 0;
277 else
278 {
279 repeater_count* p = next;
280 while(p && (p->state_id != state_id))
281 p = p->next;
282 if(p)
283 {
284 count = p->count;
285 start_pos = p->start_pos;
286 }
287 else
288 count = 0;
289 }
290 }
291 ~repeater_count()
292 {
293 if(next)
294 *stack = next;
295 }
296 std::size_t get_count() { return count; }
297 int get_id() { return state_id; }
298 std::size_t operator++() { return ++count; }
299 bool check_null_repeat(const BidiIterator& pos, std::size_t max)
300 {
301 // this is called when we are about to start a new repeat,
302 // if the last one was NULL move our count to max,
303 // otherwise save the current position.
304 bool result = (count == 0) ? false : (pos == start_pos);
305 if(result)
306 count = max;
307 else
308 start_pos = pos;
309 return result;
310 }
311};
312
313struct saved_state;
314
315enum saved_state_type
316{
317 saved_type_end = 0,
318 saved_type_paren = 1,
319 saved_type_recurse = 2,
320 saved_type_assertion = 3,
321 saved_state_alt = 4,
322 saved_state_repeater_count = 5,
323 saved_state_extra_block = 6,
324 saved_state_greedy_single_repeat = 7,
325 saved_state_rep_slow_dot = 8,
326 saved_state_rep_fast_dot = 9,
327 saved_state_rep_char = 10,
328 saved_state_rep_short_set = 11,
329 saved_state_rep_long_set = 12,
330 saved_state_non_greedy_long_repeat = 13,
331 saved_state_count = 14
332};
333
334template <class Results>
335struct recursion_info
336{
337 typedef typename Results::value_type value_type;
338 typedef typename value_type::iterator iterator;
339 int idx;
340 const re_syntax_base* preturn_address;
341 Results results;
342 repeater_count<iterator>* repeater_stack;
343};
344
345#ifdef NDNBOOST_MSVC
346#pragma warning(push)
347#pragma warning(disable : 4251 4231)
348# if NDNBOOST_MSVC < 1600
349# pragma warning(disable : 4660)
350# endif
351#endif
352
353template <class BidiIterator, class Allocator, class traits>
354class perl_matcher
355{
356public:
357 typedef typename traits::char_type char_type;
358 typedef perl_matcher<BidiIterator, Allocator, traits> self_type;
359 typedef bool (self_type::*matcher_proc_type)(void);
360 typedef std::size_t traits_size_type;
361 typedef typename is_byte<char_type>::width_type width_type;
362 typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type;
363 typedef match_results<BidiIterator, Allocator> results_type;
364
365 perl_matcher(BidiIterator first, BidiIterator end,
366 match_results<BidiIterator, Allocator>& what,
367 const basic_regex<char_type, traits>& e,
368 match_flag_type f,
369 BidiIterator l_base)
370 : m_result(what), base(first), last(end),
371 position(first), backstop(l_base), re(e), traits_inst(e.get_traits()),
372 m_independent(false), next_count(&rep_obj), rep_obj(&next_count)
373 {
374 construct_init(e, f);
375 }
376
377 bool match();
378 bool find();
379
380 void setf(match_flag_type f)
381 { m_match_flags |= f; }
382 void unsetf(match_flag_type f)
383 { m_match_flags &= ~f; }
384
385private:
386 void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f);
387
388 bool find_imp();
389 bool match_imp();
390#ifdef NDNBOOST_REGEX_HAS_MS_STACK_GUARD
391 typedef bool (perl_matcher::*protected_proc_type)();
392 bool protected_call(protected_proc_type);
393#endif
394 void estimate_max_state_count(std::random_access_iterator_tag*);
395 void estimate_max_state_count(void*);
396 bool match_prefix();
397 bool match_all_states();
398
399 // match procs, stored in s_match_vtable:
400 bool match_startmark();
401 bool match_endmark();
402 bool match_literal();
403 bool match_start_line();
404 bool match_end_line();
405 bool match_wild();
406 bool match_match();
407 bool match_word_boundary();
408 bool match_within_word();
409 bool match_word_start();
410 bool match_word_end();
411 bool match_buffer_start();
412 bool match_buffer_end();
413 bool match_backref();
414 bool match_long_set();
415 bool match_set();
416 bool match_jump();
417 bool match_alt();
418 bool match_rep();
419 bool match_combining();
420 bool match_soft_buffer_end();
421 bool match_restart_continue();
422 bool match_long_set_repeat();
423 bool match_set_repeat();
424 bool match_char_repeat();
425 bool match_dot_repeat_fast();
426 bool match_dot_repeat_slow();
427 bool match_dot_repeat_dispatch()
428 {
429 return ::ndnboost::is_random_access_iterator<BidiIterator>::value ? match_dot_repeat_fast() : match_dot_repeat_slow();
430 }
431 bool match_backstep();
432 bool match_assert_backref();
433 bool match_toggle_case();
434#ifdef NDNBOOST_REGEX_RECURSIVE
435 bool backtrack_till_match(std::size_t count);
436#endif
437 bool match_recursion();
438
439 // find procs stored in s_find_vtable:
440 bool find_restart_any();
441 bool find_restart_word();
442 bool find_restart_line();
443 bool find_restart_buf();
444 bool find_restart_lit();
445
446private:
447 // final result structure to be filled in:
448 match_results<BidiIterator, Allocator>& m_result;
449 // temporary result for POSIX matches:
450 scoped_ptr<match_results<BidiIterator, Allocator> > m_temp_match;
451 // pointer to actual result structure to fill in:
452 match_results<BidiIterator, Allocator>* m_presult;
453 // start of sequence being searched:
454 BidiIterator base;
455 // end of sequence being searched:
456 BidiIterator last;
457 // current character being examined:
458 BidiIterator position;
459 // where to restart next search after failed match attempt:
460 BidiIterator restart;
461 // where the current search started from, acts as base for $` during grep:
462 BidiIterator search_base;
463 // how far we can go back when matching lookbehind:
464 BidiIterator backstop;
465 // the expression being examined:
466 const basic_regex<char_type, traits>& re;
467 // the expression's traits class:
468 const ::ndnboost::regex_traits_wrapper<traits>& traits_inst;
469 // the next state in the machine being matched:
470 const re_syntax_base* pstate;
471 // matching flags in use:
472 match_flag_type m_match_flags;
473 // how many states we have examined so far:
474 std::ptrdiff_t state_count;
475 // max number of states to examine before giving up:
476 std::ptrdiff_t max_state_count;
477 // whether we should ignore case or not:
478 bool icase;
479 // set to true when (position == last), indicates that we may have a partial match:
480 bool m_has_partial_match;
481 // set to true whenever we get a match:
482 bool m_has_found_match;
483 // set to true whenever we're inside an independent sub-expression:
484 bool m_independent;
485 // the current repeat being examined:
486 repeater_count<BidiIterator>* next_count;
487 // the first repeat being examined (top of linked list):
488 repeater_count<BidiIterator> rep_obj;
489 // the mask to pass when matching word boundaries:
490 typename traits::char_class_type m_word_mask;
491 // the bitmask to use when determining whether a match_any matches a newline or not:
492 unsigned char match_any_mask;
493 // recursion information:
494 std::vector<recursion_info<results_type> > recursion_stack;
495
496#ifdef NDNBOOST_REGEX_NON_RECURSIVE
497 //
498 // additional members for non-recursive version:
499 //
500 typedef bool (self_type::*unwind_proc_type)(bool);
501
502 void extend_stack();
503 bool unwind(bool);
504 bool unwind_end(bool);
505 bool unwind_paren(bool);
506 bool unwind_recursion_stopper(bool);
507 bool unwind_assertion(bool);
508 bool unwind_alt(bool);
509 bool unwind_repeater_counter(bool);
510 bool unwind_extra_block(bool);
511 bool unwind_greedy_single_repeat(bool);
512 bool unwind_slow_dot_repeat(bool);
513 bool unwind_fast_dot_repeat(bool);
514 bool unwind_char_repeat(bool);
515 bool unwind_short_set_repeat(bool);
516 bool unwind_long_set_repeat(bool);
517 bool unwind_non_greedy_repeat(bool);
518 bool unwind_recursion(bool);
519 bool unwind_recursion_pop(bool);
520 void destroy_single_repeat();
521 void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
522 void push_recursion_stopper();
523 void push_assertion(const re_syntax_base* ps, bool positive);
524 void push_alt(const re_syntax_base* ps);
525 void push_repeater_count(int i, repeater_count<BidiIterator>** s);
526 void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int state_id);
527 void push_non_greedy_repeat(const re_syntax_base* ps);
528 void push_recursion(int idx, const re_syntax_base* p, results_type* presults);
529 void push_recursion_pop();
530
531 // pointer to base of stack:
532 saved_state* m_stack_base;
533 // pointer to current stack position:
534 saved_state* m_backup_state;
535 // determines what value to return when unwinding from recursion,
536 // allows for mixed recursive/non-recursive algorithm:
537 bool m_recursive_result;
538 // how many memory blocks have we used up?:
539 unsigned used_block_count;
540#endif
541
542 // these operations aren't allowed, so are declared private,
543 // bodies are provided to keep explicit-instantiation requests happy:
544 perl_matcher& operator=(const perl_matcher&)
545 {
546 return *this;
547 }
548 perl_matcher(const perl_matcher& that)
549 : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {}
550};
551
552#ifdef NDNBOOST_MSVC
553#pragma warning(pop)
554#endif
555
556} // namespace re_detail
557
558#ifdef NDNBOOST_MSVC
559#pragma warning(push)
560#pragma warning(disable: 4103)
561#endif
562#ifdef NDNBOOST_HAS_ABI_HEADERS
563# include NDNBOOST_ABI_SUFFIX
564#endif
565#ifdef NDNBOOST_MSVC
566#pragma warning(pop)
567#endif
568
569} // namespace ndnboost
570
571#ifdef NDNBOOST_MSVC
572# pragma warning(pop)
573#endif
574
575//
576// include the implementation of perl_matcher:
577//
578#ifdef NDNBOOST_REGEX_RECURSIVE
579#include <ndnboost/regex/v4/perl_matcher_recursive.hpp>
580#else
581#include <ndnboost/regex/v4/perl_matcher_non_recursive.hpp>
582#endif
583// this one has to be last:
584#include <ndnboost/regex/v4/perl_matcher_common.hpp>
585
586#endif
587