00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031 namespace std
00032 {
00033 namespace __regex
00034 {
00035 struct _Scanner_base
00036 {
00037
00038 typedef unsigned int _StateT;
00039
00040 static const _StateT _S_state_at_start = 1 << 0;
00041 static const _StateT _S_state_in_brace = 1 << 2;
00042 static const _StateT _S_state_in_bracket = 1 << 3;
00043 };
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055 template<typename _InputIterator>
00056 class _Scanner: public _Scanner_base
00057 {
00058 public:
00059 typedef _InputIterator _IteratorT;
00060 typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
00061 typedef std::basic_string<_CharT> _StringT;
00062 typedef regex_constants::syntax_option_type _FlagT;
00063 typedef const std::ctype<_CharT> _CtypeT;
00064
00065
00066 enum _TokenT
00067 {
00068 _S_token_anychar,
00069 _S_token_backref,
00070 _S_token_bracket_begin,
00071 _S_token_bracket_end,
00072 _S_token_inverse_class,
00073 _S_token_char_class_name,
00074 _S_token_closure0,
00075 _S_token_closure1,
00076 _S_token_collelem_multi,
00077 _S_token_collelem_single,
00078 _S_token_collsymbol,
00079 _S_token_comma,
00080 _S_token_dash,
00081 _S_token_dup_count,
00082 _S_token_eof,
00083 _S_token_equiv_class_name,
00084 _S_token_interval_begin,
00085 _S_token_interval_end,
00086 _S_token_line_begin,
00087 _S_token_line_end,
00088 _S_token_opt,
00089 _S_token_or,
00090 _S_token_ord_char,
00091 _S_token_quoted_char,
00092 _S_token_subexpr_begin,
00093 _S_token_subexpr_end,
00094 _S_token_word_begin,
00095 _S_token_word_end,
00096 _S_token_unknown
00097 };
00098
00099 public:
00100 _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
00101 std::locale __loc)
00102 : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
00103 _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
00104 { _M_advance(); }
00105
00106 void
00107 _M_advance();
00108
00109 _TokenT
00110 _M_token() const
00111 { return _M_curToken; }
00112
00113 const _StringT&
00114 _M_value() const
00115 { return _M_curValue; }
00116
00117 #ifdef _GLIBCXX_DEBUG
00118 std::ostream&
00119 _M_print(std::ostream&);
00120 #endif
00121
00122 private:
00123 void
00124 _M_eat_escape();
00125
00126 void
00127 _M_scan_in_brace();
00128
00129 void
00130 _M_scan_in_bracket();
00131
00132 void
00133 _M_eat_charclass();
00134
00135 void
00136 _M_eat_equivclass();
00137
00138 void
00139 _M_eat_collsymbol();
00140
00141 private:
00142 _IteratorT _M_current;
00143 _IteratorT _M_end;
00144 _FlagT _M_flags;
00145 _CtypeT& _M_ctype;
00146 _TokenT _M_curToken;
00147 _StringT _M_curValue;
00148 _StateT _M_state;
00149 };
00150
00151 template<typename _InputIterator>
00152 void
00153 _Scanner<_InputIterator>::
00154 _M_advance()
00155 {
00156 if (_M_current == _M_end)
00157 {
00158 _M_curToken = _S_token_eof;
00159 return;
00160 }
00161
00162 _CharT __c = *_M_current;
00163 if (_M_state & _S_state_in_bracket)
00164 {
00165 _M_scan_in_bracket();
00166 return;
00167 }
00168 if (_M_state & _S_state_in_brace)
00169 {
00170 _M_scan_in_brace();
00171 return;
00172 }
00173 else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
00174 {
00175 _M_curToken = _S_token_line_begin;
00176 ++_M_current;
00177 return;
00178 }
00179 else if (__c == _M_ctype.widen('$'))
00180 {
00181 _M_curToken = _S_token_line_end;
00182 ++_M_current;
00183 return;
00184 }
00185 else if (__c == _M_ctype.widen('.'))
00186 {
00187 _M_curToken = _S_token_anychar;
00188 ++_M_current;
00189 return;
00190 }
00191 else if (__c == _M_ctype.widen('*'))
00192 {
00193 _M_curToken = _S_token_closure0;
00194 ++_M_current;
00195 return;
00196 }
00197 else if (__c == _M_ctype.widen('+'))
00198 {
00199 _M_curToken = _S_token_closure1;
00200 ++_M_current;
00201 return;
00202 }
00203 else if (__c == _M_ctype.widen('|'))
00204 {
00205 _M_curToken = _S_token_or;
00206 ++_M_current;
00207 return;
00208 }
00209 else if (__c == _M_ctype.widen('['))
00210 {
00211 _M_curToken = _S_token_bracket_begin;
00212 _M_state |= (_S_state_in_bracket | _S_state_at_start);
00213 ++_M_current;
00214 return;
00215 }
00216 else if (__c == _M_ctype.widen('\\'))
00217 {
00218 _M_eat_escape();
00219 return;
00220 }
00221 else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00222 {
00223 if (__c == _M_ctype.widen('('))
00224 {
00225 _M_curToken = _S_token_subexpr_begin;
00226 ++_M_current;
00227 return;
00228 }
00229 else if (__c == _M_ctype.widen(')'))
00230 {
00231 _M_curToken = _S_token_subexpr_end;
00232 ++_M_current;
00233 return;
00234 }
00235 else if (__c == _M_ctype.widen('{'))
00236 {
00237 _M_curToken = _S_token_interval_begin;
00238 _M_state |= _S_state_in_brace;
00239 ++_M_current;
00240 return;
00241 }
00242 }
00243
00244 _M_curToken = _S_token_ord_char;
00245 _M_curValue.assign(1, __c);
00246 ++_M_current;
00247 }
00248
00249
00250 template<typename _InputIterator>
00251 void
00252 _Scanner<_InputIterator>::
00253 _M_scan_in_brace()
00254 {
00255 if (_M_ctype.is(_CtypeT::digit, *_M_current))
00256 {
00257 _M_curToken = _S_token_dup_count;
00258 _M_curValue.assign(1, *_M_current);
00259 ++_M_current;
00260 while (_M_current != _M_end
00261 && _M_ctype.is(_CtypeT::digit, *_M_current))
00262 {
00263 _M_curValue += *_M_current;
00264 ++_M_current;
00265 }
00266 return;
00267 }
00268 else if (*_M_current == _M_ctype.widen(','))
00269 {
00270 _M_curToken = _S_token_comma;
00271 ++_M_current;
00272 return;
00273 }
00274 if (_M_flags & (regex_constants::basic | regex_constants::grep))
00275 {
00276 if (*_M_current == _M_ctype.widen('\\'))
00277 _M_eat_escape();
00278 }
00279 else
00280 {
00281 if (*_M_current == _M_ctype.widen('}'))
00282 {
00283 _M_curToken = _S_token_interval_end;
00284 _M_state &= ~_S_state_in_brace;
00285 ++_M_current;
00286 return;
00287 }
00288 }
00289 }
00290
00291 template<typename _InputIterator>
00292 void
00293 _Scanner<_InputIterator>::
00294 _M_scan_in_bracket()
00295 {
00296 if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
00297 {
00298 _M_curToken = _S_token_inverse_class;
00299 _M_state &= ~_S_state_at_start;
00300 ++_M_current;
00301 return;
00302 }
00303 else if (*_M_current == _M_ctype.widen('['))
00304 {
00305 ++_M_current;
00306 if (_M_current == _M_end)
00307 {
00308 _M_curToken = _S_token_eof;
00309 return;
00310 }
00311
00312 if (*_M_current == _M_ctype.widen('.'))
00313 {
00314 _M_curToken = _S_token_collsymbol;
00315 _M_eat_collsymbol();
00316 return;
00317 }
00318 else if (*_M_current == _M_ctype.widen(':'))
00319 {
00320 _M_curToken = _S_token_char_class_name;
00321 _M_eat_charclass();
00322 return;
00323 }
00324 else if (*_M_current == _M_ctype.widen('='))
00325 {
00326 _M_curToken = _S_token_equiv_class_name;
00327 _M_eat_equivclass();
00328 return;
00329 }
00330 }
00331 else if (*_M_current == _M_ctype.widen('-'))
00332 {
00333 _M_curToken = _S_token_dash;
00334 ++_M_current;
00335 return;
00336 }
00337 else if (*_M_current == _M_ctype.widen(']'))
00338 {
00339 if (!(_M_flags & regex_constants::ECMAScript)
00340 || !(_M_state & _S_state_at_start))
00341 {
00342
00343
00344 _M_curToken = _S_token_bracket_end;
00345 ++_M_current;
00346 return;
00347 }
00348 }
00349 _M_curToken = _S_token_collelem_single;
00350 _M_curValue.assign(1, *_M_current);
00351 ++_M_current;
00352 }
00353
00354 template<typename _InputIterator>
00355 void
00356 _Scanner<_InputIterator>::
00357 _M_eat_escape()
00358 {
00359 ++_M_current;
00360 if (_M_current == _M_end)
00361 {
00362 _M_curToken = _S_token_eof;
00363 return;
00364 }
00365 _CharT __c = *_M_current;
00366 ++_M_current;
00367
00368 if (__c == _M_ctype.widen('('))
00369 {
00370 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00371 {
00372 _M_curToken = _S_token_ord_char;
00373 _M_curValue.assign(1, __c);
00374 }
00375 else
00376 _M_curToken = _S_token_subexpr_begin;
00377 }
00378 else if (__c == _M_ctype.widen(')'))
00379 {
00380 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00381 {
00382 _M_curToken = _S_token_ord_char;
00383 _M_curValue.assign(1, __c);
00384 }
00385 else
00386 _M_curToken = _S_token_subexpr_end;
00387 }
00388 else if (__c == _M_ctype.widen('{'))
00389 {
00390 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00391 {
00392 _M_curToken = _S_token_ord_char;
00393 _M_curValue.assign(1, __c);
00394 }
00395 else
00396 {
00397 _M_curToken = _S_token_interval_begin;
00398 _M_state |= _S_state_in_brace;
00399 }
00400 }
00401 else if (__c == _M_ctype.widen('}'))
00402 {
00403 if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00404 {
00405 _M_curToken = _S_token_ord_char;
00406 _M_curValue.assign(1, __c);
00407 }
00408 else
00409 {
00410 if (!(_M_state && _S_state_in_brace))
00411 __throw_regex_error(regex_constants::error_badbrace);
00412 _M_state &= ~_S_state_in_brace;
00413 _M_curToken = _S_token_interval_end;
00414 }
00415 }
00416 else if (__c == _M_ctype.widen('x'))
00417 {
00418 ++_M_current;
00419 if (_M_current == _M_end)
00420 {
00421 _M_curToken = _S_token_eof;
00422 return;
00423 }
00424 if (_M_ctype.is(_CtypeT::digit, *_M_current))
00425 {
00426 _M_curValue.assign(1, *_M_current);
00427 ++_M_current;
00428 if (_M_current == _M_end)
00429 {
00430 _M_curToken = _S_token_eof;
00431 return;
00432 }
00433 if (_M_ctype.is(_CtypeT::digit, *_M_current))
00434 {
00435 _M_curValue += *_M_current;
00436 ++_M_current;
00437 return;
00438 }
00439 }
00440 }
00441 else if (__c == _M_ctype.widen('^')
00442 || __c == _M_ctype.widen('.')
00443 || __c == _M_ctype.widen('*')
00444 || __c == _M_ctype.widen('$')
00445 || __c == _M_ctype.widen('\\'))
00446 {
00447 _M_curToken = _S_token_ord_char;
00448 _M_curValue.assign(1, __c);
00449 }
00450 else if (_M_ctype.is(_CtypeT::digit, __c))
00451 {
00452 _M_curToken = _S_token_backref;
00453 _M_curValue.assign(1, __c);
00454 }
00455 else
00456 __throw_regex_error(regex_constants::error_escape);
00457 }
00458
00459
00460
00461
00462 template<typename _InputIterator>
00463 void
00464 _Scanner<_InputIterator>::
00465 _M_eat_charclass()
00466 {
00467 ++_M_current;
00468 if (_M_current == _M_end)
00469 __throw_regex_error(regex_constants::error_ctype);
00470 for (_M_curValue.clear();
00471 _M_current != _M_end && *_M_current != _M_ctype.widen(':');
00472 ++_M_current)
00473 _M_curValue += *_M_current;
00474 if (_M_current == _M_end)
00475 __throw_regex_error(regex_constants::error_ctype);
00476 ++_M_current;
00477 if (*_M_current != _M_ctype.widen(']'))
00478 __throw_regex_error(regex_constants::error_ctype);
00479 ++_M_current;
00480 }
00481
00482
00483 template<typename _InputIterator>
00484 void
00485 _Scanner<_InputIterator>::
00486 _M_eat_equivclass()
00487 {
00488 ++_M_current;
00489 if (_M_current == _M_end)
00490 __throw_regex_error(regex_constants::error_collate);
00491 for (_M_curValue.clear();
00492 _M_current != _M_end && *_M_current != _M_ctype.widen('=');
00493 ++_M_current)
00494 _M_curValue += *_M_current;
00495 if (_M_current == _M_end)
00496 __throw_regex_error(regex_constants::error_collate);
00497 ++_M_current;
00498 if (*_M_current != _M_ctype.widen(']'))
00499 __throw_regex_error(regex_constants::error_collate);
00500 ++_M_current;
00501 }
00502
00503
00504 template<typename _InputIterator>
00505 void
00506 _Scanner<_InputIterator>::
00507 _M_eat_collsymbol()
00508 {
00509 ++_M_current;
00510 if (_M_current == _M_end)
00511 __throw_regex_error(regex_constants::error_collate);
00512 for (_M_curValue.clear();
00513 _M_current != _M_end && *_M_current != _M_ctype.widen('.');
00514 ++_M_current)
00515 _M_curValue += *_M_current;
00516 if (_M_current == _M_end)
00517 __throw_regex_error(regex_constants::error_collate);
00518 ++_M_current;
00519 if (*_M_current != _M_ctype.widen(']'))
00520 __throw_regex_error(regex_constants::error_collate);
00521 ++_M_current;
00522 }
00523
00524 #ifdef _GLIBCXX_DEBUG
00525 template<typename _InputIterator>
00526 std::ostream&
00527 _Scanner<_InputIterator>::
00528 _M_print(std::ostream& ostr)
00529 {
00530 switch (_M_curToken)
00531 {
00532 case _S_token_anychar:
00533 ostr << "any-character\n";
00534 break;
00535 case _S_token_backref:
00536 ostr << "backref\n";
00537 break;
00538 case _S_token_bracket_begin:
00539 ostr << "bracket-begin\n";
00540 break;
00541 case _S_token_bracket_end:
00542 ostr << "bracket-end\n";
00543 break;
00544 case _S_token_char_class_name:
00545 ostr << "char-class-name \"" << _M_curValue << "\"\n";
00546 break;
00547 case _S_token_closure0:
00548 ostr << "closure0\n";
00549 break;
00550 case _S_token_closure1:
00551 ostr << "closure1\n";
00552 break;
00553 case _S_token_collelem_multi:
00554 ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
00555 break;
00556 case _S_token_collelem_single:
00557 ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
00558 break;
00559 case _S_token_collsymbol:
00560 ostr << "collsymbol \"" << _M_curValue << "\"\n";
00561 break;
00562 case _S_token_comma:
00563 ostr << "comma\n";
00564 break;
00565 case _S_token_dash:
00566 ostr << "dash\n";
00567 break;
00568 case _S_token_dup_count:
00569 ostr << "dup count: " << _M_curValue << "\n";
00570 break;
00571 case _S_token_eof:
00572 ostr << "EOF\n";
00573 break;
00574 case _S_token_equiv_class_name:
00575 ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
00576 break;
00577 case _S_token_interval_begin:
00578 ostr << "interval begin\n";
00579 break;
00580 case _S_token_interval_end:
00581 ostr << "interval end\n";
00582 break;
00583 case _S_token_line_begin:
00584 ostr << "line begin\n";
00585 break;
00586 case _S_token_line_end:
00587 ostr << "line end\n";
00588 break;
00589 case _S_token_opt:
00590 ostr << "opt\n";
00591 break;
00592 case _S_token_or:
00593 ostr << "or\n";
00594 break;
00595 case _S_token_ord_char:
00596 ostr << "ordinary character: \"" << _M_value() << "\"\n";
00597 break;
00598 case _S_token_quoted_char:
00599 ostr << "quoted char\n";
00600 break;
00601 case _S_token_subexpr_begin:
00602 ostr << "subexpr begin\n";
00603 break;
00604 case _S_token_subexpr_end:
00605 ostr << "subexpr end\n";
00606 break;
00607 case _S_token_word_begin:
00608 ostr << "word begin\n";
00609 break;
00610 case _S_token_word_end:
00611 ostr << "word end\n";
00612 break;
00613 case _S_token_unknown:
00614 ostr << "-- unknown token --\n";
00615 break;
00616 }
00617 return ostr;
00618 }
00619 #endif
00620
00621
00622 template<typename _InIter, typename _TraitsT>
00623 class _Compiler
00624 {
00625 public:
00626 typedef _InIter _IterT;
00627 typedef typename std::iterator_traits<_InIter>::value_type _CharT;
00628 typedef std::basic_string<_CharT> _StringT;
00629 typedef regex_constants::syntax_option_type _FlagT;
00630
00631 public:
00632 _Compiler(const _InIter& __b, const _InIter& __e,
00633 _TraitsT& __traits, _FlagT __flags);
00634
00635 const _Nfa&
00636 _M_nfa() const
00637 { return _M_state_store; }
00638
00639 private:
00640 typedef _Scanner<_InIter> _ScannerT;
00641 typedef typename _ScannerT::_TokenT _TokenT;
00642 typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
00643 typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT;
00644
00645
00646 bool
00647 _M_match_token(_TokenT __token);
00648
00649 void
00650 _M_disjunction();
00651
00652 bool
00653 _M_alternative();
00654
00655 bool
00656 _M_term();
00657
00658 bool
00659 _M_assertion();
00660
00661 bool
00662 _M_quantifier();
00663
00664 bool
00665 _M_atom();
00666
00667 bool
00668 _M_bracket_expression();
00669
00670 bool
00671 _M_bracket_list(_RMatcherT& __matcher);
00672
00673 bool
00674 _M_follow_list(_RMatcherT& __matcher);
00675
00676 bool
00677 _M_follow_list2(_RMatcherT& __matcher);
00678
00679 bool
00680 _M_expression_term(_RMatcherT& __matcher);
00681
00682 bool
00683 _M_range_expression(_RMatcherT& __matcher);
00684
00685 bool
00686 _M_start_range(_RMatcherT& __matcher);
00687
00688 bool
00689 _M_collating_symbol(_RMatcherT& __matcher);
00690
00691 bool
00692 _M_equivalence_class(_RMatcherT& __matcher);
00693
00694 bool
00695 _M_character_class(_RMatcherT& __matcher);
00696
00697 int
00698 _M_cur_int_value(int __radix);
00699
00700 private:
00701 _TraitsT& _M_traits;
00702 _ScannerT _M_scanner;
00703 _StringT _M_cur_value;
00704 _Nfa _M_state_store;
00705 _StackT _M_stack;
00706 };
00707
00708 template<typename _InIter, typename _TraitsT>
00709 _Compiler<_InIter, _TraitsT>::
00710 _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
00711 _Compiler<_InIter, _TraitsT>::_FlagT __flags)
00712 : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
00713 _M_state_store(__flags)
00714 {
00715 using std::bind;
00716 using std::placeholders::_1;
00717 using std::placeholders::_2;
00718 typedef _StartTagger<_InIter, _TraitsT> _Start;
00719 typedef _EndTagger<_InIter, _TraitsT> _End;
00720
00721 _StateSeq __r(_M_state_store,
00722 _M_state_store._M_insert_subexpr_begin(
00723 bind(_Start(0), _1, _2)));
00724 _M_disjunction();
00725 if (!_M_stack.empty())
00726 {
00727 __r._M_append(_M_stack.top());
00728 _M_stack.pop();
00729 }
00730 __r._M_append(_M_state_store.
00731 _M_insert_subexpr_end(0, bind(_End(0), _1, _2)));
00732 __r._M_append(_M_state_store._M_insert_accept());
00733 }
00734
00735 template<typename _InIter, typename _TraitsT>
00736 bool
00737 _Compiler<_InIter, _TraitsT>::
00738 _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
00739 {
00740 if (token == _M_scanner._M_token())
00741 {
00742 _M_cur_value = _M_scanner._M_value();
00743 _M_scanner._M_advance();
00744 return true;
00745 }
00746 return false;
00747 }
00748
00749 template<typename _InIter, typename _TraitsT>
00750 void
00751 _Compiler<_InIter, _TraitsT>::
00752 _M_disjunction()
00753 {
00754 this->_M_alternative();
00755 if (_M_match_token(_ScannerT::_S_token_or))
00756 {
00757 _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
00758 this->_M_disjunction();
00759 _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
00760 _M_stack.push(_StateSeq(__alt1, __alt2));
00761 }
00762 }
00763
00764 template<typename _InIter, typename _TraitsT>
00765 bool
00766 _Compiler<_InIter, _TraitsT>::
00767 _M_alternative()
00768 {
00769 if (this->_M_term())
00770 {
00771 _StateSeq __re = _M_stack.top(); _M_stack.pop();
00772 this->_M_alternative();
00773 if (!_M_stack.empty())
00774 {
00775 __re._M_append(_M_stack.top());
00776 _M_stack.pop();
00777 }
00778 _M_stack.push(__re);
00779 return true;
00780 }
00781 return false;
00782 }
00783
00784 template<typename _InIter, typename _TraitsT>
00785 bool
00786 _Compiler<_InIter, _TraitsT>::
00787 _M_term()
00788 {
00789 if (this->_M_assertion())
00790 return true;
00791 if (this->_M_atom())
00792 {
00793 this->_M_quantifier();
00794 return true;
00795 }
00796 return false;
00797 }
00798
00799 template<typename _InIter, typename _TraitsT>
00800 bool
00801 _Compiler<_InIter, _TraitsT>::
00802 _M_assertion()
00803 {
00804 if (_M_match_token(_ScannerT::_S_token_line_begin))
00805 {
00806
00807 return true;
00808 }
00809 if (_M_match_token(_ScannerT::_S_token_line_end))
00810 {
00811
00812 return true;
00813 }
00814 if (_M_match_token(_ScannerT::_S_token_word_begin))
00815 {
00816
00817 return true;
00818 }
00819 if (_M_match_token(_ScannerT::_S_token_word_end))
00820 {
00821
00822 return true;
00823 }
00824 return false;
00825 }
00826
00827 template<typename _InIter, typename _TraitsT>
00828 bool
00829 _Compiler<_InIter, _TraitsT>::
00830 _M_quantifier()
00831 {
00832 if (_M_match_token(_ScannerT::_S_token_closure0))
00833 {
00834 if (_M_stack.empty())
00835 __throw_regex_error(regex_constants::error_badrepeat);
00836 _StateSeq __r(_M_stack.top(), -1);
00837 __r._M_append(__r._M_front());
00838 _M_stack.pop();
00839 _M_stack.push(__r);
00840 return true;
00841 }
00842 if (_M_match_token(_ScannerT::_S_token_closure1))
00843 {
00844 if (_M_stack.empty())
00845 __throw_regex_error(regex_constants::error_badrepeat);
00846 _StateSeq __r(_M_state_store,
00847 _M_state_store.
00848 _M_insert_alt(_S_invalid_state_id,
00849 _M_stack.top()._M_front()));
00850 _M_stack.top()._M_append(__r);
00851 return true;
00852 }
00853 if (_M_match_token(_ScannerT::_S_token_opt))
00854 {
00855 if (_M_stack.empty())
00856 __throw_regex_error(regex_constants::error_badrepeat);
00857 _StateSeq __r(_M_stack.top(), -1);
00858 _M_stack.pop();
00859 _M_stack.push(__r);
00860 return true;
00861 }
00862 if (_M_match_token(_ScannerT::_S_token_interval_begin))
00863 {
00864 if (_M_stack.empty())
00865 __throw_regex_error(regex_constants::error_badrepeat);
00866 if (!_M_match_token(_ScannerT::_S_token_dup_count))
00867 __throw_regex_error(regex_constants::error_badbrace);
00868 _StateSeq __r(_M_stack.top());
00869 int __min_rep = _M_cur_int_value(10);
00870 for (int __i = 1; __i < __min_rep; ++__i)
00871 _M_stack.top()._M_append(__r._M_clone());
00872 if (_M_match_token(_ScannerT::_S_token_comma))
00873 if (_M_match_token(_ScannerT::_S_token_dup_count))
00874 {
00875 int __n = _M_cur_int_value(10) - __min_rep;
00876 if (__n < 0)
00877 __throw_regex_error(regex_constants::error_badbrace);
00878 for (int __i = 0; __i < __n; ++__i)
00879 {
00880 _StateSeq __r(_M_state_store,
00881 _M_state_store.
00882 _M_insert_alt(_S_invalid_state_id,
00883 _M_stack.top()._M_front()));
00884 _M_stack.top()._M_append(__r);
00885 }
00886 }
00887 else
00888 {
00889 _StateSeq __r(_M_stack.top(), -1);
00890 __r._M_push_back(__r._M_front());
00891 _M_stack.pop();
00892 _M_stack.push(__r);
00893 }
00894 if (!_M_match_token(_ScannerT::_S_token_interval_end))
00895 __throw_regex_error(regex_constants::error_brace);
00896 return true;
00897 }
00898 return false;
00899 }
00900
00901 template<typename _InIter, typename _TraitsT>
00902 bool
00903 _Compiler<_InIter, _TraitsT>::
00904 _M_atom()
00905 {
00906 using std::bind;
00907 using std::placeholders::_1;
00908 using std::placeholders::_2;
00909 typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
00910 typedef _StartTagger<_InIter, _TraitsT> _Start;
00911 typedef _EndTagger<_InIter, _TraitsT> _End;
00912
00913 if (_M_match_token(_ScannerT::_S_token_anychar))
00914 {
00915 _M_stack.push(_StateSeq(_M_state_store,
00916 _M_state_store.
00917 _M_insert_matcher(bind(_AnyMatcher, _1))));
00918 return true;
00919 }
00920 if (_M_match_token(_ScannerT::_S_token_ord_char))
00921 {
00922 _M_stack.push(_StateSeq
00923 (_M_state_store, _M_state_store.
00924 _M_insert_matcher
00925 (bind(_CMatcher(_M_cur_value[0], _M_traits), _1))));
00926 return true;
00927 }
00928 if (_M_match_token(_ScannerT::_S_token_quoted_char))
00929 {
00930
00931 _M_stack.push(_StateSeq(_M_state_store,
00932 _M_state_store.
00933 _M_insert_matcher
00934 (bind(_CMatcher(_M_cur_value[0], _M_traits),
00935 _1))));
00936 return true;
00937 }
00938 if (_M_match_token(_ScannerT::_S_token_backref))
00939 {
00940
00941 return true;
00942 }
00943 if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
00944 {
00945 int __mark = _M_state_store._M_sub_count();
00946 _StateSeq __r(_M_state_store,
00947 _M_state_store.
00948 _M_insert_subexpr_begin(bind(_Start(__mark), _1, _2)));
00949 this->_M_disjunction();
00950 if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
00951 __throw_regex_error(regex_constants::error_paren);
00952 if (!_M_stack.empty())
00953 {
00954 __r._M_append(_M_stack.top());
00955 _M_stack.pop();
00956 }
00957 __r._M_append(_M_state_store._M_insert_subexpr_end
00958 (__mark, bind(_End(__mark), _1, _2)));
00959 _M_stack.push(__r);
00960 return true;
00961 }
00962 return _M_bracket_expression();
00963 }
00964
00965 template<typename _InIter, typename _TraitsT>
00966 bool
00967 _Compiler<_InIter, _TraitsT>::
00968 _M_bracket_expression()
00969 {
00970 using std::bind;
00971 using std::placeholders::_1;
00972 if (_M_match_token(_ScannerT::_S_token_bracket_begin))
00973 {
00974 _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
00975 _M_traits);
00976 if (!_M_bracket_list(__matcher)
00977 || !_M_match_token(_ScannerT::_S_token_bracket_end))
00978 __throw_regex_error(regex_constants::error_brack);
00979 _M_stack.push(_StateSeq(_M_state_store,
00980 _M_state_store._M_insert_matcher
00981 (bind(__matcher, _1))));
00982 return true;
00983 }
00984 return false;
00985 }
00986
00987
00988
00989 template<typename _InIter, typename _TraitsT>
00990 bool
00991 _Compiler<_InIter, _TraitsT>::
00992 _M_bracket_list(_RMatcherT& __matcher)
00993 {
00994 if (_M_follow_list(__matcher))
00995 {
00996 if (_M_match_token(_ScannerT::_S_token_dash))
00997 __matcher._M_add_char(_M_cur_value[0]);
00998 return true;
00999 }
01000 return false;
01001 }
01002
01003 template<typename _InIter, typename _TraitsT>
01004 bool
01005 _Compiler<_InIter, _TraitsT>::
01006 _M_follow_list(_RMatcherT& __matcher)
01007 { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
01008
01009 template<typename _InIter, typename _TraitsT>
01010 bool
01011 _Compiler<_InIter, _TraitsT>::
01012 _M_follow_list2(_RMatcherT& __matcher)
01013 {
01014 if (_M_expression_term(__matcher))
01015 return _M_follow_list2(__matcher);
01016 return true;
01017 }
01018
01019 template<typename _InIter, typename _TraitsT>
01020 bool
01021 _Compiler<_InIter, _TraitsT>::
01022 _M_expression_term(_RMatcherT& __matcher)
01023 {
01024 return (_M_collating_symbol(__matcher)
01025 || _M_character_class(__matcher)
01026 || _M_equivalence_class(__matcher)
01027 || (_M_start_range(__matcher)
01028 && _M_range_expression(__matcher)));
01029 }
01030
01031 template<typename _InIter, typename _TraitsT>
01032 bool
01033 _Compiler<_InIter, _TraitsT>::
01034 _M_range_expression(_RMatcherT& __matcher)
01035 {
01036 if (!_M_collating_symbol(__matcher))
01037 if (!_M_match_token(_ScannerT::_S_token_dash))
01038 __throw_regex_error(regex_constants::error_range);
01039 __matcher._M_make_range();
01040 return true;
01041 }
01042
01043 template<typename _InIter, typename _TraitsT>
01044 bool
01045 _Compiler<_InIter, _TraitsT>::
01046 _M_start_range(_RMatcherT& __matcher)
01047 { return _M_match_token(_ScannerT::_S_token_dash); }
01048
01049 template<typename _InIter, typename _TraitsT>
01050 bool
01051 _Compiler<_InIter, _TraitsT>::
01052 _M_collating_symbol(_RMatcherT& __matcher)
01053 {
01054 if (_M_match_token(_ScannerT::_S_token_collelem_single))
01055 {
01056 __matcher._M_add_char(_M_cur_value[0]);
01057 return true;
01058 }
01059 if (_M_match_token(_ScannerT::_S_token_collsymbol))
01060 {
01061 __matcher._M_add_collating_element(_M_cur_value);
01062 return true;
01063 }
01064 return false;
01065 }
01066
01067 template<typename _InIter, typename _TraitsT>
01068 bool
01069 _Compiler<_InIter, _TraitsT>::
01070 _M_equivalence_class(_RMatcherT& __matcher)
01071 {
01072 if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
01073 {
01074 __matcher._M_add_equivalence_class(_M_cur_value);
01075 return true;
01076 }
01077 return false;
01078 }
01079
01080 template<typename _InIter, typename _TraitsT>
01081 bool
01082 _Compiler<_InIter, _TraitsT>::
01083 _M_character_class(_RMatcherT& __matcher)
01084 {
01085 if (_M_match_token(_ScannerT::_S_token_char_class_name))
01086 {
01087 __matcher._M_add_character_class(_M_cur_value);
01088 return true;
01089 }
01090 return false;
01091 }
01092
01093 template<typename _InIter, typename _TraitsT>
01094 int
01095 _Compiler<_InIter, _TraitsT>::
01096 _M_cur_int_value(int __radix)
01097 {
01098 int __v = 0;
01099 for (typename _StringT::size_type __i = 0;
01100 __i < _M_cur_value.length(); ++__i)
01101 __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
01102 return __v;
01103 }
01104
01105 template<typename _InIter, typename _TraitsT>
01106 _AutomatonPtr
01107 __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
01108 regex_constants::syntax_option_type __f)
01109 { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
01110 __f)._M_nfa())); }
01111
01112 }
01113 }
01114
01115