regex_compiler.h

Go to the documentation of this file.
00001 // class template regex -*- C++ -*-
00002 
00003 // Copyright (C) 2010 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /**
00026  * @file bits/regex_compiler.h
00027  * This is an internal header file, included by other library headers.
00028  * You should not attempt to use it directly.
00029  */
00030 
00031 namespace std
00032 {
00033 namespace __regex
00034 {
00035   struct _Scanner_base
00036   {
00037     // FIXME: replace these constanst with constexpr
00038     typedef unsigned int _StateT;
00039 
00040     static const _StateT _S_state_at_start    = 1 << 0;
00041     static const _StateT _S_state_in_brace    = 1 << 2;
00042     static const _StateT _S_state_in_bracket  = 1 << 3;
00043   };
00044 
00045   //
00046   // @brief Scans an input range for regex tokens.
00047   //
00048   // The %_Scanner class interprets the regular expression pattern in the input
00049   // range passed to its constructor as a sequence of parse tokens passed to
00050   // the regular expression compiler.  The sequence of tokens provided depends
00051   // on the flag settings passed to the constructor:  different regular
00052   // expression gramars will interpret the same input pattern in syntactically
00053   // different ways.
00054   //
00055   template<typename _InputIterator>
00056     class _Scanner: public _Scanner_base
00057     {
00058     public:
00059       typedef _InputIterator                                        _IteratorT;
00060       typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
00061       typedef std::basic_string<_CharT>                             _StringT;
00062       typedef regex_constants::syntax_option_type                   _FlagT;
00063       typedef const std::ctype<_CharT>                              _CtypeT;
00064 
00065       // Token types returned from the scanner.
00066       enum _TokenT
00067       {
00068     _S_token_anychar,
00069     _S_token_backref,
00070     _S_token_bracket_begin,
00071     _S_token_bracket_end,
00072     _S_token_inverse_class,
00073     _S_token_char_class_name,
00074     _S_token_closure0,
00075     _S_token_closure1,
00076     _S_token_collelem_multi,
00077     _S_token_collelem_single,
00078     _S_token_collsymbol,
00079     _S_token_comma,
00080     _S_token_dash,
00081     _S_token_dup_count,
00082     _S_token_eof,
00083     _S_token_equiv_class_name,
00084     _S_token_interval_begin,
00085     _S_token_interval_end,
00086     _S_token_line_begin,
00087     _S_token_line_end,
00088     _S_token_opt,
00089     _S_token_or,
00090     _S_token_ord_char,
00091     _S_token_quoted_char,
00092     _S_token_subexpr_begin,
00093     _S_token_subexpr_end,
00094     _S_token_word_begin,
00095     _S_token_word_end,
00096     _S_token_unknown
00097       };
00098 
00099     public:
00100       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
00101            std::locale __loc)
00102       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
00103         _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
00104       { _M_advance(); }
00105 
00106       void
00107       _M_advance();
00108 
00109       _TokenT
00110       _M_token() const
00111       { return _M_curToken; }
00112 
00113       const _StringT&
00114       _M_value() const
00115       { return _M_curValue; }
00116 
00117 #ifdef _GLIBCXX_DEBUG
00118       std::ostream&
00119       _M_print(std::ostream&);
00120 #endif
00121 
00122     private:
00123       void
00124       _M_eat_escape();
00125 
00126       void
00127       _M_scan_in_brace();
00128 
00129       void
00130       _M_scan_in_bracket();
00131 
00132       void
00133       _M_eat_charclass();
00134 
00135       void
00136       _M_eat_equivclass();
00137 
00138       void
00139       _M_eat_collsymbol();
00140 
00141     private:
00142       _IteratorT  _M_current;
00143       _IteratorT  _M_end;
00144       _FlagT      _M_flags;
00145       _CtypeT&    _M_ctype;
00146       _TokenT     _M_curToken;
00147       _StringT    _M_curValue;
00148       _StateT     _M_state;
00149     };
00150 
00151   template<typename _InputIterator>
00152     void
00153     _Scanner<_InputIterator>::
00154     _M_advance()
00155     {
00156       if (_M_current == _M_end)
00157     {
00158       _M_curToken = _S_token_eof;
00159       return;
00160     }
00161 
00162       _CharT __c = *_M_current;
00163       if (_M_state & _S_state_in_bracket)
00164     {
00165       _M_scan_in_bracket();
00166       return;
00167     }
00168       if (_M_state & _S_state_in_brace)
00169     {
00170       _M_scan_in_brace();
00171       return;
00172     }
00173       else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
00174     {
00175       _M_curToken = _S_token_line_begin;
00176       ++_M_current;
00177       return;
00178     }
00179       else if (__c == _M_ctype.widen('$'))
00180     {
00181       _M_curToken = _S_token_line_end;
00182       ++_M_current;
00183       return;
00184     }
00185       else if (__c == _M_ctype.widen('.'))
00186     {
00187       _M_curToken = _S_token_anychar;
00188       ++_M_current;
00189       return;
00190     }
00191       else if (__c == _M_ctype.widen('*'))
00192     {
00193       _M_curToken = _S_token_closure0;
00194       ++_M_current;
00195       return;
00196     }
00197       else if (__c == _M_ctype.widen('+'))
00198     {
00199       _M_curToken = _S_token_closure1;
00200       ++_M_current;
00201       return;
00202     }
00203       else if (__c == _M_ctype.widen('|'))
00204     {
00205       _M_curToken = _S_token_or;
00206       ++_M_current;
00207       return;
00208     }
00209       else if (__c == _M_ctype.widen('['))
00210     {
00211       _M_curToken = _S_token_bracket_begin;
00212       _M_state |= (_S_state_in_bracket | _S_state_at_start);
00213       ++_M_current;
00214       return;
00215     }
00216       else if (__c == _M_ctype.widen('\\'))
00217     {
00218       _M_eat_escape();
00219       return;
00220     }
00221       else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00222     {
00223       if (__c == _M_ctype.widen('('))
00224         {
00225           _M_curToken = _S_token_subexpr_begin;
00226           ++_M_current;
00227           return;
00228         }
00229       else if (__c == _M_ctype.widen(')'))
00230         {
00231           _M_curToken = _S_token_subexpr_end;
00232           ++_M_current;
00233           return;
00234         }
00235       else if (__c == _M_ctype.widen('{'))
00236         {
00237           _M_curToken = _S_token_interval_begin;
00238           _M_state |= _S_state_in_brace;
00239           ++_M_current;
00240           return;
00241         }
00242     }
00243 
00244       _M_curToken = _S_token_ord_char;
00245       _M_curValue.assign(1, __c);
00246       ++_M_current;
00247     }
00248 
00249 
00250   template<typename _InputIterator>
00251     void
00252     _Scanner<_InputIterator>::
00253     _M_scan_in_brace()
00254     {
00255       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00256     {
00257       _M_curToken = _S_token_dup_count;
00258       _M_curValue.assign(1, *_M_current);
00259       ++_M_current;
00260       while (_M_current != _M_end
00261          && _M_ctype.is(_CtypeT::digit, *_M_current))
00262         {
00263           _M_curValue += *_M_current;
00264           ++_M_current;
00265         }
00266       return;
00267     }
00268       else if (*_M_current == _M_ctype.widen(','))
00269     {
00270       _M_curToken = _S_token_comma;
00271       ++_M_current;
00272       return;
00273     }
00274       if (_M_flags & (regex_constants::basic | regex_constants::grep))
00275     {
00276       if (*_M_current == _M_ctype.widen('\\'))
00277         _M_eat_escape();
00278     }
00279       else 
00280     {
00281       if (*_M_current == _M_ctype.widen('}'))
00282         {
00283           _M_curToken = _S_token_interval_end;
00284           _M_state &= ~_S_state_in_brace;
00285           ++_M_current;
00286           return;
00287         }
00288     }
00289     }
00290 
00291   template<typename _InputIterator>
00292     void
00293     _Scanner<_InputIterator>::
00294     _M_scan_in_bracket()
00295     {
00296       if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
00297     {
00298       _M_curToken = _S_token_inverse_class;
00299       _M_state &= ~_S_state_at_start;
00300       ++_M_current;
00301       return;
00302     }
00303       else if (*_M_current == _M_ctype.widen('['))
00304     {
00305       ++_M_current;
00306       if (_M_current == _M_end)
00307         {
00308           _M_curToken = _S_token_eof;
00309           return;
00310         }
00311 
00312       if (*_M_current == _M_ctype.widen('.'))
00313         {
00314           _M_curToken = _S_token_collsymbol;
00315           _M_eat_collsymbol();
00316           return;
00317         }
00318       else if (*_M_current == _M_ctype.widen(':'))
00319         {
00320           _M_curToken = _S_token_char_class_name;
00321           _M_eat_charclass();
00322           return;
00323         }
00324       else if (*_M_current == _M_ctype.widen('='))
00325         {
00326           _M_curToken = _S_token_equiv_class_name;
00327           _M_eat_equivclass();
00328           return;
00329         }
00330     }
00331       else if (*_M_current == _M_ctype.widen('-'))
00332     {
00333       _M_curToken = _S_token_dash;
00334       ++_M_current;
00335       return;
00336     }
00337       else if (*_M_current == _M_ctype.widen(']'))
00338     {
00339       if (!(_M_flags & regex_constants::ECMAScript)
00340           || !(_M_state & _S_state_at_start))
00341         {
00342           // special case: only if  _not_ chr first after
00343           // '[' or '[^' and if not ECMAscript
00344           _M_curToken = _S_token_bracket_end;
00345           ++_M_current;
00346           return;
00347         }
00348     }
00349       _M_curToken = _S_token_collelem_single;
00350       _M_curValue.assign(1, *_M_current);
00351       ++_M_current;
00352     }
00353 
00354   template<typename _InputIterator>
00355     void
00356     _Scanner<_InputIterator>::
00357     _M_eat_escape()
00358     {
00359       ++_M_current;
00360       if (_M_current == _M_end)
00361     {
00362       _M_curToken = _S_token_eof;
00363       return;
00364     }
00365       _CharT __c = *_M_current;
00366       ++_M_current;
00367 
00368       if (__c == _M_ctype.widen('('))
00369     {
00370       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00371         {
00372           _M_curToken = _S_token_ord_char;
00373           _M_curValue.assign(1, __c);
00374         }
00375       else
00376         _M_curToken = _S_token_subexpr_begin;
00377     }
00378       else if (__c == _M_ctype.widen(')'))
00379     {
00380       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00381         {
00382           _M_curToken = _S_token_ord_char;
00383           _M_curValue.assign(1, __c);
00384         }
00385       else
00386         _M_curToken = _S_token_subexpr_end;
00387     }
00388       else if (__c == _M_ctype.widen('{'))
00389     {
00390       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00391         {
00392           _M_curToken = _S_token_ord_char;
00393           _M_curValue.assign(1, __c);
00394         }
00395       else
00396         {
00397           _M_curToken = _S_token_interval_begin;
00398           _M_state |= _S_state_in_brace;
00399         }
00400     }
00401       else if (__c == _M_ctype.widen('}'))
00402     {
00403       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00404         {
00405           _M_curToken = _S_token_ord_char;
00406           _M_curValue.assign(1, __c);
00407         }
00408       else
00409         {
00410           if (!(_M_state && _S_state_in_brace))
00411         __throw_regex_error(regex_constants::error_badbrace);
00412           _M_state &= ~_S_state_in_brace;
00413           _M_curToken = _S_token_interval_end;
00414         }
00415     }
00416       else if (__c == _M_ctype.widen('x'))
00417     {
00418       ++_M_current;
00419       if (_M_current == _M_end)
00420         {
00421           _M_curToken = _S_token_eof;
00422           return;
00423         }
00424       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00425         {
00426           _M_curValue.assign(1, *_M_current);
00427           ++_M_current;
00428           if (_M_current == _M_end)
00429         {
00430           _M_curToken = _S_token_eof;
00431           return;
00432         }
00433           if (_M_ctype.is(_CtypeT::digit, *_M_current))
00434         {
00435           _M_curValue += *_M_current;
00436           ++_M_current;
00437           return;
00438         }
00439         }
00440     }
00441       else if (__c == _M_ctype.widen('^')
00442            || __c == _M_ctype.widen('.')
00443            || __c == _M_ctype.widen('*')
00444            || __c == _M_ctype.widen('$')
00445            || __c == _M_ctype.widen('\\'))
00446     {
00447       _M_curToken = _S_token_ord_char;
00448       _M_curValue.assign(1, __c);
00449     }
00450       else if (_M_ctype.is(_CtypeT::digit, __c))
00451     {
00452       _M_curToken = _S_token_backref;
00453       _M_curValue.assign(1, __c);
00454     }
00455       else
00456     __throw_regex_error(regex_constants::error_escape);
00457     }
00458 
00459 
00460   // Eats a character class or throwns an exception.
00461   // current point to ':' delimiter on entry, char after ']' on return
00462   template<typename _InputIterator>
00463     void
00464     _Scanner<_InputIterator>::
00465     _M_eat_charclass()
00466     {
00467       ++_M_current; // skip ':'
00468       if (_M_current == _M_end)
00469     __throw_regex_error(regex_constants::error_ctype);
00470       for (_M_curValue.clear();
00471        _M_current != _M_end && *_M_current != _M_ctype.widen(':');
00472        ++_M_current)
00473     _M_curValue += *_M_current;
00474       if (_M_current == _M_end)
00475     __throw_regex_error(regex_constants::error_ctype);
00476       ++_M_current; // skip ':'
00477       if (*_M_current != _M_ctype.widen(']'))
00478     __throw_regex_error(regex_constants::error_ctype);
00479       ++_M_current; // skip ']'
00480     }
00481 
00482 
00483   template<typename _InputIterator>
00484     void
00485     _Scanner<_InputIterator>::
00486     _M_eat_equivclass()
00487     {
00488       ++_M_current; // skip '='
00489       if (_M_current == _M_end)
00490     __throw_regex_error(regex_constants::error_collate);
00491       for (_M_curValue.clear();
00492        _M_current != _M_end && *_M_current != _M_ctype.widen('=');
00493        ++_M_current)
00494     _M_curValue += *_M_current;
00495       if (_M_current == _M_end)
00496     __throw_regex_error(regex_constants::error_collate);
00497       ++_M_current; // skip '='
00498       if (*_M_current != _M_ctype.widen(']'))
00499     __throw_regex_error(regex_constants::error_collate);
00500       ++_M_current; // skip ']'
00501     }
00502 
00503 
00504   template<typename _InputIterator>
00505     void
00506     _Scanner<_InputIterator>::
00507     _M_eat_collsymbol()
00508     {
00509       ++_M_current; // skip '.'
00510       if (_M_current == _M_end)
00511     __throw_regex_error(regex_constants::error_collate);
00512       for (_M_curValue.clear();
00513        _M_current != _M_end && *_M_current != _M_ctype.widen('.');
00514        ++_M_current)
00515     _M_curValue += *_M_current;
00516       if (_M_current == _M_end)
00517     __throw_regex_error(regex_constants::error_collate);
00518       ++_M_current; // skip '.'
00519       if (*_M_current != _M_ctype.widen(']'))
00520     __throw_regex_error(regex_constants::error_collate);
00521       ++_M_current; // skip ']'
00522     }
00523 
00524 #ifdef _GLIBCXX_DEBUG
00525   template<typename _InputIterator>
00526     std::ostream&
00527     _Scanner<_InputIterator>::
00528     _M_print(std::ostream& ostr)
00529     {
00530       switch (_M_curToken)
00531       {
00532     case _S_token_anychar:
00533       ostr << "any-character\n";
00534       break;
00535     case _S_token_backref:
00536       ostr << "backref\n";
00537       break;
00538     case _S_token_bracket_begin:
00539       ostr << "bracket-begin\n";
00540       break;
00541     case _S_token_bracket_end:
00542       ostr << "bracket-end\n";
00543       break;
00544     case _S_token_char_class_name:
00545       ostr << "char-class-name \"" << _M_curValue << "\"\n";
00546       break;
00547     case _S_token_closure0:
00548       ostr << "closure0\n";
00549       break;
00550     case _S_token_closure1:
00551       ostr << "closure1\n";
00552       break;
00553     case _S_token_collelem_multi:
00554       ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
00555       break;
00556     case _S_token_collelem_single:
00557       ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
00558       break;
00559     case _S_token_collsymbol:
00560       ostr << "collsymbol \"" << _M_curValue << "\"\n";
00561       break;
00562     case _S_token_comma:
00563       ostr << "comma\n";
00564       break;
00565     case _S_token_dash:
00566       ostr << "dash\n";
00567       break;
00568     case _S_token_dup_count:
00569       ostr << "dup count: " << _M_curValue << "\n";
00570       break;
00571     case _S_token_eof:
00572       ostr << "EOF\n";
00573       break;
00574     case _S_token_equiv_class_name:
00575       ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
00576       break;
00577     case _S_token_interval_begin:
00578       ostr << "interval begin\n";
00579       break;
00580     case _S_token_interval_end:
00581       ostr << "interval end\n";
00582       break;
00583     case _S_token_line_begin:
00584       ostr << "line begin\n";
00585       break;
00586     case _S_token_line_end:
00587       ostr << "line end\n";
00588       break;
00589     case _S_token_opt:
00590       ostr << "opt\n";
00591       break;
00592     case _S_token_or:
00593       ostr << "or\n";
00594       break;
00595     case _S_token_ord_char:
00596       ostr << "ordinary character: \"" << _M_value() << "\"\n";
00597       break;
00598     case _S_token_quoted_char:
00599       ostr << "quoted char\n";
00600       break;
00601     case _S_token_subexpr_begin:
00602       ostr << "subexpr begin\n";
00603       break;
00604     case _S_token_subexpr_end:
00605       ostr << "subexpr end\n";
00606       break;
00607     case _S_token_word_begin:
00608       ostr << "word begin\n";
00609       break;
00610     case _S_token_word_end:
00611       ostr << "word end\n";
00612       break;
00613     case _S_token_unknown:
00614       ostr << "-- unknown token --\n";
00615       break;
00616       }
00617       return ostr;
00618     }
00619 #endif
00620 
00621   // Builds an NFA from an input iterator interval.
00622   template<typename _InIter, typename _TraitsT>
00623     class _Compiler
00624     {
00625     public:
00626       typedef _InIter                                            _IterT;
00627       typedef typename std::iterator_traits<_InIter>::value_type _CharT;
00628       typedef std::basic_string<_CharT>                          _StringT;
00629       typedef regex_constants::syntax_option_type                _FlagT;
00630 
00631     public:
00632       _Compiler(const _InIter& __b, const _InIter& __e,
00633         _TraitsT& __traits, _FlagT __flags);
00634 
00635       const _Nfa&
00636       _M_nfa() const
00637       { return _M_state_store; }
00638 
00639     private:
00640       typedef _Scanner<_InIter>                              _ScannerT;
00641       typedef typename _ScannerT::_TokenT                    _TokenT;
00642       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
00643       typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
00644 
00645       // accepts a specific token or returns false.
00646       bool
00647       _M_match_token(_TokenT __token);
00648 
00649       void
00650       _M_disjunction();
00651 
00652       bool
00653       _M_alternative();
00654 
00655       bool
00656       _M_term();
00657 
00658       bool
00659       _M_assertion();
00660 
00661       bool
00662       _M_quantifier();
00663 
00664       bool
00665       _M_atom();
00666 
00667       bool
00668       _M_bracket_expression();
00669 
00670       bool
00671       _M_bracket_list(_RMatcherT& __matcher);
00672 
00673       bool
00674       _M_follow_list(_RMatcherT& __matcher);
00675 
00676       bool
00677       _M_follow_list2(_RMatcherT& __matcher);
00678 
00679       bool
00680       _M_expression_term(_RMatcherT& __matcher);
00681 
00682       bool
00683       _M_range_expression(_RMatcherT& __matcher);
00684 
00685       bool
00686       _M_start_range(_RMatcherT& __matcher);
00687 
00688       bool
00689       _M_collating_symbol(_RMatcherT& __matcher);
00690 
00691       bool
00692       _M_equivalence_class(_RMatcherT& __matcher);
00693 
00694       bool
00695       _M_character_class(_RMatcherT& __matcher);
00696 
00697       int
00698       _M_cur_int_value(int __radix);
00699 
00700     private:
00701       _TraitsT&      _M_traits;
00702       _ScannerT      _M_scanner;
00703       _StringT       _M_cur_value;
00704       _Nfa           _M_state_store;
00705       _StackT        _M_stack;
00706     };
00707 
00708   template<typename _InIter, typename _TraitsT>
00709     _Compiler<_InIter, _TraitsT>::
00710     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
00711           _Compiler<_InIter, _TraitsT>::_FlagT __flags)
00712     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
00713       _M_state_store(__flags)
00714     {
00715       using std::bind;
00716       using std::placeholders::_1;
00717       using std::placeholders::_2;
00718       typedef _StartTagger<_InIter, _TraitsT> _Start;
00719       typedef _EndTagger<_InIter, _TraitsT> _End;
00720 
00721       _StateSeq __r(_M_state_store,
00722                 _M_state_store._M_insert_subexpr_begin(
00723                         bind(_Start(0), _1, _2)));
00724       _M_disjunction();
00725       if (!_M_stack.empty())
00726     {
00727       __r._M_append(_M_stack.top());
00728       _M_stack.pop();
00729     }
00730       __r._M_append(_M_state_store.
00731             _M_insert_subexpr_end(0, bind(_End(0), _1, _2)));
00732       __r._M_append(_M_state_store._M_insert_accept());
00733     }
00734 
00735   template<typename _InIter, typename _TraitsT>
00736     bool
00737     _Compiler<_InIter, _TraitsT>::
00738     _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
00739     { 
00740       if (token == _M_scanner._M_token())
00741     {
00742       _M_cur_value = _M_scanner._M_value();
00743       _M_scanner._M_advance();
00744       return true;
00745     }
00746       return false;
00747     }
00748 
00749   template<typename _InIter, typename _TraitsT>
00750     void
00751     _Compiler<_InIter, _TraitsT>::
00752     _M_disjunction()
00753     {
00754       this->_M_alternative();
00755       if (_M_match_token(_ScannerT::_S_token_or))
00756     {
00757       _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
00758       this->_M_disjunction();
00759       _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
00760       _M_stack.push(_StateSeq(__alt1, __alt2));
00761     }
00762     }
00763 
00764   template<typename _InIter, typename _TraitsT>
00765     bool
00766     _Compiler<_InIter, _TraitsT>::
00767     _M_alternative()
00768     {
00769       if (this->_M_term())
00770     {
00771       _StateSeq __re = _M_stack.top(); _M_stack.pop();
00772       this->_M_alternative();
00773       if (!_M_stack.empty())
00774         {
00775           __re._M_append(_M_stack.top());
00776           _M_stack.pop();
00777         }
00778       _M_stack.push(__re);
00779       return true;
00780     }
00781       return false;
00782     }
00783 
00784   template<typename _InIter, typename _TraitsT>
00785     bool
00786     _Compiler<_InIter, _TraitsT>::
00787     _M_term()
00788     {
00789       if (this->_M_assertion())
00790     return true;
00791       if (this->_M_atom())
00792     {
00793       this->_M_quantifier();
00794       return true;
00795     }
00796       return false;
00797     }
00798 
00799   template<typename _InIter, typename _TraitsT>
00800     bool
00801     _Compiler<_InIter, _TraitsT>::
00802     _M_assertion()
00803     {
00804       if (_M_match_token(_ScannerT::_S_token_line_begin))
00805     {
00806       // __m.push(_Matcher::_S_opcode_line_begin);
00807       return true;
00808     }
00809       if (_M_match_token(_ScannerT::_S_token_line_end))
00810     {
00811       // __m.push(_Matcher::_S_opcode_line_end);
00812       return true;
00813     }
00814       if (_M_match_token(_ScannerT::_S_token_word_begin))
00815     {
00816       // __m.push(_Matcher::_S_opcode_word_begin);
00817       return true;
00818     }
00819       if (_M_match_token(_ScannerT::_S_token_word_end))
00820     {
00821       // __m.push(_Matcher::_S_opcode_word_end);
00822       return true;
00823     }
00824       return false;
00825     }
00826 
00827   template<typename _InIter, typename _TraitsT>
00828     bool
00829     _Compiler<_InIter, _TraitsT>::
00830     _M_quantifier()
00831     {
00832       if (_M_match_token(_ScannerT::_S_token_closure0))
00833     {
00834       if (_M_stack.empty())
00835         __throw_regex_error(regex_constants::error_badrepeat);
00836       _StateSeq __r(_M_stack.top(), -1);
00837       __r._M_append(__r._M_front());
00838       _M_stack.pop();
00839       _M_stack.push(__r);
00840       return true;
00841     }
00842       if (_M_match_token(_ScannerT::_S_token_closure1))
00843     {
00844       if (_M_stack.empty())
00845         __throw_regex_error(regex_constants::error_badrepeat);
00846       _StateSeq __r(_M_state_store,
00847             _M_state_store.
00848             _M_insert_alt(_S_invalid_state_id,
00849                       _M_stack.top()._M_front()));
00850       _M_stack.top()._M_append(__r);
00851       return true;
00852     }
00853       if (_M_match_token(_ScannerT::_S_token_opt))
00854     {
00855       if (_M_stack.empty())
00856       __throw_regex_error(regex_constants::error_badrepeat);
00857       _StateSeq __r(_M_stack.top(), -1);
00858       _M_stack.pop();
00859       _M_stack.push(__r);
00860       return true;
00861     }
00862       if (_M_match_token(_ScannerT::_S_token_interval_begin))
00863     {
00864       if (_M_stack.empty())
00865         __throw_regex_error(regex_constants::error_badrepeat);
00866       if (!_M_match_token(_ScannerT::_S_token_dup_count))
00867         __throw_regex_error(regex_constants::error_badbrace);
00868       _StateSeq __r(_M_stack.top());
00869       int __min_rep = _M_cur_int_value(10);
00870       for (int __i = 1; __i < __min_rep; ++__i)
00871         _M_stack.top()._M_append(__r._M_clone()); 
00872       if (_M_match_token(_ScannerT::_S_token_comma))
00873         if (_M_match_token(_ScannerT::_S_token_dup_count))
00874           {
00875         int __n = _M_cur_int_value(10) - __min_rep;
00876         if (__n < 0)
00877           __throw_regex_error(regex_constants::error_badbrace);
00878         for (int __i = 0; __i < __n; ++__i)
00879           {
00880             _StateSeq __r(_M_state_store,
00881                   _M_state_store.
00882                   _M_insert_alt(_S_invalid_state_id,
00883                         _M_stack.top()._M_front()));
00884             _M_stack.top()._M_append(__r);
00885           }
00886           }
00887         else
00888           {
00889         _StateSeq __r(_M_stack.top(), -1);
00890         __r._M_push_back(__r._M_front());
00891         _M_stack.pop();
00892         _M_stack.push(__r);
00893           }
00894       if (!_M_match_token(_ScannerT::_S_token_interval_end))
00895         __throw_regex_error(regex_constants::error_brace);
00896       return true;
00897     }
00898       return false;
00899     }
00900 
00901   template<typename _InIter, typename _TraitsT>
00902     bool
00903     _Compiler<_InIter, _TraitsT>::
00904     _M_atom()
00905     {
00906       using std::bind;
00907       using std::placeholders::_1;
00908       using std::placeholders::_2;
00909       typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
00910       typedef _StartTagger<_InIter, _TraitsT> _Start;
00911       typedef _EndTagger<_InIter, _TraitsT> _End;
00912 
00913       if (_M_match_token(_ScannerT::_S_token_anychar))
00914     {
00915       _M_stack.push(_StateSeq(_M_state_store,
00916                   _M_state_store.
00917                   _M_insert_matcher(bind(_AnyMatcher, _1))));
00918       return true;
00919     }
00920       if (_M_match_token(_ScannerT::_S_token_ord_char))
00921     {
00922       _M_stack.push(_StateSeq
00923             (_M_state_store, _M_state_store. 
00924              _M_insert_matcher
00925              (bind(_CMatcher(_M_cur_value[0], _M_traits), _1))));
00926       return true;
00927     }
00928       if (_M_match_token(_ScannerT::_S_token_quoted_char))
00929     {
00930       // note that in the ECMA grammar, this case covers backrefs.
00931       _M_stack.push(_StateSeq(_M_state_store,
00932                   _M_state_store.
00933                   _M_insert_matcher
00934                   (bind(_CMatcher(_M_cur_value[0], _M_traits),
00935                     _1))));
00936       return true;
00937     }
00938       if (_M_match_token(_ScannerT::_S_token_backref))
00939     {
00940       // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
00941       return true;
00942     }
00943       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
00944     {
00945       int __mark = _M_state_store._M_sub_count();
00946       _StateSeq __r(_M_state_store,
00947             _M_state_store.
00948             _M_insert_subexpr_begin(bind(_Start(__mark), _1, _2)));
00949       this->_M_disjunction();
00950       if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
00951         __throw_regex_error(regex_constants::error_paren);
00952       if (!_M_stack.empty())
00953         {
00954           __r._M_append(_M_stack.top());
00955           _M_stack.pop();
00956         }
00957       __r._M_append(_M_state_store._M_insert_subexpr_end
00958             (__mark, bind(_End(__mark), _1, _2)));
00959       _M_stack.push(__r);
00960       return true;
00961     }
00962       return _M_bracket_expression();
00963     }
00964 
00965   template<typename _InIter, typename _TraitsT>
00966     bool
00967     _Compiler<_InIter, _TraitsT>::
00968     _M_bracket_expression()
00969     {
00970       using std::bind;
00971       using std::placeholders::_1;
00972       if (_M_match_token(_ScannerT::_S_token_bracket_begin))
00973     {
00974       _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
00975                    _M_traits);
00976       if (!_M_bracket_list(__matcher)
00977           || !_M_match_token(_ScannerT::_S_token_bracket_end))
00978         __throw_regex_error(regex_constants::error_brack);
00979       _M_stack.push(_StateSeq(_M_state_store,
00980                   _M_state_store._M_insert_matcher
00981                   (bind(__matcher, _1))));
00982       return true;
00983     }
00984       return false;
00985     }
00986 
00987   // If the dash is the last character in the bracket expression, it is not
00988   // special.
00989   template<typename _InIter, typename _TraitsT>
00990     bool
00991     _Compiler<_InIter, _TraitsT>::
00992     _M_bracket_list(_RMatcherT& __matcher)
00993     {
00994       if (_M_follow_list(__matcher))
00995     {
00996       if (_M_match_token(_ScannerT::_S_token_dash))
00997         __matcher._M_add_char(_M_cur_value[0]);
00998       return true;
00999     }
01000       return false;
01001     }
01002 
01003   template<typename _InIter, typename _TraitsT>
01004     bool
01005     _Compiler<_InIter, _TraitsT>::
01006     _M_follow_list(_RMatcherT& __matcher)
01007     { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
01008 
01009   template<typename _InIter, typename _TraitsT>
01010     bool
01011     _Compiler<_InIter, _TraitsT>::
01012     _M_follow_list2(_RMatcherT& __matcher)
01013     {
01014       if (_M_expression_term(__matcher))
01015     return _M_follow_list2(__matcher);
01016       return true;
01017     }
01018 
01019   template<typename _InIter, typename _TraitsT>
01020     bool
01021     _Compiler<_InIter, _TraitsT>::
01022     _M_expression_term(_RMatcherT& __matcher)
01023     {
01024       return (_M_collating_symbol(__matcher)
01025           || _M_character_class(__matcher)
01026           || _M_equivalence_class(__matcher)
01027           || (_M_start_range(__matcher)
01028           && _M_range_expression(__matcher)));
01029     }
01030 
01031   template<typename _InIter, typename _TraitsT>
01032     bool
01033     _Compiler<_InIter, _TraitsT>::
01034     _M_range_expression(_RMatcherT& __matcher)
01035     {
01036       if (!_M_collating_symbol(__matcher))
01037     if (!_M_match_token(_ScannerT::_S_token_dash))
01038       __throw_regex_error(regex_constants::error_range);
01039       __matcher._M_make_range();
01040       return true;
01041     }
01042 
01043   template<typename _InIter, typename _TraitsT>
01044     bool
01045     _Compiler<_InIter, _TraitsT>::
01046     _M_start_range(_RMatcherT& __matcher)
01047     { return _M_match_token(_ScannerT::_S_token_dash); }
01048 
01049   template<typename _InIter, typename _TraitsT>
01050     bool
01051     _Compiler<_InIter, _TraitsT>::
01052     _M_collating_symbol(_RMatcherT& __matcher)
01053     {
01054       if (_M_match_token(_ScannerT::_S_token_collelem_single))
01055     {
01056       __matcher._M_add_char(_M_cur_value[0]);
01057       return true;
01058     }
01059       if (_M_match_token(_ScannerT::_S_token_collsymbol))
01060     {
01061       __matcher._M_add_collating_element(_M_cur_value);
01062       return true;
01063     }
01064       return false;
01065     }
01066 
01067   template<typename _InIter, typename _TraitsT>
01068     bool
01069     _Compiler<_InIter, _TraitsT>::
01070     _M_equivalence_class(_RMatcherT& __matcher)
01071     {
01072       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
01073     {
01074       __matcher._M_add_equivalence_class(_M_cur_value);
01075       return true;
01076     }
01077       return false;
01078     }
01079 
01080   template<typename _InIter, typename _TraitsT>
01081     bool
01082     _Compiler<_InIter, _TraitsT>::
01083     _M_character_class(_RMatcherT& __matcher)
01084     {
01085       if (_M_match_token(_ScannerT::_S_token_char_class_name))
01086     {
01087       __matcher._M_add_character_class(_M_cur_value);
01088       return true;
01089     }
01090       return false;
01091     }
01092 
01093   template<typename _InIter, typename _TraitsT>
01094     int
01095     _Compiler<_InIter, _TraitsT>::
01096     _M_cur_int_value(int __radix)
01097     {
01098       int __v = 0;
01099       for (typename _StringT::size_type __i = 0;
01100        __i < _M_cur_value.length(); ++__i)
01101     __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
01102       return __v;
01103     }
01104 
01105   template<typename _InIter, typename _TraitsT>
01106     _AutomatonPtr
01107     __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
01108           regex_constants::syntax_option_type __f)
01109     { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
01110                                         __f)._M_nfa())); }
01111 
01112 } // namespace __regex
01113 } // namespace std
01114 
01115 /* vim: set ts=8 sw=2 sts=2: */