0ea2aa7ca2
The macro `BOOST_NO_AUTO_PTR` is defined on systems where both `auto_ptr` and `unique_ptr` are available (and `auto_ptr` usage produces a deprecation warning). By using `BOOST_NO_CXX11_SMART_PTR` we will favor `unique_ptr` over `auto_ptr`. ``` slex/lexer.hpp:2423:13: error: 'template<class> class std::auto_ptr' is deprecated [-Werror=deprecated-declarations] ```
2933 lines
76 KiB
C++
2933 lines
76 KiB
C++
/*=============================================================================
|
|
Boost.Wave: A Standard compliant C++ preprocessor library
|
|
|
|
Spirit based lexer
|
|
|
|
http://www.boost.org/
|
|
|
|
Copyright (c) 2001, Daniel C. Nuffer.
|
|
Copyright (c) 2001-2012 Hartmut Kaiser.
|
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
TODO List:
|
|
X callback objects (called when a match is made.)
|
|
X callback passed first & last iterator, and
|
|
a reference to a lexercontrol object that supports some
|
|
operations on the lexer.
|
|
set state
|
|
terminate
|
|
state stack (push, pop, top)
|
|
set new token return value
|
|
ignore the current token
|
|
yymore
|
|
get length of matched token
|
|
get current lexer state
|
|
X DFA serialization to save recomputing the DFA.
|
|
|
|
lexer states.
|
|
organize the file into hpp and ipp. arrange the classes in a logical order.
|
|
use buffering - iterator only needs be an input iterator,
|
|
lexer & callback are not templatized on iterator type, but char type.
|
|
next_token is templatized on iterator type.
|
|
beginning/ending contexts.
|
|
^ and $
|
|
DFA minimization.
|
|
DFA table compression.
|
|
|
|
=============================================================================*/
|
|
#ifndef BOOST_SPIRIT_LEXER_HPP
|
|
#define BOOST_SPIRIT_LEXER_HPP
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
#include <boost/config.hpp>
|
|
#include <boost/throw_exception.hpp>
|
|
|
|
#include <boost/spirit/include/classic_core.hpp>
|
|
#include <boost/spirit/include/classic_symbols.hpp>
|
|
#include <boost/spirit/include/classic_chset.hpp>
|
|
#include <boost/spirit/include/classic_escape_char.hpp>
|
|
|
|
#include <set>
|
|
#include <map>
|
|
#include <memory> // for auto_ptr/unique_ptr
|
|
#include <vector>
|
|
#include <stack>
|
|
#include <utility> // for pair
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <boost/assert.hpp>
|
|
#include <boost/limits.hpp>
|
|
|
|
#if defined(BOOST_NO_STD_ITERATOR_TRAITS)
|
|
#define BOOST_SPIRIT_IT_NS impl
|
|
#else
|
|
#define BOOST_SPIRIT_IT_NS std
|
|
#endif
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
namespace boost {
|
|
namespace spirit {
|
|
namespace classic {
|
|
|
|
typedef unsigned char uchar;
|
|
typedef unsigned int node_id_t;
|
|
const node_id_t invalid_node = node_id_t(-1);
|
|
typedef std::set<node_id_t> node_set;
|
|
typedef std::vector<uchar> uchar_vector;
|
|
typedef std::map<node_id_t, node_set> followpos_t;
|
|
typedef std::vector<uchar_vector> state_match_t;
|
|
|
|
template <typename TokenT>
|
|
class lexer_control;
|
|
|
|
class bad_regex : public std::exception
|
|
{
|
|
};
|
|
|
|
namespace lexerimpl
|
|
{
|
|
|
|
class node
|
|
{
|
|
public:
|
|
|
|
virtual ~node() {}
|
|
|
|
virtual node* clone() const = 0;
|
|
virtual bool nullable() const = 0;
|
|
virtual node_set firstpos() const = 0;
|
|
virtual node_set lastpos() const = 0;
|
|
virtual void compute_followpos(followpos_t& followpos) const = 0;
|
|
virtual void compute_state_match(state_match_t& state_match) const = 0;
|
|
virtual void get_eof_ids(node_set& eof_set) const = 0;
|
|
virtual void assign_node_ids(node_id_t& node_count) = 0;
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
virtual void dump(std::ostream& out) const = 0;
|
|
#endif
|
|
|
|
};
|
|
|
|
class char_node : public node
|
|
{
|
|
|
|
public:
|
|
|
|
char_node(const uchar c);
|
|
char_node(const char_node& x);
|
|
virtual ~char_node(){}
|
|
|
|
virtual node* clone() const;
|
|
virtual bool nullable() const;
|
|
virtual node_set firstpos() const;
|
|
virtual node_set lastpos() const;
|
|
virtual void compute_followpos(followpos_t& followpos) const;
|
|
virtual void compute_state_match(state_match_t& state_match ) const;
|
|
virtual void get_eof_ids(node_set& eof_set) const;
|
|
virtual void assign_node_ids(node_id_t& node_count);
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
virtual void dump(std::ostream& out) const;
|
|
#endif
|
|
|
|
private:
|
|
|
|
uchar m_char;
|
|
node_id_t m_node_num;
|
|
};
|
|
|
|
inline
|
|
char_node::char_node(const uchar c)
|
|
: node()
|
|
, m_char(c)
|
|
, m_node_num(0)
|
|
{
|
|
}
|
|
|
|
inline
|
|
char_node::char_node(const char_node& x)
|
|
: node(x)
|
|
, m_char(x.m_char)
|
|
, m_node_num(x.m_node_num)
|
|
{
|
|
}
|
|
|
|
inline node *
|
|
char_node::clone() const
|
|
{
|
|
return new char_node(*this);
|
|
}
|
|
|
|
inline bool
|
|
char_node::nullable() const
|
|
{
|
|
return false;
|
|
}
|
|
|
|
inline node_set
|
|
char_node::firstpos() const
|
|
{
|
|
node_set rval;
|
|
rval.insert(m_node_num);
|
|
return rval;
|
|
}
|
|
|
|
inline node_set
|
|
char_node::lastpos() const
|
|
{
|
|
return firstpos();
|
|
}
|
|
|
|
inline void
|
|
char_node::compute_followpos(followpos_t&) const
|
|
{
|
|
return;
|
|
}
|
|
|
|
inline void
|
|
char_node::compute_state_match(state_match_t& state_match) const
|
|
{
|
|
if (state_match.size() < m_node_num + 1)
|
|
state_match.resize(m_node_num + 1);
|
|
state_match[m_node_num].resize(256);
|
|
state_match[m_node_num][m_char] = 1;
|
|
}
|
|
|
|
inline void
|
|
char_node::get_eof_ids(node_set&) const
|
|
{
|
|
return;
|
|
}
|
|
|
|
inline void
|
|
char_node::assign_node_ids(node_id_t& node_count)
|
|
{
|
|
m_node_num = node_count++;
|
|
}
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
inline void
|
|
char_node::dump(std::ostream& out) const
|
|
{
|
|
out << "\nchar_node m_char = " << m_char;
|
|
out << " m_node_num = " << m_node_num;
|
|
out << " nullable() = " << (nullable() ? "true" : "false");
|
|
out << " firstpos() = ";
|
|
node_set fp = firstpos();
|
|
std::copy(fp.begin(), fp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
out << " lastpos() = ";
|
|
node_set lp = lastpos();
|
|
std::copy(lp.begin(), lp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
}
|
|
#endif
|
|
|
|
|
|
class epsilon_node : public node
|
|
{
|
|
|
|
public:
|
|
|
|
epsilon_node();
|
|
epsilon_node(const epsilon_node& x);
|
|
virtual ~epsilon_node(){}
|
|
|
|
virtual node* clone() const;
|
|
virtual bool nullable() const;
|
|
virtual node_set firstpos() const;
|
|
virtual node_set lastpos() const;
|
|
virtual void compute_followpos(followpos_t& followpos) const;
|
|
virtual void compute_state_match(state_match_t& state_match ) const;
|
|
virtual void get_eof_ids(node_set& eof_set) const;
|
|
virtual void assign_node_ids(node_id_t& node_count);
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
virtual void dump(std::ostream& out) const;
|
|
#endif
|
|
|
|
private:
|
|
|
|
node_id_t m_node_num;
|
|
};
|
|
|
|
inline
|
|
epsilon_node::epsilon_node()
|
|
: node()
|
|
, m_node_num(0)
|
|
{
|
|
}
|
|
|
|
inline
|
|
epsilon_node::epsilon_node(const epsilon_node& x)
|
|
: node(x)
|
|
, m_node_num(x.m_node_num)
|
|
{
|
|
}
|
|
|
|
inline node *
|
|
epsilon_node::clone() const
|
|
{
|
|
return new epsilon_node(*this);
|
|
}
|
|
|
|
inline bool
|
|
epsilon_node::nullable() const
|
|
{
|
|
return true;
|
|
}
|
|
|
|
inline node_set
|
|
epsilon_node::firstpos() const
|
|
{
|
|
return node_set();
|
|
}
|
|
|
|
inline node_set
|
|
epsilon_node::lastpos() const
|
|
{
|
|
return node_set();
|
|
}
|
|
|
|
inline void
|
|
epsilon_node::compute_followpos(followpos_t&) const
|
|
{
|
|
return;
|
|
}
|
|
|
|
inline void
|
|
epsilon_node::compute_state_match(state_match_t& state_match) const
|
|
{
|
|
if (state_match.size() < m_node_num + 1)
|
|
state_match.resize(m_node_num + 1);
|
|
state_match[m_node_num].resize(256, 1);
|
|
}
|
|
|
|
inline void
|
|
epsilon_node::get_eof_ids(node_set&) const
|
|
{
|
|
return;
|
|
}
|
|
|
|
inline void
|
|
epsilon_node::assign_node_ids(node_id_t& node_count)
|
|
{
|
|
m_node_num = node_count++;
|
|
}
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
inline void
|
|
epsilon_node::dump(std::ostream& out) const
|
|
{
|
|
out << "\nepsilon_node";
|
|
out << " m_node_num = " << m_node_num;
|
|
out << " nullable() = " << (nullable() ? "true" : "false");
|
|
out << " firstpos() = ";
|
|
node_set fp = firstpos();
|
|
std::copy(fp.begin(), fp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
out << " lastpos() = ";
|
|
node_set lp = lastpos();
|
|
std::copy(lp.begin(), lp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
}
|
|
#endif
|
|
|
|
|
|
class or_node : public node
|
|
{
|
|
|
|
public:
|
|
|
|
or_node(node* left, node* right);
|
|
or_node(const or_node& x);
|
|
virtual ~or_node(){}
|
|
|
|
virtual node* clone() const;
|
|
virtual bool nullable() const;
|
|
virtual node_set firstpos() const;
|
|
virtual node_set lastpos() const;
|
|
virtual void compute_followpos(followpos_t& followpos) const;
|
|
virtual void compute_state_match(state_match_t& state_match ) const;
|
|
virtual void get_eof_ids(node_set& eof_set) const;
|
|
virtual void assign_node_ids(node_id_t& node_count);
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
virtual void dump(std::ostream& out) const;
|
|
#endif
|
|
|
|
private:
|
|
|
|
#ifndef BOOST_NO_CXX11_SMART_PTR
|
|
std::unique_ptr<node> m_left;
|
|
std::unique_ptr<node> m_right;
|
|
#else
|
|
std::auto_ptr<node> m_left;
|
|
std::auto_ptr<node> m_right;
|
|
#endif
|
|
};
|
|
|
|
inline
|
|
or_node::or_node(node* left, node* right)
|
|
: node()
|
|
, m_left(left)
|
|
, m_right(right)
|
|
{
|
|
}
|
|
|
|
inline
|
|
or_node::or_node(const or_node& x)
|
|
: node(x)
|
|
, m_left(x.m_left->clone())
|
|
, m_right(x.m_right->clone())
|
|
{
|
|
}
|
|
|
|
inline node *
|
|
or_node::clone() const
|
|
{
|
|
return new or_node(m_left->clone(), m_right->clone());
|
|
}
|
|
|
|
inline bool
|
|
or_node::nullable() const
|
|
{
|
|
return m_left->nullable() || m_right->nullable();
|
|
}
|
|
|
|
inline node_set
|
|
or_node::firstpos() const
|
|
{
|
|
node_set rval;
|
|
node_set l = m_left->firstpos();
|
|
node_set r = m_right->firstpos();
|
|
std::set_union(l.begin(), l.end(), r.begin(), r.end(),
|
|
std::inserter(rval, rval.begin()));
|
|
return rval;
|
|
}
|
|
|
|
inline node_set
|
|
or_node::lastpos() const
|
|
{
|
|
node_set rval;
|
|
node_set l = m_left->lastpos();
|
|
node_set r = m_right->lastpos();
|
|
std::set_union(l.begin(), l.end(), r.begin(), r.end(),
|
|
std::inserter(rval, rval.begin()));
|
|
return rval;
|
|
}
|
|
|
|
inline void
|
|
or_node::compute_followpos(followpos_t& followpos) const
|
|
{
|
|
m_left->compute_followpos(followpos);
|
|
m_right->compute_followpos(followpos);
|
|
}
|
|
|
|
inline void
|
|
or_node::compute_state_match(state_match_t& state_match) const
|
|
{
|
|
m_left->compute_state_match(state_match);
|
|
m_right->compute_state_match(state_match);
|
|
}
|
|
|
|
inline void
|
|
or_node::get_eof_ids(node_set& eof_nodes) const
|
|
{
|
|
m_left->get_eof_ids(eof_nodes);
|
|
m_right->get_eof_ids(eof_nodes);
|
|
}
|
|
|
|
inline void
|
|
or_node::assign_node_ids(node_id_t& node_count)
|
|
{
|
|
m_left->assign_node_ids(node_count);
|
|
m_right->assign_node_ids(node_count);
|
|
}
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
inline void
|
|
or_node::dump(std::ostream& out) const
|
|
{
|
|
m_left->dump(out);
|
|
|
|
out << "\nor_node";
|
|
out << " nullable() = " << (nullable() ? "true" : "false");
|
|
out << " firstpos() = ";
|
|
node_set fp = firstpos();
|
|
std::copy(fp.begin(), fp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
out << " lastpos() = ";
|
|
node_set lp = lastpos();
|
|
std::copy(lp.begin(), lp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
m_right->dump(out);
|
|
}
|
|
#endif
|
|
|
|
|
|
class cat_node : public node
|
|
{
|
|
|
|
public:
|
|
|
|
cat_node(node* left, node* right);
|
|
cat_node(const cat_node& x);
|
|
virtual ~cat_node(){}
|
|
|
|
virtual node* clone() const;
|
|
virtual bool nullable() const;
|
|
virtual node_set firstpos() const;
|
|
virtual node_set lastpos() const;
|
|
virtual void compute_followpos(followpos_t& followpos) const;
|
|
virtual void compute_state_match(state_match_t& state_match ) const;
|
|
virtual void get_eof_ids(node_set& eof_set) const;
|
|
virtual void assign_node_ids(node_id_t& node_count);
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
virtual void dump(std::ostream& out) const;
|
|
#endif
|
|
|
|
private:
|
|
|
|
#ifndef BOOST_NO_CXX11_SMART_PTR
|
|
std::unique_ptr<node> m_left;
|
|
std::unique_ptr<node> m_right;
|
|
#else
|
|
std::auto_ptr<node> m_left;
|
|
std::auto_ptr<node> m_right;
|
|
#endif
|
|
};
|
|
|
|
inline
|
|
cat_node::cat_node(node* left, node* right)
|
|
: node()
|
|
, m_left(left)
|
|
, m_right(right)
|
|
{
|
|
}
|
|
|
|
inline
|
|
cat_node::cat_node(const cat_node& x)
|
|
: node(x)
|
|
, m_left(x.m_left->clone())
|
|
, m_right(x.m_right->clone())
|
|
{
|
|
}
|
|
|
|
inline node *
|
|
cat_node::clone() const
|
|
{
|
|
return new cat_node(m_left->clone(), m_right->clone());
|
|
}
|
|
|
|
inline bool
|
|
cat_node::nullable() const
|
|
{
|
|
return m_left->nullable() && m_right->nullable();
|
|
}
|
|
|
|
inline node_set
|
|
cat_node::firstpos() const
|
|
{
|
|
if (m_left->nullable())
|
|
{
|
|
node_set rval;
|
|
node_set l = m_left->firstpos();
|
|
node_set r = m_right->firstpos();
|
|
std::set_union(l.begin(), l.end(), r.begin(), r.end(),
|
|
std::inserter(rval, rval.begin()));
|
|
return rval;
|
|
}
|
|
else
|
|
{
|
|
return m_left->firstpos();
|
|
}
|
|
}
|
|
|
|
inline node_set
|
|
cat_node::lastpos() const
|
|
{
|
|
if (m_right->nullable())
|
|
{
|
|
node_set rval;
|
|
node_set l = m_left->lastpos();
|
|
node_set r = m_right->lastpos();
|
|
std::set_union(l.begin(), l.end(), r.begin(), r.end(),
|
|
std::inserter(rval, rval.begin()));
|
|
return rval;
|
|
}
|
|
else
|
|
{
|
|
return m_right->lastpos();
|
|
}
|
|
}
|
|
|
|
inline void
|
|
cat_node::compute_followpos(followpos_t& followpos) const
|
|
{
|
|
node_set l = m_left->lastpos();
|
|
for (node_set::iterator i = l.begin();
|
|
i != l.end();
|
|
++i)
|
|
{
|
|
node_set rf = m_right->firstpos();
|
|
followpos[*i].insert(rf.begin(), rf.end());
|
|
}
|
|
|
|
m_left->compute_followpos(followpos);
|
|
m_right->compute_followpos(followpos);
|
|
}
|
|
|
|
inline void
|
|
cat_node::compute_state_match(state_match_t& state_match) const
|
|
{
|
|
m_left->compute_state_match(state_match);
|
|
m_right->compute_state_match(state_match);
|
|
}
|
|
|
|
inline void
|
|
cat_node::get_eof_ids(node_set& eof_nodes) const
|
|
{
|
|
m_left->get_eof_ids(eof_nodes);
|
|
m_right->get_eof_ids(eof_nodes);
|
|
}
|
|
|
|
inline void
|
|
cat_node::assign_node_ids(node_id_t& node_count)
|
|
{
|
|
m_left->assign_node_ids(node_count);
|
|
m_right->assign_node_ids(node_count);
|
|
}
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
inline void
|
|
cat_node::dump(std::ostream& out) const
|
|
{
|
|
m_left->dump(out);
|
|
|
|
out << "\ncat_node";
|
|
out << " nullable() = " << (nullable() ? "true" : "false");
|
|
out << " firstpos() = ";
|
|
node_set fp = firstpos();
|
|
std::copy(fp.begin(), fp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
out << " lastpos() = ";
|
|
node_set lp = lastpos();
|
|
std::copy(lp.begin(), lp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
m_right->dump(out);
|
|
}
|
|
#endif
|
|
|
|
|
|
class star_node : public node
|
|
{
|
|
|
|
public:
|
|
|
|
star_node(node* left);
|
|
star_node(const star_node& x);
|
|
virtual ~star_node(){}
|
|
|
|
virtual node* clone() const;
|
|
virtual bool nullable() const;
|
|
virtual node_set firstpos() const;
|
|
virtual node_set lastpos() const;
|
|
virtual void compute_followpos(followpos_t& followpos) const;
|
|
virtual void compute_state_match(state_match_t& state_match ) const;
|
|
virtual void get_eof_ids(node_set& eof_set) const;
|
|
virtual void assign_node_ids(node_id_t& node_count);
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
virtual void dump(std::ostream& out) const;
|
|
#endif
|
|
|
|
private:
|
|
|
|
#ifndef BOOST_NO_CXX11_SMART_PTR
|
|
std::unique_ptr<node> m_left;
|
|
#else
|
|
std::auto_ptr<node> m_left;
|
|
#endif
|
|
};
|
|
|
|
inline
|
|
star_node::star_node(node* left)
|
|
: node()
|
|
, m_left(left)
|
|
{
|
|
}
|
|
|
|
inline
|
|
star_node::star_node(const star_node& x)
|
|
: node(x)
|
|
, m_left(x.m_left->clone())
|
|
{
|
|
}
|
|
|
|
inline node *
|
|
star_node::clone() const
|
|
{
|
|
return new star_node(m_left->clone());
|
|
}
|
|
|
|
inline bool
|
|
star_node::nullable() const
|
|
{
|
|
return true;
|
|
}
|
|
|
|
inline node_set
|
|
star_node::firstpos() const
|
|
{
|
|
return m_left->firstpos();
|
|
}
|
|
|
|
inline node_set
|
|
star_node::lastpos() const
|
|
{
|
|
return m_left->lastpos();
|
|
}
|
|
|
|
inline void
|
|
star_node::compute_followpos(followpos_t& followpos) const
|
|
{
|
|
node_set lp = this->lastpos();
|
|
for (node_set::iterator i = lp.begin();
|
|
i != lp.end();
|
|
++i)
|
|
{
|
|
node_set fp = this->firstpos();
|
|
followpos[*i].insert(fp.begin(), fp.end());
|
|
}
|
|
|
|
m_left->compute_followpos(followpos);
|
|
}
|
|
|
|
inline void
|
|
star_node::compute_state_match(state_match_t& state_match) const
|
|
{
|
|
m_left->compute_state_match(state_match);
|
|
}
|
|
|
|
inline void
|
|
star_node::get_eof_ids(node_set& eof_nodes) const
|
|
{
|
|
m_left->get_eof_ids(eof_nodes);
|
|
}
|
|
|
|
inline void
|
|
star_node::assign_node_ids(node_id_t& node_count)
|
|
{
|
|
m_left->assign_node_ids(node_count);
|
|
}
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
inline void
|
|
star_node::dump(std::ostream& out) const
|
|
{
|
|
m_left->dump(out);
|
|
|
|
out << "\nstar_node";
|
|
out << " nullable() = " << (nullable() ? "true" : "false");
|
|
out << " firstpos() = ";
|
|
node_set fp = firstpos();
|
|
std::copy(fp.begin(), fp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
out << " lastpos() = ";
|
|
node_set lp = lastpos();
|
|
std::copy(lp.begin(), lp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
}
|
|
#endif
|
|
|
|
|
|
class eof_node : public node
|
|
{
|
|
|
|
public:
|
|
|
|
eof_node();
|
|
eof_node(const eof_node& x);
|
|
virtual ~eof_node(){}
|
|
|
|
virtual node* clone() const;
|
|
virtual bool nullable() const;
|
|
virtual node_set firstpos() const;
|
|
virtual node_set lastpos() const;
|
|
virtual void compute_followpos(followpos_t& followpos) const;
|
|
virtual void compute_state_match(state_match_t& state_match ) const;
|
|
virtual void get_eof_ids(node_set& eof_set) const;
|
|
virtual void assign_node_ids(node_id_t& node_count);
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
virtual void dump(std::ostream& out) const;
|
|
#endif
|
|
|
|
private:
|
|
|
|
node_id_t m_node_num;
|
|
};
|
|
|
|
inline
|
|
eof_node::eof_node()
|
|
: node()
|
|
, m_node_num(0)
|
|
{
|
|
}
|
|
|
|
inline
|
|
eof_node::eof_node(const eof_node& x)
|
|
: node(x)
|
|
, m_node_num(x.m_node_num)
|
|
{
|
|
}
|
|
|
|
inline node *
|
|
eof_node::clone() const
|
|
{
|
|
return new eof_node(*this);
|
|
}
|
|
|
|
inline bool
|
|
eof_node::nullable() const
|
|
{
|
|
return false;
|
|
}
|
|
|
|
inline node_set
|
|
eof_node::firstpos() const
|
|
{
|
|
node_set rval;
|
|
rval.insert(m_node_num);
|
|
return rval;
|
|
}
|
|
|
|
inline node_set
|
|
eof_node::lastpos() const
|
|
{
|
|
node_set rval;
|
|
rval.insert(m_node_num);
|
|
return rval;
|
|
}
|
|
|
|
inline void
|
|
eof_node::compute_followpos(followpos_t&) const
|
|
{
|
|
return;
|
|
}
|
|
|
|
inline void
|
|
eof_node::compute_state_match(state_match_t& state_match) const
|
|
{
|
|
if (state_match.size() < m_node_num + 1)
|
|
state_match.resize(m_node_num + 1);
|
|
state_match[m_node_num].resize(256, 0);
|
|
}
|
|
|
|
inline void
|
|
eof_node::get_eof_ids(node_set& eof_nodes) const
|
|
{
|
|
eof_nodes.insert(m_node_num);
|
|
}
|
|
|
|
inline void
|
|
eof_node::assign_node_ids(node_id_t& node_count)
|
|
{
|
|
m_node_num = node_count++;
|
|
}
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
inline void
|
|
eof_node::dump(std::ostream& out) const
|
|
{
|
|
out << "\neof_node";
|
|
out << " m_node_num = " << m_node_num;
|
|
out << " nullable() = " << (nullable() ? "true" : "false");
|
|
out << " firstpos() = ";
|
|
node_set fp = firstpos();
|
|
std::copy(fp.begin(), fp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
out << " lastpos() = ";
|
|
node_set lp = lastpos();
|
|
std::copy(lp.begin(), lp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
}
|
|
#endif
|
|
|
|
class ccl_node : public node
|
|
{
|
|
|
|
public:
|
|
|
|
ccl_node(const std::vector<uchar>& v);
|
|
ccl_node(const uchar c1, const uchar c2);
|
|
ccl_node(const ccl_node& x);
|
|
virtual ~ccl_node(){}
|
|
|
|
virtual node* clone() const;
|
|
virtual bool nullable() const;
|
|
virtual node_set firstpos() const;
|
|
virtual node_set lastpos() const;
|
|
virtual void compute_followpos(followpos_t& followpos) const;
|
|
virtual void compute_state_match(state_match_t& state_match ) const;
|
|
virtual void get_eof_ids(node_set& eof_set) const;
|
|
virtual void assign_node_ids(node_id_t& node_count);
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
virtual void dump(std::ostream& out) const;
|
|
#endif
|
|
|
|
private:
|
|
|
|
std::vector<uchar> m_match;
|
|
node_id_t m_node_num;
|
|
};
|
|
|
|
inline
|
|
ccl_node::ccl_node(const std::vector<uchar>& v)
|
|
: node()
|
|
, m_match(v)
|
|
, m_node_num(0)
|
|
{
|
|
m_match.resize(256); // make sure it's the right size
|
|
}
|
|
|
|
inline
|
|
ccl_node::ccl_node(const uchar c1, const uchar c2)
|
|
: node()
|
|
, m_match(256, uchar(0))
|
|
, m_node_num(0)
|
|
{
|
|
BOOST_ASSERT(c1 < c2);
|
|
for (std::size_t i = c1; i <= std::size_t(c2); ++i)
|
|
{
|
|
m_match[i] = 1;
|
|
}
|
|
}
|
|
|
|
inline
|
|
ccl_node::ccl_node(const ccl_node& x)
|
|
: node(x)
|
|
, m_match(x.m_match)
|
|
, m_node_num(x.m_node_num)
|
|
{
|
|
}
|
|
|
|
inline node *
|
|
ccl_node::clone() const
|
|
{
|
|
return new ccl_node(*this);
|
|
}
|
|
|
|
inline bool
|
|
ccl_node::nullable() const
|
|
{
|
|
return false;
|
|
}
|
|
|
|
inline node_set
|
|
ccl_node::firstpos() const
|
|
{
|
|
node_set rval;
|
|
rval.insert(m_node_num);
|
|
return rval;
|
|
}
|
|
|
|
inline node_set
|
|
ccl_node::lastpos() const
|
|
{
|
|
return firstpos();
|
|
}
|
|
|
|
inline void
|
|
ccl_node::compute_followpos(followpos_t&) const
|
|
{
|
|
return;
|
|
}
|
|
|
|
inline void
|
|
ccl_node::compute_state_match(state_match_t& state_match) const
|
|
{
|
|
if (state_match.size() < m_node_num + 1)
|
|
state_match.resize(m_node_num + 1);
|
|
state_match[m_node_num] = m_match;
|
|
}
|
|
|
|
inline void
|
|
ccl_node::get_eof_ids(node_set&) const
|
|
{
|
|
return;
|
|
}
|
|
|
|
inline void
|
|
ccl_node::assign_node_ids(node_id_t& node_count)
|
|
{
|
|
m_node_num = node_count++;
|
|
}
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
inline void
|
|
ccl_node::dump(std::ostream& out) const
|
|
{
|
|
out << "\nccl_node m_match = ";
|
|
for (std::size_t i = 0; i < m_match.size(); ++i)
|
|
{
|
|
if (m_match[i])
|
|
out << i << ", ";
|
|
}
|
|
out << " m_node_num = " << m_node_num;
|
|
out << " nullable() = " << (nullable() ? "true" : "false");
|
|
out << " firstpos() = ";
|
|
node_set fp = firstpos();
|
|
std::copy(fp.begin(), fp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
|
|
out << " lastpos() = ";
|
|
node_set lp = lastpos();
|
|
std::copy(lp.begin(), lp.end(),
|
|
std::ostream_iterator<node_id_t>(out, ","));
|
|
}
|
|
#endif
|
|
|
|
template <typename ScannerT>
|
|
class make_concat
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
|
|
make_concat(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(iterator_type const &, iterator_type const &) const
|
|
{
|
|
node* right = m_stack.top();
|
|
m_stack.pop();
|
|
node* left = m_stack.top();
|
|
m_stack.pop();
|
|
node* newnode = new cat_node(left, right);
|
|
m_stack.push(newnode);
|
|
}
|
|
|
|
std::stack<node*>& m_stack;
|
|
};
|
|
|
|
template <int CharTSize>
|
|
struct get_byte_aux;
|
|
|
|
template<>
|
|
struct get_byte_aux<1>
|
|
{
|
|
template <typename CharT>
|
|
unsigned char operator()(CharT c, unsigned int byte)
|
|
{
|
|
BOOST_ASSERT(byte == 0);
|
|
return c;
|
|
}
|
|
};
|
|
|
|
template<>
|
|
struct get_byte_aux<2>
|
|
{
|
|
template <typename CharT>
|
|
unsigned char operator()(CharT c, unsigned int byte)
|
|
{
|
|
static unsigned long mask[] =
|
|
{
|
|
0xFF00,
|
|
0x00FF
|
|
};
|
|
|
|
BOOST_ASSERT(byte < 2);
|
|
return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
|
|
}
|
|
};
|
|
|
|
template<>
|
|
struct get_byte_aux<4>
|
|
{
|
|
template <typename CharT>
|
|
unsigned char operator()(CharT c, unsigned int byte)
|
|
{
|
|
static unsigned long mask[] =
|
|
{
|
|
0xFF000000,
|
|
0x00FF0000,
|
|
0x0000FF00,
|
|
0x000000FF
|
|
};
|
|
|
|
BOOST_ASSERT(byte < 4);
|
|
return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
|
|
}
|
|
};
|
|
|
|
template <typename CharT>
|
|
inline unsigned char
|
|
get_byte(CharT c, unsigned int byte)
|
|
{
|
|
return get_byte_aux<sizeof(c)>()(c, byte);
|
|
}
|
|
|
|
template <typename ScannerT>
|
|
class make_star
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
make_star(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(const char_t) const
|
|
{
|
|
node* left = m_stack.top();
|
|
m_stack.pop();
|
|
node* newnode = new star_node(left);
|
|
m_stack.push(newnode);
|
|
}
|
|
|
|
std::stack<node*>& m_stack;
|
|
};
|
|
|
|
template <typename ScannerT>
|
|
class make_or
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
|
|
make_or(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(iterator_type const&, iterator_type const&) const
|
|
{
|
|
node* right = m_stack.top();
|
|
m_stack.pop();
|
|
node* left = m_stack.top();
|
|
m_stack.pop();
|
|
node* newnode = new or_node(left, right);
|
|
m_stack.push(newnode);
|
|
}
|
|
|
|
std::stack<node*>& m_stack;
|
|
};
|
|
|
|
template <typename ScannerT>
|
|
class make_plus
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
make_plus(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(const char_t) const
|
|
{
|
|
node* left = m_stack.top();
|
|
m_stack.pop();
|
|
|
|
node* copy = left->clone();
|
|
|
|
node* new_star = new star_node(copy);
|
|
node* new_cat = new cat_node(left, new_star);
|
|
m_stack.push(new_cat);
|
|
}
|
|
|
|
std::stack<node*>& m_stack;
|
|
};
|
|
|
|
template <typename ScannerT>
|
|
class make_optional
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
make_optional(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(const char_t) const
|
|
{
|
|
node* left = m_stack.top();
|
|
m_stack.pop();
|
|
|
|
node* new_or = new or_node(left, new epsilon_node());
|
|
m_stack.push(new_or);
|
|
}
|
|
|
|
std::stack<node*>& m_stack;
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// utility function
|
|
template <typename CharT>
|
|
inline utility::impl::range<CharT> const&
|
|
full_range()
|
|
{
|
|
static utility::impl::range<CharT> full((std::numeric_limits<CharT>::min)(),
|
|
(std::numeric_limits<CharT>::max)());
|
|
return full;
|
|
}
|
|
|
|
namespace ccl_utils
|
|
{
|
|
template <typename char_t>
|
|
inline utility::impl::range_run<char_t>
|
|
negate_range_run(
|
|
const utility::impl::range_run<char_t>& rr)
|
|
{
|
|
utility::impl::range_run<char_t> newrr;
|
|
newrr.set(full_range<char_t>());
|
|
for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
|
|
iter != rr.end(); ++iter)
|
|
newrr.clear(*iter);
|
|
return newrr;
|
|
}
|
|
|
|
template <typename char_t>
|
|
inline node*
|
|
create_mb_node_seq(char_t c)
|
|
{
|
|
node* newnode = new char_node(get_byte(c, 0));
|
|
for (unsigned int i = 1; i < sizeof(c); ++i)
|
|
{
|
|
node* cnode = new char_node(get_byte(c, i));
|
|
node* top_node = new cat_node(newnode, cnode);
|
|
newnode = top_node;
|
|
}
|
|
return newnode;
|
|
}
|
|
|
|
template <typename char_t>
|
|
inline void
|
|
handle_mb_char(char_t c, bool first_time,
|
|
std::stack<node*>& stack)
|
|
{
|
|
node* newnode = create_mb_node_seq(c);
|
|
|
|
if (first_time)
|
|
{
|
|
stack.push(newnode);
|
|
}
|
|
else
|
|
{
|
|
node* top = stack.top();
|
|
stack.pop();
|
|
|
|
node* newtop = new or_node(top, newnode);
|
|
stack.push(newtop);
|
|
}
|
|
}
|
|
|
|
// forward decl only
|
|
template <typename char_t>
|
|
inline void
|
|
handle_mb_range(char_t c1, char_t c2, bool first_time,
|
|
std::stack<node*>& stack);
|
|
|
|
template <typename char_t>
|
|
inline void
|
|
create_nodes(const utility::impl::range_run<char_t>& rr,
|
|
std::stack<node*>& stack)
|
|
{
|
|
|
|
if (sizeof(char_t) == 1)
|
|
{
|
|
std::vector<uchar> ccl;
|
|
ccl.resize(256);
|
|
for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
|
|
iter != rr.end(); ++iter)
|
|
{
|
|
for (int i = iter->first; i <= iter->last; ++i)
|
|
{
|
|
// this is always true because of the limited datatype
|
|
// BOOST_ASSERT(uchar(i) < 256 && ccl.size() == 256);
|
|
ccl[uchar(i)] = 1;
|
|
}
|
|
}
|
|
|
|
node* new_ccl = new ccl_node(ccl);
|
|
stack.push(new_ccl);
|
|
}
|
|
else
|
|
{
|
|
bool mb_first_time = true;
|
|
for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
|
|
iter != rr.end(); ++iter)
|
|
{
|
|
if (iter->first == iter->last)
|
|
{
|
|
handle_mb_char(iter->first, mb_first_time, stack);
|
|
}
|
|
else
|
|
{
|
|
handle_mb_range(iter->first, iter->last, mb_first_time, stack);
|
|
}
|
|
mb_first_time = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename char_t>
|
|
inline std::size_t
|
|
compute_differing_byte(char_t c1, char_t c2)
|
|
{
|
|
std::size_t rval = 0;
|
|
while (rval < sizeof(c1) &&
|
|
get_byte(c1, (unsigned int)rval) == get_byte(c2, (unsigned int)rval))
|
|
{
|
|
++rval;
|
|
}
|
|
return rval;
|
|
}
|
|
|
|
template <typename char_t>
|
|
inline node*
|
|
create_mb_node_type1(std::size_t j, char_t c1, char_t c2)
|
|
{
|
|
std::size_t diff = get_byte(c2, (unsigned int)j) -
|
|
get_byte(c1, (unsigned int)j);
|
|
if (diff == 1) {
|
|
return 0;
|
|
}
|
|
else if (diff == 2) {
|
|
return new char_node(get_byte(c1, (unsigned int)j)+1);
|
|
}
|
|
else {
|
|
return new ccl_node(get_byte(c1, (unsigned int)j)+1,
|
|
get_byte(c2, (unsigned int)j)-1);
|
|
}
|
|
}
|
|
|
|
template <typename char_t>
|
|
inline node *
|
|
create_mb_node_for_byte(std::size_t i, std::size_t j, std::size_t sizem1,
|
|
std::size_t differing_byte, char_t c1, char_t c2, node* newnode)
|
|
{
|
|
node* cnode;
|
|
if (i == sizem1 && j == differing_byte && j != sizem1)
|
|
{
|
|
node* tmp = create_mb_node_type1(j, c1, c2);
|
|
if (tmp == 0)
|
|
{
|
|
delete newnode;
|
|
return 0;
|
|
}
|
|
else
|
|
cnode = tmp;
|
|
}
|
|
else if (i == differing_byte && j == sizem1)
|
|
{
|
|
if (i != sizem1) {
|
|
cnode = new ccl_node(get_byte(c1, (unsigned int)j), 0xFF);
|
|
}
|
|
else {
|
|
cnode = new ccl_node(get_byte(c1, (unsigned int)j),
|
|
get_byte(c2, (unsigned int)j));
|
|
}
|
|
}
|
|
else if (i != differing_byte && i != sizem1 &&
|
|
j == (sizem1 - i + differing_byte))
|
|
{
|
|
cnode = new ccl_node(get_byte(c1, (unsigned int)j)+1, 0xFF);
|
|
}
|
|
else if (i + j - differing_byte > sizem1) {
|
|
cnode = new ccl_node(0, 0xFF);
|
|
}
|
|
else {//if (is plain)
|
|
cnode = new char_node(get_byte(c1, (unsigned int)j));
|
|
}
|
|
|
|
node* top_node = new cat_node(newnode, cnode);
|
|
return top_node;
|
|
}
|
|
|
|
// On platforms, where wchar_t is a typedef for unsigned short, the
|
|
// comparision for a negative value is pointless
|
|
template <bool is_signed>
|
|
struct correct_char_aux {
|
|
};
|
|
|
|
template <>
|
|
struct correct_char_aux<true> {
|
|
|
|
template <typename char_t>
|
|
static char_t correct(char_t c) { if (c < 0) c = 0; return c; }
|
|
};
|
|
|
|
template <>
|
|
struct correct_char_aux<false> {
|
|
|
|
template <typename char_t>
|
|
static char_t correct(char_t c) { return c; }
|
|
};
|
|
|
|
template <typename char_t>
|
|
struct correct_char
|
|
{
|
|
static char_t correct(char_t c)
|
|
{
|
|
return correct_char_aux<std::numeric_limits<char_t>::is_signed >::
|
|
correct(c);
|
|
}
|
|
};
|
|
|
|
template <typename char_t>
|
|
inline void
|
|
handle_mb_range(char_t c1, char_t c2, bool first_time,
|
|
std::stack<node*>& stack)
|
|
{
|
|
// The algorithm can't handle negative value chars, which don't make
|
|
// much sense anyway. This comparision is pointless for wchar_t's on
|
|
// platforms, where wchar_t is a typedef for unsigned short
|
|
|
|
c1 = correct_char<char_t>::correct(c1);
|
|
//if (c1 < 0)
|
|
// c1 = 0;
|
|
|
|
BOOST_ASSERT(c1 < c2);
|
|
node* newnode = 0;
|
|
node* savednode = 0;
|
|
const std::size_t differing_byte = compute_differing_byte(c1, c2);
|
|
const std::size_t sizem1 = sizeof(c1) - 1;
|
|
const std::size_t ndb = sizem1 - differing_byte;
|
|
for (std::size_t i = differing_byte; i < sizeof(c1); ++i)
|
|
{
|
|
// generate node for the first byte
|
|
if (differing_byte == 0 && i == ndb)
|
|
{
|
|
node* tmp = create_mb_node_type1(0, c1, c2);
|
|
if (tmp == 0)
|
|
continue;
|
|
else
|
|
newnode = tmp;
|
|
}
|
|
else
|
|
{
|
|
newnode = new char_node(get_byte(c1, 0));
|
|
}
|
|
for (std::size_t j = 1; j < sizeof(c1); ++j)
|
|
{
|
|
newnode = create_mb_node_for_byte(i, j, sizem1, differing_byte,
|
|
c1, c2, newnode);
|
|
if (newnode == 0)
|
|
goto end_outer_for;
|
|
}
|
|
|
|
// or together the various parts
|
|
if (savednode)
|
|
{
|
|
node* top_node = new or_node(savednode, newnode);
|
|
savednode = top_node;
|
|
}
|
|
else
|
|
{
|
|
savednode = newnode;
|
|
}
|
|
end_outer_for:
|
|
continue;
|
|
}
|
|
|
|
for (std::size_t k = 0; k < ndb; ++k)
|
|
{
|
|
newnode = new char_node(get_byte(c2, 0));
|
|
for (std::size_t j = 1; j < sizeof(c2); ++j)
|
|
{
|
|
node* cnode;
|
|
if (k == differing_byte && j == sizem1 && k != sizem1)
|
|
cnode = new ccl_node(0, get_byte(c2, (unsigned int)j));
|
|
|
|
else if (k != differing_byte && k != sizem1 &&
|
|
j == (sizem1 - k + differing_byte))
|
|
cnode = new ccl_node(0, get_byte(c2, (unsigned int)j)-1);
|
|
|
|
else if (k + j - differing_byte > sizem1)
|
|
cnode = new ccl_node(0, 0xFF);
|
|
|
|
else //if (is plain)
|
|
cnode = new char_node(get_byte(c2, (unsigned int)j));
|
|
|
|
|
|
node* top_node = new cat_node(newnode, cnode);
|
|
newnode = top_node;
|
|
}
|
|
|
|
// or together the various parts
|
|
if (savednode)
|
|
{
|
|
node* top_node = new or_node(savednode, newnode);
|
|
savednode = top_node;
|
|
}
|
|
else
|
|
{
|
|
savednode = newnode;
|
|
}
|
|
}
|
|
|
|
|
|
if (first_time)
|
|
{
|
|
stack.push(savednode);
|
|
}
|
|
else
|
|
{
|
|
node* top = stack.top();
|
|
stack.pop();
|
|
|
|
node* newtop = new or_node(top, savednode);
|
|
stack.push(newtop);
|
|
}
|
|
}
|
|
} // namespace ccl_utils
|
|
|
|
template <typename ScannerT>
|
|
class make_char
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
make_char(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(iterator_type const& first, iterator_type const& last) const
|
|
{
|
|
const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
|
|
escape_char_parser<lex_escapes, char_t>();
|
|
char_t the_char;
|
|
iterator_type first_ = first;
|
|
ScannerT scan(first_, last);
|
|
lex_escape_ch[assign(the_char)].parse(scan);
|
|
node* newnode = ccl_utils::create_mb_node_seq(the_char);
|
|
m_stack.push(newnode);
|
|
}
|
|
|
|
std::stack<node*>& m_stack;
|
|
};
|
|
|
|
|
|
template <typename ScannerT>
|
|
class make_ccl
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
make_ccl(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
static bool is_equal_to_string(iterator_type first,
|
|
iterator_type const & last, const char* str)
|
|
{
|
|
while (first != last &&*str &&*first ==*str)
|
|
{
|
|
++first;
|
|
++str;
|
|
}
|
|
return*str == 0;
|
|
}
|
|
|
|
template <typename ParserT>
|
|
static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)
|
|
{
|
|
for (int i = 0; i < 256; ++i)
|
|
{
|
|
if (parser.test(static_cast<char_t>(uchar(i))))
|
|
rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));
|
|
}
|
|
}
|
|
|
|
void operator()(iterator_type const& first_, iterator_type const& last) const
|
|
{
|
|
BOOST_ASSERT(*first_ == '[');
|
|
|
|
iterator_type first = first_;
|
|
++first; // skip over '['
|
|
bool negated_ccl = false;
|
|
if (*first == '^')
|
|
{
|
|
negated_ccl = true;
|
|
++first;
|
|
}
|
|
|
|
utility::impl::range_run<char_t> rr;
|
|
while (first != last &&*first != ']')
|
|
{
|
|
if (*first == '[') // it's a ccl_expr like [:space:]
|
|
{
|
|
// check for [:space:], etc.
|
|
if (is_equal_to_string(first, last, "[:alnum:]"))
|
|
{
|
|
fill_ccl(rr, alnum_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:alpha:]"))
|
|
{
|
|
fill_ccl(rr, alpha_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:blank:]"))
|
|
{
|
|
fill_ccl(rr, blank_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:cntrl:]"))
|
|
{
|
|
fill_ccl(rr, cntrl_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:digit:]"))
|
|
{
|
|
fill_ccl(rr, digit_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:graph:]"))
|
|
{
|
|
fill_ccl(rr, graph_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:lower:]"))
|
|
{
|
|
fill_ccl(rr, lower_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:print:]"))
|
|
{
|
|
fill_ccl(rr, print_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:punct:]"))
|
|
{
|
|
fill_ccl(rr, punct_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:space:]"))
|
|
{
|
|
fill_ccl(rr, space_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:upper:]"))
|
|
{
|
|
fill_ccl(rr, upper_p);
|
|
}
|
|
else if (is_equal_to_string(first, last, "[:xdigit:]"))
|
|
{
|
|
fill_ccl(rr, xdigit_p);
|
|
}
|
|
// this can't happen, because it's parsed before we get here.
|
|
//else
|
|
// throw bad_regex();
|
|
|
|
// Advance past the character class expression
|
|
while (first != last &&*first != ']')
|
|
++first;
|
|
BOOST_ASSERT(*first == ']');
|
|
++first;
|
|
}
|
|
else {
|
|
const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
|
|
escape_char_parser<lex_escapes, char_t>();
|
|
|
|
char_t c1;
|
|
ScannerT scan(first, last);
|
|
lex_escape_ch[assign(c1)].parse(scan);
|
|
if (*scan.first == '-') // insert a range
|
|
{
|
|
++scan.first;
|
|
char_t c2;
|
|
lex_escape_ch[assign(c2)].parse(scan);
|
|
BOOST_ASSERT(c1 < c2); // Throw exception?
|
|
rr.set(utility::impl::range<char_t>(c1, c2));
|
|
}
|
|
else // insert 1 char
|
|
{
|
|
rr.set(utility::impl::range<char_t>(c1, c1));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (negated_ccl)
|
|
{
|
|
rr = ccl_utils::negate_range_run(rr);
|
|
}
|
|
|
|
ccl_utils::create_nodes(rr, m_stack);
|
|
}
|
|
|
|
std::stack<node*>& m_stack;
|
|
};
|
|
|
|
template <typename ScannerT>
|
|
class make_any_char
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
std::stack<node*>& m_stack;
|
|
|
|
make_any_char(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(const char_t c) const
|
|
{
|
|
BOOST_ASSERT(c == '.');
|
|
do_any_char();
|
|
}
|
|
|
|
void do_any_char() const
|
|
{
|
|
static utility::impl::range_run<char_t> rr;
|
|
rr.set(full_range<char_t>());
|
|
char_t newline = '\n';
|
|
rr.clear(utility::impl::range<char_t>(newline, newline));
|
|
|
|
ccl_utils::create_nodes(rr, m_stack);
|
|
}
|
|
};
|
|
|
|
template <typename ScannerT>
|
|
class make_string
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
std::stack<node*>& m_stack;
|
|
|
|
make_string(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(iterator_type const& first, iterator_type const& last) const
|
|
{
|
|
BOOST_ASSERT(*first == '"');
|
|
|
|
iterator_type first_ = first;
|
|
ScannerT scan(first_, last);
|
|
++scan.first; // skip over '"'
|
|
|
|
// empty string not allowed
|
|
if (*scan.first == '"')
|
|
{
|
|
boost::throw_exception(bad_regex());
|
|
}
|
|
|
|
const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
|
|
escape_char_parser<lex_escapes, char_t>();
|
|
|
|
char_t c;
|
|
lex_escape_ch[assign(c)].parse(scan);
|
|
node* top_node = ccl_utils::create_mb_node_seq(c);
|
|
|
|
while (*scan.first != '"' && scan.first != scan.last)
|
|
{
|
|
lex_escape_ch[assign(c)].parse(scan);
|
|
node* cur_node = ccl_utils::create_mb_node_seq(c);
|
|
top_node = new cat_node(top_node, cur_node);
|
|
}
|
|
m_stack.push(top_node);
|
|
}
|
|
};
|
|
|
|
inline
|
|
node* repeat_node(node* n, int num)
|
|
{
|
|
node* list_of_nodes = n;
|
|
for (int i = 1; i < num; ++i)
|
|
{
|
|
list_of_nodes = new cat_node(list_of_nodes, n->clone());
|
|
}
|
|
return list_of_nodes;
|
|
}
|
|
|
|
inline
|
|
node* optional_node(node* n)
|
|
{
|
|
return new or_node(n, new epsilon_node());
|
|
}
|
|
|
|
template <typename ScannerT>
|
|
class make_rep1
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
std::stack<node*>& m_stack;
|
|
|
|
make_rep1(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(iterator_type const& first, iterator_type const& last) const
|
|
{
|
|
BOOST_ASSERT(*first == '{');
|
|
|
|
iterator_type first_ = first;
|
|
ScannerT scan(first_, last);
|
|
++scan.first; // skip over '{'
|
|
|
|
unsigned int count;
|
|
uint_p[assign(count)].parse(scan);
|
|
if (count == 0)
|
|
boost::throw_exception(bad_regex());
|
|
|
|
node* top_node = m_stack.top();
|
|
m_stack.pop();
|
|
top_node = repeat_node(top_node, count);
|
|
m_stack.push(top_node);
|
|
}
|
|
};
|
|
|
|
template <typename ScannerT>
|
|
class make_rep2
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
std::stack<node*>& m_stack;
|
|
|
|
make_rep2(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(iterator_type const& first, iterator_type const& last) const
|
|
{
|
|
BOOST_ASSERT(*first == '{');
|
|
|
|
iterator_type first_ = first;
|
|
ScannerT scan (first_, last);
|
|
++scan.first; // skip over '{'
|
|
|
|
unsigned int count;
|
|
uint_p[assign(count)].parse(scan);
|
|
if (count == 0)
|
|
boost::throw_exception(bad_regex());
|
|
|
|
node* top_node = m_stack.top();
|
|
m_stack.pop();
|
|
top_node = new cat_node(repeat_node(top_node, count),
|
|
new star_node(top_node->clone()));
|
|
m_stack.push(top_node);
|
|
|
|
}
|
|
};
|
|
|
|
template <typename ScannerT>
|
|
class make_rep3
|
|
{
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
|
|
public:
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
std::stack<node*>& m_stack;
|
|
|
|
make_rep3(std::stack<node*>& the_stack)
|
|
: m_stack(the_stack)
|
|
{}
|
|
|
|
void operator()(iterator_type const& first, iterator_type const& last) const
|
|
{
|
|
BOOST_ASSERT(*first == '{');
|
|
|
|
iterator_type first_ = first;
|
|
ScannerT scan(first_, last);
|
|
++scan.first; // skip over '{'
|
|
|
|
unsigned int count1, count2;
|
|
uint_p[assign(count1)].parse(scan);
|
|
if (count1 == 0)
|
|
boost::throw_exception(bad_regex());
|
|
|
|
++scan.first; // skip over ','
|
|
|
|
uint_p[assign(count2)].parse(scan);
|
|
if (count2 <= count1)
|
|
boost::throw_exception(bad_regex());
|
|
|
|
node* top_node = m_stack.top();
|
|
m_stack.pop();
|
|
node* repeats = repeat_node(top_node, count1);
|
|
top_node = new cat_node(repeats,
|
|
repeat_node(optional_node(top_node->clone()),
|
|
count2 - count1));
|
|
|
|
m_stack.push(top_node);
|
|
}
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// Lexer grammar
|
|
//
|
|
// Defines the grammar, which mandates the syntax of the understood
|
|
// lexeme definitions passed to lexer::register_regex.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
class lexer_grammar : public boost::spirit::classic::grammar<lexer_grammar>
|
|
{
|
|
public:
|
|
lexer_grammar(std::stack<node*> &node_stack_)
|
|
: node_stack(node_stack_) {}
|
|
|
|
template <typename ScannerT>
|
|
struct definition
|
|
{
|
|
typedef rule<ScannerT> rule_t;
|
|
typedef typename ScannerT::iterator_t iterator_type;
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
|
|
char_t;
|
|
|
|
rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,
|
|
escseq, ccl_char;
|
|
symbols<> ccl_expr;
|
|
|
|
definition(lexer_grammar const &self)
|
|
{
|
|
regex =
|
|
re >> !('/' >> re) >> !ch_p('$')
|
|
;
|
|
|
|
re =
|
|
series
|
|
>>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
|
|
;
|
|
|
|
series =
|
|
singleton
|
|
>>*( singleton[make_concat<ScannerT>(self.node_stack)] )
|
|
;
|
|
|
|
singleton =
|
|
ch_p('.')[make_any_char<ScannerT>(self.node_stack)]
|
|
>> singleton2
|
|
| fullccl
|
|
>> singleton2
|
|
| ('"' >> string >> '"')
|
|
[
|
|
make_string<ScannerT>(self.node_stack)
|
|
]
|
|
>> singleton2
|
|
| '(' >> re >> ')'
|
|
>> singleton2
|
|
| ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
|
|
[
|
|
make_char<ScannerT>(self.node_stack)
|
|
]
|
|
>> singleton2
|
|
;
|
|
|
|
singleton2 =
|
|
ch_p('*')[make_star<ScannerT>(self.node_stack)]
|
|
>> singleton2
|
|
| ch_p('+')[make_plus<ScannerT>(self.node_stack)]
|
|
>> singleton2
|
|
| ch_p('?')[make_optional<ScannerT>(self.node_stack)]
|
|
>> singleton2
|
|
| ('{' >> uint_p >> '}')
|
|
[
|
|
make_rep1<ScannerT>(self.node_stack)
|
|
]
|
|
>> singleton2
|
|
| ('{' >> uint_p >> ',' >> '}')
|
|
[
|
|
make_rep2<ScannerT>(self.node_stack)
|
|
]
|
|
>> singleton2
|
|
| ('{' >> uint_p >> ',' >> uint_p >> '}')
|
|
[
|
|
make_rep3<ScannerT>(self.node_stack)
|
|
]
|
|
>> singleton2
|
|
| epsilon_p
|
|
;
|
|
|
|
fullccl =
|
|
('[' >> !ch_p('^') >> ccl >> ']')
|
|
[
|
|
make_ccl<ScannerT>(self.node_stack)
|
|
]
|
|
;
|
|
|
|
ccl =
|
|
*(ccl_expr | (ccl_char >> !('-' >> ccl_char)))
|
|
;
|
|
|
|
ccl_char =
|
|
( (anychar_p - chset<>("\\\n]")) | escseq )
|
|
;
|
|
|
|
ccl_expr =
|
|
"[:alnum:]",
|
|
"[:alpha:]",
|
|
"[:blank:]",
|
|
"[:cntrl:]",
|
|
"[:digit:]",
|
|
"[:graph:]",
|
|
"[:lower:]",
|
|
"[:print:]",
|
|
"[:punct:]",
|
|
"[:space:]",
|
|
"[:upper:]",
|
|
"[:xdigit:]"
|
|
;
|
|
|
|
string =
|
|
+( (anychar_p - chset<>("\"\\")) | escseq )
|
|
;
|
|
|
|
typedef
|
|
uint_parser<char_t, 8, 1,
|
|
std::numeric_limits<char_t>::digits / 3 + 1
|
|
> oct_parser_t;
|
|
typedef
|
|
uint_parser<char_t, 16, 1,
|
|
std::numeric_limits<char_t>::digits / 4 + 1
|
|
> hex_parser_t;
|
|
|
|
escseq =
|
|
ch_p('\\')
|
|
>> (
|
|
oct_parser_t()
|
|
| as_lower_d['x'] >> hex_parser_t()
|
|
| (anychar_p - chset<>('\n'))
|
|
)
|
|
;
|
|
|
|
#define BOOST_SLEX_DEBUG (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(regex, BOOST_SLEX_DEBUG);
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(re, BOOST_SLEX_DEBUG);
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(series, BOOST_SLEX_DEBUG);
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton, BOOST_SLEX_DEBUG);
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton2, BOOST_SLEX_DEBUG);
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(fullccl, BOOST_SLEX_DEBUG);
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl, BOOST_SLEX_DEBUG);
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(string, BOOST_SLEX_DEBUG);
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(escseq, BOOST_SLEX_DEBUG);
|
|
BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl_char, BOOST_SLEX_DEBUG);
|
|
|
|
#undef BOOST_SLEX_DEBUG
|
|
}
|
|
|
|
rule<ScannerT> const&
|
|
start() const { return regex; }
|
|
};
|
|
|
|
std::stack<node*> &node_stack;
|
|
|
|
}; // class lexer_grammar
|
|
|
|
template <typename StringT>
|
|
inline node *
|
|
parse(lexer_grammar& g, StringT const& str)
|
|
{
|
|
typedef
|
|
scanner<typename StringT::const_iterator, scanner_policies<> >
|
|
scanner_t;
|
|
typedef typename rule<scanner_t>::template result<scanner_t>::type
|
|
result_t;
|
|
|
|
typename StringT::const_iterator first = str.begin();
|
|
typename StringT::const_iterator last = str.end();
|
|
|
|
scanner_t scan(first, last);
|
|
// typename rule<scanner_t>::result_t hit = g.parse(scan);
|
|
result_t hit = g.parse(scan);
|
|
if (!hit || !scan.at_end())
|
|
{
|
|
while (g.node_stack.size())
|
|
{
|
|
delete g.node_stack.top();
|
|
g.node_stack.pop();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
BOOST_ASSERT(g.node_stack.size() == 1);
|
|
node* rval = g.node_stack.top();
|
|
g.node_stack.pop();
|
|
node* an_eof_node = new eof_node();
|
|
rval = new cat_node(rval, an_eof_node);
|
|
return rval;
|
|
}
|
|
|
|
inline
|
|
void make_case_insensitive(state_match_t& state_match)
|
|
{
|
|
// TODO: Fix this.
|
|
// This doesn't take into account foreign languages, figure out how to
|
|
// do that. Also this approach is broken for this implementation of
|
|
// wide chars.
|
|
for (state_match_t::iterator iter = state_match.begin();
|
|
iter != state_match.end(); ++iter)
|
|
{
|
|
int i, j;
|
|
for (i = 'A', j = 'a'; i <= 'Z'; ++i, ++j)
|
|
{
|
|
// if either is set, turn them both on
|
|
(*iter)[i] = (*iter)[j] = uchar((*iter)[i] | (*iter)[j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
template<bool wide_char>
|
|
struct regex_match_helper;
|
|
|
|
template<>
|
|
struct regex_match_helper<false> // single byte char
|
|
{
|
|
template <typename DfaT, typename IteratorT>
|
|
static bool
|
|
do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
|
|
int& regex_index,
|
|
std::basic_string<
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
|
|
> *token)
|
|
{
|
|
typedef std::basic_string<
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
|
|
> string_type;
|
|
typedef typename string_type::size_type size_type;
|
|
|
|
node_id_t s = 0;
|
|
node_id_t last_accepting_index = invalid_node;
|
|
IteratorT p = first;
|
|
IteratorT last_accepting_cpos = first;
|
|
while (p != last)
|
|
{
|
|
s = dfa.transition_table[s][(uchar)*p];
|
|
if (s == invalid_node)
|
|
break;
|
|
if (token) token->append((size_type)1, *p);
|
|
++p;
|
|
if (dfa.acceptance_index[s] != invalid_node)
|
|
{
|
|
last_accepting_index = s;
|
|
last_accepting_cpos = p;
|
|
}
|
|
}
|
|
if (last_accepting_index != invalid_node)
|
|
{
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
|
|
dfa.acceptance_index[last_accepting_index] << '\n';
|
|
#endif
|
|
|
|
first = last_accepting_cpos;
|
|
regex_index = dfa.acceptance_index[last_accepting_index];
|
|
return true;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
};
|
|
|
|
template<>
|
|
struct regex_match_helper<true> // wide char
|
|
{
|
|
template <typename DfaT, typename IteratorT>
|
|
static bool
|
|
do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
|
|
int& regex_index,
|
|
std::basic_string<
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
|
|
> *token)
|
|
{
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
|
|
char_t;
|
|
typedef std::basic_string<char_t> string_type;
|
|
typedef typename string_type::size_type size_type;
|
|
|
|
node_id_t s = 0;
|
|
node_id_t last_accepting_index = invalid_node;
|
|
IteratorT wp = first;
|
|
IteratorT last_accepting_cpos = first;
|
|
|
|
while (wp != last)
|
|
{
|
|
for (unsigned int i = 0; i < sizeof(char_t); ++i)
|
|
{
|
|
s = dfa.transition_table[s][get_byte(*wp,i)];
|
|
if (s == invalid_node)
|
|
{
|
|
goto break_while;
|
|
}
|
|
}
|
|
if (token) token->append((size_type)1, *wp);
|
|
++wp;
|
|
if (dfa.acceptance_index[s] != invalid_node)
|
|
{
|
|
last_accepting_index = s;
|
|
last_accepting_cpos = wp;
|
|
}
|
|
|
|
}
|
|
|
|
break_while:
|
|
if (last_accepting_index != invalid_node)
|
|
{
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
|
|
dfa.acceptance_index[last_accepting_index] << '\n';
|
|
#endif
|
|
first = last_accepting_cpos;
|
|
regex_index = dfa.acceptance_index[last_accepting_index];
|
|
|
|
return true;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
};
|
|
|
|
template <typename DfaT, typename IteratorT, bool wide_char>
|
|
struct regex_match
|
|
{
|
|
static bool
|
|
do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
|
|
int& regex_index,
|
|
std::basic_string<
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
|
|
> *token)
|
|
{
|
|
return regex_match_helper<wide_char>::do_match(
|
|
dfa, first, last, regex_index, token);
|
|
}
|
|
};
|
|
|
|
} // namespace lexerimpl
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
template <typename IteratorT = char const*, typename TokenT = int,
|
|
typename CallbackT = void(*)(IteratorT const &,
|
|
IteratorT &,
|
|
IteratorT const&,
|
|
TokenT const&,
|
|
lexer_control<TokenT> &)>
|
|
class lexer
|
|
{
|
|
public:
|
|
typedef CallbackT callback_t;
|
|
typedef
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
|
|
char_t;
|
|
|
|
struct regex_info
|
|
{
|
|
std::basic_string<char_t> str;
|
|
TokenT token;
|
|
CallbackT callback;
|
|
|
|
regex_info(const std::basic_string<char_t>& _str,
|
|
const TokenT& _token,
|
|
const CallbackT& _callback)
|
|
: str(_str)
|
|
, token(_token)
|
|
, callback(_callback)
|
|
{}
|
|
|
|
};
|
|
|
|
struct dfa_table
|
|
{
|
|
std::vector<std::vector<node_id_t> > transition_table;
|
|
std::vector<node_id_t> acceptance_index;
|
|
};
|
|
typedef std::vector<node_id_t> node_table_t;
|
|
typedef std::vector<node_table_t> transition_table_t;
|
|
typedef std::vector<dfa_table> dfa_t;
|
|
|
|
|
|
lexer(unsigned int states = 1);
|
|
|
|
void register_regex(const std::basic_string<char_t>& regex,
|
|
const TokenT& id, const CallbackT& cb = CallbackT(),
|
|
unsigned int state = 0);
|
|
|
|
TokenT next_token(IteratorT &first, IteratorT const &last,
|
|
std::basic_string<char_t> *token = 0);
|
|
|
|
void create_dfa();
|
|
bool has_compiled_dfa() { return m_compiled_dfa; }
|
|
|
|
void set_case_insensitive(bool insensitive);
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
void dump(std::ostream& out);
|
|
#endif
|
|
typedef std::vector<std::vector<regex_info> > regex_list_t;
|
|
|
|
bool load (std::ifstream &in, long unique_id = 0);
|
|
bool save (std::ofstream &out, long unique_id = 0);
|
|
enum {
|
|
SLEX_SIGNATURE = 0x58454C53, // "SLEX"
|
|
SLEX_VERSION_100 = 0x0100, // persistance version
|
|
SLEX_LAST_KNOWN_VERSION = SLEX_VERSION_100,
|
|
SLEX_MINOR_VERSION_MASK = 0xFF
|
|
};
|
|
|
|
private:
|
|
|
|
void create_dfa_for_state(int state);
|
|
|
|
static bool regex_match(const dfa_t& dfa, IteratorT& first,
|
|
IteratorT& last, int& regex_index);
|
|
|
|
mutable std::stack<lexerimpl::node*> node_stack;
|
|
lexerimpl::lexer_grammar g;
|
|
|
|
mutable bool m_compiled_dfa;
|
|
mutable dfa_t m_dfa;
|
|
|
|
regex_list_t m_regex_list;
|
|
bool m_case_insensitive;
|
|
|
|
unsigned int m_state;
|
|
std::stack<unsigned int> m_state_stack;
|
|
unsigned int m_num_states;
|
|
};
|
|
|
|
|
|
template <typename IteratorT, typename TokenT, typename CallbackT>
|
|
inline
|
|
lexer<IteratorT, TokenT, CallbackT>::lexer(unsigned int states)
|
|
: g(node_stack)
|
|
, m_compiled_dfa(false)
|
|
, m_regex_list(states)
|
|
, m_case_insensitive(false)
|
|
, m_state(0)
|
|
, m_state_stack()
|
|
, m_num_states(states)
|
|
{
|
|
BOOST_SPIRIT_DEBUG_TRACE_NODE_NAME(g, "slex::lexer",
|
|
BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX);
|
|
}
|
|
|
|
template <typename IteratorT, typename TokenT, typename CallbackT>
|
|
inline void
|
|
lexer<IteratorT, TokenT, CallbackT>::register_regex(
|
|
const std::basic_string<char_t>& regex, const TokenT& id,
|
|
const CallbackT& callback, unsigned int state)
|
|
{
|
|
if (state > m_num_states) {
|
|
m_regex_list.resize(state);
|
|
m_num_states = state;
|
|
}
|
|
m_regex_list[state].push_back(regex_info(regex, id, callback));
|
|
}
|
|
|
|
template <typename IteratorT, typename TokenT, typename CallbackT>
|
|
inline TokenT
|
|
lexer<IteratorT, TokenT, CallbackT>::next_token(
|
|
IteratorT &first, IteratorT const& last,
|
|
std::basic_string<
|
|
typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
|
|
> *token)
|
|
{
|
|
if (!m_compiled_dfa)
|
|
{
|
|
create_dfa();
|
|
}
|
|
|
|
IteratorT saved = first;
|
|
int regex_index;
|
|
if (!lexerimpl::regex_match<dfa_table, IteratorT, (sizeof(char_t) > 1)>::
|
|
do_match(m_dfa[m_state], first, last, regex_index, token))
|
|
return -1; // TODO: can't return -1, need to return some invalid token.
|
|
// how to figure this out? We can use traits I guess.
|
|
else
|
|
{
|
|
regex_info regex = m_regex_list[m_state][regex_index];
|
|
TokenT rval = regex.token;
|
|
if (regex.callback)
|
|
{
|
|
// execute corresponding callback
|
|
lexer_control<TokenT> controller(rval, m_state, m_state_stack);
|
|
regex.callback(saved, first, last, regex.token, controller);
|
|
if (controller.ignore_current_token_set()) {
|
|
if (token)
|
|
token->erase();
|
|
return next_token(first, last, token);
|
|
}
|
|
}
|
|
return rval;
|
|
}
|
|
}
|
|
|
|
namespace lexerimpl
|
|
{
|
|
|
|
inline
|
|
bool find_acceptance_state(const node_set& eof_node_ids,
|
|
const node_set& current_set,
|
|
node_id_t& acceptance_node_id)
|
|
{
|
|
for(node_set::const_iterator nsi = eof_node_ids.begin();
|
|
nsi != eof_node_ids.end(); ++nsi)
|
|
{
|
|
node_id_t eof_node_id =*nsi;
|
|
if (current_set.end() != current_set.find(eof_node_id))
|
|
{
|
|
// store the first one we come to as the
|
|
// matched pattern
|
|
acceptance_node_id = eof_node_id;
|
|
// don't bother searching for more
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
template <typename RegexListT, typename GrammarT>
|
|
#ifndef BOOST_NO_CXX11_SMART_PTR
|
|
inline std::unique_ptr<node>
|
|
#else
|
|
inline std::auto_ptr<node>
|
|
#endif
|
|
parse_regexes(const RegexListT& regex_list, GrammarT& g)
|
|
{
|
|
// parse the expressions into a tree
|
|
if (regex_list.begin() == regex_list.end())
|
|
boost::throw_exception(bad_regex());
|
|
|
|
typename RegexListT::const_iterator ri = regex_list.begin();
|
|
#ifndef BOOST_NO_CXX11_SMART_PTR
|
|
std::unique_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
|
|
#else
|
|
std::auto_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
|
|
#endif
|
|
if (tree.get() == 0)
|
|
boost::throw_exception(bad_regex());
|
|
|
|
++ri;
|
|
for (/**/; ri != regex_list.end(); ++ri)
|
|
{
|
|
#ifndef BOOST_NO_CXX11_SMART_PTR
|
|
std::unique_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
|
|
#else
|
|
std::auto_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
|
|
#endif
|
|
if (next_tree.get() == 0)
|
|
boost::throw_exception(bad_regex());
|
|
#ifndef BOOST_NO_CXX11_SMART_PTR
|
|
tree = std::unique_ptr<node>(new or_node(tree.release(), next_tree.release()));
|
|
#else
|
|
tree = std::auto_ptr<node>(new or_node(tree.release(), next_tree.release()));
|
|
#endif
|
|
}
|
|
return tree;
|
|
}
|
|
|
|
} //namespace lexerimpl
|
|
|
|
template <typename IteratorT, typename TokenT, typename CallbackT>
|
|
inline void
|
|
lexer<IteratorT, TokenT, CallbackT>::create_dfa()
|
|
{
|
|
m_dfa.resize(m_num_states);
|
|
for (unsigned int i = 0; i < m_num_states; ++i)
|
|
create_dfa_for_state(i);
|
|
}
|
|
|
|
// Algorithm from Compilers: Principles, Techniques, and Tools p. 141
|
|
template <typename IteratorT, typename TokenT, typename CallbackT>
|
|
inline void
|
|
lexer<IteratorT, TokenT, CallbackT>::create_dfa_for_state(int state)
|
|
{
|
|
using lexerimpl::node;
|
|
#ifndef BOOST_NO_CXX11_SMART_PTR
|
|
std::unique_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
|
|
#else
|
|
std::auto_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
|
|
#endif
|
|
node_id_t dummy = 0;
|
|
tree->assign_node_ids(dummy);
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
tree->dump(std::cout);
|
|
#endif
|
|
|
|
// compute followpos(root)
|
|
followpos_t followpos;
|
|
tree->compute_followpos(followpos);
|
|
|
|
// the dfa states <-> nfa state groups
|
|
std::map<node_set, node_id_t> dstates1;
|
|
std::map<node_id_t, node_set> dstates2;
|
|
|
|
// the dfa transitions
|
|
m_dfa[state].transition_table.push_back(
|
|
std::vector<node_id_t>(256, invalid_node));
|
|
m_dfa[state].acceptance_index.push_back(invalid_node);
|
|
|
|
// whether the dfa state has been processed yet
|
|
std::vector<node_id_t> marked;
|
|
|
|
// used to give a unique id to each dfa state
|
|
node_id_t num_states = 0;
|
|
|
|
// initially, the only unmarked state in Dstates is firstpos(root).
|
|
marked.push_back(0);
|
|
node_set fpr = tree->firstpos();
|
|
dstates1[fpr] = 0;
|
|
dstates2[0] = fpr;
|
|
state_match_t state_match;
|
|
tree->compute_state_match(state_match);
|
|
|
|
if (m_case_insensitive)
|
|
lexerimpl::make_case_insensitive(state_match);
|
|
|
|
node_set eof_node_ids;
|
|
tree->get_eof_ids(eof_node_ids);
|
|
// translate the eof_node_ids into a 0-based index
|
|
std::map<node_id_t, node_id_t> eof_node_id_map;
|
|
unsigned int x = 0;
|
|
for (node_set::iterator node_id_it = eof_node_ids.begin();
|
|
node_id_it != eof_node_ids.end();
|
|
++node_id_it)
|
|
{
|
|
eof_node_id_map[*node_id_it] = x++;
|
|
}
|
|
|
|
// figure out if this is an acceptance state
|
|
node_id_t eof_node_id;
|
|
if (lexerimpl::find_acceptance_state(eof_node_ids, fpr, eof_node_id))
|
|
{
|
|
m_dfa[state].acceptance_index[0] = eof_node_id_map[eof_node_id];
|
|
}
|
|
|
|
std::vector<node_id_t>::iterator i = std::find(marked.begin(), marked.end(),
|
|
node_id_t(0));
|
|
while (marked.end() != i)
|
|
{
|
|
*i = 1;
|
|
node_id_t T = node_id_t(std::distance(marked.begin(), i));
|
|
BOOST_ASSERT(T < dstates2.size());
|
|
node_set Tstates = dstates2[T];
|
|
for (node_id_t j = 0; j < 256; ++j)
|
|
{
|
|
node_set U;
|
|
for (node_set::iterator k = Tstates.begin();
|
|
k != Tstates.end(); ++k)
|
|
{
|
|
node_id_t p =*k;
|
|
BOOST_ASSERT(p < state_match.size());
|
|
BOOST_ASSERT(j < state_match[p].size());
|
|
if (state_match[p][j])
|
|
{
|
|
node_set fpp = followpos[p];
|
|
U.insert(fpp.begin(), fpp.end());
|
|
}
|
|
}
|
|
if (U.size() > 0)
|
|
{
|
|
std::map<node_set, node_id_t>::iterator l = dstates1.find(U);
|
|
node_id_t target_state;
|
|
if (l == dstates1.end()) // not in the states yet
|
|
{
|
|
++num_states;
|
|
dstates1[U] = target_state = num_states;
|
|
dstates2[target_state] = U;
|
|
marked.push_back(0);
|
|
m_dfa[state].transition_table.push_back(
|
|
std::vector<node_id_t>(256, invalid_node));
|
|
m_dfa[state].acceptance_index.push_back(invalid_node);
|
|
// figure out if this is an acceptance state
|
|
node_id_t eof_node_id;
|
|
if (lexerimpl::find_acceptance_state(eof_node_ids, U, eof_node_id))
|
|
{
|
|
m_dfa[state].acceptance_index[target_state] =
|
|
eof_node_id_map[eof_node_id];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
target_state = dstates1[U];
|
|
}
|
|
|
|
BOOST_ASSERT(T < m_dfa[state].transition_table.size());
|
|
BOOST_ASSERT(j < m_dfa[state].transition_table[T].size());
|
|
m_dfa[state].transition_table[T][j] = target_state;
|
|
}
|
|
|
|
}
|
|
|
|
i = std::find(marked.begin(), marked.end(), node_id_t(0));
|
|
}
|
|
m_compiled_dfa = true;
|
|
}
|
|
|
|
template <typename IteratorT, typename TokenT, typename CallbackT>
|
|
inline void
|
|
lexer<IteratorT, TokenT, CallbackT>::set_case_insensitive(bool insensitive)
|
|
{
|
|
m_case_insensitive = insensitive;
|
|
}
|
|
|
|
|
|
#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
|
|
template <typename IteratorT, typename TokenT, typename CallbackT>
|
|
inline void
|
|
lexer<IteratorT, TokenT, CallbackT>::dump(std::ostream& out)
|
|
{
|
|
for (unsigned x = 0; x < m_dfa.size(); ++x)
|
|
{
|
|
out << "\nm_dfa[" << x << "] has " << m_dfa[x].transition_table.size() << " states\n";
|
|
for (node_id_t i = 0; i < m_dfa[x].transition_table.size(); ++i)
|
|
{
|
|
out << "state " << i << ":";
|
|
for (node_id_t j = 0; j < m_dfa[x].transition_table[i].size(); ++j)
|
|
{
|
|
if (m_dfa[x].transition_table[i][j] != invalid_node)
|
|
out << j << "->" << m_dfa[x].transition_table[i][j] << " ";
|
|
}
|
|
out << "\n";
|
|
}
|
|
out << "acceptance states: ";
|
|
for(unsigned int k = 0; k < m_dfa[x].acceptance_index.size(); ++k)
|
|
{
|
|
if (m_dfa[x].acceptance_index[k] != invalid_node)
|
|
out << '<' << k << ',' << m_dfa[x].acceptance_index[k] << "> ";
|
|
}
|
|
out << endl;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// load the lexer tables
|
|
#define slex_in(strm, val) \
|
|
strm.read((char*)&val, sizeof(val)); \
|
|
if (std::ios::goodbit != strm.rdstate()) return false
|
|
|
|
template <typename IteratorT, typename TokenT, typename CallbackT>
|
|
inline bool
|
|
lexer<IteratorT, TokenT, CallbackT>::load (std::ifstream &in, long unique_id)
|
|
{
|
|
// ensure correct signature and version
|
|
long signature = 0;
|
|
|
|
slex_in (in, signature);
|
|
if (signature != SLEX_SIGNATURE)
|
|
return false; // not for us
|
|
|
|
long version = 0;
|
|
|
|
slex_in (in, version);
|
|
if ((version & ~SLEX_MINOR_VERSION_MASK) > SLEX_LAST_KNOWN_VERSION)
|
|
return false; // to new for us
|
|
|
|
long uid = 0;
|
|
|
|
slex_in (in, uid);
|
|
if (uid != unique_id)
|
|
return false; // not saved by us
|
|
|
|
// load auxiliary members
|
|
int num_states = 0;
|
|
|
|
slex_in (in, num_states);
|
|
|
|
// load the dfa tables
|
|
dfa_t in_dfa;
|
|
std::size_t dfa_size = 0;
|
|
|
|
slex_in (in, dfa_size);
|
|
in_dfa.resize(dfa_size);
|
|
for (std::size_t dfa = 0; dfa < dfa_size; ++dfa)
|
|
{
|
|
// load the transition tables
|
|
std::size_t tt_size = 0;
|
|
transition_table_t &tt_table = in_dfa[dfa].transition_table;
|
|
|
|
slex_in (in, tt_size);
|
|
tt_table.resize(tt_size);
|
|
for (std::size_t tt = 0; tt < tt_size; ++tt)
|
|
{
|
|
std::size_t nt_size = 0;
|
|
node_table_t &nt_table = tt_table[tt];
|
|
|
|
slex_in (in, nt_size);
|
|
nt_table.resize(nt_size);
|
|
for (std::size_t nt = 0; nt < nt_size; ++nt)
|
|
{
|
|
slex_in (in, nt_table[nt]);
|
|
}
|
|
}
|
|
|
|
// load the acceptance index table
|
|
std::size_t ai_size = 0;
|
|
node_table_t &ai_table = in_dfa[dfa].acceptance_index;
|
|
|
|
slex_in (in, ai_size);
|
|
ai_table.resize(ai_size);
|
|
for (std::size_t ai = 0; ai < ai_size; ++ai)
|
|
{
|
|
slex_in (in, ai_table[ai]);
|
|
}
|
|
}
|
|
|
|
m_dfa.swap(in_dfa); // success, swap in the read values
|
|
m_num_states = num_states;
|
|
|
|
m_compiled_dfa = true;
|
|
return true;
|
|
}
|
|
|
|
#undef slex_in
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// save the lexer tables
|
|
#define slex_out(strm, val) \
|
|
strm.write((char*)&val, sizeof(val)); \
|
|
if (std::ios::goodbit != strm.rdstate()) return false
|
|
|
|
template <typename IteratorT, typename TokenT, typename CallbackT>
|
|
inline bool
|
|
lexer<IteratorT, TokenT, CallbackT>::save (std::ofstream &out, long unique_id)
|
|
{
|
|
// save signature and version information
|
|
long out_long = SLEX_SIGNATURE;
|
|
|
|
slex_out(out, out_long);
|
|
out_long = SLEX_VERSION_100;
|
|
slex_out(out, out_long);
|
|
slex_out(out, unique_id);
|
|
|
|
// save auxiliary members
|
|
slex_out(out, m_num_states);
|
|
|
|
// save the dfa tables
|
|
typedef typename dfa_t::const_iterator dfa_iter_t;
|
|
typedef transition_table_t::const_iterator transition_table_iter_t;
|
|
typedef node_table_t::const_iterator node_table_iter_t;
|
|
|
|
std::size_t out_size_t = m_dfa.size();
|
|
slex_out(out, out_size_t);
|
|
|
|
dfa_iter_t end = m_dfa.end();
|
|
for (dfa_iter_t it = m_dfa.begin(); it != end; ++it)
|
|
{
|
|
// save the transition table
|
|
out_size_t = (*it).transition_table.size();
|
|
slex_out(out, out_size_t);
|
|
|
|
transition_table_iter_t tt_end = (*it).transition_table.end();
|
|
for (transition_table_iter_t tt_it = (*it).transition_table.begin();
|
|
tt_it != tt_end;
|
|
++tt_it)
|
|
{
|
|
out_size_t = (*tt_it).size();
|
|
slex_out(out, out_size_t);
|
|
|
|
node_table_iter_t nt_end = (*tt_it).end();
|
|
for (node_table_iter_t nt_it = (*tt_it).begin();
|
|
nt_it != nt_end;
|
|
++nt_it)
|
|
{
|
|
slex_out(out, (*nt_it));
|
|
}
|
|
}
|
|
|
|
// save the acceptance index table
|
|
out_size_t = (*it).acceptance_index.size();
|
|
slex_out(out, out_size_t);
|
|
|
|
node_table_iter_t nt_end = (*it).acceptance_index.end();
|
|
for (node_table_iter_t nt_it = (*it).acceptance_index.begin();
|
|
nt_it != nt_end;
|
|
++nt_it)
|
|
{
|
|
slex_out(out, (*nt_it));
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
#undef slex_out
|
|
|
|
/*
|
|
a lexer_control object supports some operations on the lexer.
|
|
get current lexer state
|
|
set state
|
|
terminate
|
|
state stack (push, pop, top)
|
|
set new token return value
|
|
ignore the current token
|
|
yymore
|
|
get length of matched token
|
|
*/
|
|
template <typename TokenT>
|
|
class lexer_control
|
|
{
|
|
public:
|
|
|
|
lexer_control(TokenT& token, unsigned int& current_state,
|
|
std::stack<unsigned int>& state_stack);
|
|
// functions dealing with the lexer state
|
|
|
|
// set the state to state
|
|
void set_state(unsigned int state);
|
|
|
|
// get the current state
|
|
unsigned int get_state();
|
|
|
|
// pushes the current state onto the top of the state stack and
|
|
// switches to new_state
|
|
void push_state(unsigned int new_state);
|
|
|
|
// pops the top of the state stack and switches to it.
|
|
void pop_state();
|
|
|
|
// returns the top of the stack without altering the stack's contents.
|
|
unsigned int top_state();
|
|
|
|
// functions dealing with the token returned.
|
|
|
|
// set a new TokenT return value, overriding that one that was
|
|
// registered via. register_regex()
|
|
void set_token(const TokenT& token);
|
|
|
|
// tell the lexer to return an invalid token, signifying termination.
|
|
void terminate();
|
|
|
|
// ignore the current token, and move on to the next one. The current
|
|
// token will NOT be returned from next_token()
|
|
void ignore_current_token();
|
|
|
|
// returns true if ignore_current_token() has been called,
|
|
// false otherwise.
|
|
bool ignore_current_token_set();
|
|
|
|
private:
|
|
TokenT& m_token;
|
|
bool m_ignore_current_token;
|
|
unsigned int& m_current_state;
|
|
std::stack<unsigned int>& m_state_stack;
|
|
};
|
|
|
|
template <typename TokenT>
|
|
inline
|
|
lexer_control<TokenT>::lexer_control(TokenT& token, unsigned int& current_state,
|
|
std::stack<unsigned int>& state_stack)
|
|
: m_token(token)
|
|
, m_ignore_current_token(false)
|
|
, m_current_state(current_state)
|
|
, m_state_stack(state_stack)
|
|
{
|
|
}
|
|
|
|
template <typename TokenT>
|
|
inline void
|
|
lexer_control<TokenT>::set_state(unsigned int state)
|
|
{
|
|
m_current_state = state;
|
|
}
|
|
|
|
template <typename TokenT>
|
|
inline unsigned int
|
|
lexer_control<TokenT>::get_state()
|
|
{
|
|
return m_current_state;
|
|
}
|
|
|
|
template <typename TokenT>
|
|
inline void
|
|
lexer_control<TokenT>::push_state(unsigned int new_state)
|
|
{
|
|
m_state_stack.push(m_current_state);
|
|
m_current_state = new_state;
|
|
}
|
|
|
|
template <typename TokenT>
|
|
inline void
|
|
lexer_control<TokenT>::pop_state()
|
|
{
|
|
m_current_state = m_state_stack.top();
|
|
m_state_stack.pop();
|
|
}
|
|
|
|
template <typename TokenT>
|
|
inline unsigned int
|
|
lexer_control<TokenT>::top_state()
|
|
{
|
|
return m_state_stack.top();
|
|
}
|
|
|
|
template <typename TokenT>
|
|
inline void
|
|
lexer_control<TokenT>::set_token(const TokenT& token)
|
|
{
|
|
m_token = token;
|
|
}
|
|
|
|
template <typename TokenT>
|
|
inline void
|
|
lexer_control<TokenT>::terminate()
|
|
{
|
|
m_token = -1; // TOOD: fix this, need to be determined by traits
|
|
}
|
|
|
|
template <typename TokenT>
|
|
inline void
|
|
lexer_control<TokenT>::ignore_current_token()
|
|
{
|
|
m_ignore_current_token = true;
|
|
}
|
|
|
|
template <typename TokenT>
|
|
inline bool
|
|
lexer_control<TokenT>::ignore_current_token_set()
|
|
{
|
|
return m_ignore_current_token;
|
|
}
|
|
|
|
} // namespace classic
|
|
} // namespace spirit
|
|
} // namespace boost
|
|
|
|
#undef BOOST_SPIRIT_IT_NS
|
|
|
|
#endif
|
|
|