810 lines
31 KiB
C++
810 lines
31 KiB
C++
/*=============================================================================
|
|
Boost.Wave: A Standard compliant C++ preprocessor library
|
|
http://www.boost.org/
|
|
|
|
Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
|
|
Software License, Version 1.0. (See accompanying file
|
|
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
=============================================================================*/
|
|
|
|
#if !defined(BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED)
|
|
#define BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED
|
|
|
|
#include <fstream>
|
|
|
|
#include <boost/iterator/iterator_traits.hpp>
|
|
|
|
#include <boost/wave/wave_config.hpp>
|
|
#include <boost/wave/language_support.hpp>
|
|
#include <boost/wave/token_ids.hpp>
|
|
#include <boost/wave/util/time_conversion_helper.hpp>
|
|
|
|
#include <boost/wave/cpplexer/validate_universal_char.hpp>
|
|
#include <boost/wave/cpplexer/convert_trigraphs.hpp>
|
|
#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
|
|
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
|
|
#include <boost/wave/cpplexer/detect_include_guards.hpp>
|
|
#endif
|
|
|
|
#include "wave_lexertl_config.hpp"
|
|
#include "../lexertl_iterator.hpp"
|
|
|
|
#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0
|
|
#include "wave_lexertl_tables.hpp"
|
|
#else
|
|
#include <boost/spirit/home/support/detail/lexer/generator.hpp>
|
|
#include <boost/spirit/home/support/detail/lexer/rules.hpp>
|
|
#include <boost/spirit/home/support/detail/lexer/state_machine.hpp>
|
|
#include <boost/spirit/home/support/detail/lexer/consts.hpp>
|
|
//#include "lexertl/examples/serialise.hpp>
|
|
// #if BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE != 0
|
|
// #include "lexertl/examples/cpp_code.hpp"
|
|
// #endif
|
|
#endif
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
namespace boost { namespace wave { namespace cpplexer { namespace lexertl
|
|
{
|
|
|
|
#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// The following numbers are the array sizes of the token regex's which we
|
|
// need to specify to make the CW compiler happy (at least up to V9.5).
|
|
#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
|
|
#define INIT_DATA_SIZE 176
|
|
#else
|
|
#define INIT_DATA_SIZE 159
|
|
#endif
|
|
#define INIT_DATA_CPP_SIZE 15
|
|
#define INIT_DATA_PP_NUMBER_SIZE 2
|
|
#define INIT_MACRO_DATA_SIZE 27
|
|
#endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
|
|
// this is just a hack to have a unique token id not otherwise used by Wave
|
|
#define T_ANYCTRL T_LAST_TOKEN_ID
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
namespace lexer
|
|
{
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// this is the wrapper for the lexertl lexer library
|
|
template <typename Iterator, typename Position>
|
|
class lexertl
|
|
{
|
|
private:
|
|
typedef BOOST_WAVE_STRINGTYPE string_type;
|
|
typedef typename boost::detail::iterator_traits<Iterator>::value_type
|
|
char_type;
|
|
|
|
public:
|
|
wave::token_id next_token(Iterator &first, Iterator const &last,
|
|
string_type& token_value);
|
|
|
|
#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES != 0
|
|
lexertl() {}
|
|
void init_dfa(wave::language_support lang, Position const& pos,
|
|
bool force_reinit = false) {}
|
|
bool is_initialized() const { return true; }
|
|
#else
|
|
lexertl() : has_compiled_dfa_(false) {}
|
|
bool init_dfa(wave::language_support lang, Position const& pos,
|
|
bool force_reinit = false);
|
|
bool is_initialized() const { return has_compiled_dfa_; }
|
|
|
|
// get time of last compilation
|
|
static std::time_t get_compilation_time()
|
|
{ return compilation_time.get_time(); }
|
|
|
|
bool load (std::istream& instrm);
|
|
bool save (std::ostream& outstrm);
|
|
|
|
private:
|
|
boost::lexer::state_machine state_machine_;
|
|
bool has_compiled_dfa_;
|
|
|
|
// initialization data (regular expressions for the token definitions)
|
|
struct lexer_macro_data {
|
|
char_type const *name; // macro name
|
|
char_type const *macro; // associated macro definition
|
|
};
|
|
static lexer_macro_data const init_macro_data[INIT_MACRO_DATA_SIZE]; // macro patterns
|
|
|
|
struct lexer_data {
|
|
token_id tokenid; // token data
|
|
char_type const *tokenregex; // associated token to match
|
|
};
|
|
static lexer_data const init_data[INIT_DATA_SIZE]; // common patterns
|
|
static lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE]; // C++ only patterns
|
|
static lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE]; // pp-number only patterns
|
|
|
|
// helper for calculation of the time of last compilation
|
|
static boost::wave::util::time_conversion_helper compilation_time;
|
|
#endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
};
|
|
|
|
#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// get time of last compilation of this file
|
|
template <typename IteratorT, typename PositionT>
|
|
boost::wave::util::time_conversion_helper
|
|
lexertl<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// token regex definitions
|
|
|
|
// helper for initializing token data and macro definitions
|
|
#define Q(c) "\\" c
|
|
#define TRI(c) "{TRI}" c
|
|
#define OR "|"
|
|
#define MACRO_DATA(name, macro) { name, macro }
|
|
#define TOKEN_DATA(id, regex) { id, regex }
|
|
|
|
// lexertl macro definitions
|
|
template <typename Iterator, typename Position>
|
|
typename lexertl<Iterator, Position>::lexer_macro_data const
|
|
lexertl<Iterator, Position>::init_macro_data[INIT_MACRO_DATA_SIZE] =
|
|
{
|
|
MACRO_DATA("ANY", "[\t\v\f\r\n\\040-\\377]"),
|
|
MACRO_DATA("ANYCTRL", "[\\000-\\037]"),
|
|
MACRO_DATA("TRI", "\\?\\?"),
|
|
MACRO_DATA("BLANK", "[ \t\v\f]"),
|
|
MACRO_DATA("CCOMMENT", "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"),
|
|
MACRO_DATA("PPSPACE", "(" "{BLANK}" OR "{CCOMMENT}" ")*"),
|
|
MACRO_DATA("OCTALDIGIT", "[0-7]"),
|
|
MACRO_DATA("DIGIT", "[0-9]"),
|
|
MACRO_DATA("HEXDIGIT", "[0-9a-fA-F]"),
|
|
MACRO_DATA("OPTSIGN", "[-+]?"),
|
|
MACRO_DATA("EXPSTART", "[eE][-+]"),
|
|
MACRO_DATA("EXPONENT", "([eE]{OPTSIGN}{DIGIT}+)"),
|
|
MACRO_DATA("NONDIGIT", "[a-zA-Z_]"),
|
|
MACRO_DATA("INTEGER", "(" "(0x|0X){HEXDIGIT}+" OR "0{OCTALDIGIT}*" OR "[1-9]{DIGIT}*" ")"),
|
|
MACRO_DATA("INTEGER_SUFFIX", "(" "[uU][lL]?" OR "[lL][uU]?" ")"),
|
|
#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
|
|
MACRO_DATA("LONGINTEGER_SUFFIX", "([uU]([lL][lL])|([lL][lL])[uU]?|i64)"),
|
|
#else
|
|
MACRO_DATA("LONGINTEGER_SUFFIX", "([uU]([lL][lL])|([lL][lL])[uU]?)"),
|
|
#endif
|
|
MACRO_DATA("FLOAT_SUFFIX", "(" "[fF][lL]?" OR "[lL][fF]?" ")"),
|
|
MACRO_DATA("CHAR_SPEC", "L?"),
|
|
MACRO_DATA("BACKSLASH", "(" Q("\\") OR TRI(Q("/")) ")"),
|
|
MACRO_DATA("ESCAPESEQ", "{BACKSLASH}([abfnrtv?'\"]|{BACKSLASH}|x{HEXDIGIT}+|{OCTALDIGIT}{1,3})"),
|
|
MACRO_DATA("HEXQUAD", "{HEXDIGIT}{4}"),
|
|
MACRO_DATA("UNIVERSALCHAR", "{BACKSLASH}(u{HEXQUAD}|U{HEXQUAD}{2})"),
|
|
MACRO_DATA("POUNDDEF", "(" "#" OR TRI("=") OR Q("%:") ")"),
|
|
MACRO_DATA("NEWLINEDEF", "(" "\\n" OR "\\r" OR "\\r\\n" ")"),
|
|
#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
|
|
MACRO_DATA("INCLUDEDEF", "(include|include_next)"),
|
|
#else
|
|
MACRO_DATA("INCLUDEDEF", "include"),
|
|
#endif
|
|
MACRO_DATA("PP_NUMBERDEF", "\\.?{DIGIT}({DIGIT}|{NONDIGIT}|{EXPSTART}|\\.)*"),
|
|
MACRO_DATA(NULL, NULL) // should be the last entry
|
|
};
|
|
|
|
// common C++/C99 token definitions
|
|
template <typename Iterator, typename Position>
|
|
typename lexertl<Iterator, Position>::lexer_data const
|
|
lexertl<Iterator, Position>::init_data[INIT_DATA_SIZE] =
|
|
{
|
|
TOKEN_DATA(T_AND, "&"),
|
|
TOKEN_DATA(T_ANDAND, "&&"),
|
|
TOKEN_DATA(T_ASSIGN, "="),
|
|
TOKEN_DATA(T_ANDASSIGN, "&="),
|
|
TOKEN_DATA(T_OR, Q("|")),
|
|
TOKEN_DATA(T_OR_TRIGRAPH, "{TRI}!"),
|
|
TOKEN_DATA(T_ORASSIGN, Q("|=")),
|
|
TOKEN_DATA(T_ORASSIGN_TRIGRAPH, "{TRI}!="),
|
|
TOKEN_DATA(T_XOR, Q("^")),
|
|
TOKEN_DATA(T_XOR_TRIGRAPH, "{TRI}'"),
|
|
TOKEN_DATA(T_XORASSIGN, Q("^=")),
|
|
TOKEN_DATA(T_XORASSIGN_TRIGRAPH, "{TRI}'="),
|
|
TOKEN_DATA(T_COMMA, ","),
|
|
TOKEN_DATA(T_COLON, ":"),
|
|
TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
|
|
TOKEN_DATA(T_DIVIDE, Q("/")),
|
|
TOKEN_DATA(T_DOT, Q(".")),
|
|
TOKEN_DATA(T_ELLIPSIS, Q(".") "{3}"),
|
|
TOKEN_DATA(T_EQUAL, "=="),
|
|
TOKEN_DATA(T_GREATER, ">"),
|
|
TOKEN_DATA(T_GREATEREQUAL, ">="),
|
|
TOKEN_DATA(T_LEFTBRACE, Q("{")),
|
|
TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
|
|
TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, "{TRI}<"),
|
|
TOKEN_DATA(T_LESS, "<"),
|
|
TOKEN_DATA(T_LESSEQUAL, "<="),
|
|
TOKEN_DATA(T_LEFTPAREN, Q("(")),
|
|
TOKEN_DATA(T_LEFTBRACKET, Q("[")),
|
|
TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
|
|
TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, "{TRI}" Q("(")),
|
|
TOKEN_DATA(T_MINUS, Q("-")),
|
|
TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
|
|
TOKEN_DATA(T_MINUSMINUS, Q("-") "{2}"),
|
|
TOKEN_DATA(T_PERCENT, Q("%")),
|
|
TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
|
|
TOKEN_DATA(T_NOT, "!"),
|
|
TOKEN_DATA(T_NOTEQUAL, "!="),
|
|
TOKEN_DATA(T_OROR, Q("|") "{2}"),
|
|
TOKEN_DATA(T_OROR_TRIGRAPH, "{TRI}!\\||\\|{TRI}!|{TRI}!{TRI}!"),
|
|
TOKEN_DATA(T_PLUS, Q("+")),
|
|
TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
|
|
TOKEN_DATA(T_PLUSPLUS, Q("+") "{2}"),
|
|
TOKEN_DATA(T_ARROW, Q("->")),
|
|
TOKEN_DATA(T_QUESTION_MARK, Q("?")),
|
|
TOKEN_DATA(T_RIGHTBRACE, Q("}")),
|
|
TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
|
|
TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, "{TRI}>"),
|
|
TOKEN_DATA(T_RIGHTPAREN, Q(")")),
|
|
TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
|
|
TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
|
|
TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, "{TRI}" Q(")")),
|
|
TOKEN_DATA(T_SEMICOLON, ";"),
|
|
TOKEN_DATA(T_SHIFTLEFT, "<<"),
|
|
TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
|
|
TOKEN_DATA(T_SHIFTRIGHT, ">>"),
|
|
TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
|
|
TOKEN_DATA(T_STAR, Q("*")),
|
|
TOKEN_DATA(T_COMPL, Q("~")),
|
|
TOKEN_DATA(T_COMPL_TRIGRAPH, "{TRI}-"),
|
|
TOKEN_DATA(T_STARASSIGN, Q("*=")),
|
|
TOKEN_DATA(T_ASM, "asm"),
|
|
TOKEN_DATA(T_AUTO, "auto"),
|
|
TOKEN_DATA(T_BOOL, "bool"),
|
|
TOKEN_DATA(T_FALSE, "false"),
|
|
TOKEN_DATA(T_TRUE, "true"),
|
|
TOKEN_DATA(T_BREAK, "break"),
|
|
TOKEN_DATA(T_CASE, "case"),
|
|
TOKEN_DATA(T_CATCH, "catch"),
|
|
TOKEN_DATA(T_CHAR, "char"),
|
|
TOKEN_DATA(T_CLASS, "class"),
|
|
TOKEN_DATA(T_CONST, "const"),
|
|
TOKEN_DATA(T_CONSTCAST, "const_cast"),
|
|
TOKEN_DATA(T_CONTINUE, "continue"),
|
|
TOKEN_DATA(T_DEFAULT, "default"),
|
|
TOKEN_DATA(T_DELETE, "delete"),
|
|
TOKEN_DATA(T_DO, "do"),
|
|
TOKEN_DATA(T_DOUBLE, "double"),
|
|
TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
|
|
TOKEN_DATA(T_ELSE, "else"),
|
|
TOKEN_DATA(T_ENUM, "enum"),
|
|
TOKEN_DATA(T_EXPLICIT, "explicit"),
|
|
TOKEN_DATA(T_EXPORT, "export"),
|
|
TOKEN_DATA(T_EXTERN, "extern"),
|
|
TOKEN_DATA(T_FLOAT, "float"),
|
|
TOKEN_DATA(T_FOR, "for"),
|
|
TOKEN_DATA(T_FRIEND, "friend"),
|
|
TOKEN_DATA(T_GOTO, "goto"),
|
|
TOKEN_DATA(T_IF, "if"),
|
|
TOKEN_DATA(T_INLINE, "inline"),
|
|
TOKEN_DATA(T_INT, "int"),
|
|
TOKEN_DATA(T_LONG, "long"),
|
|
TOKEN_DATA(T_MUTABLE, "mutable"),
|
|
TOKEN_DATA(T_NAMESPACE, "namespace"),
|
|
TOKEN_DATA(T_NEW, "new"),
|
|
TOKEN_DATA(T_OPERATOR, "operator"),
|
|
TOKEN_DATA(T_PRIVATE, "private"),
|
|
TOKEN_DATA(T_PROTECTED, "protected"),
|
|
TOKEN_DATA(T_PUBLIC, "public"),
|
|
TOKEN_DATA(T_REGISTER, "register"),
|
|
TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
|
|
TOKEN_DATA(T_RETURN, "return"),
|
|
TOKEN_DATA(T_SHORT, "short"),
|
|
TOKEN_DATA(T_SIGNED, "signed"),
|
|
TOKEN_DATA(T_SIZEOF, "sizeof"),
|
|
TOKEN_DATA(T_STATIC, "static"),
|
|
TOKEN_DATA(T_STATICCAST, "static_cast"),
|
|
TOKEN_DATA(T_STRUCT, "struct"),
|
|
TOKEN_DATA(T_SWITCH, "switch"),
|
|
TOKEN_DATA(T_TEMPLATE, "template"),
|
|
TOKEN_DATA(T_THIS, "this"),
|
|
TOKEN_DATA(T_THROW, "throw"),
|
|
TOKEN_DATA(T_TRY, "try"),
|
|
TOKEN_DATA(T_TYPEDEF, "typedef"),
|
|
TOKEN_DATA(T_TYPEID, "typeid"),
|
|
TOKEN_DATA(T_TYPENAME, "typename"),
|
|
TOKEN_DATA(T_UNION, "union"),
|
|
TOKEN_DATA(T_UNSIGNED, "unsigned"),
|
|
TOKEN_DATA(T_USING, "using"),
|
|
TOKEN_DATA(T_VIRTUAL, "virtual"),
|
|
TOKEN_DATA(T_VOID, "void"),
|
|
TOKEN_DATA(T_VOLATILE, "volatile"),
|
|
TOKEN_DATA(T_WCHART, "wchar_t"),
|
|
TOKEN_DATA(T_WHILE, "while"),
|
|
TOKEN_DATA(T_PP_DEFINE, "{POUNDDEF}{PPSPACE}define"),
|
|
TOKEN_DATA(T_PP_IF, "{POUNDDEF}{PPSPACE}if"),
|
|
TOKEN_DATA(T_PP_IFDEF, "{POUNDDEF}{PPSPACE}ifdef"),
|
|
TOKEN_DATA(T_PP_IFNDEF, "{POUNDDEF}{PPSPACE}ifndef"),
|
|
TOKEN_DATA(T_PP_ELSE, "{POUNDDEF}{PPSPACE}else"),
|
|
TOKEN_DATA(T_PP_ELIF, "{POUNDDEF}{PPSPACE}elif"),
|
|
TOKEN_DATA(T_PP_ENDIF, "{POUNDDEF}{PPSPACE}endif"),
|
|
TOKEN_DATA(T_PP_ERROR, "{POUNDDEF}{PPSPACE}error"),
|
|
TOKEN_DATA(T_PP_QHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" Q("\"") "[^\\n\\r\"]+" Q("\"")),
|
|
TOKEN_DATA(T_PP_HHEADER, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}" "<" "[^\\n\\r>]+" ">"),
|
|
TOKEN_DATA(T_PP_INCLUDE, "{POUNDDEF}{PPSPACE}{INCLUDEDEF}{PPSPACE}"),
|
|
TOKEN_DATA(T_PP_LINE, "{POUNDDEF}{PPSPACE}line"),
|
|
TOKEN_DATA(T_PP_PRAGMA, "{POUNDDEF}{PPSPACE}pragma"),
|
|
TOKEN_DATA(T_PP_UNDEF, "{POUNDDEF}{PPSPACE}undef"),
|
|
TOKEN_DATA(T_PP_WARNING, "{POUNDDEF}{PPSPACE}warning"),
|
|
#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
|
|
TOKEN_DATA(T_MSEXT_INT8, "__int8"),
|
|
TOKEN_DATA(T_MSEXT_INT16, "__int16"),
|
|
TOKEN_DATA(T_MSEXT_INT32, "__int32"),
|
|
TOKEN_DATA(T_MSEXT_INT64, "__int64"),
|
|
TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
|
|
TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
|
|
TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
|
|
TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
|
|
TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
|
|
TOKEN_DATA(T_MSEXT_TRY , "__try"),
|
|
TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
|
|
TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
|
|
TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
|
|
TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
|
|
TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
|
|
TOKEN_DATA(T_MSEXT_PP_REGION, "{POUNDDEF}{PPSPACE}region"),
|
|
TOKEN_DATA(T_MSEXT_PP_ENDREGION, "{POUNDDEF}{PPSPACE}endregion"),
|
|
#endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
|
|
TOKEN_DATA(T_LONGINTLIT, "{INTEGER}{LONGINTEGER_SUFFIX}"),
|
|
TOKEN_DATA(T_INTLIT, "{INTEGER}{INTEGER_SUFFIX}?"),
|
|
TOKEN_DATA(T_FLOATLIT,
|
|
"(" "{DIGIT}*" Q(".") "{DIGIT}+" OR "{DIGIT}+" Q(".") "){EXPONENT}?{FLOAT_SUFFIX}?" OR
|
|
"{DIGIT}+{EXPONENT}{FLOAT_SUFFIX}?"),
|
|
#if BOOST_WAVE_USE_STRICT_LEXER != 0
|
|
TOKEN_DATA(T_IDENTIFIER,
|
|
"(" "{NONDIGIT}" OR "{UNIVERSALCHAR}" ")"
|
|
"(" "{NONDIGIT}" OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),
|
|
#else
|
|
TOKEN_DATA(T_IDENTIFIER,
|
|
"(" "{NONDIGIT}" OR Q("$") OR "{UNIVERSALCHAR}" ")"
|
|
"(" "{NONDIGIT}" OR Q("$") OR "{DIGIT}" OR "{UNIVERSALCHAR}" ")*"),
|
|
#endif
|
|
TOKEN_DATA(T_CCOMMENT, "{CCOMMENT}"),
|
|
TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") "{NEWLINEDEF}" ),
|
|
TOKEN_DATA(T_CHARLIT,
|
|
"{CHAR_SPEC}" "'" "({ESCAPESEQ}|[^\\n\\r']|{UNIVERSALCHAR})+" "'"),
|
|
TOKEN_DATA(T_STRINGLIT,
|
|
"{CHAR_SPEC}" Q("\"") "({ESCAPESEQ}|[^\\n\\r\"]|{UNIVERSALCHAR})*" Q("\"")),
|
|
TOKEN_DATA(T_SPACE, "{BLANK}+"),
|
|
TOKEN_DATA(T_CONTLINE, Q("\\") "\\n"),
|
|
TOKEN_DATA(T_NEWLINE, "{NEWLINEDEF}"),
|
|
TOKEN_DATA(T_POUND_POUND, "##"),
|
|
TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
|
|
TOKEN_DATA(T_POUND_POUND_TRIGRAPH, "({TRI}=){2}"),
|
|
TOKEN_DATA(T_POUND, "#"),
|
|
TOKEN_DATA(T_POUND_ALT, Q("%:")),
|
|
TOKEN_DATA(T_POUND_TRIGRAPH, "{TRI}="),
|
|
TOKEN_DATA(T_ANY_TRIGRAPH, "{TRI}\\/"),
|
|
TOKEN_DATA(T_ANY, "{ANY}"),
|
|
TOKEN_DATA(T_ANYCTRL, "{ANYCTRL}"), // this should be the last recognized token
|
|
{ token_id(0) } // this should be the last entry
|
|
};
|
|
|
|
// C++ only token definitions
|
|
template <typename Iterator, typename Position>
|
|
typename lexertl<Iterator, Position>::lexer_data const
|
|
lexertl<Iterator, Position>::init_data_cpp[INIT_DATA_CPP_SIZE] =
|
|
{
|
|
TOKEN_DATA(T_AND_ALT, "bitand"),
|
|
TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
|
|
TOKEN_DATA(T_ANDAND_ALT, "and"),
|
|
TOKEN_DATA(T_OR_ALT, "bitor"),
|
|
TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
|
|
TOKEN_DATA(T_OROR_ALT, "or"),
|
|
TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
|
|
TOKEN_DATA(T_XOR_ALT, "xor"),
|
|
TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
|
|
TOKEN_DATA(T_NOT_ALT, "not"),
|
|
TOKEN_DATA(T_COMPL_ALT, "compl"),
|
|
#if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
|
|
TOKEN_DATA(T_IMPORT, "import"),
|
|
#endif
|
|
TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
|
|
TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
|
|
TOKEN_DATA(T_COLON_COLON, "::"),
|
|
{ token_id(0) } // this should be the last entry
|
|
};
|
|
|
|
// pp-number specific token definitions
|
|
template <typename Iterator, typename Position>
|
|
typename lexertl<Iterator, Position>::lexer_data const
|
|
lexertl<Iterator, Position>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
|
|
{
|
|
TOKEN_DATA(T_PP_NUMBER, "{PP_NUMBERDEF}"),
|
|
{ token_id(0) } // this should be the last entry
|
|
};
|
|
|
|
#undef MACRO_DATA
|
|
#undef TOKEN_DATA
|
|
#undef OR
|
|
#undef TRI
|
|
#undef Q
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// initialize lexertl lexer from C++ token regex's
|
|
template <typename Iterator, typename Position>
|
|
inline bool
|
|
lexertl<Iterator, Position>::init_dfa(wave::language_support lang,
|
|
Position const& pos, bool force_reinit)
|
|
{
|
|
if (has_compiled_dfa_)
|
|
return true;
|
|
|
|
std::ifstream dfa_in("wave_lexertl_lexer.dfa", std::ios::in|std::ios::binary);
|
|
|
|
if (force_reinit || !dfa_in.is_open() || !load (dfa_in))
|
|
{
|
|
dfa_in.close();
|
|
|
|
state_machine_.clear();
|
|
|
|
// register macro definitions
|
|
boost::lexer::rules rules;
|
|
for (int k = 0; NULL != init_macro_data[k].name; ++k) {
|
|
rules.add_macro(init_macro_data[k].name, init_macro_data[k].macro);
|
|
}
|
|
|
|
// if pp-numbers should be preferred, insert the corresponding rule first
|
|
if (wave::need_prefer_pp_numbers(lang)) {
|
|
for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {
|
|
rules.add(init_data_pp_number[j].tokenregex,
|
|
init_data_pp_number[j].tokenid);
|
|
}
|
|
}
|
|
|
|
// if in C99 mode, some of the keywords are not valid
|
|
if (!wave::need_c99(lang)) {
|
|
for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
|
|
rules.add(init_data_cpp[j].tokenregex,
|
|
init_data_cpp[j].tokenid);
|
|
}
|
|
}
|
|
|
|
for (int i = 0; 0 != init_data[i].tokenid; ++i) {
|
|
rules.add(init_data[i].tokenregex, init_data[i].tokenid);
|
|
}
|
|
|
|
// generate minimized DFA
|
|
try {
|
|
boost::lexer::generator::build (rules, state_machine_);
|
|
boost::lexer::generator::minimise (state_machine_);
|
|
}
|
|
catch (std::runtime_error const& e) {
|
|
string_type msg("lexertl initialization error: ");
|
|
msg += e.what();
|
|
BOOST_WAVE_LEXER_THROW(wave::cpplexer::lexing_exception,
|
|
unexpected_error, msg.c_str(),
|
|
pos.get_line(), pos.get_column(), pos.get_file().c_str());
|
|
return false;
|
|
}
|
|
|
|
std::ofstream dfa_out ("wave_lexertl_lexer.dfa",
|
|
std::ios::out|std::ios::binary|std::ios::trunc);
|
|
|
|
if (dfa_out.is_open())
|
|
save (dfa_out);
|
|
}
|
|
|
|
has_compiled_dfa_ = true;
|
|
return true;
|
|
}
|
|
#endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// return next token from the input stream
|
|
template <typename Iterator, typename Position>
|
|
inline wave::token_id
|
|
lexertl<Iterator, Position>::next_token(Iterator &first, Iterator const &last,
|
|
string_type& token_value)
|
|
{
|
|
#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
size_t const* const lookup = &state_machine_.data()._lookup[0]->front ();
|
|
size_t const dfa_alphabet = state_machine_.data()._dfa_alphabet[0];
|
|
|
|
size_t const* dfa = &state_machine_.data()._dfa[0]->front();
|
|
size_t const* ptr = dfa + dfa_alphabet + boost::lexer::dfa_offset;
|
|
#else
|
|
const std::size_t *ptr = dfa + dfa_offset;
|
|
#endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
|
|
Iterator curr = first;
|
|
Iterator end_token = first;
|
|
bool end_state = (*ptr != 0);
|
|
size_t id = *(ptr + 1);
|
|
|
|
while (curr != last) {
|
|
size_t const state = ptr[lookup[int(*curr)]];
|
|
if (0 == state)
|
|
break;
|
|
++curr;
|
|
|
|
#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
ptr = &dfa[state * (dfa_alphabet + boost::lexer::dfa_offset)];
|
|
#else
|
|
ptr = &dfa[state * dfa_offset];
|
|
#endif // BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
|
|
if (0 != *ptr) {
|
|
end_state = true;
|
|
id = *(ptr + 1);
|
|
end_token = curr;
|
|
}
|
|
}
|
|
|
|
if (end_state) {
|
|
if (T_ANY == id) {
|
|
id = TOKEN_FROM_ID(*first, UnknownTokenType);
|
|
}
|
|
|
|
// return longest match
|
|
string_type str(first, end_token);
|
|
token_value.swap(str);
|
|
first = end_token;
|
|
return wave::token_id(id);
|
|
}
|
|
return T_EOF;
|
|
}
|
|
|
|
#if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// load the DFA tables to/from a stream
|
|
template <typename Iterator, typename Position>
|
|
inline bool
|
|
lexertl<Iterator, Position>::load (std::istream& instrm)
|
|
{
|
|
// #if !defined(BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE)
|
|
// std::size_t version = 0;
|
|
// boost::lexer::serialise::load_as_binary(instrm, state_machine_, version);
|
|
// if (version != (std::size_t)get_compilation_time())
|
|
// return false; // too new for us
|
|
// return instrm.good();
|
|
// #else
|
|
return false; // always create the dfa when generating the C++ code
|
|
// #endif
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// save the DFA tables to/from a stream
|
|
template <typename Iterator, typename Position>
|
|
inline bool
|
|
lexertl<Iterator, Position>::save (std::ostream& outstrm)
|
|
{
|
|
// #if defined(BOOST_WAVE_LEXERTL_GENERATE_CPP_CODE)
|
|
// cpp_code::generate(state_machine_, outstrm);
|
|
// #else
|
|
// boost::lexer::serialise::save_as_binary(state_machine_, outstrm,
|
|
// (std::size_t)get_compilation_time());
|
|
// #endif
|
|
return outstrm.good();
|
|
}
|
|
#endif // #if BOOST_WAVE_LEXERTL_USE_STATIC_TABLES == 0
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
} // namespace lexer
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
template <typename Iterator, typename Position = wave::util::file_position_type>
|
|
class lexertl_functor
|
|
: public lexertl_input_interface<wave::cpplexer::lex_token<Position> >
|
|
{
|
|
public:
|
|
typedef wave::util::position_iterator<Iterator, Position> iterator_type;
|
|
typedef typename boost::detail::iterator_traits<Iterator>::value_type
|
|
char_type;
|
|
typedef BOOST_WAVE_STRINGTYPE string_type;
|
|
typedef wave::cpplexer::lex_token<Position> token_type;
|
|
|
|
lexertl_functor(Iterator const &first_, Iterator const &last_,
|
|
Position const &pos_, wave::language_support language)
|
|
: first(first_, last_, pos_), language(language), at_eof(false)
|
|
{
|
|
lexer_.init_dfa(language, pos_);
|
|
}
|
|
~lexertl_functor() {}
|
|
|
|
// get the next token from the input stream
|
|
token_type& get(token_type& result)
|
|
{
|
|
if (lexer_.is_initialized() && !at_eof) {
|
|
do {
|
|
// generate and return the next token
|
|
string_type token_val;
|
|
Position pos = first.get_position(); // begin of token position
|
|
wave::token_id id = lexer_.next_token(first, last, token_val);
|
|
|
|
if (T_CONTLINE != id) {
|
|
// The cast should avoid spurious warnings about missing case labels
|
|
// for the other token ids's.
|
|
switch (id) {
|
|
case T_IDENTIFIER:
|
|
// test identifier characters for validity (throws if
|
|
// invalid chars found)
|
|
if (!wave::need_no_character_validation(language)) {
|
|
using wave::cpplexer::impl::validate_identifier_name;
|
|
validate_identifier_name(token_val,
|
|
pos.get_line(), pos.get_column(), pos.get_file());
|
|
}
|
|
break;
|
|
|
|
case T_STRINGLIT:
|
|
case T_CHARLIT:
|
|
// test literal characters for validity (throws if invalid
|
|
// chars found)
|
|
if (wave::need_convert_trigraphs(language)) {
|
|
using wave::cpplexer::impl::convert_trigraphs;
|
|
token_val = convert_trigraphs(token_val);
|
|
}
|
|
if (!wave::need_no_character_validation(language)) {
|
|
using wave::cpplexer::impl::validate_literal;
|
|
validate_literal(token_val,
|
|
pos.get_line(), pos.get_column(), pos.get_file());
|
|
}
|
|
break;
|
|
|
|
case T_LONGINTLIT: // supported in C99 and long_long mode
|
|
if (!wave::need_long_long(language)) {
|
|
// syntax error: not allowed in C++ mode
|
|
BOOST_WAVE_LEXER_THROW(
|
|
wave::cpplexer::lexing_exception,
|
|
invalid_long_long_literal, token_val.c_str(),
|
|
pos.get_line(), pos.get_column(),
|
|
pos.get_file().c_str());
|
|
}
|
|
break;
|
|
|
|
#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
|
|
case T_PP_HHEADER:
|
|
case T_PP_QHEADER:
|
|
case T_PP_INCLUDE:
|
|
// convert to the corresponding ..._next token, if appropriate
|
|
{
|
|
// Skip '#' and whitespace and see whether we find an
|
|
// 'include_next' here.
|
|
typename string_type::size_type start = token_val.find("include");
|
|
if (0 == token_val.compare(start, 12, "include_next", 12))
|
|
id = token_id(id | AltTokenType);
|
|
}
|
|
break;
|
|
#endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
|
|
|
|
case T_EOF:
|
|
// T_EOF is returned as a valid token, the next call will
|
|
// return T_EOI, i.e. the actual end of input
|
|
at_eof = true;
|
|
token_val.clear();
|
|
break;
|
|
|
|
case T_OR_TRIGRAPH:
|
|
case T_XOR_TRIGRAPH:
|
|
case T_LEFTBRACE_TRIGRAPH:
|
|
case T_RIGHTBRACE_TRIGRAPH:
|
|
case T_LEFTBRACKET_TRIGRAPH:
|
|
case T_RIGHTBRACKET_TRIGRAPH:
|
|
case T_COMPL_TRIGRAPH:
|
|
case T_POUND_TRIGRAPH:
|
|
case T_ANY_TRIGRAPH:
|
|
if (wave::need_convert_trigraphs(language))
|
|
{
|
|
using wave::cpplexer::impl::convert_trigraph;
|
|
token_val = convert_trigraph(token_val);
|
|
}
|
|
break;
|
|
|
|
case T_ANYCTRL:
|
|
// matched some unexpected character
|
|
{
|
|
// 21 is the max required size for a 64 bit integer
|
|
// represented as a string
|
|
char buffer[22];
|
|
string_type msg("invalid character in input stream: '0x");
|
|
|
|
// for some systems sprintf is in namespace std
|
|
using namespace std;
|
|
sprintf(buffer, "%02x'", token_val[0]);
|
|
msg += buffer;
|
|
BOOST_WAVE_LEXER_THROW(
|
|
wave::cpplexer::lexing_exception,
|
|
generic_lexing_error,
|
|
msg.c_str(), pos.get_line(), pos.get_column(),
|
|
pos.get_file().c_str());
|
|
}
|
|
break;
|
|
}
|
|
|
|
result = token_type(id, token_val, pos);
|
|
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
|
|
return guards.detect_guard(result);
|
|
#else
|
|
return result;
|
|
#endif
|
|
}
|
|
} while (true); // skip the T_CONTLINE token
|
|
}
|
|
return result = token_type(); // return T_EOI
|
|
}
|
|
|
|
void set_position(Position const &pos)
|
|
{
|
|
// set position has to change the file name and line number only
|
|
first.get_position().set_file(pos.get_file());
|
|
first.get_position().set_line(pos.get_line());
|
|
}
|
|
|
|
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
|
|
bool has_include_guards(std::string& guard_name) const
|
|
{ return guards.detected(guard_name); }
|
|
#endif
|
|
|
|
private:
|
|
iterator_type first;
|
|
iterator_type last;
|
|
|
|
wave::language_support language;
|
|
bool at_eof;
|
|
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
|
|
include_guards<token_type> guards;
|
|
#endif
|
|
|
|
static lexer::lexertl<iterator_type, Position> lexer_;
|
|
};
|
|
|
|
template <typename Iterator, typename Position>
|
|
lexer::lexertl<
|
|
typename lexertl_functor<Iterator, Position>::iterator_type, Position>
|
|
lexertl_functor<Iterator, Position>::lexer_;
|
|
|
|
#undef INIT_DATA_SIZE
|
|
#undef INIT_DATA_CPP_SIZE
|
|
#undef INIT_DATA_PP_NUMBER_SIZE
|
|
#undef INIT_MACRO_DATA_SIZE
|
|
#undef T_ANYCTRL
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The new_lexer_gen<>::new_lexer function (declared in lexertl_interface.hpp)
|
|
// should be defined inline, if the lex_functor shouldn't be instantiated
|
|
// separately from the lex_iterator.
|
|
//
|
|
// Separate (explicit) instantiation helps to reduce compilation time.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
|
|
#define BOOST_WAVE_FLEX_NEW_LEXER_INLINE
|
|
#else
|
|
#define BOOST_WAVE_FLEX_NEW_LEXER_INLINE inline
|
|
#endif
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The 'new_lexer' function allows the opaque generation of a new lexer object.
|
|
// It is coupled to the iterator type to allow to decouple the lexer/iterator
|
|
// configurations at compile time.
|
|
//
|
|
// This function is declared inside the xlex_interface.hpp file, which is
|
|
// referenced by the source file calling the lexer and the source file, which
|
|
// instantiates the lex_functor. But it is defined here, so it will be
|
|
// instantiated only while compiling the source file, which instantiates the
|
|
// lex_functor. While the xlex_interface.hpp file may be included everywhere,
|
|
// this file (xlex_lexer.hpp) should be included only once. This allows
|
|
// to decouple the lexer interface from the lexer implementation and reduces
|
|
// compilation time.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename Iterator, typename Position>
|
|
BOOST_WAVE_FLEX_NEW_LEXER_INLINE
|
|
wave::cpplexer::lex_input_interface<wave::cpplexer::lex_token<Position> > *
|
|
new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
|
|
Iterator const &last, Position const &pos, wave::language_support language)
|
|
{
|
|
return new lexertl_functor<Iterator, Position>(first, last, pos, language);
|
|
}
|
|
|
|
#undef BOOST_WAVE_FLEX_NEW_LEXER_INLINE
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}}}} // namespace boost::wave::cpplexer::lexertl
|
|
|
|
#endif // !defined(BOOST_WAVE_LEXERTL_LEXER_HPP_INCLUDED)
|
|
|