efbf08fab4
[SVN r85641]
589 lines
21 KiB
C++
589 lines
21 KiB
C++
/*=============================================================================
|
|
Boost.Wave: A Standard compliant C++ preprocessor library
|
|
|
|
Xpressive based C++ lexer
|
|
|
|
http://www.boost.org/
|
|
|
|
Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
|
|
Software License, Version 1.0. (See accompanying file
|
|
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
=============================================================================*/
|
|
|
|
#if !defined(XLEX_LEXER_HPP)
|
|
#define XLEX_LEXER_HPP
|
|
|
|
#include <string>
|
|
#include <cstdio>
|
|
#include <cstdarg>
|
|
#if defined(BOOST_SPIRIT_DEBUG)
|
|
#include <iostream>
|
|
#endif // defined(BOOST_SPIRIT_DEBUG)
|
|
|
|
#include <boost/concept_check.hpp>
|
|
#include <boost/assert.hpp>
|
|
#include <boost/spirit/include/classic_core.hpp>
|
|
|
|
#include <boost/wave/token_ids.hpp>
|
|
#include <boost/wave/language_support.hpp>
|
|
#include <boost/wave/util/file_position.hpp>
|
|
#include <boost/wave/cpplexer/validate_universal_char.hpp>
|
|
#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
|
|
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
|
|
#include <boost/wave/cpplexer/detect_include_guards.hpp>
|
|
#endif
|
|
#include <boost/wave/cpplexer/cpp_lex_interface.hpp>
|
|
|
|
// reuse the default token type
|
|
#include "../xlex_iterator.hpp"
|
|
|
|
// include the xpressive headers
|
|
#include "xpressive_lexer.hpp"
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
namespace boost {
|
|
namespace wave {
|
|
namespace cpplexer {
|
|
namespace xlex {
|
|
namespace lexer {
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// encapsulation of the xpressive based C++ lexer
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
template <
|
|
typename Iterator,
|
|
typename Position = boost::wave::util::file_position_type
|
|
>
|
|
class lexer
|
|
{
|
|
public:
|
|
typedef char char_type;
|
|
typedef boost::wave::cpplexer::lex_token<Position> token_type;
|
|
typedef typename token_type::string_type string_type;
|
|
|
|
lexer(Iterator const &first, Iterator const &last,
|
|
Position const &pos, boost::wave::language_support language);
|
|
~lexer() {}
|
|
|
|
token_type& get(token_type& t);
|
|
void set_position(Position const &pos)
|
|
{
|
|
// set position has to change the file name and line number only
|
|
filename = pos.get_file();
|
|
line = pos.get_line();
|
|
}
|
|
|
|
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
|
|
bool has_include_guards(std::string& guard_name) const
|
|
{ return guards.detected(guard_name); }
|
|
#endif
|
|
|
|
private:
|
|
typedef xpressive_lexer<Iterator, token_id> lexer_type;
|
|
typedef typename lexer_type::callback_type callback_type;
|
|
|
|
lexer_type xlexer;
|
|
Iterator first;
|
|
Iterator last;
|
|
|
|
string_type filename;
|
|
int line;
|
|
bool at_eof;
|
|
boost::wave::language_support language;
|
|
|
|
// initialization data (regular expressions for the token definitions)
|
|
struct lexer_data {
|
|
token_id tokenid; // token data
|
|
char_type const *tokenregex; // associated token to match
|
|
callback_type tokencb; // associated callback function
|
|
};
|
|
|
|
static lexer_data const init_data[]; // common patterns
|
|
static lexer_data const init_data_cpp[]; // C++ only patterns
|
|
|
|
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
|
|
boost::wave::cpplexer::include_guards<token_type> guards;
|
|
#endif
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// helper for initializing token data
|
|
#define TOKEN_DATA(id, regex) \
|
|
{ id, regex, 0 }
|
|
|
|
#define TOKEN_DATA_EX(id, regex, callback) \
|
|
{ id, regex, callback }
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// data required for initialization of the lexer (token definitions)
|
|
#define OR "|"
|
|
#define Q(c) "\\" c
|
|
#define TRI(c) Q("?") Q("?") c
|
|
|
|
// definition of some subtoken regexps to simplify the regex definitions
|
|
#define BLANK "[ \t]"
|
|
#define CCOMMENT Q("/") Q("*") ".*?" Q("*") Q("/")
|
|
|
|
#define PPSPACE "(" BLANK OR CCOMMENT ")*"
|
|
|
|
#define OCTALDIGIT "[0-7]"
|
|
#define DIGIT "[0-9]"
|
|
#define HEXDIGIT "[0-9a-fA-F]"
|
|
#define SIGN "[-+]?"
|
|
#define EXPONENT "(" "[eE]" SIGN "[0-9]+" ")"
|
|
|
|
#define INTEGER "(" \
|
|
"(0x|0X)" HEXDIGIT "+" OR \
|
|
"0" OCTALDIGIT "*" OR \
|
|
"[1-9]" DIGIT "*" \
|
|
")"
|
|
|
|
#define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"
|
|
#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
|
|
#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
|
|
"(" "[lL][lL]" ")" "[uU]" "?" OR \
|
|
"i64" \
|
|
")"
|
|
#else
|
|
#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
|
|
"(" "[lL][lL]" ")" "[uU]" "?" ")"
|
|
#endif
|
|
#define FLOAT_SUFFIX "(" "[fF][lL]?|[lL][fF]?" ")"
|
|
#define CHAR_SPEC "L?"
|
|
|
|
#define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")"
|
|
#define ESCAPESEQ BACKSLASH "(" \
|
|
"[abfnrtv?'\"]" OR \
|
|
BACKSLASH OR \
|
|
"x" HEXDIGIT "+" OR \
|
|
OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
|
|
")"
|
|
#define HEXQUAD HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT
|
|
#define UNIVERSALCHAR BACKSLASH "(" \
|
|
"u" HEXQUAD OR \
|
|
"U" HEXQUAD HEXQUAD \
|
|
")"
|
|
|
|
#define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"
|
|
#define NEWLINEDEF "(" "\n" OR "\r\n" OR "\r" ")"
|
|
|
|
#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
|
|
#define INCLUDEDEF "(include_next|include)"
|
|
#else
|
|
#define INCLUDEDEF "include"
|
|
#endif
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// common C++/C99 token definitions
|
|
template <typename Iterator, typename Position>
|
|
typename lexer<Iterator, Position>::lexer_data const
|
|
lexer<Iterator, Position>::init_data[] =
|
|
{
|
|
TOKEN_DATA(T_CCOMMENT, CCOMMENT),
|
|
TOKEN_DATA(T_CPPCOMMENT, Q("/") Q("/.*?") NEWLINEDEF ),
|
|
TOKEN_DATA(T_CHARLIT, CHAR_SPEC "'"
|
|
"(" ESCAPESEQ OR "[^\n\r']" OR UNIVERSALCHAR ")+" "'"),
|
|
TOKEN_DATA(T_STRINGLIT, CHAR_SPEC Q("\"")
|
|
"(" ESCAPESEQ OR "[^\n\r\"]" OR UNIVERSALCHAR ")*" Q("\"")),
|
|
TOKEN_DATA(T_ANDAND, "&&"),
|
|
TOKEN_DATA(T_ANDASSIGN, "&="),
|
|
TOKEN_DATA(T_AND, "&"),
|
|
TOKEN_DATA(T_EQUAL, "=="),
|
|
TOKEN_DATA(T_ASSIGN, "="),
|
|
TOKEN_DATA(T_ORASSIGN, Q("|=")),
|
|
TOKEN_DATA(T_ORASSIGN_TRIGRAPH, TRI("!=")),
|
|
TOKEN_DATA(T_OROR, Q("|") Q("|")),
|
|
TOKEN_DATA(T_OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
|
|
TOKEN_DATA(T_OR, Q("|")),
|
|
TOKEN_DATA(T_OR_TRIGRAPH, TRI("!")),
|
|
TOKEN_DATA(T_XORASSIGN, Q("^=")),
|
|
TOKEN_DATA(T_XORASSIGN_TRIGRAPH, TRI("'=")),
|
|
TOKEN_DATA(T_XOR, Q("^")),
|
|
TOKEN_DATA(T_XOR_TRIGRAPH, TRI("'")),
|
|
TOKEN_DATA(T_COMMA, ","),
|
|
TOKEN_DATA(T_RIGHTBRACKET_ALT, ":>"),
|
|
TOKEN_DATA(T_COLON, ":"),
|
|
TOKEN_DATA(T_DIVIDEASSIGN, Q("/=")),
|
|
TOKEN_DATA(T_DIVIDE, Q("/")),
|
|
TOKEN_DATA(T_ELLIPSIS, Q(".") Q(".") Q(".")),
|
|
TOKEN_DATA(T_SHIFTRIGHTASSIGN, ">>="),
|
|
TOKEN_DATA(T_SHIFTRIGHT, ">>"),
|
|
TOKEN_DATA(T_GREATEREQUAL, ">="),
|
|
TOKEN_DATA(T_GREATER, ">"),
|
|
TOKEN_DATA(T_LEFTBRACE, Q("{")),
|
|
TOKEN_DATA(T_SHIFTLEFTASSIGN, "<<="),
|
|
TOKEN_DATA(T_SHIFTLEFT, "<<"),
|
|
TOKEN_DATA(T_LEFTBRACE_ALT, "<" Q("%")),
|
|
TOKEN_DATA(T_LESSEQUAL, "<="),
|
|
TOKEN_DATA(T_LEFTBRACKET_ALT, "<:"),
|
|
TOKEN_DATA(T_LESS, "<"),
|
|
TOKEN_DATA(T_LEFTBRACE_TRIGRAPH, TRI("<")),
|
|
TOKEN_DATA(T_LEFTPAREN, Q("(")),
|
|
TOKEN_DATA(T_LEFTBRACKET, Q("[")),
|
|
TOKEN_DATA(T_LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
|
|
TOKEN_DATA(T_MINUSMINUS, Q("-") Q("-")),
|
|
TOKEN_DATA(T_MINUSASSIGN, Q("-=")),
|
|
TOKEN_DATA(T_ARROW, Q("->")),
|
|
TOKEN_DATA(T_MINUS, Q("-")),
|
|
TOKEN_DATA(T_POUND_POUND_ALT, Q("%:") Q("%:")),
|
|
TOKEN_DATA(T_PERCENTASSIGN, Q("%=")),
|
|
TOKEN_DATA(T_RIGHTBRACE_ALT, Q("%>")),
|
|
TOKEN_DATA(T_POUND_ALT, Q("%:")),
|
|
TOKEN_DATA(T_PERCENT, Q("%")),
|
|
TOKEN_DATA(T_NOTEQUAL, "!="),
|
|
TOKEN_DATA(T_NOT, "!"),
|
|
TOKEN_DATA(T_PLUSASSIGN, Q("+=")),
|
|
TOKEN_DATA(T_PLUSPLUS, Q("+") Q("+")),
|
|
TOKEN_DATA(T_PLUS, Q("+")),
|
|
TOKEN_DATA(T_RIGHTBRACE, Q("}")),
|
|
TOKEN_DATA(T_RIGHTBRACE_TRIGRAPH, TRI(">")),
|
|
TOKEN_DATA(T_RIGHTPAREN, Q(")")),
|
|
TOKEN_DATA(T_RIGHTBRACKET, Q("]")),
|
|
TOKEN_DATA(T_RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
|
|
TOKEN_DATA(T_SEMICOLON, ";"),
|
|
TOKEN_DATA(T_STARASSIGN, Q("*=")),
|
|
TOKEN_DATA(T_STAR, Q("*")),
|
|
TOKEN_DATA(T_COMPL, Q("~")),
|
|
TOKEN_DATA(T_COMPL_TRIGRAPH, TRI("-")),
|
|
TOKEN_DATA(T_ASM, "asm"),
|
|
TOKEN_DATA(T_AUTO, "auto"),
|
|
TOKEN_DATA(T_BOOL, "bool"),
|
|
TOKEN_DATA(T_FALSE, "false"),
|
|
TOKEN_DATA(T_TRUE, "true"),
|
|
TOKEN_DATA(T_BREAK, "break"),
|
|
TOKEN_DATA(T_CASE, "case"),
|
|
TOKEN_DATA(T_CATCH, "catch"),
|
|
TOKEN_DATA(T_CHAR, "char"),
|
|
TOKEN_DATA(T_CLASS, "class"),
|
|
TOKEN_DATA(T_CONSTCAST, "const_cast"),
|
|
TOKEN_DATA(T_CONST, "const"),
|
|
TOKEN_DATA(T_CONTINUE, "continue"),
|
|
TOKEN_DATA(T_DEFAULT, "default"),
|
|
TOKEN_DATA(T_DELETE, "delete"),
|
|
TOKEN_DATA(T_DOUBLE, "double"),
|
|
TOKEN_DATA(T_DO, "do"),
|
|
TOKEN_DATA(T_DYNAMICCAST, "dynamic_cast"),
|
|
TOKEN_DATA(T_ELSE, "else"),
|
|
TOKEN_DATA(T_ENUM, "enum"),
|
|
TOKEN_DATA(T_EXPLICIT, "explicit"),
|
|
TOKEN_DATA(T_EXPORT, "export"),
|
|
TOKEN_DATA(T_EXTERN, "extern"),
|
|
TOKEN_DATA(T_FLOAT, "float"),
|
|
TOKEN_DATA(T_FOR, "for"),
|
|
TOKEN_DATA(T_FRIEND, "friend"),
|
|
TOKEN_DATA(T_GOTO, "goto"),
|
|
TOKEN_DATA(T_IF, "if"),
|
|
TOKEN_DATA(T_INLINE, "inline"),
|
|
TOKEN_DATA(T_INT, "int"),
|
|
TOKEN_DATA(T_LONG, "long"),
|
|
TOKEN_DATA(T_MUTABLE, "mutable"),
|
|
TOKEN_DATA(T_NAMESPACE, "namespace"),
|
|
TOKEN_DATA(T_NEW, "new"),
|
|
TOKEN_DATA(T_OPERATOR, "operator"),
|
|
TOKEN_DATA(T_PRIVATE, "private"),
|
|
TOKEN_DATA(T_PROTECTED, "protected"),
|
|
TOKEN_DATA(T_PUBLIC, "public"),
|
|
TOKEN_DATA(T_REGISTER, "register"),
|
|
TOKEN_DATA(T_REINTERPRETCAST, "reinterpret_cast"),
|
|
TOKEN_DATA(T_RETURN, "return"),
|
|
TOKEN_DATA(T_SHORT, "short"),
|
|
TOKEN_DATA(T_SIGNED, "signed"),
|
|
TOKEN_DATA(T_SIZEOF, "sizeof"),
|
|
TOKEN_DATA(T_STATICCAST, "static_cast"),
|
|
TOKEN_DATA(T_STATIC, "static"),
|
|
TOKEN_DATA(T_STRUCT, "struct"),
|
|
TOKEN_DATA(T_SWITCH, "switch"),
|
|
TOKEN_DATA(T_TEMPLATE, "template"),
|
|
TOKEN_DATA(T_THIS, "this"),
|
|
TOKEN_DATA(T_THROW, "throw"),
|
|
TOKEN_DATA(T_TRY, "try"),
|
|
TOKEN_DATA(T_TYPEDEF, "typedef"),
|
|
TOKEN_DATA(T_TYPEID, "typeid"),
|
|
TOKEN_DATA(T_TYPENAME, "typename"),
|
|
TOKEN_DATA(T_UNION, "union"),
|
|
TOKEN_DATA(T_UNSIGNED, "unsigned"),
|
|
TOKEN_DATA(T_USING, "using"),
|
|
TOKEN_DATA(T_VIRTUAL, "virtual"),
|
|
TOKEN_DATA(T_VOID, "void"),
|
|
TOKEN_DATA(T_VOLATILE, "volatile"),
|
|
TOKEN_DATA(T_WCHART, "wchar_t"),
|
|
TOKEN_DATA(T_WHILE, "while"),
|
|
#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
|
|
TOKEN_DATA(T_MSEXT_INT8, "__int8"),
|
|
TOKEN_DATA(T_MSEXT_INT16, "__int16"),
|
|
TOKEN_DATA(T_MSEXT_INT32, "__int32"),
|
|
TOKEN_DATA(T_MSEXT_INT64, "__int64"),
|
|
TOKEN_DATA(T_MSEXT_BASED, "_?" "_based"),
|
|
TOKEN_DATA(T_MSEXT_DECLSPEC, "_?" "_declspec"),
|
|
TOKEN_DATA(T_MSEXT_CDECL, "_?" "_cdecl"),
|
|
TOKEN_DATA(T_MSEXT_FASTCALL, "_?" "_fastcall"),
|
|
TOKEN_DATA(T_MSEXT_STDCALL, "_?" "_stdcall"),
|
|
TOKEN_DATA(T_MSEXT_TRY , "__try"),
|
|
TOKEN_DATA(T_MSEXT_EXCEPT, "__except"),
|
|
TOKEN_DATA(T_MSEXT_FINALLY, "__finally"),
|
|
TOKEN_DATA(T_MSEXT_LEAVE, "__leave"),
|
|
TOKEN_DATA(T_MSEXT_INLINE, "_?" "_inline"),
|
|
TOKEN_DATA(T_MSEXT_ASM, "_?" "_asm"),
|
|
TOKEN_DATA(T_MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
|
|
TOKEN_DATA(T_MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
|
|
#endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
|
|
TOKEN_DATA(T_PP_DEFINE, POUNDDEF PPSPACE "define"),
|
|
TOKEN_DATA(T_PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
|
|
TOKEN_DATA(T_PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
|
|
TOKEN_DATA(T_PP_IF, POUNDDEF PPSPACE "if"),
|
|
TOKEN_DATA(T_PP_ELSE, POUNDDEF PPSPACE "else"),
|
|
TOKEN_DATA(T_PP_ELIF, POUNDDEF PPSPACE "elif"),
|
|
TOKEN_DATA(T_PP_ENDIF, POUNDDEF PPSPACE "endif"),
|
|
TOKEN_DATA(T_PP_ERROR, POUNDDEF PPSPACE "error"),
|
|
TOKEN_DATA(T_PP_QHEADER, POUNDDEF PPSPACE \
|
|
INCLUDEDEF PPSPACE Q("\"") "[^\n\r\"]+" Q("\"")),
|
|
TOKEN_DATA(T_PP_HHEADER, POUNDDEF PPSPACE \
|
|
INCLUDEDEF PPSPACE "<" "[^\n\r>]+" ">"),
|
|
TOKEN_DATA(T_PP_INCLUDE, POUNDDEF PPSPACE \
|
|
INCLUDEDEF PPSPACE),
|
|
TOKEN_DATA(T_PP_LINE, POUNDDEF PPSPACE "line"),
|
|
TOKEN_DATA(T_PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
|
|
TOKEN_DATA(T_PP_UNDEF, POUNDDEF PPSPACE "undef"),
|
|
TOKEN_DATA(T_PP_WARNING, POUNDDEF PPSPACE "warning"),
|
|
TOKEN_DATA(T_FLOATLIT,
|
|
"(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
|
|
EXPONENT "?" FLOAT_SUFFIX "?" OR
|
|
DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
|
|
TOKEN_DATA(T_LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
|
|
TOKEN_DATA(T_INTLIT, INTEGER INTEGER_SUFFIX "?"),
|
|
#if BOOST_WAVE_USE_STRICT_LEXER != 0
|
|
TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
|
|
#else
|
|
TOKEN_DATA(T_IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
|
|
#endif
|
|
TOKEN_DATA(T_SPACE, BLANK "+"),
|
|
TOKEN_DATA(T_SPACE2, "[\v\f]+"),
|
|
TOKEN_DATA(T_CONTLINE, Q("\\") "\n"),
|
|
TOKEN_DATA(T_NEWLINE, NEWLINEDEF),
|
|
TOKEN_DATA(T_POUND_POUND, "##"),
|
|
TOKEN_DATA(T_POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
|
|
TOKEN_DATA(T_POUND, "#"),
|
|
TOKEN_DATA(T_POUND_TRIGRAPH, TRI("=")),
|
|
TOKEN_DATA(T_ANY_TRIGRAPH, TRI(Q("/"))),
|
|
TOKEN_DATA(T_QUESTION_MARK, Q("?")),
|
|
TOKEN_DATA(T_DOT, Q(".")),
|
|
TOKEN_DATA(T_ANY, "."),
|
|
{ token_id(0) } // this should be the last entry
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// C++ only token definitions
|
|
template <typename Iterator, typename Position>
|
|
typename lexer<Iterator, Position>::lexer_data const
|
|
lexer<Iterator, Position>::init_data_cpp[] =
|
|
{
|
|
TOKEN_DATA(T_AND_ALT, "bitand"),
|
|
TOKEN_DATA(T_ANDASSIGN_ALT, "and_eq"),
|
|
TOKEN_DATA(T_ANDAND_ALT, "and"),
|
|
TOKEN_DATA(T_OR_ALT, "bitor"),
|
|
TOKEN_DATA(T_ORASSIGN_ALT, "or_eq"),
|
|
TOKEN_DATA(T_OROR_ALT, "or"),
|
|
TOKEN_DATA(T_XORASSIGN_ALT, "xor_eq"),
|
|
TOKEN_DATA(T_XOR_ALT, "xor"),
|
|
TOKEN_DATA(T_NOTEQUAL_ALT, "not_eq"),
|
|
TOKEN_DATA(T_NOT_ALT, "not"),
|
|
TOKEN_DATA(T_COMPL_ALT, "compl"),
|
|
TOKEN_DATA(T_ARROWSTAR, Q("->") Q("*")),
|
|
TOKEN_DATA(T_DOTSTAR, Q(".") Q("*")),
|
|
TOKEN_DATA(T_COLON_COLON, "::"),
|
|
{ token_id(0) } // this should be the last entry
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// undefine macros, required for regular expression definitions
|
|
#undef INCLUDEDEF
|
|
#undef POUNDDEF
|
|
#undef CCOMMENT
|
|
#undef PPSPACE
|
|
#undef DIGIT
|
|
#undef OCTALDIGIT
|
|
#undef HEXDIGIT
|
|
#undef SIGN
|
|
#undef EXPONENT
|
|
#undef LONGINTEGER_SUFFIX
|
|
#undef INTEGER_SUFFIX
|
|
#undef INTEGER
|
|
#undef FLOAT_SUFFIX
|
|
#undef CHAR_SPEC
|
|
#undef BACKSLASH
|
|
#undef ESCAPESEQ
|
|
#undef HEXQUAD
|
|
#undef UNIVERSALCHAR
|
|
|
|
#undef Q
|
|
#undef TRI
|
|
#undef OR
|
|
|
|
#undef TOKEN_DATA
|
|
#undef TOKEN_DATA_EX
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// initialize cpp lexer
|
|
template <typename Iterator, typename Position>
|
|
inline
|
|
lexer<Iterator, Position>::lexer(Iterator const &first,
|
|
Iterator const &last, Position const &pos,
|
|
boost::wave::language_support language)
|
|
: first(first), last(last),
|
|
filename(pos.get_file()), line(0), at_eof(false), language(language)
|
|
{
|
|
// if in C99 mode, some of the keywords/operators are not valid
|
|
if (!boost::wave::need_c99(language)) {
|
|
for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
|
|
xlexer.register_regex(init_data_cpp[j].tokenregex,
|
|
init_data_cpp[j].tokenid, init_data_cpp[j].tokencb);
|
|
}
|
|
}
|
|
|
|
// tokens valid for C++ and C99
|
|
for (int i = 0; 0 != init_data[i].tokenid; ++i) {
|
|
xlexer.register_regex(init_data[i].tokenregex, init_data[i].tokenid,
|
|
init_data[i].tokencb);
|
|
}
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// get the next token from the input stream
|
|
template <typename Iterator, typename Position>
|
|
inline boost::wave::cpplexer::lex_token<Position>&
|
|
lexer<Iterator, Position>::get(boost::wave::cpplexer::lex_token<Position>& t)
|
|
{
|
|
using namespace boost::wave; // to import token ids to this scope
|
|
|
|
if (at_eof)
|
|
return t = cpplexer::lex_token<Position>(); // return T_EOI
|
|
|
|
std::string tokval;
|
|
token_id id = xlexer.next_token(first, last, tokval);
|
|
string_type value = tokval.c_str();
|
|
|
|
if ((token_id)(-1) == id)
|
|
id = T_EOF; // end of input reached
|
|
|
|
if (T_IDENTIFIER == id) {
|
|
// test identifier characters for validity (throws if invalid chars found)
|
|
if (!boost::wave::need_no_character_validation(language)) {
|
|
cpplexer::impl::validate_identifier_name(value, line, -1, filename);
|
|
}
|
|
}
|
|
else if (T_STRINGLIT == id || T_CHARLIT == id) {
|
|
// test literal characters for validity (throws if invalid chars found)
|
|
if (!boost::wave::need_no_character_validation(language)) {
|
|
cpplexer::impl::validate_literal(value, line, -1, filename);
|
|
}
|
|
}
|
|
else if (T_EOF == id) {
|
|
// T_EOF is returned as a valid token, the next call will return T_EOI,
|
|
// i.e. the actual end of input
|
|
at_eof = true;
|
|
value.clear();
|
|
}
|
|
|
|
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
|
|
cpplexer::lex_token<Position> tok(id, value, Position(filename, line, -1));
|
|
return t = guards.detect_guard(tok);
|
|
#else
|
|
return t = cpplexer::lex_token<Position>(id, value,
|
|
Position(filename, line, -1));
|
|
#endif
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// lex_functor
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
template <
|
|
typename Iterator,
|
|
typename Position = boost::wave::util::file_position_type
|
|
>
|
|
class xlex_functor
|
|
: public xlex_input_interface<typename lexer<Iterator, Position>::token_type>
|
|
{
|
|
public:
|
|
|
|
typedef typename lexer<Iterator, Position>::token_type token_type;
|
|
|
|
xlex_functor(Iterator const &first, Iterator const &last,
|
|
Position const &pos, boost::wave::language_support language)
|
|
: lexer_(first, last, pos, language)
|
|
{}
|
|
virtual ~xlex_functor() {}
|
|
|
|
// get the next token from the input stream
|
|
token_type& get(token_type& t) { return lexer_.get(t); }
|
|
void set_position(Position const &pos) { lexer_.set_position(pos); }
|
|
|
|
#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
|
|
bool has_include_guards(std::string& guard_name) const
|
|
{ return lexer_.has_include_guards(guard_name); }
|
|
#endif
|
|
|
|
private:
|
|
lexer<Iterator, Position> lexer_;
|
|
};
|
|
|
|
} // namespace lexer
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
|
|
// should be defined inline, if the lex_functor shouldn't be instantiated
|
|
// separately from the lex_iterator.
|
|
//
|
|
// Separate (explicit) instantiation helps to reduce compilation time.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
|
|
#define BOOST_WAVE_XLEX_NEW_LEXER_INLINE
|
|
#else
|
|
#define BOOST_WAVE_XLEX_NEW_LEXER_INLINE inline
|
|
#endif
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The 'new_lexer' function allows the opaque generation of a new lexer object.
|
|
// It is coupled to the iterator type to allow to decouple the lexer/iterator
|
|
// configurations at compile time.
|
|
//
|
|
// This function is declared inside the xlex_interface.hpp file, which is
|
|
// referenced by the source file calling the lexer and the source file, which
|
|
// instantiates the lex_functor. But it is defined here, so it will be
|
|
// instantiated only while compiling the source file, which instantiates the
|
|
// lex_functor. While the xlex_interface.hpp file may be included everywhere,
|
|
// this file (xlex_lexer.hpp) should be included only once. This allows
|
|
// to decouple the lexer interface from the lexer implementation and reduces
|
|
// compilation time.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename Iterator, typename Position>
|
|
BOOST_WAVE_XLEX_NEW_LEXER_INLINE
|
|
lex_input_interface<boost::wave::cpplexer::lex_token<Position> > *
|
|
new_lexer_gen<Iterator, Position>::new_lexer(Iterator const &first,
|
|
Iterator const &last, Position const &pos,
|
|
wave::language_support language)
|
|
{
|
|
return new lexer::xlex_functor<Iterator, Position>(
|
|
first, last, pos, language);
|
|
}
|
|
|
|
#undef BOOST_WAVE_XLEX_NEW_LEXER_INLINE
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
} // namespace xlex
|
|
} // namespace cpplexer
|
|
} // namespace wave
|
|
} // namespace boost
|
|
|
|
#endif // !defined(XLEX_LEXER_HPP)
|