wave/samples/waveidl/idllexer/idl.re
Hartmut Kaiser 9fd423ca58 Wave: merging from trunk
[SVN r58724]
2010-01-05 18:46:09 +00:00

589 lines
18 KiB
C++

/*=============================================================================
Boost.Wave: A Standard compliant C++ preprocessor library
Sample: IDL lexer
http://www.boost.org/
Copyright (c) 2001-2010 Hartmut Kaiser. Distributed under the Boost
Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
#include <ctime>
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <boost/config.hpp>
#if defined(BOOST_HAS_UNISTD_H)
#include <unistd.h>
#else
#include <io.h>
#endif
#include <boost/assert.hpp>
#include <boost/detail/workaround.hpp>
// reuse the token ids and re2c helper functions from the default C++ lexer
#include <boost/wave/token_ids.hpp>
#include <boost/wave/cpplexer/re2clex/aq.hpp>
#include <boost/wave/cpplexer/re2clex/scanner.hpp>
#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
#include "idl_re.hpp"
#if defined(_MSC_VER) && !defined(__COMO__)
#pragma warning (disable: 4101) // 'foo' : unreferenced local variable
#pragma warning (disable: 4102) // 'foo' : unreferenced label
#endif
#define BOOST_WAVE_BSIZE 196608
#define YYCTYPE uchar
#define YYCURSOR cursor
#define YYLIMIT s->lim
#define YYMARKER s->ptr
#define YYFILL(n) {cursor = fill(s, cursor);}
//#define BOOST_WAVE_RET(i) {s->cur = cursor; return (i);}
#define BOOST_WAVE_RET(i) \
{ \
s->line += count_backslash_newlines(s, cursor); \
s->cur = cursor; \
return (i); \
} \
/**/
///////////////////////////////////////////////////////////////////////////////
namespace boost {
namespace wave {
namespace idllexer {
namespace re2clex {
#define RE2C_ASSERT BOOST_ASSERT
int
get_one_char(boost::wave::cpplexer::re2clex::Scanner *s)
{
using namespace boost::wave::cpplexer::re2clex;
if (0 != s->act) {
RE2C_ASSERT(s->first != 0 && s->last != 0);
RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
if (s->act < s->last)
return *(s->act)++;
}
return -1;
}
std::ptrdiff_t
rewind_stream (boost::wave::cpplexer::re2clex::Scanner *s, int cnt)
{
if (0 != s->act) {
RE2C_ASSERT(s->first != 0 && s->last != 0);
s->act += cnt;
RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
return s->act - s->first;
}
return 0;
}
std::size_t
get_first_eol_offset(boost::wave::cpplexer::re2clex::Scanner* s)
{
if (!AQ_EMPTY(s->eol_offsets))
{
return s->eol_offsets->queue[s->eol_offsets->head];
}
else
{
return (unsigned int)-1;
}
}
void
adjust_eol_offsets(boost::wave::cpplexer::re2clex::Scanner* s,
std::size_t adjustment)
{
boost::wave::cpplexer::re2clex::aq_queue q;
std::size_t i;
if (!s->eol_offsets)
s->eol_offsets = boost::wave::cpplexer::re2clex::aq_create();
q = s->eol_offsets;
if (AQ_EMPTY(q))
return;
i = q->head;
while (i != q->tail)
{
if (adjustment > q->queue[i])
q->queue[i] = 0;
else
q->queue[i] -= adjustment;
++i;
if (i == q->max_size)
i = 0;
}
if (adjustment > q->queue[i])
q->queue[i] = 0;
else
q->queue[i] -= adjustment;
}
int
count_backslash_newlines(boost::wave::cpplexer::re2clex::Scanner *s,
boost::wave::cpplexer::re2clex::uchar *cursor)
{
using namespace boost::wave::cpplexer::re2clex;
std::size_t diff, offset;
int skipped = 0;
/* figure out how many backslash-newlines skipped over unknowingly. */
diff = cursor - s->bot;
offset = get_first_eol_offset(s);
while (offset <= diff && offset != (unsigned int)-1)
{
skipped++;
boost::wave::cpplexer::re2clex::aq_pop(s->eol_offsets);
offset = get_first_eol_offset(s);
}
return skipped;
}
bool is_backslash(
boost::wave::cpplexer::re2clex::uchar *p,
boost::wave::cpplexer::re2clex::uchar *end, int &len)
{
if (*p == '\\') {
len = 1;
return true;
}
else if (*p == '?' && *(p+1) == '?' && (p+2 < end && *(p+2) == '/')) {
len = 3;
return true;
}
return false;
}
boost::wave::cpplexer::re2clex::uchar *
fill(boost::wave::cpplexer::re2clex::Scanner *s,
boost::wave::cpplexer::re2clex::uchar *cursor)
{
using namespace std; // some systems have memcpy etc. in namespace std
using namespace boost::wave::cpplexer::re2clex;
if(!s->eof)
{
uchar* p;
std::ptrdiff_t cnt = s->tok - s->bot;
if(cnt)
{
memcpy(s->bot, s->tok, s->lim - s->tok);
s->tok = s->bot;
s->ptr -= cnt;
cursor -= cnt;
s->lim -= cnt;
adjust_eol_offsets(s, cnt);
}
if((s->top - s->lim) < BOOST_WAVE_BSIZE)
{
uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar));
if (buf == 0)
{
using namespace std; // some systems have printf in std
if (0 != s->error_proc) {
(*s->error_proc)(s,
cpplexer::lexing_exception::unexpected_error,
"Out of memory!");
}
else
printf("Out of memory!\n");
/* get the scanner to stop */
*cursor = 0;
return cursor;
}
memcpy(buf, s->tok, s->lim - s->tok);
s->tok = buf;
s->ptr = &buf[s->ptr - s->bot];
cursor = &buf[cursor - s->bot];
s->lim = &buf[s->lim - s->bot];
s->top = &s->lim[BOOST_WAVE_BSIZE];
free(s->bot);
s->bot = buf;
}
if (s->act != 0) {
cnt = s->last - s->act;
if (cnt > BOOST_WAVE_BSIZE)
cnt = BOOST_WAVE_BSIZE;
memcpy(s->lim, s->act, cnt);
s->act += cnt;
if (cnt != BOOST_WAVE_BSIZE)
{
s->eof = &s->lim[cnt]; *(s->eof)++ = '\0';
}
}
/* backslash-newline erasing time */
/* first scan for backslash-newline and erase them */
for (p = s->lim; p < s->lim + cnt - 2; ++p)
{
int len = 0;
if (is_backslash(p, s->lim + cnt, len))
{
if (*(p+len) == '\n')
{
int offset = len + 1;
memmove(p, p + offset, s->lim + cnt - p - offset);
cnt -= offset;
--p;
aq_enqueue(s->eol_offsets, p - s->bot + 1);
}
else if (*(p+len) == '\r')
{
if (*(p+len+1) == '\n')
{
int offset = len + 2;
memmove(p, p + offset, s->lim + cnt - p - offset);
cnt -= offset;
--p;
}
else
{
int offset = len + 1;
memmove(p, p + offset, s->lim + cnt - p - offset);
cnt -= offset;
--p;
}
aq_enqueue(s->eol_offsets, p - s->bot + 1);
}
}
}
/* FIXME: the following code should be fixed to recognize correctly the
trigraph backslash token */
/* check to see if what we just read ends in a backslash */
if (cnt >= 2)
{
uchar last = s->lim[cnt-1];
uchar last2 = s->lim[cnt-2];
/* check \ EOB */
if (last == '\\')
{
int next = get_one_char(s);
/* check for \ \n or \ \r or \ \r \n straddling the border */
if (next == '\n')
{
--cnt; /* chop the final \, we've already read the \n. */
boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
cnt + (s->lim - s->bot));
}
else if (next == '\r')
{
int next2 = get_one_char(s);
if (next2 == '\n')
{
--cnt; /* skip the backslash */
}
else
{
/* rewind one, and skip one char */
rewind_stream(s, -1);
--cnt;
}
boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
cnt + (s->lim - s->bot));
}
else if (next != -1) /* -1 means end of file */
{
/* next was something else, so rewind the stream */
rewind_stream(s, -1);
}
}
/* check \ \r EOB */
else if (last == '\r' && last2 == '\\')
{
int next = get_one_char(s);
if (next == '\n')
{
cnt -= 2; /* skip the \ \r */
}
else
{
/* rewind one, and skip two chars */
rewind_stream(s, -1);
cnt -= 2;
}
boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
cnt + (s->lim - s->bot));
}
/* check \ \n EOB */
else if (last == '\n' && last2 == '\\')
{
cnt -= 2;
boost::wave::cpplexer::re2clex::aq_enqueue(s->eol_offsets,
cnt + (s->lim - s->bot));
}
}
s->lim += cnt;
if (s->eof) /* eof needs adjusting if we erased backslash-newlines */
{
s->eof = s->lim;
*(s->eof)++ = '\0';
}
}
return cursor;
}
boost::wave::token_id
scan(boost::wave::cpplexer::re2clex::Scanner *s)
{
using namespace boost::wave::cpplexer::re2clex;
uchar *cursor = s->tok = s->cur;
/*!re2c
re2c:indent:string = " ";
any = [\t\v\f\r\n\040-\377];
anyctrl = [\000-\377];
OctalDigit = [0-7];
Digit = [0-9];
HexDigit = [a-fA-F0-9];
ExponentPart = [Ee] [+-]? Digit+;
FractionalConstant = (Digit* "." Digit+) | (Digit+ ".");
FloatingSuffix = [fF][lL]?|[lL][fF]?;
IntegerSuffix = [uU][lL]?|[lL][uU]?;
FixedPointSuffix = [dD];
Backslash = [\\]|"??/";
EscapeSequence = Backslash ([abfnrtv?'"] | Backslash | "x" HexDigit+ | OctalDigit OctalDigit? OctalDigit?);
HexQuad = HexDigit HexDigit HexDigit HexDigit;
UniversalChar = Backslash ("u" HexQuad | "U" HexQuad HexQuad);
Newline = "\r\n" | "\n" | "\r";
PPSpace = ([ \t]|("/*"(any\[*]|Newline|("*"+(any\[*/]|Newline)))*"*"+"/"))*;
Pound = "#" | "??=" | "%:";
*/
/*!re2c
"/*" { goto ccomment; }
"//" { goto cppcomment; }
"TRUE" { BOOST_WAVE_RET(T_TRUE); }
"FALSE" { BOOST_WAVE_RET(T_FALSE); }
"{" { BOOST_WAVE_RET(T_LEFTBRACE); }
"}" { BOOST_WAVE_RET(T_RIGHTBRACE); }
"[" { BOOST_WAVE_RET(T_LEFTBRACKET); }
"]" { BOOST_WAVE_RET(T_RIGHTBRACKET); }
"#" { BOOST_WAVE_RET(T_POUND); }
"##" { BOOST_WAVE_RET(T_POUND_POUND); }
"(" { BOOST_WAVE_RET(T_LEFTPAREN); }
")" { BOOST_WAVE_RET(T_RIGHTPAREN); }
";" { BOOST_WAVE_RET(T_SEMICOLON); }
":" { BOOST_WAVE_RET(T_COLON); }
"?" { BOOST_WAVE_RET(T_QUESTION_MARK); }
"." { BOOST_WAVE_RET(T_DOT); }
"+" { BOOST_WAVE_RET(T_PLUS); }
"-" { BOOST_WAVE_RET(T_MINUS); }
"*" { BOOST_WAVE_RET(T_STAR); }
"/" { BOOST_WAVE_RET(T_DIVIDE); }
"%" { BOOST_WAVE_RET(T_PERCENT); }
"^" { BOOST_WAVE_RET(T_XOR); }
"&" { BOOST_WAVE_RET(T_AND); }
"|" { BOOST_WAVE_RET(T_OR); }
"~" { BOOST_WAVE_RET(T_COMPL); }
"!" { BOOST_WAVE_RET(T_NOT); }
"=" { BOOST_WAVE_RET(T_ASSIGN); }
"<" { BOOST_WAVE_RET(T_LESS); }
">" { BOOST_WAVE_RET(T_GREATER); }
"<<" { BOOST_WAVE_RET(T_SHIFTLEFT); }
">>" { BOOST_WAVE_RET(T_SHIFTRIGHT); }
"==" { BOOST_WAVE_RET(T_EQUAL); }
"!=" { BOOST_WAVE_RET(T_NOTEQUAL); }
"<=" { BOOST_WAVE_RET(T_LESSEQUAL); }
">=" { BOOST_WAVE_RET(T_GREATEREQUAL); }
"&&" { BOOST_WAVE_RET(T_ANDAND); }
"||" { BOOST_WAVE_RET(T_OROR); }
"++" { BOOST_WAVE_RET(T_PLUSPLUS); }
"--" { BOOST_WAVE_RET(T_MINUSMINUS); }
"," { BOOST_WAVE_RET(T_COMMA); }
([a-zA-Z_] | UniversalChar) ([a-zA-Z_0-9] | UniversalChar)*
{ BOOST_WAVE_RET(T_IDENTIFIER); }
(("0" [xX] HexDigit+) | ("0" OctalDigit*) | ([1-9] Digit*)) IntegerSuffix?
{ BOOST_WAVE_RET(T_INTLIT); }
((FractionalConstant ExponentPart?) | (Digit+ ExponentPart)) FloatingSuffix?
{ BOOST_WAVE_RET(T_FLOATLIT); }
(FractionalConstant | Digit+) FixedPointSuffix
{ BOOST_WAVE_RET(T_FIXEDPOINTLIT); }
"L"? (['] (EscapeSequence|any\[\n\r\\']|UniversalChar)+ ['])
{ BOOST_WAVE_RET(T_CHARLIT); }
"L"? (["] (EscapeSequence|any\[\n\r\\"]|UniversalChar)* ["])
{ BOOST_WAVE_RET(T_STRINGLIT); }
Pound PPSpace "include" PPSpace "<" (any\[\n\r>])+ ">"
{ BOOST_WAVE_RET(T_PP_HHEADER); }
Pound PPSpace "include" PPSpace "\"" (any\[\n\r"])+ "\""
{ BOOST_WAVE_RET(T_PP_QHEADER); }
Pound PPSpace "include" PPSpace
{ BOOST_WAVE_RET(T_PP_INCLUDE); }
Pound PPSpace "if" { BOOST_WAVE_RET(T_PP_IF); }
Pound PPSpace "ifdef" { BOOST_WAVE_RET(T_PP_IFDEF); }
Pound PPSpace "ifndef" { BOOST_WAVE_RET(T_PP_IFNDEF); }
Pound PPSpace "else" { BOOST_WAVE_RET(T_PP_ELSE); }
Pound PPSpace "elif" { BOOST_WAVE_RET(T_PP_ELIF); }
Pound PPSpace "endif" { BOOST_WAVE_RET(T_PP_ENDIF); }
Pound PPSpace "define" { BOOST_WAVE_RET(T_PP_DEFINE); }
Pound PPSpace "undef" { BOOST_WAVE_RET(T_PP_UNDEF); }
Pound PPSpace "line" { BOOST_WAVE_RET(T_PP_LINE); }
Pound PPSpace "error" { BOOST_WAVE_RET(T_PP_ERROR); }
Pound PPSpace "pragma" { BOOST_WAVE_RET(T_PP_PRAGMA); }
Pound PPSpace "warning" { BOOST_WAVE_RET(T_PP_WARNING); }
[ \t\v\f]+
{ BOOST_WAVE_RET(T_SPACE); }
Newline
{
s->line++;
BOOST_WAVE_RET(T_NEWLINE);
}
"\000"
{
if(cursor != s->eof)
{
using namespace std; // some systems have printf in std
if (0 != s->error_proc) {
(*s->error_proc)(s,
cpplexer::lexing_exception::generic_lexing_error,
"'\\000' in input stream");
}
else
printf("Error: 0 in file\n");
}
BOOST_WAVE_RET(T_EOF);
}
anyctrl
{
BOOST_WAVE_RET(TOKEN_FROM_ID(*s->tok, UnknownTokenType));
}
*/
ccomment:
/*!re2c
"*/" { BOOST_WAVE_RET(T_CCOMMENT); }
Newline
{
/*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF);*/
/*s->tok = cursor; */
s->line += count_backslash_newlines(s, cursor) +1;
goto ccomment;
}
any { goto ccomment; }
"\000"
{
using namespace std; // some systems have printf in std
if(cursor == s->eof)
{
if (s->error_proc)
(*s->error_proc)(s,
cpplexer::lexing_exception::generic_lexing_warning,
"Unterminated comment");
else
printf("Error: Unterminated comment\n");
}
else
{
if (s->error_proc)
(*s->error_proc)(s,
cpplexer::lexing_exception::generic_lexing_error,
"'\\000' in input stream");
else
printf("Error: 0 in file");
}
/* adjust cursor such next call returns T_EOF */
--YYCURSOR;
/* the comment is unterminated, but nevertheless its a comment */
BOOST_WAVE_RET(T_CCOMMENT);
}
anyctrl
{
if (s->error_proc)
(*s->error_proc)(s,
cpplexer::lexing_exception::generic_lexing_error,
"invalid character in input stream");
else
printf("Error: 0 in file");
}
*/
cppcomment:
/*!re2c
Newline
{
/*if(cursor == s->eof) BOOST_WAVE_RET(T_EOF); */
/*s->tok = cursor; */
s->line++;
BOOST_WAVE_RET(T_CPPCOMMENT);
}
any { goto cppcomment; }
"\000"
{
using namespace std; // some systems have printf in std
if(cursor != s->eof)
{
if (s->error_proc)
(*s->error_proc)(s,
cpplexer::lexing_exception::generic_lexing_error,
"'\\000' in input stream");
else
printf("Error: 0 in file");
}
/* adjust cursor such next call returns T_EOF */
--YYCURSOR;
/* the comment is unterminated, but nevertheless its a comment */
BOOST_WAVE_RET(T_CPPCOMMENT);
}
*/
} /* end of scan */
#undef RE2C_ASSERT
///////////////////////////////////////////////////////////////////////////////
} // namespace re2clex
} // namespace idllexer
} // namespace wave
} // namespace boost