259365d671
[SVN r78354]
173 lines
5.4 KiB
C++
173 lines
5.4 KiB
C++
// Copyright (c) 2001-2010 Hartmut Kaiser
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
// This example is the equivalent to the following lex program:
|
|
//
|
|
// %{
|
|
// /* INITIAL is the default start state. COMMENT is our new */
|
|
// /* state where we remove comments. */
|
|
// %}
|
|
//
|
|
// %s COMMENT
|
|
// %%
|
|
// <INITIAL>"//".* ;
|
|
// <INITIAL>"/*" BEGIN COMMENT;
|
|
// <INITIAL>. ECHO;
|
|
// <INITIAL>[\n] ECHO;
|
|
// <COMMENT>"*/" BEGIN INITIAL;
|
|
// <COMMENT>. ;
|
|
// <COMMENT>[\n] ;
|
|
// %%
|
|
//
|
|
// main()
|
|
// {
|
|
// yylex();
|
|
// }
|
|
//
|
|
// Its purpose is to strip comments out of C code.
|
|
//
|
|
// Additionally this example demonstrates the use of lexer states to structure
|
|
// the lexer definition.
|
|
|
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
|
|
|
#include <boost/config/warning_disable.hpp>
|
|
#include <boost/spirit/include/lex_lexertl.hpp>
|
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
|
#include <boost/spirit/include/phoenix_statement.hpp>
|
|
#include <boost/spirit/include/phoenix_core.hpp>
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
|
|
#include "example.hpp"
|
|
|
|
using namespace boost::spirit;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Token definition: We use the lexertl based lexer engine as the underlying
|
|
// lexer type.
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
enum tokenids
|
|
{
|
|
IDANY = lex::min_token_id + 10,
|
|
IDEOL = lex::min_token_id + 11
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Simple custom semantic action function object used to print the matched
|
|
// input sequence for a particular token
|
|
template <typename Char, typename Traits>
|
|
struct echo_input_functor
|
|
{
|
|
echo_input_functor (std::basic_ostream<Char, Traits>& os_)
|
|
: os(os_) {}
|
|
|
|
// This is called by the semantic action handling code during the lexing
|
|
template <typename Iterator, typename Context>
|
|
void operator()(Iterator const& b, Iterator const& e
|
|
, BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
|
|
, std::size_t&, Context&) const
|
|
{
|
|
os << std::string(b, e);
|
|
}
|
|
|
|
std::basic_ostream<Char, Traits>& os;
|
|
};
|
|
|
|
template <typename Char, typename Traits>
|
|
inline echo_input_functor<Char, Traits>
|
|
echo_input(std::basic_ostream<Char, Traits>& os)
|
|
{
|
|
return echo_input_functor<Char, Traits>(os);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Another simple custom semantic action function object used to switch the
|
|
// state of the lexer
|
|
struct set_lexer_state
|
|
{
|
|
set_lexer_state(char const* state_)
|
|
: state(state_) {}
|
|
|
|
// This is called by the semantic action handling code during the lexing
|
|
template <typename Iterator, typename Context>
|
|
void operator()(Iterator const&, Iterator const&
|
|
, BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)&
|
|
, std::size_t&, Context& ctx) const
|
|
{
|
|
ctx.set_state_name(state.c_str());
|
|
}
|
|
|
|
std::string state;
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
template <typename Lexer>
|
|
struct strip_comments_tokens : lex::lexer<Lexer>
|
|
{
|
|
strip_comments_tokens()
|
|
: strip_comments_tokens::base_type(lex::match_flags::match_default)
|
|
{
|
|
// define tokens and associate them with the lexer
|
|
cppcomment = "\"//\"[^\n]*"; // '//[^\n]*'
|
|
ccomment = "\"/*\""; // '/*'
|
|
endcomment = "\"*/\""; // '*/'
|
|
any = std::string(".");
|
|
eol = "\n";
|
|
|
|
// The following tokens are associated with the default lexer state
|
|
// (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is
|
|
// strictly optional.
|
|
this->self
|
|
= cppcomment
|
|
| ccomment [ set_lexer_state("COMMENT") ]
|
|
| eol [ echo_input(std::cout) ]
|
|
| any [ echo_input(std::cout) ]
|
|
;
|
|
|
|
// The following tokens are associated with the lexer state 'COMMENT'.
|
|
this->self("COMMENT")
|
|
= endcomment [ set_lexer_state("INITIAL") ]
|
|
| "\n"
|
|
| std::string(".")
|
|
;
|
|
}
|
|
|
|
lex::token_def<> cppcomment, ccomment, endcomment, any, eol;
|
|
};
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
int main(int argc, char* argv[])
|
|
{
|
|
// iterator type used to expose the underlying input stream
|
|
typedef std::string::iterator base_iterator_type;
|
|
|
|
// lexer type
|
|
typedef
|
|
lex::lexertl::actor_lexer<lex::lexertl::token<base_iterator_type> >
|
|
lexer_type;
|
|
|
|
// now we use the types defined above to create the lexer and grammar
|
|
// object instances needed to invoke the parsing process
|
|
strip_comments_tokens<lexer_type> strip_comments; // Our lexer
|
|
|
|
// No parsing is done alltogether, everything happens in the lexer semantic
|
|
// actions.
|
|
std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1]));
|
|
base_iterator_type first = str.begin();
|
|
bool r = lex::tokenize(first, str.end(), strip_comments);
|
|
|
|
if (!r) {
|
|
std::string rest(first, str.end());
|
|
std::cerr << "Lexical analysis failed\n" << "stopped at: \""
|
|
<< rest << "\"\n";
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
|