spirit/example/lex/static_lexer/word_count_static.cpp
2014-04-11 16:27:54 +04:00

121 lines
4.4 KiB
C++

// Copyright (c) 2001-2010 Hartmut Kaiser
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
// The purpose of this example is to show, how it is possible to use a lexer
// token definition for two purposes:
//
// . To generate C++ code implementing a static lexical analyzer allowing
// to recognize all defined tokens
// . To integrate the generated C++ lexer into the /Spirit/ framework.
//
// #define BOOST_SPIRIT_LEXERTL_DEBUG
#define BOOST_VARIANT_MINIMIZE_SIZE
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
//[wc_static_include
#include <boost/spirit/include/lex_static_lexertl.hpp>
//]
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_statement.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <string>
#include "../example.hpp"
#include "word_count_tokens.hpp" // token definition
#include "word_count_static.hpp" // generated tokenizer
using namespace boost::spirit;
using namespace boost::spirit::ascii;
///////////////////////////////////////////////////////////////////////////////
// Grammar definition
///////////////////////////////////////////////////////////////////////////////
//[wc_static_grammar
// This is an ordinary grammar definition following the rules defined by
// Spirit.Qi. There is nothing specific about it, except it gets the token
// definition class instance passed to the constructor to allow accessing the
// embedded token_def<> instances.
template <typename Iterator>
struct word_count_grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
word_count_grammar(TokenDef const& tok)
: word_count_grammar::base_type(start)
, c(0), w(0), l(0)
{
using boost::phoenix::ref;
using boost::phoenix::size;
// associate the defined tokens with the lexer, at the same time
// defining the actions to be executed
start = *( tok.word [ ++ref(w), ref(c) += size(_1) ]
| lit('\n') [ ++ref(l), ++ref(c) ]
| qi::token(IDANY) [ ++ref(c) ]
)
;
}
std::size_t c, w, l; // counter for characters, words, and lines
qi::rule<Iterator> start;
};
//]
///////////////////////////////////////////////////////////////////////////////
//[wc_static_main
int main(int argc, char* argv[])
{
// Define the token type to be used: 'std::string' is available as the type
// of the token value.
typedef lex::lexertl::token<
char const*, boost::mpl::vector<std::string>
> token_type;
// Define the lexer type to be used as the base class for our token
// definition.
//
// This is the only place where the code is different from an equivalent
// dynamic lexical analyzer. We use the `lexertl::static_lexer<>` instead of
// the `lexertl::lexer<>` as the base class for our token defintion type.
//
// As we specified the suffix "wc" while generating the static tables we
// need to pass the type lexertl::static_::lexer_wc as the second template
// parameter below (see word_count_generate.cpp).
typedef lex::lexertl::static_lexer<
token_type, lex::lexertl::static_::lexer_wc
> lexer_type;
// Define the iterator type exposed by the lexer.
typedef word_count_tokens<lexer_type>::iterator_type iterator_type;
// Now we use the types defined above to create the lexer and grammar
// object instances needed to invoke the parsing process.
word_count_tokens<lexer_type> word_count; // Our lexer
word_count_grammar<iterator_type> g (word_count); // Our parser
// Read in the file into memory.
std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
char const* first = str.c_str();
char const* last = &first[str.size()];
// Parsing is done based on the token stream, not the character stream.
bool r = lex::tokenize_and_parse(first, last, word_count, g);
if (r) { // success
std::cout << "lines: " << g.l << ", words: " << g.w
<< ", characters: " << g.c << "\n";
}
else {
std::string rest(first, last);
std::cerr << "Parsing failed\n" << "stopped at: \""
<< rest << "\"\n";
}
return 0;
}
//]