153 lines
5.0 KiB
C++
153 lines
5.0 KiB
C++
// Copyright (c) 2001-2010 Hartmut Kaiser
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
// This example is the equivalent to the following lex program:
|
|
/*
|
|
//[wcl_flex_version
|
|
%{
|
|
int c = 0, w = 0, l = 0;
|
|
%}
|
|
%%
|
|
[^ \t\n]+ { ++w; c += yyleng; }
|
|
\n { ++c; ++l; }
|
|
. { ++c; }
|
|
%%
|
|
main()
|
|
{
|
|
yylex();
|
|
printf("%d %d %d\n", l, w, c);
|
|
}
|
|
//]
|
|
*/
|
|
// Its purpose is to do the word count function of the wc command in UNIX. It
|
|
// prints the number of lines, words and characters in a file.
|
|
//
|
|
// This examples shows how to use semantic actions associated with token
|
|
// definitions to directly attach actions to tokens. These get executed
|
|
// whenever the corresponding token got matched in the input sequence. Note,
|
|
// how this example implements all functionality directly in the lexer
|
|
// definition without any need for a parser.
|
|
|
|
// #define BOOST_SPIRIT_LEXERTL_DEBUG
|
|
|
|
#include <boost/config/warning_disable.hpp>
|
|
//[wcl_includes
|
|
#include <boost/spirit/include/lex_lexertl.hpp>
|
|
#include <boost/spirit/include/phoenix_operator.hpp>
|
|
#include <boost/spirit/include/phoenix_statement.hpp>
|
|
#include <boost/spirit/include/phoenix_algorithm.hpp>
|
|
#include <boost/spirit/include/phoenix_core.hpp>
|
|
//]
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
|
|
#include "example.hpp"
|
|
|
|
//[wcl_namespaces
|
|
namespace lex = boost::spirit::lex;
|
|
//]
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Token definition: We use the lexertl based lexer engine as the underlying
|
|
// lexer type.
|
|
//
|
|
// Note, the token definition type is derived from the 'lexertl_actor_lexer'
|
|
// template, which is a necessary to being able to use lexer semantic actions.
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
struct distance_func
|
|
{
|
|
template <typename Iterator1, typename Iterator2>
|
|
struct result : boost::iterator_difference<Iterator1> {};
|
|
|
|
template <typename Iterator1, typename Iterator2>
|
|
typename result<Iterator1, Iterator2>::type
|
|
operator()(Iterator1 const& begin, Iterator2 const& end) const
|
|
{
|
|
return std::distance(begin, end);
|
|
}
|
|
};
|
|
boost::phoenix::function<distance_func> const distance = distance_func();
|
|
|
|
//[wcl_token_definition
|
|
template <typename Lexer>
|
|
struct word_count_tokens : lex::lexer<Lexer>
|
|
{
|
|
word_count_tokens()
|
|
: c(0), w(0), l(0)
|
|
, word("[^ \t\n]+") // define tokens
|
|
, eol("\n")
|
|
, any(".")
|
|
{
|
|
using boost::spirit::lex::_start;
|
|
using boost::spirit::lex::_end;
|
|
using boost::phoenix::ref;
|
|
|
|
// associate tokens with the lexer
|
|
this->self
|
|
= word [++ref(w), ref(c) += distance(_start, _end)]
|
|
| eol [++ref(c), ++ref(l)]
|
|
| any [++ref(c)]
|
|
;
|
|
}
|
|
|
|
std::size_t c, w, l;
|
|
lex::token_def<> word, eol, any;
|
|
};
|
|
//]
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
//[wcl_main
|
|
int main(int argc, char* argv[])
|
|
{
|
|
|
|
/*< Specifying `omit` as the token attribute type generates a token class
|
|
not holding any token attribute at all (not even the iterator range of the
|
|
matched input sequence), therefore optimizing the token, the lexer, and
|
|
possibly the parser implementation as much as possible. Specifying
|
|
`mpl::false_` as the 3rd template parameter generates a token
|
|
type and an iterator, both holding no lexer state, allowing for even more
|
|
aggressive optimizations. As a result the token instances contain the token
|
|
ids as the only data member.
|
|
>*/ typedef
|
|
lex::lexertl::token<char const*, lex::omit, boost::mpl::false_>
|
|
token_type;
|
|
|
|
/*< This defines the lexer type to use
|
|
>*/ typedef lex::lexertl::actor_lexer<token_type> lexer_type;
|
|
|
|
/*< Create the lexer object instance needed to invoke the lexical analysis
|
|
>*/ word_count_tokens<lexer_type> word_count_lexer;
|
|
|
|
/*< Read input from the given file, tokenize all the input, while discarding
|
|
all generated tokens
|
|
>*/ std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1]));
|
|
char const* first = str.c_str();
|
|
char const* last = &first[str.size()];
|
|
|
|
/*< Create a pair of iterators returning the sequence of generated tokens
|
|
>*/ lexer_type::iterator_type iter = word_count_lexer.begin(first, last);
|
|
lexer_type::iterator_type end = word_count_lexer.end();
|
|
|
|
/*< Here we simply iterate over all tokens, making sure to break the loop
|
|
if an invalid token gets returned from the lexer
|
|
>*/ while (iter != end && token_is_valid(*iter))
|
|
++iter;
|
|
|
|
if (iter == end) {
|
|
std::cout << "lines: " << word_count_lexer.l
|
|
<< ", words: " << word_count_lexer.w
|
|
<< ", characters: " << word_count_lexer.c
|
|
<< "\n";
|
|
}
|
|
else {
|
|
std::string rest(first, last);
|
|
std::cout << "Lexical analysis failed\n" << "stopped at: \""
|
|
<< rest << "\"\n";
|
|
}
|
|
return 0;
|
|
}
|
|
//]
|