123 lines
4.5 KiB
Plaintext
123 lines
4.5 KiB
Plaintext
[/
|
|
Copyright 2006-2007 John Maddock.
|
|
Distributed under the Boost Software License, Version 1.0.
|
|
(See accompanying file LICENSE_1_0.txt or copy at
|
|
http://www.boost.org/LICENSE_1_0.txt).
|
|
]
|
|
|
|
|
|
[section:regex_split regex_split (deprecated)]
|
|
|
|
The algorithm [regex_split] has been deprecated in favor of the iterator
|
|
[regex_token_iterator] which has a more flexible and powerful interface,
|
|
as well as following the more usual standard library "pull" rather than
|
|
"push" semantics.
|
|
|
|
Code which uses [regex_split] will continue to compile, the following
|
|
documentation is taken from a previous Boost.Regex version:
|
|
|
|
#include <boost/regex.hpp>
|
|
|
|
Algorithm [regex_split] performs a similar operation to the perl split operation,
|
|
and comes in three overloaded forms:
|
|
|
|
template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2>
|
|
std::size_t regex_split(OutputIterator out,
|
|
std::basic_string<charT, Traits1, Alloc1>& s,
|
|
const basic_regex<charT, Traits2>& e,
|
|
boost::match_flag_type flags,
|
|
std::size_t max_split);
|
|
|
|
template <class OutputIterator, class charT, class Traits1, class Alloc1, class Traits2>
|
|
std::size_t regex_split(OutputIterator out,
|
|
std::basic_string<charT, Traits1, Alloc1>& s,
|
|
const basic_regex<charT, Traits2>& e,
|
|
boost::match_flag_type flags = match_default);
|
|
|
|
template <class OutputIterator, class charT, class Traits1, class Alloc1>
|
|
std::size_t regex_split(OutputIterator out,
|
|
std::basic_string<charT, Traits1, Alloc1>& s);
|
|
|
|
[*Effects]: Each version of the algorithm takes an output-iterator for
|
|
output, and a string for input. If the expression contains no marked
|
|
sub-expressions, then the algorithm writes one string onto the output-iterator
|
|
for each section of input that does not match the expression.
|
|
If the expression does contain marked sub-expressions, then each
|
|
time a match is found, one string for each marked sub-expression will be
|
|
written to the output-iterator. No more than max_split strings will be written
|
|
to the output-iterator. Before returning, all the input processed will be
|
|
deleted from the string /s/ (if /max_split/ is not reached then all of /s/
|
|
will be deleted). Returns the number of strings written to the output-iterator.
|
|
If the parameter /max_split/ is not specified then it defaults to `UINT_MAX`.
|
|
If no expression is specified, then it defaults to "\\s+", and splitting occurs
|
|
on whitespace.
|
|
|
|
[*Throws]: `std::runtime_error` if the complexity of matching the expression
|
|
against an N character string begins to exceed O(N[super 2]), or if the
|
|
program runs out of stack space while matching the expression (if Boost.Regex is
|
|
configured in recursive mode), or if the matcher exhausts its permitted
|
|
memory allocation (if Boost.Regex is configured in non-recursive mode).
|
|
|
|
[*Example]: the following function will split the input string into a
|
|
series of tokens, and remove each token from the string /s/:
|
|
|
|
unsigned tokenise(std::list<std::string>& l, std::string& s)
|
|
{
|
|
return boost::regex_split(std::back_inserter(l), s);
|
|
}
|
|
|
|
Example: the following short program will extract all of the URL's
|
|
from a html file, and print them out to cout:
|
|
|
|
#include <list>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <boost/regex.hpp>
|
|
|
|
boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
|
|
boost::regbase::normal | boost::regbase::icase);
|
|
|
|
void load_file(std::string& s, std::istream& is)
|
|
{
|
|
s.erase();
|
|
//
|
|
// attempt to grow string buffer to match file size,
|
|
// this doesn't always work...
|
|
s.reserve(is.rdbuf()->in_avail());
|
|
char c;
|
|
while(is.get(c))
|
|
{
|
|
// use logarithmic growth strategy, in case
|
|
// in_avail (above) returned zero:
|
|
if(s.capacity() == s.size())
|
|
s.reserve(s.capacity() * 3);
|
|
s.append(1, c);
|
|
}
|
|
}
|
|
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
std::string s;
|
|
std::list<std::string> l;
|
|
|
|
for(int i = 1; i < argc; ++i)
|
|
{
|
|
std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
|
|
s.erase();
|
|
std::ifstream is(argv[i]);
|
|
load_file(s, is);
|
|
boost::regex_split(std::back_inserter(l), s, e);
|
|
while(l.size())
|
|
{
|
|
s = *(l.begin());
|
|
l.pop_front();
|
|
std::cout << s << std::endl;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
[endsect]
|
|
|