90 lines
2.2 KiB
C++
90 lines
2.2 KiB
C++
/*
|
|
*
|
|
* Copyright (c) 2003
|
|
* John Maddock
|
|
*
|
|
* Use, modification and distribution are subject to the
|
|
* Boost Software License, Version 1.0. (See accompanying file
|
|
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* LOCATION: see http://www.boost.org for most recent version.
|
|
* FILE regex_token_iterator_example_2.cpp
|
|
* VERSION see <boost/version.hpp>
|
|
* DESCRIPTION: regex_token_iterator example: spit out linked URL's.
|
|
*/
|
|
|
|
|
|
#include <boost/regex.hpp>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <iterator>
|
|
|
|
boost::regex e("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"",
|
|
boost::regex::normal | boost::regbase::icase);
|
|
|
|
void load_file(std::string& s, std::istream& is)
|
|
{
|
|
s.erase();
|
|
if(is.bad()) return;
|
|
//
|
|
// attempt to grow string buffer to match file size,
|
|
// this doesn't always work...
|
|
s.reserve(static_cast<std::string::size_type>(is.rdbuf()->in_avail()));
|
|
char c;
|
|
while(is.get(c))
|
|
{
|
|
// use logarithmic growth stategy, in case
|
|
// in_avail (above) returned zero:
|
|
if(s.capacity() == s.size())
|
|
s.reserve(s.capacity() * 3);
|
|
s.append(1, c);
|
|
}
|
|
}
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
std::string s;
|
|
int i;
|
|
for(i = 1; i < argc; ++i)
|
|
{
|
|
std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
|
|
s.erase();
|
|
std::ifstream is(argv[i]);
|
|
load_file(s, is);
|
|
is.close();
|
|
boost::sregex_token_iterator i(s.begin(), s.end(), e, 1);
|
|
boost::sregex_token_iterator j;
|
|
while(i != j)
|
|
{
|
|
std::cout << *i++ << std::endl;
|
|
}
|
|
}
|
|
//
|
|
// alternative method:
|
|
// test the array-literal constructor, and split out the whole
|
|
// match as well as $1....
|
|
//
|
|
for(i = 1; i < argc; ++i)
|
|
{
|
|
std::cout << "Findings URL's in " << argv[i] << ":" << std::endl;
|
|
s.erase();
|
|
std::ifstream is(argv[i]);
|
|
load_file(s, is);
|
|
is.close();
|
|
const int subs[] = {1, 0,};
|
|
boost::sregex_token_iterator i(s.begin(), s.end(), e, subs);
|
|
boost::sregex_token_iterator j;
|
|
while(i != j)
|
|
{
|
|
std::cout << *i++ << std::endl;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|