265 lines
10 KiB
Plaintext
265 lines
10 KiB
Plaintext
[/
|
|
Copyright 2006-2007 John Maddock.
|
|
Distributed under the Boost Software License, Version 1.0.
|
|
(See accompanying file LICENSE_1_0.txt or copy at
|
|
http://www.boost.org/LICENSE_1_0.txt).
|
|
]
|
|
|
|
|
|
[section:regex_replace regex_replace]
|
|
|
|
#include <boost/regex.hpp>
|
|
|
|
The algorithm [regex_replace] searches through a string finding all the
|
|
matches to the regular expression: for each match it then calls
|
|
[match_results_format] to format the string and sends the result to the
|
|
output iterator. Sections of text that do not match are copied to the
|
|
output unchanged only if the /flags/ parameter does not have the
|
|
flag `format_no_copy` set. If the flag `format_first_only` is set then
|
|
only the first occurrence is replaced rather than all occurrences.
|
|
|
|
template <class OutputIterator, class BidirectionalIterator, class traits, class Formatter>
|
|
OutputIterator regex_replace(OutputIterator out,
|
|
BidirectionalIterator first,
|
|
BidirectionalIterator last,
|
|
const basic_regex<charT, traits>& e,
|
|
Formatter fmt,
|
|
match_flag_type flags = match_default);
|
|
|
|
template <class traits, class Formatter>
|
|
basic_string<charT> regex_replace(const basic_string<charT>& s,
|
|
const basic_regex<charT, traits>& e,
|
|
Formatter fmt,
|
|
match_flag_type flags = match_default);
|
|
|
|
|
|
[h4 Description]
|
|
|
|
template <class OutputIterator, class BidirectionalIterator, class traits, class Formatter>
|
|
OutputIterator regex_replace(OutputIterator out,
|
|
BidirectionalIterator first,
|
|
BidirectionalIterator last,
|
|
const basic_regex<charT, traits>& e,
|
|
Formatter fmt,
|
|
match_flag_type flags = match_default);
|
|
|
|
Enumerates all the occurrences of expression /e/ in the sequence \[first, last),
|
|
replacing each occurrence with the string that results by merging the
|
|
match found with the format string /fmt/, and copies the resulting string to /out/.
|
|
In the case that /fmt/ is a unary, binary or ternary function object, then the
|
|
character sequence generated by that object is copied unchanged to the output when performing
|
|
a substitution.
|
|
|
|
If the flag `format_no_copy` is set in /flags/ then unmatched sections of
|
|
text are not copied to output.
|
|
|
|
If the flag `format_first_only` is set in flags then only the first
|
|
occurrence of /e/ is replaced.
|
|
|
|
The manner in which the format string /fmt/ is interpreted, along with the
|
|
rules used for finding matches, are determined by the flags set in /flags/:
|
|
see [match_flag_type].
|
|
|
|
[*Requires]
|
|
The type `Formatter` must be either a pointer to a null-terminated string
|
|
of type `char_type[]`, or be a container of `char_type`'s (for example
|
|
`std::basic_string<char_type>`) or be a unary, binary or ternary functor
|
|
that computes the replacement string from a function call: either
|
|
`fmt(what)` which must return a container of `char_type`'s to be used as the
|
|
replacement text, or either `fmt(what, out)` or `fmt(what, out, flags)`, both of
|
|
which write the replacement text to `*out`, and then return the new
|
|
OutputIterator position. In each case `what` is the [match_results] object
|
|
that represents the match found. Note that if the formatter is a functor, then it is
|
|
['passed by value]: users that want to pass function objects with internal state
|
|
might want to use [@../../../../doc/html/ref.html Boost.Ref] to wrap the object so
|
|
that it's passed by reference.
|
|
|
|
[*Effects]: Constructs an [regex_iterator] object:
|
|
|
|
regex_iterator<BidirectionalIterator, charT, traits, Allocator>
|
|
i(first, last, e, flags),
|
|
|
|
and uses /i/ to enumerate through all of the matches /m/ of type
|
|
[match_results] `<BidirectionalIterator>` that occur within the sequence
|
|
\[first, last).
|
|
|
|
If no such matches are found and
|
|
|
|
!(flags & format_no_copy)
|
|
|
|
then calls
|
|
|
|
std::copy(first, last, out).
|
|
|
|
Otherwise, for each match found, if
|
|
|
|
!(flags & format_no_copy)
|
|
|
|
calls
|
|
|
|
std::copy(m.prefix().first, m.prefix().last, out),
|
|
|
|
and then calls
|
|
|
|
m.format(out, fmt, flags).
|
|
|
|
Finally if
|
|
|
|
!(flags & format_no_copy)
|
|
|
|
calls
|
|
|
|
std::copy(last_m.suffix().first, last_m,suffix().last, out)
|
|
|
|
where /last_m/ is a copy of the last match found.
|
|
|
|
If `flags & format_first_only` is non-zero then only the first match found
|
|
is replaced.
|
|
|
|
[*Throws]: `std::runtime_error` if the complexity of matching the expression
|
|
against an N character string begins to exceed O(N[super 2]), or if the
|
|
program runs out of stack space while matching the expression (if Boost.Regex is
|
|
configured in recursive mode), or if the matcher exhausts its permitted
|
|
memory allocation (if Boost.Regex is configured in non-recursive mode).
|
|
|
|
[*Returns]: out.
|
|
|
|
template <class traits, class Formatter>
|
|
basic_string<charT> regex_replace(const basic_string<charT>& s,
|
|
const basic_regex<charT, traits>& e,
|
|
Formatter fmt,
|
|
match_flag_type flags = match_default);
|
|
|
|
[*Requires]
|
|
The type `Formatter` must be either a pointer to a null-terminated string
|
|
of type `char_type[]`, or be a container of `char_type`'s (for example
|
|
`std::basic_string<char_type>`) or be a unary, binary or ternary functor
|
|
that computes the replacement string from a function call: either
|
|
`fmt(what)` which must return a container of `char_type`'s to be used as the
|
|
replacement text, or either `fmt(what, out)` or `fmt(what, out, flags)`, both of
|
|
which write the replacement text to `*out`, and then return the new
|
|
OutputIterator position. In each case `what` is the [match_results] object
|
|
that represents the match found.
|
|
|
|
[*Effects]: Constructs an object `basic_string<charT> result`, calls
|
|
`regex_replace(back_inserter(result), s.begin(), s.end(), e, fmt, flags)`,
|
|
and then returns `result`.
|
|
|
|
[h4 Examples]
|
|
|
|
The following example takes C/C++ source code as input, and outputs
|
|
syntax highlighted HTML code.
|
|
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <iterator>
|
|
#include <boost/regex.hpp>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
|
|
// purpose:
|
|
// takes the contents of a file and transform to
|
|
// syntax highlighted code in html format
|
|
|
|
boost::regex e1, e2;
|
|
extern const char* expression_text;
|
|
extern const char* format_string;
|
|
extern const char* pre_expression;
|
|
extern const char* pre_format;
|
|
extern const char* header_text;
|
|
extern const char* footer_text;
|
|
|
|
void load_file(std::string& s, std::istream& is)
|
|
{
|
|
s.erase();
|
|
s.reserve(is.rdbuf()->in_avail());
|
|
char c;
|
|
while(is.get(c))
|
|
{
|
|
if(s.capacity() == s.size())
|
|
s.reserve(s.capacity() * 3);
|
|
s.append(1, c);
|
|
}
|
|
}
|
|
|
|
int main(int argc, const char** argv)
|
|
{
|
|
try{
|
|
e1.assign(expression_text);
|
|
e2.assign(pre_expression);
|
|
for(int i = 1; i < argc; ++i)
|
|
{
|
|
std::cout << "Processing file " << argv[i] << std::endl;
|
|
std::ifstream fs(argv[i]);
|
|
std::string in;
|
|
load_file(in, fs);
|
|
std::string out_name(std::string(argv[i]) + std::string(".htm"));
|
|
std::ofstream os(out_name.c_str());
|
|
os << header_text;
|
|
// strip '<' and '>' first by outputting to a
|
|
// temporary string stream
|
|
std::ostringstream t(std::ios::out | std::ios::binary);
|
|
std::ostream_iterator<char, char> oi(t);
|
|
boost::regex_replace(oi, in.begin(), in.end(),
|
|
e2, pre_format, boost::match_default | boost::format_all);
|
|
// then output to final output stream
|
|
// adding syntax highlighting:
|
|
std::string s(t.str());
|
|
std::ostream_iterator<char, char> out(os);
|
|
boost::regex_replace(out, s.begin(), s.end(),
|
|
e1, format_string, boost::match_default | boost::format_all);
|
|
os << footer_text;
|
|
}
|
|
}
|
|
catch(...)
|
|
{ return -1; }
|
|
return 0;
|
|
}
|
|
|
|
extern const char* pre_expression = "(<)|(>)|(&)|\\r";
|
|
extern const char* pre_format = "(?1<)(?2>)(?3&)";
|
|
|
|
|
|
const char* expression_text =
|
|
// preprocessor directives: index 1
|
|
"(^[[:blank:]]*#(?:[^\\\\\\n]|\\\\[^\\n[:punct:][:word:]]*[\\n[:punct:][:word:]])*)|"
|
|
// comment: index 2
|
|
"(//[^\\n]*|/\\*.*?\\*/)|"
|
|
// literals: index 3
|
|
"\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+"
|
|
"(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|"
|
|
// string literals: index 4
|
|
"('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|"
|
|
// keywords: index 5
|
|
"\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import"
|
|
"|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall"
|
|
"|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool"
|
|
"|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete"
|
|
"|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto"
|
|
"|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected"
|
|
"|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast"
|
|
"|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned"
|
|
"|using|virtual|void|volatile|wchar_t|while)\\>"
|
|
;
|
|
|
|
const char* format_string = "(?1<font color=\"#008040\">$&</font>)"
|
|
"(?2<I><font color=\"#000080\">$&</font></I>)"
|
|
"(?3<font color=\"#0000A0\">$&</font>)"
|
|
"(?4<font color=\"#0000FF\">$&</font>)"
|
|
"(?5<B>$&</B>)";
|
|
|
|
const char* header_text =
|
|
"<HTML>\n<HEAD>\n"
|
|
"<TITLE>Auto-generated html formatted source</TITLE>\n"
|
|
"<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=windows-1252\">\n"
|
|
"</HEAD>\n"
|
|
"<BODY LINK=\"#0000ff\" VLINK=\"#800080\" BGCOLOR=\"#ffffff\">\n"
|
|
"<P> </P>\n<PRE>";
|
|
|
|
const char* footer_text = "</PRE>\n</BODY>\n\n";
|
|
|
|
|
|
[endsect]
|
|
|