regex/example/snippets/icu_example.cpp
John Maddock 05636b5c89 Fix numerous VC-10 compiler warnings.
Rewrite ICU configuration to use the new Boost.Build configuration logic.

[SVN r61893]
2010-05-10 12:13:49 +00:00

189 lines
5.4 KiB
C++

/*
*
* Copyright (c) 2004
* John Maddock
*
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*
*/
/*
* LOCATION: see http://www.boost.org for most recent version.
* FILE mfc_example.cpp
* VERSION see <boost/version.hpp>
* DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
*/
#include <boost/regex/config.hpp>
#ifdef BOOST_HAS_ICU
#include <boost/regex/icu.hpp>
#include <iostream>
#include <assert.h>
//
// Find out if *password* meets our password requirements,
// as defined by the regular expression *requirements*.
//
bool is_valid_password(const U_NAMESPACE_QUALIFIER UnicodeString& password, const U_NAMESPACE_QUALIFIER UnicodeString& requirements)
{
return boost::u32regex_match(password, boost::make_u32regex(requirements));
}
//
// Extract filename part of a path from a UTF-8 encoded std::string and return the result
// as another std::string:
//
std::string get_filename(const std::string& path)
{
boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
boost::smatch what;
if(boost::u32regex_match(path, what, r))
{
// extract $1 as a std::string:
return what.str(1);
}
else
{
throw std::runtime_error("Invalid pathname");
}
}
U_NAMESPACE_QUALIFIER UnicodeString extract_greek(const U_NAMESPACE_QUALIFIER UnicodeString& text)
{
// searches through some UTF-16 encoded text for a block encoded in Greek,
// this expression is imperfect, but the best we can do for now - searching
// for specific scripts is actually pretty hard to do right.
boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
boost::u16match what;
if(boost::u32regex_search(text, what, r))
{
// extract $0 as a UnicodeString:
return U_NAMESPACE_QUALIFIER UnicodeString(what[0].first, what.length(0));
}
else
{
throw std::runtime_error("No Greek found!");
}
}
void enumerate_currencies(const std::string& text)
{
// enumerate and print all the currency symbols, along
// with any associated numeric values:
const char* re =
"([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
"([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
"(?(1)"
"|(?(2)"
"[[:Cf:][:Cc:][:Z*:]]*"
")"
"[[:Sc:]]"
")";
boost::u32regex r = boost::make_u32regex(re);
boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
while(i != j)
{
std::cout << (*i)[0] << std::endl;
++i;
}
}
void enumerate_currencies2(const std::string& text)
{
// enumerate and print all the currency symbols, along
// with any associated numeric values:
const char* re =
"([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
"([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
"(?(1)"
"|(?(2)"
"[[:Cf:][:Cc:][:Z*:]]*"
")"
"[[:Sc:]]"
")";
boost::u32regex r = boost::make_u32regex(re);
boost::u32regex_token_iterator<std::string::const_iterator>
i(boost::make_u32regex_token_iterator(text, r, 1)), j;
while(i != j)
{
std::cout << *i << std::endl;
++i;
}
}
//
// Take a credit card number as a string of digits,
// and reformat it as a human readable string with "-"
// separating each group of four digit;,
// note that we're mixing a UTF-32 regex, with a UTF-16
// string and a UTF-8 format specifier, and it still all
// just works:
//
const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
const char* human_format = "$1-$2-$3-$4";
U_NAMESPACE_QUALIFIER UnicodeString human_readable_card_number(const U_NAMESPACE_QUALIFIER UnicodeString& s)
{
return boost::u32regex_replace(s, e, human_format);
}
int main()
{
// password checks using u32regex_match:
U_NAMESPACE_QUALIFIER UnicodeString pwd = "abcDEF---";
U_NAMESPACE_QUALIFIER UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
bool b = is_valid_password(pwd, pwd_check);
assert(b);
pwd = "abcD-";
b = is_valid_password(pwd, pwd_check);
assert(!b);
// filename extraction with u32regex_match:
std::string file = "abc.hpp";
file = get_filename(file);
assert(file == "abc.hpp");
file = "c:\\a\\b\\c\\d.h";
file = get_filename(file);
assert(file == "d.h");
// Greek text extraction with u32regex_search:
const UChar t[] = {
'S', 'o', 'm', 'e', ' ', 'w', 'h', 'e', 'r', 'e', ' ', 'i', 'n', 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
};
const UChar g[] = {
0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
};
U_NAMESPACE_QUALIFIER UnicodeString text = t;
U_NAMESPACE_QUALIFIER UnicodeString greek = extract_greek(text);
assert(greek == g);
// extract currency symbols with associated value, use iterator interface:
std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the pound sign encoded in UTF-8
enumerate_currencies(text2);
enumerate_currencies2(text2);
U_NAMESPACE_QUALIFIER UnicodeString credit_card_number = "1234567887654321";
credit_card_number = human_readable_card_number(credit_card_number);
assert(credit_card_number == "1234-5678-8765-4321");
return 0;
}
#else
#include <iostream>
int main()
{
std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>";
return 0;
}
#endif