4c30e01d23
[SVN r73017]
317 lines
9.9 KiB
C++
317 lines
9.9 KiB
C++
//
|
|
// Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
|
|
//
|
|
// Distributed under the Boost Software License, Version 1.0. (See
|
|
// accompanying file LICENSE_1_0.txt or copy at
|
|
// http://www.boost.org/LICENSE_1_0.txt)
|
|
//
|
|
#ifndef BOOST_SRC_LOCALE_ICU_UCONV_HPP
|
|
#define BOOST_SRC_LOCALE_ICU_UCONV_HPP
|
|
#include <unicode/unistr.h>
|
|
#include <unicode/ucnv.h>
|
|
#include <unicode/ustring.h>
|
|
#include <unicode/utf.h>
|
|
#include <unicode/utf16.h>
|
|
|
|
#include <boost/locale/encoding.hpp>
|
|
|
|
#include <string>
|
|
#include <memory>
|
|
#include "icu_util.hpp"
|
|
|
|
namespace boost {
|
|
namespace locale {
|
|
namespace impl_icu {
|
|
|
|
typedef enum {
|
|
cvt_skip,
|
|
cvt_stop
|
|
} cpcvt_type;
|
|
|
|
|
|
template<typename CharType,int char_size = sizeof(CharType) >
|
|
class icu_std_converter {
|
|
public:
|
|
typedef CharType char_type;
|
|
typedef std::basic_string<char_type> string_type;
|
|
|
|
icu_std_converter(std::string charset,cpcvt_type cv=cvt_skip);
|
|
icu::UnicodeString icu(char_type const *begin,char_type const *end) const;
|
|
string_type std(icu::UnicodeString const &str) const;
|
|
size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end,size_t n,size_t from_u=0,size_t from_c=0) const;
|
|
};
|
|
|
|
template<typename CharType>
|
|
class icu_std_converter<CharType,1> {
|
|
public:
|
|
typedef CharType char_type;
|
|
typedef std::basic_string<char_type> string_type;
|
|
|
|
|
|
icu::UnicodeString icu_checked(char_type const *vb,char_type const *ve) const
|
|
{
|
|
return icu(vb,ve); // Already done
|
|
}
|
|
icu::UnicodeString icu(char_type const *vb,char_type const *ve) const
|
|
{
|
|
char const *begin=reinterpret_cast<char const *>(vb);
|
|
char const *end=reinterpret_cast<char const *>(ve);
|
|
uconv cvt(charset_,cvt_type_);
|
|
UErrorCode err=U_ZERO_ERROR;
|
|
icu::UnicodeString tmp(begin,end-begin,cvt.cvt(),err);
|
|
check_and_throw_icu_error(err);
|
|
return tmp;
|
|
}
|
|
|
|
string_type std(icu::UnicodeString const &str) const
|
|
{
|
|
uconv cvt(charset_,cvt_type_);
|
|
return cvt.go(str.getBuffer(),str.length(),max_len_);
|
|
}
|
|
|
|
icu_std_converter(std::string charset,cpcvt_type cvt_type = cvt_skip) :
|
|
charset_(charset),
|
|
cvt_type_(cvt_type)
|
|
{
|
|
uconv cvt(charset_,cvt_type);
|
|
max_len_=cvt.max_char_size();
|
|
}
|
|
|
|
size_t cut(icu::UnicodeString const &str,char_type const *begin,char_type const *end,
|
|
size_t n,size_t from_u=0,size_t from_char=0) const
|
|
{
|
|
size_t code_points = str.countChar32(from_u,n);
|
|
uconv cvt(charset_,cvt_type_);
|
|
return cvt.cut(code_points,begin+from_char,end);
|
|
}
|
|
|
|
struct uconv {
|
|
uconv(uconv const &other);
|
|
void operator=(uconv const &other);
|
|
public:
|
|
uconv(std::string const &charset,cpcvt_type cvt_type=cvt_skip)
|
|
{
|
|
UErrorCode err=U_ZERO_ERROR;
|
|
cvt_ = ucnv_open(charset.c_str(),&err);
|
|
if(!cvt_ || U_FAILURE(err)) {
|
|
if(cvt_)
|
|
ucnv_close(cvt_);
|
|
throw conv::invalid_charset_error(charset);
|
|
}
|
|
|
|
try {
|
|
if(cvt_type==cvt_skip) {
|
|
ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_SKIP,0,0,0,&err);
|
|
check_and_throw_icu_error(err);
|
|
|
|
err=U_ZERO_ERROR;
|
|
ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_SKIP,0,0,0,&err);
|
|
check_and_throw_icu_error(err);
|
|
}
|
|
else {
|
|
ucnv_setFromUCallBack(cvt_,UCNV_FROM_U_CALLBACK_STOP,0,0,0,&err);
|
|
check_and_throw_icu_error(err);
|
|
|
|
err=U_ZERO_ERROR;
|
|
ucnv_setToUCallBack(cvt_,UCNV_TO_U_CALLBACK_STOP,0,0,0,&err);
|
|
check_and_throw_icu_error(err);
|
|
}
|
|
}
|
|
catch(...) { ucnv_close(cvt_) ; throw; }
|
|
}
|
|
|
|
int max_char_size()
|
|
{
|
|
return ucnv_getMaxCharSize(cvt_);
|
|
}
|
|
|
|
string_type go(UChar const *buf,int length,int max_size)
|
|
{
|
|
string_type res;
|
|
res.resize(UCNV_GET_MAX_BYTES_FOR_STRING(length,max_size));
|
|
char *ptr=reinterpret_cast<char *>(&res[0]);
|
|
UErrorCode err=U_ZERO_ERROR;
|
|
int n = ucnv_fromUChars(cvt_,ptr,res.size(),buf,length,&err);
|
|
check_and_throw_icu_error(err);
|
|
res.resize(n);
|
|
return res;
|
|
}
|
|
|
|
size_t cut(size_t n,char_type const *begin,char_type const *end)
|
|
{
|
|
char_type const *saved = begin;
|
|
while(n > 0 && begin < end) {
|
|
UErrorCode err=U_ZERO_ERROR;
|
|
ucnv_getNextUChar(cvt_,&begin,end,&err);
|
|
if(U_FAILURE(err))
|
|
return 0;
|
|
n--;
|
|
}
|
|
return begin - saved;
|
|
}
|
|
|
|
UConverter *cvt() { return cvt_; }
|
|
|
|
~uconv()
|
|
{
|
|
ucnv_close(cvt_);
|
|
}
|
|
|
|
private:
|
|
UConverter *cvt_;
|
|
};
|
|
|
|
private:
|
|
int max_len_;
|
|
std::string charset_;
|
|
cpcvt_type cvt_type_;
|
|
};
|
|
|
|
template<typename CharType>
|
|
class icu_std_converter<CharType,2> {
|
|
public:
|
|
typedef CharType char_type;
|
|
typedef std::basic_string<char_type> string_type;
|
|
|
|
|
|
icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const
|
|
{
|
|
icu::UnicodeString tmp(end-begin,0,0); // make inital capacity
|
|
while(begin!=end) {
|
|
UChar cl = *begin++;
|
|
if(U16_IS_SINGLE(cl))
|
|
tmp.append(static_cast<UChar32>(cl));
|
|
else if(U16_IS_LEAD(cl)) {
|
|
if(begin==end) {
|
|
throw_if_needed();
|
|
}
|
|
else {
|
|
UChar ct=*begin++;
|
|
if(!U16_IS_TRAIL(ct))
|
|
throw_if_needed();
|
|
else {
|
|
UChar32 c=U16_GET_SUPPLEMENTARY(cl,ct);
|
|
tmp.append(c);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
throw_if_needed();
|
|
}
|
|
return tmp;
|
|
}
|
|
void throw_if_needed() const
|
|
{
|
|
if(mode_ == cvt_stop)
|
|
throw conv::conversion_error();
|
|
}
|
|
icu::UnicodeString icu(char_type const *vb,char_type const *ve) const
|
|
{
|
|
UChar const *begin=reinterpret_cast<UChar const *>(vb);
|
|
UChar const *end=reinterpret_cast<UChar const *>(ve);
|
|
icu::UnicodeString tmp(begin,end-begin);
|
|
return tmp;
|
|
|
|
}
|
|
|
|
string_type std(icu::UnicodeString const &str) const
|
|
{
|
|
char_type const *ptr=reinterpret_cast<char_type const *>(str.getBuffer());
|
|
return string_type(ptr,str.length());
|
|
}
|
|
size_t cut(icu::UnicodeString const &/*str*/,char_type const * /*begin*/,char_type const * /*end*/,size_t n,
|
|
size_t /*from_u*/=0,size_t /*from_c*/=0) const
|
|
{
|
|
return n;
|
|
}
|
|
|
|
icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) :
|
|
mode_(mode)
|
|
{
|
|
}
|
|
private:
|
|
cpcvt_type mode_;
|
|
|
|
};
|
|
|
|
template<typename CharType>
|
|
class icu_std_converter<CharType,4> {
|
|
public:
|
|
|
|
typedef CharType char_type;
|
|
typedef std::basic_string<char_type> string_type;
|
|
|
|
icu::UnicodeString icu_checked(char_type const *begin,char_type const *end) const
|
|
{
|
|
icu::UnicodeString tmp(end-begin,0,0); // make inital capacity
|
|
while(begin!=end) {
|
|
UChar32 c = static_cast<UChar32>(*begin++);
|
|
if(U_IS_UNICODE_CHAR(c))
|
|
tmp.append(c);
|
|
else
|
|
throw_if_needed();
|
|
}
|
|
return tmp;
|
|
}
|
|
void throw_if_needed() const
|
|
{
|
|
if(mode_ == cvt_stop)
|
|
throw conv::conversion_error();
|
|
}
|
|
|
|
icu::UnicodeString icu(char_type const *begin,char_type const *end) const
|
|
{
|
|
icu::UnicodeString tmp(end-begin,0,0); // make inital capacity
|
|
while(begin!=end) {
|
|
UChar32 c=static_cast<UChar32>(*begin++);
|
|
tmp.append(c);
|
|
}
|
|
return tmp;
|
|
|
|
}
|
|
|
|
string_type std(icu::UnicodeString const &str) const
|
|
{
|
|
string_type tmp;
|
|
tmp.resize(str.length());
|
|
UChar32 *ptr=reinterpret_cast<UChar32 *>(&tmp[0]);
|
|
|
|
#ifdef __SUNPRO_CC
|
|
int len=0;
|
|
#else
|
|
::int32_t len=0;
|
|
#endif
|
|
|
|
UErrorCode code=U_ZERO_ERROR;
|
|
u_strToUTF32(ptr,tmp.size(),&len,str.getBuffer(),str.length(),&code);
|
|
|
|
check_and_throw_icu_error(code);
|
|
|
|
tmp.resize(len);
|
|
|
|
return tmp;
|
|
}
|
|
|
|
size_t cut(icu::UnicodeString const &str,char_type const * /*begin*/,char_type const * /*end*/,size_t n,
|
|
size_t from_u=0,size_t /*from_c*/=0) const
|
|
{
|
|
return str.countChar32(from_u,n);
|
|
}
|
|
|
|
icu_std_converter(std::string /*charset*/,cpcvt_type mode=cvt_skip) :
|
|
mode_(mode)
|
|
{
|
|
}
|
|
private:
|
|
cpcvt_type mode_;
|
|
|
|
};
|
|
} /// impl_icu
|
|
} // locale
|
|
} // boost
|
|
|
|
#endif
|
|
|
|
|
|
// vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
|