how do I pass locale to the normalizer?
Created by: BrannonKing
I just attempted to translate the c++ code below into Python3 + PyICU. The code below uses the Boost wrapper around ICU. Notice, though, that it utilizes the locale facet for both the normalization and the lower-case transform. I get different results (in c++) when I don't do that. How do I represent this code in PyICU?
std::string CClaimTrieCacheNormalizationFork::normalizeClaimName(const std::string& name, bool force) const {
if (!force && !shouldNormalize())
return name;
static std::locale utf8;
static bool initialized = false;
if (!initialized) {
static boost::locale::localization_backend_manager manager =
boost::locale::localization_backend_manager::global();
manager.select("icu");
static boost::locale::generator curLocale(manager);
utf8 = curLocale("en_US.UTF8");
initialized = true;
}
std::string normalized;
try {
// Check if it is a valid utf-8 string. If not, it will throw a
// boost::locale::conv::conversion_error exception which we catch later
normalized = boost::locale::conv::to_utf<char>(name, "UTF-8", boost::locale::conv::stop);
if (normalized.empty())
return name;
normalized = boost::locale::normalize(normalized, boost::locale::norm_nfd, utf8);
// Locale aware lowercase (the non-locale-aware version seemed to struggle with some international chars):
normalized = boost::locale::to_lower(normalized, utf8);
}
catch (const boost::locale::conv::conversion_error& e){
return name;
}
catch (const std::bad_cast& e) {
LogPrintf("%s() is invalid or dependencies are missing: %s\n", __func__, e.what());
throw;
}
catch (const std::exception& e) { // TODO: change to use ... with current_exception() in c++11
LogPrintf("%s() had an unexpected exception: %s\n", __func__, e.what());
return name;
}
return normalized;
}