Backed out changeset 3877f1fa62f5 (bug 1927706) Backed out changeset 233c6ebf84a2 (bug 1927706) Backed out changeset 07e5871d5fa3 (bug 1927706) Backed out changeset 84ef75087931 (bug 1927706) Backed out changeset f89b916619e1 (bug 1927706) Backed out changeset b82d9d622315 (bug 1927706) Backed out changeset b0d2c5711865 (bug 1927706) Backed out changeset 9529dda25bd9 (bug 1927706) Backed out changeset 40b7907d7fc8 (bug 1927706) Backed out changeset c549655dbd73 (bug 1927706) Backed out changeset c5cc289771b3 (bug 1927706) Backed out changeset 8ef66f7822c4 (bug 1927706) Backed out changeset dff6d37fb2fe (bug 1927706) Backed out changeset 083a0b3da643 (bug 1927706) Backed out changeset 06649ac72a19 (bug 1927706) Backed out changeset 019f7533abbc (bug 1927706) Backed out changeset f1539604c459 (bug 1927706) Backed out changeset 578667f1f0d4 (bug 1927706) Backed out changeset 8ed1e7e7d4ab (bug 1927706)
419 lines
13 KiB
C++
419 lines
13 KiB
C++
// © 2016 and later: Unicode, Inc. and others.
|
|
// License & terms of use: http://www.unicode.org/copyright.html
|
|
/*
|
|
*******************************************************************************
|
|
* Copyright (C) 2014, International Business Machines
|
|
* Corporation and others. All Rights Reserved.
|
|
*******************************************************************************
|
|
* loadednormalizer2impl.cpp
|
|
*
|
|
* created on: 2014sep03
|
|
* created by: Markus W. Scherer
|
|
*/
|
|
|
|
#include "unicode/utypes.h"
|
|
|
|
#if !UCONFIG_NO_NORMALIZATION
|
|
|
|
#include "unicode/udata.h"
|
|
#include "unicode/localpointer.h"
|
|
#include "unicode/normalizer2.h"
|
|
#include "unicode/ucptrie.h"
|
|
#include "unicode/unistr.h"
|
|
#include "unicode/unorm.h"
|
|
#include "cstring.h"
|
|
#include "mutex.h"
|
|
#include "norm2allmodes.h"
|
|
#include "normalizer2impl.h"
|
|
#include "uassert.h"
|
|
#include "ucln_cmn.h"
|
|
#include "uhash.h"
|
|
|
|
U_NAMESPACE_BEGIN
|
|
|
|
class LoadedNormalizer2Impl : public Normalizer2Impl {
|
|
public:
|
|
LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
|
|
virtual ~LoadedNormalizer2Impl();
|
|
|
|
void load(const char *packageName, const char *name, UErrorCode &errorCode);
|
|
|
|
private:
|
|
static UBool U_CALLCONV
|
|
isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
|
|
|
|
UDataMemory *memory;
|
|
UCPTrie *ownedTrie;
|
|
};
|
|
|
|
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
|
|
udata_close(memory);
|
|
ucptrie_close(ownedTrie);
|
|
}
|
|
|
|
UBool U_CALLCONV
|
|
LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
|
|
const char * /* type */, const char * /*name*/,
|
|
const UDataInfo *pInfo) {
|
|
if(
|
|
pInfo->size>=20 &&
|
|
pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
|
|
pInfo->charsetFamily==U_CHARSET_FAMILY &&
|
|
pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
|
|
pInfo->dataFormat[1]==0x72 &&
|
|
pInfo->dataFormat[2]==0x6d &&
|
|
pInfo->dataFormat[3]==0x32 &&
|
|
pInfo->formatVersion[0]==4
|
|
) {
|
|
// Normalizer2Impl *me=(Normalizer2Impl *)context;
|
|
// uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
void
|
|
LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) {
|
|
return;
|
|
}
|
|
memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
|
|
if(U_FAILURE(errorCode)) {
|
|
return;
|
|
}
|
|
const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
|
|
const int32_t *inIndexes=(const int32_t *)inBytes;
|
|
int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
|
|
if(indexesLength<=IX_MIN_LCCC_CP) {
|
|
errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
|
|
return;
|
|
}
|
|
|
|
int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
|
|
int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
|
|
ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
|
|
inBytes+offset, nextOffset-offset, nullptr,
|
|
&errorCode);
|
|
if(U_FAILURE(errorCode)) {
|
|
return;
|
|
}
|
|
|
|
offset=nextOffset;
|
|
nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
|
|
const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
|
|
|
|
// smallFCD: new in formatVersion 2
|
|
offset=nextOffset;
|
|
const uint8_t *inSmallFCD=inBytes+offset;
|
|
|
|
init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
|
|
}
|
|
|
|
// instance cache ---------------------------------------------------------- ***
|
|
|
|
Norm2AllModes *
|
|
Norm2AllModes::createInstance(const char *packageName,
|
|
const char *name,
|
|
UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) {
|
|
return nullptr;
|
|
}
|
|
LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
|
|
if(impl==nullptr) {
|
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
impl->load(packageName, name, errorCode);
|
|
return createInstance(impl, errorCode);
|
|
}
|
|
|
|
U_CDECL_BEGIN
|
|
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
|
|
U_CDECL_END
|
|
|
|
#if !NORM2_HARDCODE_NFC_DATA
|
|
static Norm2AllModes *nfcSingleton;
|
|
static icu::UInitOnce nfcInitOnce {};
|
|
#endif
|
|
|
|
static Norm2AllModes *nfkcSingleton;
|
|
static icu::UInitOnce nfkcInitOnce {};
|
|
|
|
static Norm2AllModes *nfkc_cfSingleton;
|
|
static icu::UInitOnce nfkc_cfInitOnce {};
|
|
|
|
static UHashtable *cache=nullptr;
|
|
|
|
// UInitOnce singleton initialization function
|
|
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
|
|
#if !NORM2_HARDCODE_NFC_DATA
|
|
if (uprv_strcmp(what, "nfc") == 0) {
|
|
nfcSingleton = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
|
|
} else
|
|
#endif
|
|
if (uprv_strcmp(what, "nfkc") == 0) {
|
|
nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
|
|
} else if (uprv_strcmp(what, "nfkc_cf") == 0) {
|
|
nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
|
|
} else {
|
|
UPRV_UNREACHABLE_EXIT; // Unknown singleton
|
|
}
|
|
ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
|
|
}
|
|
|
|
U_CDECL_BEGIN
|
|
|
|
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
|
|
delete (Norm2AllModes *)allModes;
|
|
}
|
|
|
|
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
|
|
#if !NORM2_HARDCODE_NFC_DATA
|
|
delete nfcSingleton;
|
|
nfcSingleton = nullptr;
|
|
nfcInitOnce.reset();
|
|
#endif
|
|
|
|
delete nfkcSingleton;
|
|
nfkcSingleton = nullptr;
|
|
nfkcInitOnce.reset();
|
|
|
|
delete nfkc_cfSingleton;
|
|
nfkc_cfSingleton = nullptr;
|
|
nfkc_cfInitOnce.reset();
|
|
|
|
uhash_close(cache);
|
|
cache=nullptr;
|
|
return true;
|
|
}
|
|
|
|
U_CDECL_END
|
|
|
|
#if !NORM2_HARDCODE_NFC_DATA
|
|
const Norm2AllModes *
|
|
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return nullptr; }
|
|
umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
|
|
return nfcSingleton;
|
|
}
|
|
#endif
|
|
|
|
const Norm2AllModes *
|
|
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return nullptr; }
|
|
umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
|
|
return nfkcSingleton;
|
|
}
|
|
|
|
const Norm2AllModes *
|
|
Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) { return nullptr; }
|
|
umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
|
|
return nfkc_cfSingleton;
|
|
}
|
|
|
|
#if !NORM2_HARDCODE_NFC_DATA
|
|
const Normalizer2 *
|
|
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
|
return allModes!=nullptr ? &allModes->comp : nullptr;
|
|
}
|
|
|
|
const Normalizer2 *
|
|
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
|
return allModes!=nullptr ? &allModes->decomp : nullptr;
|
|
}
|
|
|
|
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
|
return allModes!=nullptr ? &allModes->fcd : nullptr;
|
|
}
|
|
|
|
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
|
return allModes!=nullptr ? &allModes->fcc : nullptr;
|
|
}
|
|
|
|
const Normalizer2Impl *
|
|
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
|
|
return allModes!=nullptr ? allModes->impl : nullptr;
|
|
}
|
|
#endif
|
|
|
|
const Normalizer2 *
|
|
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
|
return allModes!=nullptr ? &allModes->comp : nullptr;
|
|
}
|
|
|
|
const Normalizer2 *
|
|
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
|
return allModes!=nullptr ? &allModes->decomp : nullptr;
|
|
}
|
|
|
|
const Normalizer2 *
|
|
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
|
return allModes!=nullptr ? &allModes->comp : nullptr;
|
|
}
|
|
|
|
const Normalizer2 *
|
|
Normalizer2::getInstance(const char *packageName,
|
|
const char *name,
|
|
UNormalization2Mode mode,
|
|
UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) {
|
|
return nullptr;
|
|
}
|
|
if(name==nullptr || *name==0) {
|
|
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
|
|
return nullptr;
|
|
}
|
|
const Norm2AllModes *allModes=nullptr;
|
|
if(packageName==nullptr) {
|
|
if(0==uprv_strcmp(name, "nfc")) {
|
|
allModes=Norm2AllModes::getNFCInstance(errorCode);
|
|
} else if(0==uprv_strcmp(name, "nfkc")) {
|
|
allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
|
} else if(0==uprv_strcmp(name, "nfkc_cf")) {
|
|
allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
|
}
|
|
}
|
|
if(allModes==nullptr && U_SUCCESS(errorCode)) {
|
|
{
|
|
Mutex lock;
|
|
if(cache!=nullptr) {
|
|
allModes=(Norm2AllModes *)uhash_get(cache, name);
|
|
}
|
|
}
|
|
if(allModes==nullptr) {
|
|
ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
|
|
LocalPointer<Norm2AllModes> localAllModes(
|
|
Norm2AllModes::createInstance(packageName, name, errorCode));
|
|
if(U_SUCCESS(errorCode)) {
|
|
Mutex lock;
|
|
if(cache==nullptr) {
|
|
cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
|
|
if(U_FAILURE(errorCode)) {
|
|
return nullptr;
|
|
}
|
|
uhash_setKeyDeleter(cache, uprv_free);
|
|
uhash_setValueDeleter(cache, deleteNorm2AllModes);
|
|
}
|
|
void *temp=uhash_get(cache, name);
|
|
if(temp==nullptr) {
|
|
int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
|
|
char *nameCopy=(char *)uprv_malloc(keyLength);
|
|
if(nameCopy==nullptr) {
|
|
errorCode=U_MEMORY_ALLOCATION_ERROR;
|
|
return nullptr;
|
|
}
|
|
uprv_memcpy(nameCopy, name, keyLength);
|
|
allModes=localAllModes.getAlias();
|
|
uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
|
|
} else {
|
|
// race condition
|
|
allModes=(Norm2AllModes *)temp;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if(allModes!=nullptr && U_SUCCESS(errorCode)) {
|
|
switch(mode) {
|
|
case UNORM2_COMPOSE:
|
|
return &allModes->comp;
|
|
case UNORM2_DECOMPOSE:
|
|
return &allModes->decomp;
|
|
case UNORM2_FCD:
|
|
return &allModes->fcd;
|
|
case UNORM2_COMPOSE_CONTIGUOUS:
|
|
return &allModes->fcc;
|
|
default:
|
|
break; // do nothing
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
const Normalizer2 *
|
|
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
|
|
if(U_FAILURE(errorCode)) {
|
|
return nullptr;
|
|
}
|
|
switch(mode) {
|
|
case UNORM_NFD:
|
|
return Normalizer2::getNFDInstance(errorCode);
|
|
case UNORM_NFKD:
|
|
return Normalizer2::getNFKDInstance(errorCode);
|
|
case UNORM_NFC:
|
|
return Normalizer2::getNFCInstance(errorCode);
|
|
case UNORM_NFKC:
|
|
return Normalizer2::getNFKCInstance(errorCode);
|
|
case UNORM_FCD:
|
|
return getFCDInstance(errorCode);
|
|
default: // UNORM_NONE
|
|
return getNoopInstance(errorCode);
|
|
}
|
|
}
|
|
|
|
const Normalizer2Impl *
|
|
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
|
|
return allModes!=nullptr ? allModes->impl : nullptr;
|
|
}
|
|
|
|
const Normalizer2Impl *
|
|
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
|
|
const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
|
|
return allModes!=nullptr ? allModes->impl : nullptr;
|
|
}
|
|
|
|
U_NAMESPACE_END
|
|
|
|
// C API ------------------------------------------------------------------- ***
|
|
|
|
U_NAMESPACE_USE
|
|
|
|
U_CAPI const UNormalizer2 * U_EXPORT2
|
|
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
|
|
return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
|
|
}
|
|
|
|
U_CAPI const UNormalizer2 * U_EXPORT2
|
|
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
|
|
return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
|
|
}
|
|
|
|
U_CAPI const UNormalizer2 * U_EXPORT2
|
|
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
|
|
return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
|
|
}
|
|
|
|
U_CAPI const UNormalizer2 * U_EXPORT2
|
|
unorm2_getInstance(const char *packageName,
|
|
const char *name,
|
|
UNormalization2Mode mode,
|
|
UErrorCode *pErrorCode) {
|
|
return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
|
|
}
|
|
|
|
U_CFUNC UNormalizationCheckResult
|
|
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
|
|
if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
|
|
return UNORM_YES;
|
|
}
|
|
UErrorCode errorCode=U_ZERO_ERROR;
|
|
const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
|
|
if(U_SUCCESS(errorCode)) {
|
|
return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
|
|
} else {
|
|
return UNORM_MAYBE;
|
|
}
|
|
}
|
|
|
|
#endif // !UCONFIG_NO_NORMALIZATION
|