Merge "Use icu4c to implement <wctype.h>."

This commit is contained in:
Elliott Hughes 2016-11-29 23:05:31 +00:00 committed by Gerrit Code Review
commit bd734f0716
8 changed files with 297 additions and 37 deletions

View File

@ -1624,6 +1624,7 @@ cc_library {
static: {
srcs: [
"bionic/dl_iterate_phdr_static.cpp",
"bionic/icu_static.cpp",
"bionic/malloc_common.cpp",
"bionic/libc_init_static.cpp",
],
@ -1634,6 +1635,7 @@ cc_library {
srcs: [
"arch-common/bionic/crtbegin_so.c",
"arch-common/bionic/crtbrand.S",
"bionic/icu.cpp",
"bionic/malloc_common.cpp",
"bionic/libc_init_dynamic.cpp",
"bionic/NetdClient.cpp",

99
libc/bionic/icu.cpp Normal file
View File

@ -0,0 +1,99 @@
/*
* Copyright (C) 2016 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "private/icu.h"
#include <dirent.h>
#include <dlfcn.h>
#include <pthread.h>
#include <stdlib.h>
#include "private/libc_logging.h"
// Allowed icu4c version numbers are in the range [44, 999].
// Gingerbread's icu4c 4.4 is the minimum supported ICU version.
static constexpr auto ICUDATA_VERSION_MIN_LENGTH = 2;
static constexpr auto ICUDATA_VERSION_MAX_LENGTH = 3;
static constexpr auto ICUDATA_VERSION_MIN = 44;
static char g_icudata_version[ICUDATA_VERSION_MAX_LENGTH + 1];
static void* g_libicuuc_handle = nullptr;
static int __icu_dat_file_filter(const dirent* dirp) {
const char* name = dirp->d_name;
// Is the name the right length to match 'icudt(\d\d\d)l.dat'?
const size_t len = strlen(name);
if (len < 10 + ICUDATA_VERSION_MIN_LENGTH || len > 10 + ICUDATA_VERSION_MAX_LENGTH) return 0;
return !strncmp(name, "icudt", 5) && !strncmp(&name[len - 5], "l.dat", 5);
}
static bool __find_icu() {
dirent** namelist = nullptr;
int n = scandir("/system/usr/icu", &namelist, &__icu_dat_file_filter, alphasort);
int max_version = -1;
while (n--) {
// We prefer the latest version available.
int version = atoi(&namelist[n]->d_name[strlen("icudt")]);
if (version != 0 && version > max_version) max_version = version;
free(namelist[n]);
}
free(namelist);
if (max_version == -1 || max_version < ICUDATA_VERSION_MIN) {
__libc_write_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't find an ICU .dat file");
return false;
}
snprintf(g_icudata_version, sizeof(g_icudata_version), "_%d", max_version);
g_libicuuc_handle = dlopen("libicuuc.so", RTLD_LOCAL);
if (g_libicuuc_handle == nullptr) {
__libc_format_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't open libicuuc.so: %s", dlerror());
return false;
}
return true;
}
void* __find_icu_symbol(const char* symbol_name) {
static bool found_icu = __find_icu();
if (!found_icu) return nullptr;
char versioned_symbol_name[strlen(symbol_name) + sizeof(g_icudata_version)];
snprintf(versioned_symbol_name, sizeof(versioned_symbol_name), "%s%s",
symbol_name, g_icudata_version);
void* symbol = dlsym(g_libicuuc_handle, versioned_symbol_name);
if (symbol == nullptr) {
__libc_format_log(ANDROID_LOG_ERROR, "bionic-icu", "couldn't find %s", versioned_symbol_name);
}
return symbol;
}

View File

@ -0,0 +1,34 @@
/*
* Copyright (C) 2016 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "private/icu.h"
// We don't have dlopen/dlsym for static binaries yet.
void* __find_icu_symbol(const char*) {
return nullptr;
}

View File

@ -34,26 +34,53 @@
#include <string.h>
#include <wchar.h>
// These functions are either defined to be the same as their ASCII cousins,
// or defined in terms of other functions.
int iswalnum(wint_t wc) { return iswdigit(wc) || iswalpha(wc); }
int iswblank(wint_t wc) { return isblank(wc); }
int iswdigit(wint_t wc) { return isdigit(wc); }
int iswgraph(wint_t wc) { return !iswspace(wc) && iswprint(wc); }
int iswlower(wint_t wc) {
return towlower(wc) == wc && !(iswcntrl(wc) || iswdigit(wc) || iswpunct(wc) || iswspace(wc));
}
int iswupper(wint_t wc) {
return towupper(wc) == wc && !(iswcntrl(wc) || iswdigit(wc) || iswpunct(wc) || iswspace(wc));
}
int iswxdigit(wint_t wc) { return isxdigit(wc); }
#include "private/icu.h"
// TODO: need proper implementations of these.
int iswalpha(wint_t wc) { return isalpha(wc); }
int iswcntrl(wint_t wc) { return iscntrl(wc); }
int iswprint(wint_t wc) { return isprint(wc); }
int iswpunct(wint_t wc) { return ispunct(wc); }
int iswspace(wint_t wc) { return isspace(wc); }
static constexpr int UCHAR_ALPHABETIC = 0;
static constexpr int UCHAR_LOWERCASE = 22;
static constexpr int UCHAR_POSIX_ALNUM = 44;
static constexpr int UCHAR_POSIX_BLANK = 45;
static constexpr int UCHAR_POSIX_GRAPH = 46;
static constexpr int UCHAR_POSIX_PRINT = 47;
static constexpr int UCHAR_POSIX_XDIGIT = 48;
static constexpr int UCHAR_UPPERCASE = 30;
static constexpr int UCHAR_WHITE_SPACE = 31;
static constexpr int U_CONTROL_CHAR = 15;
static bool __icu_hasBinaryProperty(wint_t wc, int property, int (*fallback)(int)) {
typedef int (*FnT)(wint_t, int);
static auto u_hasBinaryProperty = reinterpret_cast<FnT>(__find_icu_symbol("u_hasBinaryProperty"));
return u_hasBinaryProperty ? u_hasBinaryProperty(wc, property) : fallback(wc);
}
int iswalnum(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_ALNUM, isalnum); }
int iswalpha(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_ALPHABETIC, isalpha); }
int iswblank(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_BLANK, isblank); }
int iswgraph(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_GRAPH, isgraph); }
int iswlower(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_LOWERCASE, islower); }
int iswprint(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_PRINT, isprint); }
int iswspace(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_WHITE_SPACE, isspace); }
int iswupper(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_UPPERCASE, isupper); }
int iswxdigit(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_XDIGIT, isxdigit); }
int iswcntrl(wint_t wc) {
typedef int (*FnT)(wint_t);
static auto u_charType = reinterpret_cast<FnT>(__find_icu_symbol("u_charType"));
return u_charType ? (u_charType(wc) == U_CONTROL_CHAR) : iscntrl(wc);
}
int iswdigit(wint_t wc) {
typedef int (*FnT)(wint_t);
static auto u_isdigit = reinterpret_cast<FnT>(__find_icu_symbol("u_isdigit"));
return u_isdigit ? u_isdigit(wc) : isdigit(wc);
}
int iswpunct(wint_t wc) {
typedef int (*FnT)(wint_t);
static auto u_ispunct = reinterpret_cast<FnT>(__find_icu_symbol("u_ispunct"));
return u_ispunct ? u_ispunct(wc) : ispunct(wc);
}
int iswalnum_l(wint_t c, locale_t) { return iswalnum(c); }
int iswalpha_l(wint_t c, locale_t) { return iswalpha(c); }
@ -90,12 +117,20 @@ int iswctype_l(wint_t wc, wctype_t char_class, locale_t) {
return iswctype(wc, char_class);
}
// TODO: need proper implementations of these.
wint_t towlower(wint_t wc) { return tolower(wc); }
wint_t towupper(wint_t wc) { return toupper(wc); }
wint_t towlower(wint_t wc) {
typedef wchar_t (*FnT)(wchar_t);
static auto u_tolower = reinterpret_cast<FnT>(__find_icu_symbol("u_tolower"));
return u_tolower ? u_tolower(wc) : tolower(wc);
}
wint_t towupper_l(int c, locale_t) { return towupper(c); }
wint_t towlower_l(int c, locale_t) { return towlower(c); }
wint_t towupper(wint_t wc) {
typedef wchar_t (*FnT)(wchar_t);
static auto u_toupper = reinterpret_cast<FnT>(__find_icu_symbol("u_toupper"));
return u_toupper ? u_toupper(wc) : toupper(wc);
}
wint_t towupper_l(wint_t c, locale_t) { return towupper(c); }
wint_t towlower_l(wint_t c, locale_t) { return towlower(c); }
wctype_t wctype(const char* property) {
static const char* const properties[WC_TYPE_MAX] = {

View File

@ -49,8 +49,8 @@ int iswspace_l(wint_t, locale_t) __INTRODUCED_IN(21);
int iswupper_l(wint_t, locale_t) __INTRODUCED_IN(21);
int iswxdigit_l(wint_t, locale_t) __INTRODUCED_IN(21);
wint_t towlower_l(int, locale_t) __INTRODUCED_IN(21);
wint_t towupper_l(int, locale_t) __INTRODUCED_IN(21);
wint_t towlower_l(wint_t, locale_t) __INTRODUCED_IN(21);
wint_t towupper_l(wint_t, locale_t) __INTRODUCED_IN(21);
#else
// Implemented as static inlines before 21.
#endif

34
libc/private/icu.h Normal file
View File

@ -0,0 +1,34 @@
/*
* Copyright (C) 2016 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef _PRIVATE_ICU_H
#define _PRIVATE_ICU_H
void* __find_icu_symbol(const char* symbol_name);
#endif // _PRIVATE_ICU_H

View File

@ -386,6 +386,12 @@ cc_test {
name: "bionic-unit-tests",
defaults: ["bionic_unit_tests_defaults", "bionic_tests_defaults"],
clang: true,
target: {
android: {
shared_libs: ["libicuuc"],
},
},
}
cc_test {

View File

@ -16,6 +16,8 @@
#include <wctype.h>
#include <dlfcn.h>
#include <gtest/gtest.h>
class UtfLocale {
@ -25,63 +27,75 @@ class UtfLocale {
locale_t l;
};
// bionic's dlsym doesn't work in static binaries, so we can't access icu,
// so any unicode test case will fail.
static bool have_dl = (dlopen("libc.so", 0) != nullptr);
static void TestIsWideFn(int fn(wint_t),
int fn_l(wint_t, locale_t),
const wchar_t* trues,
const wchar_t* falses) {
UtfLocale l;
for (const wchar_t* p = trues; *p; ++p) {
if (!have_dl && *p > 0x7f) {
GTEST_LOG_(INFO) << "skipping unicode test " << *p;
continue;
}
EXPECT_TRUE(fn(*p)) << *p;
EXPECT_TRUE(fn_l(*p, l.l)) << *p;
}
for (const wchar_t* p = falses; *p; ++p) {
if (!have_dl && *p > 0x7f) {
GTEST_LOG_(INFO) << "skipping unicode test " << *p;
continue;
}
EXPECT_FALSE(fn(*p)) << *p;
EXPECT_FALSE(fn_l(*p, l.l)) << *p;
}
}
TEST(wctype, iswalnum) {
TestIsWideFn(iswalnum, iswalnum_l, L"1aA", L"! \b");
TestIsWideFn(iswalnum, iswalnum_l, L"1aAÇçΔδ", L"! \b");
}
TEST(wctype, iswalpha) {
TestIsWideFn(iswalpha, iswalpha_l, L"aA", L"1! \b");
TestIsWideFn(iswalpha, iswalpha_l, L"aAÇçΔδ", L"1! \b");
}
TEST(wctype, iswblank) {
TestIsWideFn(iswblank, iswblank_l, L" \t", L"1aA!\b");
TestIsWideFn(iswblank, iswblank_l, L" \t", L"1aA!\bÇçΔδ");
}
TEST(wctype, iswcntrl) {
TestIsWideFn(iswcntrl, iswcntrl_l, L"\b", L"1aA! ");
TestIsWideFn(iswcntrl, iswcntrl_l, L"\b\u009f", L"1aA! ÇçΔδ");
}
TEST(wctype, iswdigit) {
TestIsWideFn(iswdigit, iswdigit_l, L"1", L"aA! \b");
TestIsWideFn(iswdigit, iswdigit_l, L"1", L"aA! \bÇçΔδ");
}
TEST(wctype, iswgraph) {
TestIsWideFn(iswgraph, iswgraph_l, L"1aA!", L" \b");
TestIsWideFn(iswgraph, iswgraph_l, L"1aA!ÇçΔδ", L" \b");
}
TEST(wctype, iswlower) {
TestIsWideFn(iswlower, iswlower_l, L"a", L"1A! \b");
TestIsWideFn(iswlower, iswlower_l, L"açδ", L"1A! \bÇΔ");
}
TEST(wctype, iswprint) {
TestIsWideFn(iswprint, iswprint_l, L"1aA! ", L"\b");
TestIsWideFn(iswprint, iswprint_l, L"1aA! ÇçΔδ", L"\b");
}
TEST(wctype, iswpunct) {
TestIsWideFn(iswpunct, iswpunct_l, L"!", L"1aA \b");
TestIsWideFn(iswpunct, iswpunct_l, L"!", L"1aA \bÇçΔδ");
}
TEST(wctype, iswspace) {
TestIsWideFn(iswspace, iswspace_l, L" \f\t", L"1aA!\b");
TestIsWideFn(iswspace, iswspace_l, L" \f\t", L"1aA!\bÇçΔδ");
}
TEST(wctype, iswupper) {
TestIsWideFn(iswupper, iswupper_l, L"A", L"1a! \b");
TestIsWideFn(iswupper, iswupper_l, L"AÇΔ", L"1a! \bçδ");
}
TEST(wctype, iswxdigit) {
@ -89,29 +103,65 @@ TEST(wctype, iswxdigit) {
}
TEST(wctype, towlower) {
EXPECT_EQ(WEOF, towlower(WEOF));
EXPECT_EQ(wint_t('!'), towlower(L'!'));
EXPECT_EQ(wint_t('a'), towlower(L'a'));
EXPECT_EQ(wint_t('a'), towlower(L'A'));
if (have_dl) {
EXPECT_EQ(wint_t(L'ç'), towlower(L'ç'));
EXPECT_EQ(wint_t(L'ç'), towlower(L'Ç'));
EXPECT_EQ(wint_t(L'δ'), towlower(L'δ'));
EXPECT_EQ(wint_t(L'δ'), towlower(L'Δ'));
} else {
GTEST_LOG_(INFO) << "skipping unicode towlower tests";
}
}
TEST(wctype, towlower_l) {
UtfLocale l;
EXPECT_EQ(WEOF, towlower(WEOF));
EXPECT_EQ(wint_t('!'), towlower_l(L'!', l.l));
EXPECT_EQ(wint_t('a'), towlower_l(L'a', l.l));
EXPECT_EQ(wint_t('a'), towlower_l(L'A', l.l));
if (have_dl) {
EXPECT_EQ(wint_t(L'ç'), towlower_l(L'ç', l.l));
EXPECT_EQ(wint_t(L'ç'), towlower_l(L'Ç', l.l));
EXPECT_EQ(wint_t(L'δ'), towlower_l(L'δ', l.l));
EXPECT_EQ(wint_t(L'δ'), towlower_l(L'Δ', l.l));
} else {
GTEST_LOG_(INFO) << "skipping unicode towlower_l tests";
}
}
TEST(wctype, towupper) {
EXPECT_EQ(WEOF, towupper(WEOF));
EXPECT_EQ(wint_t('!'), towupper(L'!'));
EXPECT_EQ(wint_t('A'), towupper(L'a'));
EXPECT_EQ(wint_t('A'), towupper(L'A'));
if (have_dl) {
EXPECT_EQ(wint_t(L'Ç'), towupper(L'ç'));
EXPECT_EQ(wint_t(L'Ç'), towupper(L'Ç'));
EXPECT_EQ(wint_t(L'Δ'), towupper(L'δ'));
EXPECT_EQ(wint_t(L'Δ'), towupper(L'Δ'));
} else {
GTEST_LOG_(INFO) << "skipping unicode towupper tests";
}
}
TEST(wctype, towupper_l) {
UtfLocale l;
EXPECT_EQ(WEOF, towupper_l(WEOF, l.l));
EXPECT_EQ(wint_t('!'), towupper_l(L'!', l.l));
EXPECT_EQ(wint_t('A'), towupper_l(L'a', l.l));
EXPECT_EQ(wint_t('A'), towupper_l(L'A', l.l));
if (have_dl) {
EXPECT_EQ(wint_t(L'Ç'), towupper_l(L'ç', l.l));
EXPECT_EQ(wint_t(L'Ç'), towupper_l(L'Ç', l.l));
EXPECT_EQ(wint_t(L'Δ'), towupper_l(L'δ', l.l));
EXPECT_EQ(wint_t(L'Δ'), towupper_l(L'Δ', l.l));
} else {
GTEST_LOG_(INFO) << "skipping unicode towupper_l tests";
}
}
TEST(wctype, wctype) {