Fix mbsnrtowcs where `dst` is null.

POSIX is its usual unintelligible self
(http://pubs.opengroup.org/onlinepubs/9699919799/functions/mbsrtowcs.html),
but the ISO C11 standard (7.29.6.4.1 paragraph 2) is pretty clear: *src
should change if and only if dst is non-null.

Bug: https://code.google.com/p/android/issues/detail?id=166381
Test: bionic tests
Change-Id: Ibc631cfa5b1bf4a6f56963feba9f0eea27b07984
This commit is contained in:
Elliott Hughes 2016-09-29 17:21:43 -07:00
parent cc9e9933e1
commit 89e29ee485
2 changed files with 30 additions and 26 deletions

View File

@ -70,21 +70,19 @@ size_t mbsnrtowcs(wchar_t* dst, const char** src, size_t nmc, size_t len, mbstat
mbstate_t* state = (ps == NULL) ? &__private_state : ps;
size_t i, o, r;
// The fast paths in the loops below are not safe if an ASCII
// character appears as anything but the first byte of a
// multibyte sequence. Check now to avoid doing it in the loops.
if (nmc > 0 && mbstate_bytes_so_far(state) > 0 && static_cast<uint8_t>((*src)[0]) < 0x80) {
return reset_and_return_illegal(EILSEQ, state);
}
// Measure only?
if (dst == NULL) {
/*
* The fast path in the loop below is not safe if an ASCII
* character appears as anything but the first byte of a
* multibyte sequence. Check now to avoid doing it in the loop.
*/
if ((nmc > 0) && (mbstate_bytes_so_far(state) > 0)
&& (static_cast<uint8_t>((*src)[0]) < 0x80)) {
return reset_and_return_illegal(EILSEQ, state);
}
for (i = o = 0; i < nmc; i += r, o++) {
if (static_cast<uint8_t>((*src)[i]) < 0x80) {
// Fast path for plain ASCII characters.
if ((*src)[i] == '\0') {
*src = nullptr;
return reset_and_return(o, state);
}
r = 1;
@ -97,7 +95,6 @@ size_t mbsnrtowcs(wchar_t* dst, const char** src, size_t nmc, size_t len, mbstat
return reset_and_return_illegal(EILSEQ, state);
}
if (r == 0) {
*src = nullptr;
return reset_and_return(o, state);
}
}
@ -105,15 +102,7 @@ size_t mbsnrtowcs(wchar_t* dst, const char** src, size_t nmc, size_t len, mbstat
return reset_and_return(o, state);
}
/*
* The fast path in the loop below is not safe if an ASCII
* character appears as anything but the first byte of a
* multibyte sequence. Check now to avoid doing it in the loop.
*/
if ((nmc > 0) && (mbstate_bytes_so_far(state) > 0)
&& (static_cast<uint8_t>((*src)[0]) < 0x80)) {
return reset_and_return_illegal(EILSEQ, state);
}
// Actually convert, updating `dst` and `src`.
for (i = o = 0; i < nmc && o < len; i += r, o++) {
if (static_cast<uint8_t>((*src)[i]) < 0x80) {
// Fast path for plain ASCII characters.
@ -151,7 +140,7 @@ size_t wcrtomb(char* s, wchar_t wc, mbstate_t* ps) {
static mbstate_t __private_state;
mbstate_t* state = (ps == NULL) ? &__private_state : ps;
// Our wchar_t is UTF-32
// Our wchar_t is UTF-32.
return c32rtomb(s, static_cast<char32_t>(wc), state);
}

View File

@ -303,7 +303,7 @@ TEST(wchar, mbrtowc) {
ASSERT_EQ(EILSEQ, errno);
}
void test_mbrtowc_incomplete(mbstate_t* ps) {
static void test_mbrtowc_incomplete(mbstate_t* ps) {
ASSERT_STREQ("C.UTF-8", setlocale(LC_CTYPE, "C.UTF-8"));
uselocale(LC_GLOBAL_LOCALE);
@ -340,10 +340,13 @@ TEST(wchar, mbrtowc_incomplete) {
test_mbrtowc_incomplete(NULL);
}
void test_mbsrtowcs(mbstate_t* ps) {
static void test_mbsrtowcs(mbstate_t* ps) {
constexpr const char* VALID = "A" "\xc2\xa2" "\xe2\x82\xac" "\xf0\xa4\xad\xa2" "ef";
constexpr const char* INVALID = "A" "\xc2\x20" "ef";
constexpr const char* INCOMPLETE = "A" "\xc2";
wchar_t out[4];
const char* valid = "A" "\xc2\xa2" "\xe2\x82\xac" "\xf0\xa4\xad\xa2" "ef";
const char* valid = VALID;
ASSERT_EQ(4U, mbsrtowcs(out, &valid, 4, ps));
ASSERT_EQ(L'A', out[0]);
ASSERT_EQ(static_cast<wchar_t>(0x00a2), out[1]);
@ -362,15 +365,27 @@ void test_mbsrtowcs(mbstate_t* ps) {
// Check that valid has advanced to the end of the string.
ASSERT_EQ(nullptr, valid);
const char* invalid = "A" "\xc2\x20" "ef";
const char* invalid = INVALID;
ASSERT_EQ(static_cast<size_t>(-1), mbsrtowcs(out, &invalid, 4, ps));
EXPECT_EQ(EILSEQ, errno);
ASSERT_EQ('\xc2', *invalid);
const char* incomplete = "A" "\xc2";
const char* incomplete = INCOMPLETE;
ASSERT_EQ(static_cast<size_t>(-1), mbsrtowcs(out, &incomplete, 2, ps));
EXPECT_EQ(EILSEQ, errno);
ASSERT_EQ('\xc2', *incomplete);
// If dst is null, *src shouldn't be updated.
// https://code.google.com/p/android/issues/detail?id=166381
const char* mbs = VALID;
EXPECT_EQ(6U, mbsrtowcs(nullptr, &mbs, 0, ps));
EXPECT_EQ(VALID, mbs);
mbs = INVALID;
EXPECT_EQ(static_cast<size_t>(-1), mbsrtowcs(nullptr, &mbs, 0, ps));
EXPECT_EQ(INVALID, mbs);
mbs = INCOMPLETE;
EXPECT_EQ(static_cast<size_t>(-1), mbsrtowcs(nullptr, &mbs, 0, ps));
EXPECT_EQ(INCOMPLETE, mbs);
}
TEST(wchar, mbsrtowcs) {