<ctype.h>: inline the ASCII functions.

We've optimized the ctype functions to the point where they're pretty
much all down to one instruction. This change takes the obvious next
step of just inlining them.

On Android these function have only ever been for ASCII. You need the
<wctype.h> functions for non-ASCII.

libc++ currently has its own inlines for the _l variants, so if we want
to just inline them in bionic directly, we'll need to coordinate that.

Bug: http://b/144165498
Test: treehugger plus benchmarks
Change-Id: I4cc8aa96f7994ae710a562cfc9d4f220ab7babd6
This commit is contained in:
Elliott Hughes 2020-02-05 08:07:10 -08:00
parent 2968695188
commit 026d3a8fa5
3 changed files with 231 additions and 233 deletions

View File

@ -26,148 +26,63 @@
* SUCH DAMAGE.
*/
#define __BIONIC_CTYPE_INLINE /* Out of line. */
#include <bits/ctype_inlines.h>
#include <ctype.h>
static inline int __in_range(int c, char lo, char hi) {
return c >= lo && c <= hi;
}
int isalnum(int c) {
// `isalnum(c)` is `isalpha(c) || isdigit(c)`, but there's no obvious way
// to simplify that, and the table lookup is just slightly faster...
// Note that this is unsafe for inputs less than -1 (EOF) or greater than
// 0xff. This is true of other C libraries too.
return (_ctype_[c + 1] & (_CTYPE_U|_CTYPE_L|_CTYPE_N));
}
int isalnum_l(int c, locale_t) {
return isalnum(c);
}
int isalpha(int c) {
return __in_range(c, 'A', 'Z') || __in_range(c, 'a', 'z');
}
int isalpha_l(int c, locale_t) {
return isalpha(c);
}
int isascii(int c) {
return static_cast<unsigned>(c) < 0x80;
}
int isblank(int c) {
return c == ' ' || c == '\t';
}
int isblank_l(int c, locale_t) {
return isblank(c);
}
int iscntrl(int c) {
return (static_cast<unsigned>(c) < ' ') || c == 0x7f;
}
int iscntrl_l(int c, locale_t) {
return iscntrl(c);
}
int isdigit(int c) {
return __in_range(c, '0', '9');
}
int isdigit_l(int c, locale_t) {
return isdigit(c);
}
int isgraph(int c) {
return __in_range(c, '!', '~');
}
int isgraph_l(int c, locale_t) {
return isgraph(c);
}
int islower(int c) {
return __in_range(c, 'a', 'z');
}
int islower_l(int c, locale_t) {
return islower(c);
}
int isprint(int c) {
return __in_range(c, ' ', '~');
}
int isprint_l(int c, locale_t) {
return isprint(c);
}
int ispunct(int c) {
// `ispunct(c)` is `isgraph(c) && !isalnum(c)`, but there's no obvious way
// to simplify that, and the table lookup is just slightly faster...
// Note that this is unsafe for inputs less than -1 (EOF) or greater than
// 0xff. This is true of other C libraries too.
return (_ctype_[c + 1] & _CTYPE_P);
}
int ispunct_l(int c, locale_t) {
return ispunct(c);
}
int isspace(int c) {
return c == ' ' || __in_range(c, '\t', '\r');
}
int isspace_l(int c, locale_t) {
return isspace(c);
}
int isupper(int c) {
return __in_range(c, 'A', 'Z');
}
int isupper_l(int c, locale_t) {
return isupper(c);
}
int isxdigit(int c) {
return __in_range(c, '0', '9') || __in_range(c, 'a', 'f') || __in_range(c, 'A', 'F');
}
int isxdigit_l(int c, locale_t) {
return isxdigit(c);
}
int toascii(int c) {
return c & 0x7f;
}
int _toupper(int c) {
// Using EOR rather than AND makes no difference on arm, but saves an
// instruction on arm64.
return c ^ 0x20;
}
int toupper(int c) {
if (c >= 'a' && c <= 'z') return _toupper(c);
return c;
}
int toupper_l(int c, locale_t) {
return toupper(c);
}
int _tolower(int c) {
return c | 0x20;
}
int tolower(int c) {
if (c >= 'A' && c <= 'Z') return _tolower(c);
return c;
}
int tolower_l(int c, locale_t) {
return tolower(c);
}

View File

@ -0,0 +1,209 @@
/*
* Copyright (C) 2014 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#pragma once
#include <sys/cdefs.h>
#include <xlocale.h>
#if !defined(__BIONIC_CTYPE_INLINE)
#define __BIONIC_CTYPE_INLINE static __inline
#endif
/** Internal implementation detail. Do not use. */
#define _CTYPE_U 0x01
/** Internal implementation detail. Do not use. */
#define _CTYPE_L 0x02
/** Internal implementation detail. Do not use. */
#define _CTYPE_D 0x04
/** Internal implementation detail. Do not use. */
#define _CTYPE_S 0x08
/** Internal implementation detail. Do not use. */
#define _CTYPE_P 0x10
/** Internal implementation detail. Do not use. */
#define _CTYPE_C 0x20
/** Internal implementation detail. Do not use. */
#define _CTYPE_X 0x40
/** Internal implementation detail. Do not use. */
#define _CTYPE_B 0x80
/** Internal implementation detail. Do not use. */
#define _CTYPE_R (_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
/** Internal implementation detail. Do not use. */
#define _CTYPE_A (_CTYPE_L|_CTYPE_U)
/** Internal implementation detail. Do not use. */
#define _CTYPE_N _CTYPE_D
__BEGIN_DECLS
/** Internal implementation detail. Do not use. */
extern const char* _ctype_;
/** Returns true if `ch` is in `[A-Za-z0-9]`. */
__BIONIC_CTYPE_INLINE int isalnum(int __ch) {
// `isalnum(c)` is `isalpha(c) || isdigit(c)`, but there's no obvious way
// to simplify that, and the table lookup is just slightly faster...
// Note that this is unsafe for inputs less than -1 (EOF) or greater than
// 0xff. This is true of other C libraries too.
return (_ctype_[__ch + 1] & (_CTYPE_U|_CTYPE_L|_CTYPE_N));
}
/** Returns true if `ch` is in `[A-Za-z]`. */
__BIONIC_CTYPE_INLINE int isalpha(int __ch) {
return (__ch >= 'A' && __ch <= 'Z') || (__ch >= 'a' && __ch <= 'z');
}
/** Returns true if `ch` is a space or tab. */
__BIONIC_CTYPE_INLINE int isblank(int __ch) {
return __ch == ' ' || __ch == '\t';
}
/** Returns true if `ch` is a control character (any character before space, plus DEL). */
__BIONIC_CTYPE_INLINE int iscntrl(int __ch) {
return (__BIONIC_CAST(static_cast, unsigned, __ch) < ' ') || __ch == 0x7f;
}
/** Returns true if `ch` is in `[0-9]`. */
__BIONIC_CTYPE_INLINE int isdigit(int __ch) {
return (__ch >= '0' && __ch <= '9');
}
/** Returns true if `ch` is `[A-Za-z0-9]` or punctuation. */
__BIONIC_CTYPE_INLINE int isgraph(int __ch) {
return (__ch >= '!' && __ch <= '~');
}
/** Returns true if `ch` is in `[a-z]`. */
__BIONIC_CTYPE_INLINE int islower(int __ch) {
return (__ch >= 'a' && __ch <= 'z');
}
/** Returns true if `ch` is `[A-Za-z0-9]` or punctuation or space. */
__BIONIC_CTYPE_INLINE int isprint(int __ch) {
return (__ch >= ' ' && __ch <= '~');
}
/** Returns true if `ch` is punctuation. */
__BIONIC_CTYPE_INLINE int ispunct(int __ch) {
// `ispunct(c)` is `isgraph(c) && !isalnum(c)`, but there's no obvious way
// to simplify that, and the table lookup is just slightly faster...
// Note that this is unsafe for inputs less than -1 (EOF) or greater than
// 0xff. This is true of other C libraries too.
return (_ctype_[__ch + 1] & _CTYPE_P);
}
/** Returns true if `ch` is in `[ \f\n\r\t\v]`. */
__BIONIC_CTYPE_INLINE int isspace(int __ch) {
return __ch == ' ' || (__ch >= '\t' && __ch <= '\r');
}
/** Returns true if `ch` is in `[A-Z]`. */
__BIONIC_CTYPE_INLINE int isupper(int __ch) {
return (__ch >= 'A' && __ch <= 'Z');
}
/** Returns true if `ch` is in `[0-9A-Fa-f]`. */
__BIONIC_CTYPE_INLINE int isxdigit(int __ch) {
return (__ch >= '0' && __ch <= '9') || (__ch >= 'a' && __ch <= 'f') || (__ch >= 'A' && __ch <= 'F');
}
/**
* Returns the corresponding lower-case character if `ch` is upper-case, or undefined otherwise.
*
* Prefer tolower() instead.
*/
__BIONIC_CTYPE_INLINE int _tolower(int __ch) {
return __ch | 0x20;
}
/** Returns the corresponding lower-case character if `ch` is upper-case, or `ch` otherwise. */
__BIONIC_CTYPE_INLINE int tolower(int __ch) {
if (__ch >= 'A' && __ch <= 'Z') return _tolower(__ch);
return __ch;
}
/**
* Returns the corresponding upper-case character if `ch` is lower-case, or undefined otherwise.
*
* Prefer toupper() instead.
*/
__BIONIC_CTYPE_INLINE int _toupper(int __ch) {
// Using EOR rather than AND makes no difference on arm, but saves an
// instruction on arm64.
return __ch ^ 0x20;
}
/** Returns the corresponding upper-case character if `ch` is lower-case, or `ch` otherwise. */
__BIONIC_CTYPE_INLINE int toupper(int __ch) {
if (__ch >= 'a' && __ch <= 'z') return _toupper(__ch);
return __ch;
}
/** Returns true if `ch` is less than 0x80. */
__BIONIC_CTYPE_INLINE int isascii(int __ch) {
return __BIONIC_CAST(static_cast, unsigned, __ch) < 0x80;
}
/** Returns `ch & 0x7f`. */
__BIONIC_CTYPE_INLINE int toascii(int __ch) {
return __ch & 0x7f;
}
#if __ANDROID_API__ >= 21
/** Like isalnum but with an ignored `locale_t`. */
int isalnum_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isalpha but with an ignored `locale_t`. */
int isalpha_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isblank but with an ignored `locale_t`. */
int isblank_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like iscntrl but with an ignored `locale_t`. */
int iscntrl_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isdigit but with an ignored `locale_t`. */
int isdigit_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isgraph but with an ignored `locale_t`. */
int isgraph_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like islower but with an ignored `locale_t`. */
int islower_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isprint but with an ignored `locale_t`. */
int isprint_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like ispunct but with an ignored `locale_t`. */
int ispunct_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isspace but with an ignored `locale_t`. */
int isspace_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isupper but with an ignored `locale_t`. */
int isupper_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isxdigit but with an ignored `locale_t`. */
int isxdigit_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like tolower but with an ignored `locale_t`. */
int tolower_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like toupper but with an ignored `locale_t`. */
int toupper_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
#else
// Implemented as static inlines in libc++ before 21.
#endif
__END_DECLS

View File

@ -1,40 +1,29 @@
/* $OpenBSD: ctype.h,v 1.19 2005/12/13 00:35:22 millert Exp $ */
/* $NetBSD: ctype.h,v 1.14 1994/10/26 00:55:47 cgd Exp $ */
/*
* Copyright (c) 1989 The Regents of the University of California.
* Copyright (C) 2014 The Android Open Source Project
* All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ctype.h 5.3 (Berkeley) 4/3/91
*/
#pragma once
@ -45,121 +34,6 @@
*/
#include <sys/cdefs.h>
#include <xlocale.h>
/** Internal implementation detail. Do not use. */
#define _CTYPE_U 0x01
/** Internal implementation detail. Do not use. */
#define _CTYPE_L 0x02
/** Internal implementation detail. Do not use. */
#define _CTYPE_D 0x04
/** Internal implementation detail. Do not use. */
#define _CTYPE_S 0x08
/** Internal implementation detail. Do not use. */
#define _CTYPE_P 0x10
/** Internal implementation detail. Do not use. */
#define _CTYPE_C 0x20
/** Internal implementation detail. Do not use. */
#define _CTYPE_X 0x40
/** Internal implementation detail. Do not use. */
#define _CTYPE_B 0x80
/** Internal implementation detail. Do not use. */
#define _CTYPE_R (_CTYPE_P|_CTYPE_U|_CTYPE_L|_CTYPE_D|_CTYPE_B)
/** Internal implementation detail. Do not use. */
#define _CTYPE_A (_CTYPE_L|_CTYPE_U)
/** Internal implementation detail. Do not use. */
#define _CTYPE_N _CTYPE_D
__BEGIN_DECLS
/** Internal implementation detail. Do not use. */
extern const char* _ctype_;
/** Returns true if `ch` is in `[A-Za-z0-9]`. */
int isalnum(int __ch);
/** Returns true if `ch` is in `[A-Za-z]`. */
int isalpha(int __ch);
/** Returns true if `ch` is a space or tab. */
int isblank(int __ch);
/** Returns true if `ch` is a control character (any character before space, plus DEL). */
int iscntrl(int __ch);
/** Returns true if `ch` is in `[0-9]`. */
int isdigit(int __ch);
/** Returns true if `ch` is `[A-Za-z0-9]` or punctuation. */
int isgraph(int __ch);
/** Returns true if `ch` is in `[a-z]`. */
int islower(int __ch);
/** Returns true if `ch` is `[A-Za-z0-9]` or punctuation or space. */
int isprint(int __ch);
/** Returns true if `ch` is punctuation. */
int ispunct(int __ch);
/** Returns true if `ch` is in `[ \f\n\r\t\v]`. */
int isspace(int __ch);
/** Returns true if `ch` is in `[A-Z]`. */
int isupper(int __ch);
/** Returns true if `ch` is in `[0-9A-Fa-f]`. */
int isxdigit(int __ch);
/** Returns the corresponding lower-case character if `ch` is upper-case, or `ch` otherwise. */
int tolower(int __ch);
/**
* Returns the corresponding lower-case character if `ch` is upper-case, or undefined otherwise.
*
* Available since API level 21.
*
* Prefer tolower() instead.
*/
int _tolower(int __ch) __INTRODUCED_IN(21);
/** Returns the corresponding upper-case character if `ch` is lower-case, or `ch` otherwise. */
int toupper(int __ch);
/**
* Returns the corresponding upper-case character if `ch` is lower-case, or undefined otherwise.
*
* Available since API level 21.
*
* Prefer toupper() instead.
*/
int _toupper(int __ch) __INTRODUCED_IN(21);
#if __ANDROID_API__ >= 21
/** Like isalnum but with an ignored `locale_t`. */
int isalnum_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isalpha but with an ignored `locale_t`. */
int isalpha_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isblank but with an ignored `locale_t`. */
int isblank_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like iscntrl but with an ignored `locale_t`. */
int iscntrl_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isdigit but with an ignored `locale_t`. */
int isdigit_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isgraph but with an ignored `locale_t`. */
int isgraph_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like islower but with an ignored `locale_t`. */
int islower_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isprint but with an ignored `locale_t`. */
int isprint_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like ispunct but with an ignored `locale_t`. */
int ispunct_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isspace but with an ignored `locale_t`. */
int isspace_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isupper but with an ignored `locale_t`. */
int isupper_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like isxdigit but with an ignored `locale_t`. */
int isxdigit_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like tolower but with an ignored `locale_t`. */
int tolower_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
/** Like toupper but with an ignored `locale_t`. */
int toupper_l(int __ch, locale_t __l) __INTRODUCED_IN(21);
#else
// Implemented as static inlines before 21.
#endif
/** Returns true if `ch` is less than 0x80. */
int isascii(int __ch);
/** Returns `ch & 0x7f`. */
int toascii(int __ch);
__END_DECLS
#define __BIONIC_CTYPE_INLINE static __inline
#include <bits/ctype_inlines.h>