Merge "Add memory ordering constraint, convert to C11 atomics"

This commit is contained in:
Hans Boehm 2014-08-14 01:03:09 +00:00 committed by Gerrit Code Review
commit 4f85c6ffd3
1 changed files with 63 additions and 21 deletions

View File

@ -26,6 +26,7 @@
* SUCH DAMAGE. * SUCH DAMAGE.
*/ */
#include <new> #include <new>
#include <stdatomic.h>
#include <stdio.h> #include <stdio.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
@ -45,7 +46,6 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <unistd.h>
#define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_ #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
#include <sys/_system_properties.h> #include <sys/_system_properties.h>
@ -80,6 +80,16 @@ struct prop_bt {
uint8_t namelen; uint8_t namelen;
uint8_t reserved[3]; uint8_t reserved[3];
// TODO: The following fields should be declared as atomic_uint32_t.
// They should be assigned to with release semantics, instead of using
// explicit fences. Unfortunately, the read accesses are generally
// followed by more dependent read accesses, and the dependence
// is assumed to enforce memory ordering. Which it does on supported
// hardware. This technically should use memory_order_consume, if
// that worked as intended.
// We should also avoid rereading these fields redundantly, since not
// all processor implementations ensure that multiple loads from the
// same field are carried out in the right order.
volatile uint32_t prop; volatile uint32_t prop;
volatile uint32_t left; volatile uint32_t left;
@ -93,7 +103,8 @@ struct prop_bt {
this->namelen = name_length; this->namelen = name_length;
memcpy(this->name, name, name_length); memcpy(this->name, name, name_length);
this->name[name_length] = '\0'; this->name[name_length] = '\0';
ANDROID_MEMBAR_FULL(); ANDROID_MEMBAR_FULL(); // TODO: Instead use a release store
// for subsequent pointer assignment.
} }
private: private:
@ -102,14 +113,15 @@ private:
struct prop_area { struct prop_area {
uint32_t bytes_used; uint32_t bytes_used;
volatile uint32_t serial; atomic_uint_least32_t serial;
uint32_t magic; uint32_t magic;
uint32_t version; uint32_t version;
uint32_t reserved[28]; uint32_t reserved[28];
char data[0]; char data[0];
prop_area(const uint32_t magic, const uint32_t version) : prop_area(const uint32_t magic, const uint32_t version) :
serial(0), magic(magic), version(version) { magic(magic), version(version) {
atomic_init(&serial, 0);
memset(reserved, 0, sizeof(reserved)); memset(reserved, 0, sizeof(reserved));
// Allocate enough space for the root node. // Allocate enough space for the root node.
bytes_used = sizeof(prop_bt); bytes_used = sizeof(prop_bt);
@ -120,7 +132,7 @@ private:
}; };
struct prop_info { struct prop_info {
volatile uint32_t serial; atomic_uint_least32_t serial;
char value[PROP_VALUE_MAX]; char value[PROP_VALUE_MAX];
char name[0]; char name[0];
@ -128,10 +140,11 @@ struct prop_info {
const uint8_t valuelen) { const uint8_t valuelen) {
memcpy(this->name, name, namelen); memcpy(this->name, name, namelen);
this->name[namelen] = '\0'; this->name[namelen] = '\0';
this->serial = (valuelen << 24); atomic_init(&this->serial, valuelen << 24);
memcpy(this->value, value, valuelen); memcpy(this->value, value, valuelen);
this->value[valuelen] = '\0'; this->value[valuelen] = '\0';
ANDROID_MEMBAR_FULL(); ANDROID_MEMBAR_FULL(); // TODO: Instead use a release store
// for subsequent point assignment.
} }
private: private:
DISALLOW_COPY_AND_ASSIGN(prop_info); DISALLOW_COPY_AND_ASSIGN(prop_info);
@ -605,11 +618,20 @@ int __system_property_read(const prop_info *pi, char *name, char *value)
} }
while (true) { while (true) {
uint32_t serial = __system_property_serial(pi); uint32_t serial = __system_property_serial(pi); // acquire semantics
size_t len = SERIAL_VALUE_LEN(serial); size_t len = SERIAL_VALUE_LEN(serial);
memcpy(value, pi->value, len + 1); memcpy(value, pi->value, len + 1);
ANDROID_MEMBAR_FULL(); // TODO: Fix the synchronization scheme here.
if (serial == pi->serial) { // There is no fully supported way to implement this kind
// of synchronization in C++11, since the memcpy races with
// updates to pi, and the data being accessed is not atomic.
// The following fence is unintuitive, but would be the
// correct one if memcpy used memory_order_relaxed atomic accesses.
// In practice it seems unlikely that the generated code would
// would be any different, so this should be OK.
atomic_thread_fence(memory_order_acquire);
if (serial ==
atomic_load_explicit(&(pi->serial), memory_order_relaxed)) {
if (name != 0) { if (name != 0) {
strcpy(name, pi->name); strcpy(name, pi->name);
} }
@ -658,14 +680,24 @@ int __system_property_update(prop_info *pi, const char *value, unsigned int len)
if (len >= PROP_VALUE_MAX) if (len >= PROP_VALUE_MAX)
return -1; return -1;
pi->serial = pi->serial | 1; uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_relaxed);
ANDROID_MEMBAR_FULL(); serial |= 1;
atomic_store_explicit(&pi->serial, serial, memory_order_relaxed);
// The memcpy call here also races. Again pretend it
// used memory_order_relaxed atomics, and use the analogous
// counterintuitive fence.
atomic_thread_fence(memory_order_release);
memcpy(pi->value, value, len + 1); memcpy(pi->value, value, len + 1);
ANDROID_MEMBAR_FULL(); atomic_store_explicit(
pi->serial = (len << 24) | ((pi->serial + 1) & 0xffffff); &pi->serial,
(len << 24) | ((serial + 1) & 0xffffff),
memory_order_release);
__futex_wake(&pi->serial, INT32_MAX); __futex_wake(&pi->serial, INT32_MAX);
pa->serial++; atomic_store_explicit(
&pa->serial,
atomic_load_explicit(&pa->serial, memory_order_relaxed) + 1,
memory_order_release);
__futex_wake(&pa->serial, INT32_MAX); __futex_wake(&pa->serial, INT32_MAX);
return 0; return 0;
@ -688,17 +720,25 @@ int __system_property_add(const char *name, unsigned int namelen,
if (!pi) if (!pi)
return -1; return -1;
pa->serial++; // There is only a single mutator, but we want to make sure that
// updates are visible to a reader waiting for the update.
atomic_store_explicit(
&pa->serial,
atomic_load_explicit(&pa->serial, memory_order_relaxed) + 1,
memory_order_release);
__futex_wake(&pa->serial, INT32_MAX); __futex_wake(&pa->serial, INT32_MAX);
return 0; return 0;
} }
// Wait for non-locked serial, and retrieve it with acquire semantics.
unsigned int __system_property_serial(const prop_info *pi) unsigned int __system_property_serial(const prop_info *pi)
{ {
uint32_t serial = pi->serial; uint32_t serial = atomic_load_explicit(&pi->serial, memory_order_acquire);
while (SERIAL_DIRTY(serial)) { while (SERIAL_DIRTY(serial)) {
__futex_wait(const_cast<volatile uint32_t*>(&pi->serial), serial, NULL); __futex_wait(const_cast<volatile void *>(
serial = pi->serial; reinterpret_cast<const void *>(&pi->serial)),
serial, NULL);
serial = atomic_load_explicit(&pi->serial, memory_order_acquire);
} }
return serial; return serial;
} }
@ -706,12 +746,14 @@ unsigned int __system_property_serial(const prop_info *pi)
unsigned int __system_property_wait_any(unsigned int serial) unsigned int __system_property_wait_any(unsigned int serial)
{ {
prop_area *pa = __system_property_area__; prop_area *pa = __system_property_area__;
uint32_t my_serial;
do { do {
__futex_wait(&pa->serial, serial, NULL); __futex_wait(&pa->serial, serial, NULL);
} while (pa->serial == serial); my_serial = atomic_load_explicit(&pa->serial, memory_order_acquire);
} while (my_serial == serial);
return pa->serial; return my_serial;
} }
const prop_info *__system_property_find_nth(unsigned n) const prop_info *__system_property_find_nth(unsigned n)