android_system_core/adb/fdevent.cpp

554 lines
16 KiB
C++
Raw Normal View History

/* http://frotznet.googlecode.com/svn/trunk/utils/fdevent.c
**
** Copyright 2006, Brian Swetland <swetland@frotz.net>
**
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
**
** http://www.apache.org/licenses/LICENSE-2.0
**
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
*/
#define TRACE_TAG FDEVENT
#include "sysdeps.h"
#include "fdevent.h"
#include <fcntl.h>
#include <inttypes.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <atomic>
#include <deque>
#include <functional>
#include <list>
#include <mutex>
#include <optional>
#include <unordered_map>
#include <utility>
#include <variant>
#include <vector>
#include <android-base/chrono_utils.h>
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/stringprintf.h>
#include <android-base/thread_annotations.h>
#include <android-base/threads.h>
#include "adb_io.h"
#include "adb_trace.h"
#include "adb_unique_fd.h"
#include "adb_utils.h"
#include "sysdeps/chrono.h"
#define FDE_EVENTMASK 0x00ff
#define FDE_STATEMASK 0xff00
#define FDE_ACTIVE 0x0100
#define FDE_PENDING 0x0200
#define FDE_CREATED 0x0400
struct PollNode {
fdevent* fde;
adb_pollfd pollfd;
explicit PollNode(fdevent* fde) : fde(fde) {
memset(&pollfd, 0, sizeof(pollfd));
pollfd.fd = fde->fd.get();
#if defined(__linux__)
// Always enable POLLRDHUP, so the host server can take action when some clients disconnect.
// Then we can avoid leaving many sockets in CLOSE_WAIT state. See http://b/23314034.
pollfd.events = POLLRDHUP;
#endif
}
};
// All operations to fdevent should happen only in the main thread.
// That's why we don't need a lock for fdevent.
static auto& g_poll_node_map = *new std::unordered_map<int, PollNode>();
static auto& g_pending_list = *new std::list<fdevent*>();
static std::atomic<bool> terminate_loop(false);
static bool main_thread_valid;
static uint64_t main_thread_id;
static uint64_t fdevent_id;
static bool run_needs_flush = false;
static auto& run_queue_notify_fd = *new unique_fd();
static auto& run_queue_mutex = *new std::mutex();
static auto& run_queue GUARDED_BY(run_queue_mutex) = *new std::deque<std::function<void()>>();
adb: fix two device offline problems. When device goes offline, user usually has to manually replug the usb device. This patch tries to solve two offline situations, all because when adb on host is killed, the adbd on device is not notified. 1. When adb server is killed while pushing a large file to device, the device is still reading the unfinished large message. So the device thinks of the CNXN message as part of the previous unfinished message, so it doesn't reply and the device is in offline state. The solution is to add a write_msg_lock in atransport struct. And it kicks the transport only after sending a whole message. By kicking all transports before exit, we ensure that we don't write part of a message to any device. So next time we start adb server, the device should be waiting for a new message. 2. When adb server is killed while pulling a large file from device, the device is still trying to send the unfinished large message. So adb on host usually reads data with EOVERFLOW error. This is because adb on host is reading less than one packet sent from device. The solution is to use buffered read on host. The max packet size of bulk transactions in USB 3.0 is 1024 bytes. By preparing an at least 1024 bytes buffer when reading, EOVERFLOW no longer occurs. And teach adb host to ignore wrong messages. To be safe, this patch doesn't change any logic on device. Bug: http://b/32952319 Test: run python -m unittest -q test_device.DeviceOfflineTest Test: on linux/mac/windows with bullhead, ryu. Change-Id: Ib149d30028a62a6f03857b8a95ab5a1d6e9b9c4e
2017-03-11 00:01:01 +00:00
void check_main_thread() {
if (main_thread_valid) {
CHECK_EQ(main_thread_id, android::base::GetThreadId());
}
}
adb: fix two device offline problems. When device goes offline, user usually has to manually replug the usb device. This patch tries to solve two offline situations, all because when adb on host is killed, the adbd on device is not notified. 1. When adb server is killed while pushing a large file to device, the device is still reading the unfinished large message. So the device thinks of the CNXN message as part of the previous unfinished message, so it doesn't reply and the device is in offline state. The solution is to add a write_msg_lock in atransport struct. And it kicks the transport only after sending a whole message. By kicking all transports before exit, we ensure that we don't write part of a message to any device. So next time we start adb server, the device should be waiting for a new message. 2. When adb server is killed while pulling a large file from device, the device is still trying to send the unfinished large message. So adb on host usually reads data with EOVERFLOW error. This is because adb on host is reading less than one packet sent from device. The solution is to use buffered read on host. The max packet size of bulk transactions in USB 3.0 is 1024 bytes. By preparing an at least 1024 bytes buffer when reading, EOVERFLOW no longer occurs. And teach adb host to ignore wrong messages. To be safe, this patch doesn't change any logic on device. Bug: http://b/32952319 Test: run python -m unittest -q test_device.DeviceOfflineTest Test: on linux/mac/windows with bullhead, ryu. Change-Id: Ib149d30028a62a6f03857b8a95ab5a1d6e9b9c4e
2017-03-11 00:01:01 +00:00
void set_main_thread() {
main_thread_valid = true;
main_thread_id = android::base::GetThreadId();
}
static std::string dump_fde(const fdevent* fde) {
std::string state;
if (fde->state & FDE_ACTIVE) {
state += "A";
}
if (fde->state & FDE_PENDING) {
state += "P";
}
if (fde->state & FDE_CREATED) {
state += "C";
}
if (fde->state & FDE_READ) {
state += "R";
}
if (fde->state & FDE_WRITE) {
state += "W";
}
if (fde->state & FDE_ERROR) {
state += "E";
}
return android::base::StringPrintf("(fdevent %" PRIu64 ": fd %d %s)", fde->id, fde->fd.get(),
state.c_str());
}
template <typename F>
static fdevent* fdevent_create_impl(int fd, F func, void* arg) {
check_main_thread();
CHECK_GE(fd, 0);
fdevent* fde = new fdevent();
fde->id = fdevent_id++;
fde->state = FDE_ACTIVE;
fde->fd.reset(fd);
fde->func = func;
fde->arg = arg;
if (!set_file_block_mode(fd, false)) {
// Here is not proper to handle the error. If it fails here, some error is
// likely to be detected by poll(), then we can let the callback function
// to handle it.
LOG(ERROR) << "failed to set non-blocking mode for fd " << fd;
}
auto pair = g_poll_node_map.emplace(fde->fd.get(), PollNode(fde));
CHECK(pair.second) << "install existing fd " << fd;
fde->state |= FDE_CREATED;
return fde;
}
fdevent* fdevent_create(int fd, fd_func func, void* arg) {
return fdevent_create_impl(fd, func, arg);
}
fdevent* fdevent_create(int fd, fd_func2 func, void* arg) {
return fdevent_create_impl(fd, func, arg);
}
unique_fd fdevent_release(fdevent* fde) {
check_main_thread();
if (!fde) {
return {};
}
if (!(fde->state & FDE_CREATED)) {
LOG(FATAL) << "destroying fde not created by fdevent_create(): " << dump_fde(fde);
}
unique_fd result = std::move(fde->fd);
if (fde->state & FDE_ACTIVE) {
g_poll_node_map.erase(result.get());
if (fde->state & FDE_PENDING) {
g_pending_list.remove(fde);
}
fde->state = 0;
fde->events = 0;
}
delete fde;
return result;
}
void fdevent_destroy(fdevent* fde) {
// Release, and then let unique_fd's destructor cleanup.
fdevent_release(fde);
}
static void fdevent_update(fdevent* fde, unsigned events) {
auto it = g_poll_node_map.find(fde->fd.get());
CHECK(it != g_poll_node_map.end());
PollNode& node = it->second;
if (events & FDE_READ) {
node.pollfd.events |= POLLIN;
} else {
node.pollfd.events &= ~POLLIN;
}
if (events & FDE_WRITE) {
node.pollfd.events |= POLLOUT;
} else {
node.pollfd.events &= ~POLLOUT;
}
fde->state = (fde->state & FDE_STATEMASK) | events;
}
void fdevent_set(fdevent* fde, unsigned events) {
check_main_thread();
events &= FDE_EVENTMASK;
if ((fde->state & FDE_EVENTMASK) == events) {
return;
}
CHECK(fde->state & FDE_ACTIVE);
fdevent_update(fde, events);
D("fdevent_set: %s, events = %u", dump_fde(fde).c_str(), events);
if (fde->state & FDE_PENDING) {
// If we are pending, make sure we don't signal an event that is no longer wanted.
fde->events &= events;
if (fde->events == 0) {
g_pending_list.remove(fde);
fde->state &= ~FDE_PENDING;
}
}
}
void fdevent_add(fdevent* fde, unsigned events) {
check_main_thread();
CHECK(!(events & FDE_TIMEOUT));
fdevent_set(fde, (fde->state & FDE_EVENTMASK) | events);
}
void fdevent_del(fdevent* fde, unsigned events) {
check_main_thread();
CHECK(!(events & FDE_TIMEOUT));
fdevent_set(fde, (fde->state & FDE_EVENTMASK) & ~events);
}
void fdevent_set_timeout(fdevent* fde, std::optional<std::chrono::milliseconds> timeout) {
check_main_thread();
fde->timeout = timeout;
fde->last_active = std::chrono::steady_clock::now();
}
static std::string dump_pollfds(const std::vector<adb_pollfd>& pollfds) {
std::string result;
for (const auto& pollfd : pollfds) {
std::string op;
if (pollfd.events & POLLIN) {
op += "R";
}
if (pollfd.events & POLLOUT) {
op += "W";
}
android::base::StringAppendF(&result, " %d(%s)", pollfd.fd, op.c_str());
}
return result;
}
static std::optional<std::chrono::milliseconds> calculate_timeout() {
std::optional<std::chrono::milliseconds> result = std::nullopt;
auto now = std::chrono::steady_clock::now();
check_main_thread();
for (const auto& [fd, pollnode] : g_poll_node_map) {
UNUSED(fd);
auto timeout_opt = pollnode.fde->timeout;
if (timeout_opt) {
auto deadline = pollnode.fde->last_active + *timeout_opt;
auto time_left = std::chrono::duration_cast<std::chrono::milliseconds>(deadline - now);
if (time_left < std::chrono::milliseconds::zero()) {
time_left = std::chrono::milliseconds::zero();
}
if (!result) {
result = time_left;
} else {
result = std::min(*result, time_left);
}
}
}
return result;
}
static void fdevent_process() {
std::vector<adb_pollfd> pollfds;
for (const auto& pair : g_poll_node_map) {
pollfds.push_back(pair.second.pollfd);
}
CHECK_GT(pollfds.size(), 0u);
D("poll(), pollfds = %s", dump_pollfds(pollfds).c_str());
auto timeout = calculate_timeout();
int timeout_ms;
if (!timeout) {
timeout_ms = -1;
} else {
timeout_ms = timeout->count();
}
int ret = adb_poll(&pollfds[0], pollfds.size(), timeout_ms);
if (ret == -1) {
PLOG(ERROR) << "poll(), ret = " << ret;
return;
}
auto post_poll = std::chrono::steady_clock::now();
for (const auto& pollfd : pollfds) {
if (pollfd.revents != 0) {
D("for fd %d, revents = %x", pollfd.fd, pollfd.revents);
}
unsigned events = 0;
if (pollfd.revents & POLLIN) {
events |= FDE_READ;
}
if (pollfd.revents & POLLOUT) {
events |= FDE_WRITE;
}
if (pollfd.revents & (POLLERR | POLLHUP | POLLNVAL)) {
// We fake a read, as the rest of the code assumes that errors will
// be detected at that point.
events |= FDE_READ | FDE_ERROR;
}
#if defined(__linux__)
if (pollfd.revents & POLLRDHUP) {
events |= FDE_READ | FDE_ERROR;
}
#endif
auto it = g_poll_node_map.find(pollfd.fd);
CHECK(it != g_poll_node_map.end());
fdevent* fde = it->second.fde;
if (events == 0) {
// Check for timeout.
if (fde->timeout) {
auto deadline = fde->last_active + *fde->timeout;
if (deadline < post_poll) {
events |= FDE_TIMEOUT;
}
}
}
if (events != 0) {
CHECK_EQ(fde->fd.get(), pollfd.fd);
fde->events |= events;
fde->last_active = post_poll;
D("%s got events %x", dump_fde(fde).c_str(), events);
fde->state |= FDE_PENDING;
g_pending_list.push_back(fde);
}
}
}
template <class T>
struct always_false : std::false_type {};
static void fdevent_call_fdfunc(fdevent* fde) {
unsigned events = fde->events;
fde->events = 0;
CHECK(fde->state & FDE_PENDING);
fde->state &= (~FDE_PENDING);
D("fdevent_call_fdfunc %s", dump_fde(fde).c_str());
std::visit(
[&](auto&& f) {
using F = std::decay_t<decltype(f)>;
if constexpr (std::is_same_v<fd_func, F>) {
f(fde->fd.get(), events, fde->arg);
} else if constexpr (std::is_same_v<fd_func2, F>) {
f(fde, events, fde->arg);
} else {
static_assert(always_false<F>::value, "non-exhaustive visitor");
}
},
fde->func);
}
static void fdevent_run_flush() EXCLUDES(run_queue_mutex) {
// We need to be careful around reentrancy here, since a function we call can queue up another
// function.
while (true) {
std::function<void()> fn;
{
std::lock_guard<std::mutex> lock(run_queue_mutex);
if (run_queue.empty()) {
break;
}
fn = run_queue.front();
run_queue.pop_front();
}
fn();
}
}
static void fdevent_run_func(int fd, unsigned ev, void* /* userdata */) {
CHECK_GE(fd, 0);
CHECK(ev & FDE_READ);
char buf[1024];
// Empty the fd.
if (adb_read(fd, buf, sizeof(buf)) == -1) {
PLOG(FATAL) << "failed to empty run queue notify fd";
}
// Mark that we need to flush, and then run it at the end of fdevent_loop.
run_needs_flush = true;
}
static void fdevent_run_setup() {
{
std::lock_guard<std::mutex> lock(run_queue_mutex);
CHECK(run_queue_notify_fd.get() == -1);
int s[2];
if (adb_socketpair(s) != 0) {
PLOG(FATAL) << "failed to create run queue notify socketpair";
}
if (!set_file_block_mode(s[0], false) || !set_file_block_mode(s[1], false)) {
PLOG(FATAL) << "failed to make run queue notify socket nonblocking";
}
run_queue_notify_fd.reset(s[0]);
fdevent* fde = fdevent_create(s[1], fdevent_run_func, nullptr);
CHECK(fde != nullptr);
fdevent_add(fde, FDE_READ);
}
fdevent_run_flush();
}
void fdevent_run_on_main_thread(std::function<void()> fn) {
std::lock_guard<std::mutex> lock(run_queue_mutex);
run_queue.push_back(std::move(fn));
// run_queue_notify_fd could still be -1 if we're called before fdevent has finished setting up.
// In that case, rely on the setup code to flush the queue without a notification being needed.
if (run_queue_notify_fd != -1) {
int rc = adb_write(run_queue_notify_fd.get(), "", 1);
// It's possible that we get EAGAIN here, if lots of notifications came in while handling.
if (rc == 0) {
PLOG(FATAL) << "run queue notify fd was closed?";
} else if (rc == -1 && errno != EAGAIN) {
PLOG(FATAL) << "failed to write to run queue notify fd";
}
}
}
static void fdevent_check_spin(uint64_t cycle) {
// Check to see if we're spinning because we forgot about an fdevent
// by keeping track of how long fdevents have been continuously pending.
struct SpinCheck {
fdevent* fde;
android::base::boot_clock::time_point timestamp;
uint64_t cycle;
};
static auto& g_continuously_pending = *new std::unordered_map<uint64_t, SpinCheck>();
static auto last_cycle = android::base::boot_clock::now();
auto now = android::base::boot_clock::now();
if (now - last_cycle > 10ms) {
// We're not spinning.
g_continuously_pending.clear();
last_cycle = now;
return;
}
last_cycle = now;
for (auto* fde : g_pending_list) {
auto it = g_continuously_pending.find(fde->id);
if (it == g_continuously_pending.end()) {
g_continuously_pending[fde->id] =
SpinCheck{.fde = fde, .timestamp = now, .cycle = cycle};
} else {
it->second.cycle = cycle;
}
}
for (auto it = g_continuously_pending.begin(); it != g_continuously_pending.end();) {
if (it->second.cycle != cycle) {
it = g_continuously_pending.erase(it);
} else {
// Use an absurdly long window, since all we really care about is
// getting a bugreport eventually.
if (now - it->second.timestamp > 300s) {
LOG(FATAL_WITHOUT_ABORT)
<< "detected spin in fdevent: " << dump_fde(it->second.fde);
#if defined(__linux__)
int fd = it->second.fde->fd.get();
std::string fd_path = android::base::StringPrintf("/proc/self/fd/%d", fd);
std::string path;
if (!android::base::Readlink(fd_path, &path)) {
PLOG(FATAL_WITHOUT_ABORT) << "readlink of fd " << fd << " failed";
}
LOG(FATAL_WITHOUT_ABORT) << "fd " << fd << " = " << path;
#endif
abort();
}
++it;
}
}
}
void fdevent_loop() {
set_main_thread();
fdevent_run_setup();
uint64_t cycle = 0;
while (true) {
if (terminate_loop) {
return;
}
D("--- --- waiting for events");
fdevent_process();
fdevent_check_spin(cycle++);
while (!g_pending_list.empty()) {
fdevent* fde = g_pending_list.front();
g_pending_list.pop_front();
fdevent_call_fdfunc(fde);
}
if (run_needs_flush) {
fdevent_run_flush();
run_needs_flush = false;
}
}
}
void fdevent_terminate_loop() {
terminate_loop = true;
}
size_t fdevent_installed_count() {
return g_poll_node_map.size();
}
void fdevent_reset() {
g_poll_node_map.clear();
g_pending_list.clear();
std::lock_guard<std::mutex> lock(run_queue_mutex);
run_queue_notify_fd.reset();
run_queue.clear();
main_thread_valid = false;
terminate_loop = false;
}