libsnapshot: Use a two-phase merge for VABC.
If a partition shrinks in an update, it must be merged before any other partitions. Otherwise, a copy operation may source from the tail of the shrunk partition, which could be overwritten by a merge operation in another partition. This patch adds a "MergePhase" indicator to the update status that is valid only when the state is MERGING. Partitions that shrink are merged first, and the phase will be FIRST_PHASE. Once ProcessUpdateState() has determined that all first-phase snapshots are merged, it will switch to SECOND_PHASE and remaining snapshots can start merging. Otherwise, there is no change to the merge algorithm. The phase split is an implementation detail and not exposed to update_engine. Bug: 177935716 Test: vts_libsnapshot_test Change-Id: I06043f8e3b81bdecefb6a4b5944a97b7086eeb49
This commit is contained in:
parent
91c9f56d30
commit
86ba14be16
|
@ -34,7 +34,19 @@ enum SnapshotState {
|
|||
MERGE_COMPLETED = 3;
|
||||
}
|
||||
|
||||
// Next: 10
|
||||
// Next: 3
|
||||
enum MergePhase {
|
||||
// No merge is in progress.
|
||||
NO_MERGE = 0;
|
||||
|
||||
// Shrunk partitions can merge.
|
||||
FIRST_PHASE = 1;
|
||||
|
||||
// Grown partitions can merge.
|
||||
SECOND_PHASE = 2;
|
||||
}
|
||||
|
||||
// Next: 11
|
||||
message SnapshotStatus {
|
||||
// Name of the snapshot. This is usually the name of the snapshotted
|
||||
// logical partition; for example, "system_b".
|
||||
|
@ -87,6 +99,9 @@ message SnapshotStatus {
|
|||
|
||||
// True if compression is enabled, false otherwise.
|
||||
bool compression_enabled = 9;
|
||||
|
||||
// The old partition size (if none existed, this will be zero).
|
||||
uint64 old_partition_size = 10;
|
||||
}
|
||||
|
||||
// Next: 8
|
||||
|
@ -118,7 +133,7 @@ enum UpdateState {
|
|||
Cancelled = 7;
|
||||
};
|
||||
|
||||
// Next: 6
|
||||
// Next: 7
|
||||
message SnapshotUpdateStatus {
|
||||
UpdateState state = 1;
|
||||
|
||||
|
@ -136,6 +151,9 @@ message SnapshotUpdateStatus {
|
|||
|
||||
// Whether compression/dm-user was used for any snapshots.
|
||||
bool compression_enabled = 5;
|
||||
|
||||
// Merge phase (if state == MERGING).
|
||||
MergePhase merge_phase = 6;
|
||||
}
|
||||
|
||||
// Next: 4
|
||||
|
|
|
@ -525,11 +525,13 @@ class SnapshotManager final : public ISnapshotManager {
|
|||
std::string GetMergeStateFilePath() const;
|
||||
|
||||
// Helpers for merging.
|
||||
bool MergeSecondPhaseSnapshots(LockedFile* lock);
|
||||
bool SwitchSnapshotToMerge(LockedFile* lock, const std::string& name);
|
||||
bool RewriteSnapshotDeviceTable(const std::string& dm_name);
|
||||
bool MarkSnapshotMergeCompleted(LockedFile* snapshot_lock, const std::string& snapshot_name);
|
||||
void AcknowledgeMergeSuccess(LockedFile* lock);
|
||||
void AcknowledgeMergeFailure();
|
||||
MergePhase DecideMergePhase(const SnapshotStatus& status);
|
||||
std::unique_ptr<LpMetadata> ReadCurrentMetadata();
|
||||
|
||||
enum class MetadataPartitionState {
|
||||
|
@ -562,7 +564,8 @@ class SnapshotManager final : public ISnapshotManager {
|
|||
// UpdateState::MergeNeedsReboot
|
||||
UpdateState CheckMergeState(const std::function<bool()>& before_cancel);
|
||||
UpdateState CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel);
|
||||
UpdateState CheckTargetMergeState(LockedFile* lock, const std::string& name);
|
||||
UpdateState CheckTargetMergeState(LockedFile* lock, const std::string& name,
|
||||
const SnapshotUpdateStatus& update_status);
|
||||
|
||||
// Interact with status files under /metadata/ota/snapshots.
|
||||
bool WriteSnapshotStatus(LockedFile* lock, const SnapshotStatus& status);
|
||||
|
|
|
@ -157,6 +157,9 @@ bool SnapshotManager::BeginUpdate() {
|
|||
images_->RemoveAllImages();
|
||||
}
|
||||
|
||||
// Clear any cached metadata (this allows re-using one manager across tests).
|
||||
old_partition_metadata_ = nullptr;
|
||||
|
||||
auto state = ReadUpdateState(file.get());
|
||||
if (state != UpdateState::None) {
|
||||
LOG(ERROR) << "An update is already in progress, cannot begin a new update";
|
||||
|
@ -480,7 +483,8 @@ bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
|
|||
// have completed merging, but the start of the merge process is considered
|
||||
// atomic.
|
||||
SnapshotStorageMode mode;
|
||||
switch (ReadUpdateState(lock)) {
|
||||
SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
|
||||
switch (update_status.state()) {
|
||||
case UpdateState::MergeCompleted:
|
||||
case UpdateState::MergeNeedsReboot:
|
||||
LOG(ERROR) << "Should not create a snapshot device for " << name
|
||||
|
@ -490,7 +494,11 @@ bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
|
|||
case UpdateState::MergeFailed:
|
||||
// Note: MergeFailed indicates that a merge is in progress, but
|
||||
// is possibly stalled. We still have to honor the merge.
|
||||
mode = SnapshotStorageMode::Merge;
|
||||
if (DecideMergePhase(status) == update_status.merge_phase()) {
|
||||
mode = SnapshotStorageMode::Merge;
|
||||
} else {
|
||||
mode = SnapshotStorageMode::Persistent;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
mode = SnapshotStorageMode::Persistent;
|
||||
|
@ -675,6 +683,8 @@ bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) {
|
|||
|
||||
bool compression_enabled = false;
|
||||
|
||||
std::vector<std::string> first_merge_group;
|
||||
|
||||
uint64_t total_cow_file_size = 0;
|
||||
DmTargetSnapshot::Status initial_target_values = {};
|
||||
for (const auto& snapshot : snapshots) {
|
||||
|
@ -693,6 +703,9 @@ bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) {
|
|||
total_cow_file_size += snapshot_status.cow_file_size();
|
||||
|
||||
compression_enabled |= snapshot_status.compression_enabled();
|
||||
if (DecideMergePhase(snapshot_status) == MergePhase::FIRST_PHASE) {
|
||||
first_merge_group.emplace_back(snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
if (cow_file_size) {
|
||||
|
@ -706,14 +719,26 @@ bool SnapshotManager::InitiateMerge(uint64_t* cow_file_size) {
|
|||
initial_status.set_metadata_sectors(initial_target_values.metadata_sectors);
|
||||
initial_status.set_compression_enabled(compression_enabled);
|
||||
|
||||
// If any partitions shrunk, we need to merge them before we merge any other
|
||||
// partitions (see b/177935716). Otherwise, a merge from another partition
|
||||
// may overwrite the source block of a copy operation.
|
||||
const std::vector<std::string>* merge_group;
|
||||
if (first_merge_group.empty()) {
|
||||
merge_group = &snapshots;
|
||||
initial_status.set_merge_phase(MergePhase::SECOND_PHASE);
|
||||
} else {
|
||||
merge_group = &first_merge_group;
|
||||
initial_status.set_merge_phase(MergePhase::FIRST_PHASE);
|
||||
}
|
||||
|
||||
// Point of no return - mark that we're starting a merge. From now on every
|
||||
// snapshot must be a merge target.
|
||||
// eligible snapshot must be a merge target.
|
||||
if (!WriteSnapshotUpdateStatus(lock.get(), initial_status)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool rewrote_all = true;
|
||||
for (const auto& snapshot : snapshots) {
|
||||
for (const auto& snapshot : *merge_group) {
|
||||
// If this fails, we have no choice but to continue. Everything must
|
||||
// be merged. This is not an ideal state to be in, but it is safe,
|
||||
// because we the next boot will try again.
|
||||
|
@ -904,13 +929,13 @@ UpdateState SnapshotManager::CheckMergeState(const std::function<bool()>& before
|
|||
|
||||
UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
|
||||
const std::function<bool()>& before_cancel) {
|
||||
UpdateState state = ReadUpdateState(lock);
|
||||
switch (state) {
|
||||
SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
|
||||
switch (update_status.state()) {
|
||||
case UpdateState::None:
|
||||
case UpdateState::MergeCompleted:
|
||||
// Harmless races are allowed between two callers of WaitForMerge,
|
||||
// so in both of these cases we just propagate the state.
|
||||
return state;
|
||||
return update_status.state();
|
||||
|
||||
case UpdateState::Merging:
|
||||
case UpdateState::MergeNeedsReboot:
|
||||
|
@ -927,10 +952,10 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
|
|||
if (HandleCancelledUpdate(lock, before_cancel)) {
|
||||
return UpdateState::Cancelled;
|
||||
}
|
||||
return state;
|
||||
return update_status.state();
|
||||
|
||||
default:
|
||||
return state;
|
||||
return update_status.state();
|
||||
}
|
||||
|
||||
std::vector<std::string> snapshots;
|
||||
|
@ -942,8 +967,9 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
|
|||
bool failed = false;
|
||||
bool merging = false;
|
||||
bool needs_reboot = false;
|
||||
bool wrong_phase = false;
|
||||
for (const auto& snapshot : snapshots) {
|
||||
UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot);
|
||||
UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot, update_status);
|
||||
switch (snapshot_state) {
|
||||
case UpdateState::MergeFailed:
|
||||
failed = true;
|
||||
|
@ -959,6 +985,9 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
|
|||
case UpdateState::Cancelled:
|
||||
cancelled = true;
|
||||
break;
|
||||
case UpdateState::None:
|
||||
wrong_phase = true;
|
||||
break;
|
||||
default:
|
||||
LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
|
||||
<< "\"" << snapshot_state << "\"";
|
||||
|
@ -978,6 +1007,14 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
|
|||
// it in WaitForMerge rather than here and elsewhere.
|
||||
return UpdateState::MergeFailed;
|
||||
}
|
||||
if (wrong_phase) {
|
||||
// If we got here, no other partitions are being merged, and nothing
|
||||
// failed to merge. It's safe to move to the next merge phase.
|
||||
if (!MergeSecondPhaseSnapshots(lock)) {
|
||||
return UpdateState::MergeFailed;
|
||||
}
|
||||
return UpdateState::Merging;
|
||||
}
|
||||
if (needs_reboot) {
|
||||
WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
|
||||
return UpdateState::MergeNeedsReboot;
|
||||
|
@ -993,7 +1030,8 @@ UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
|
|||
return UpdateState::MergeCompleted;
|
||||
}
|
||||
|
||||
UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name) {
|
||||
UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
|
||||
const SnapshotUpdateStatus& update_status) {
|
||||
SnapshotStatus snapshot_status;
|
||||
if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
|
||||
return UpdateState::MergeFailed;
|
||||
|
@ -1015,7 +1053,7 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::
|
|||
// During a check, we decided the merge was complete, but we were unable to
|
||||
// collapse the device-mapper stack and perform COW cleanup. If we haven't
|
||||
// rebooted after this check, the device will still be a snapshot-merge
|
||||
// target. If the have rebooted, the device will now be a linear target,
|
||||
// target. If we have rebooted, the device will now be a linear target,
|
||||
// and we can try cleanup again.
|
||||
if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
|
||||
// NB: It's okay if this fails now, we gave cleanup our best effort.
|
||||
|
@ -1036,6 +1074,12 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::
|
|||
if (!QuerySnapshotStatus(name, &target_type, &status)) {
|
||||
return UpdateState::MergeFailed;
|
||||
}
|
||||
if (target_type == "snapshot" &&
|
||||
DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
|
||||
update_status.merge_phase() == MergePhase::FIRST_PHASE) {
|
||||
// The snapshot is not being merged because it's in the wrong phase.
|
||||
return UpdateState::None;
|
||||
}
|
||||
if (target_type != "snapshot-merge") {
|
||||
// We can get here if we failed to rewrite the target type in
|
||||
// InitiateMerge(). If we failed to create the target in first-stage
|
||||
|
@ -1071,6 +1115,38 @@ UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::
|
|||
return UpdateState::MergeCompleted;
|
||||
}
|
||||
|
||||
bool SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
|
||||
std::vector<std::string> snapshots;
|
||||
if (!ListSnapshots(lock, &snapshots)) {
|
||||
return UpdateState::MergeFailed;
|
||||
}
|
||||
|
||||
SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
|
||||
CHECK(update_status.state() == UpdateState::Merging);
|
||||
CHECK(update_status.merge_phase() == MergePhase::FIRST_PHASE);
|
||||
|
||||
update_status.set_merge_phase(MergePhase::SECOND_PHASE);
|
||||
if (!WriteSnapshotUpdateStatus(lock, update_status)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool rewrote_all = true;
|
||||
for (const auto& snapshot : snapshots) {
|
||||
SnapshotStatus snapshot_status;
|
||||
if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
|
||||
return UpdateState::MergeFailed;
|
||||
}
|
||||
if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
|
||||
continue;
|
||||
}
|
||||
if (!SwitchSnapshotToMerge(lock, snapshot)) {
|
||||
LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
|
||||
rewrote_all = false;
|
||||
}
|
||||
}
|
||||
return rewrote_all;
|
||||
}
|
||||
|
||||
std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
|
||||
return metadata_dir_ + "/" + android::base::Basename(kBootIndicatorPath);
|
||||
}
|
||||
|
@ -1211,6 +1287,10 @@ bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
|
|||
if (!dm.DeleteDeviceIfExists(base_name)) {
|
||||
LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
|
||||
}
|
||||
auto source_name = GetSourceDeviceName(name);
|
||||
if (!dm.DeleteDeviceIfExists(source_name)) {
|
||||
LOG(ERROR) << "Unable to delete source device for snapshot: " << source_name;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2654,6 +2734,15 @@ Return SnapshotManager::CreateUpdateSnapshotsInternal(
|
|||
continue;
|
||||
}
|
||||
|
||||
// Find the original partition size.
|
||||
auto name = target_partition->name();
|
||||
auto old_partition_name =
|
||||
name.substr(0, name.size() - target_suffix.size()) + cow_creator->current_suffix;
|
||||
auto old_partition = cow_creator->current_metadata->FindPartition(old_partition_name);
|
||||
if (old_partition) {
|
||||
cow_creator_ret->snapshot_status.set_old_partition_size(old_partition->size());
|
||||
}
|
||||
|
||||
// Store these device sizes to snapshot status file.
|
||||
if (!CreateSnapshot(lock, &cow_creator_ret->snapshot_status)) {
|
||||
return Return::Error();
|
||||
|
@ -3355,5 +3444,12 @@ const LpMetadata* SnapshotManager::ReadOldPartitionMetadata(LockedFile* lock) {
|
|||
return old_partition_metadata_.get();
|
||||
}
|
||||
|
||||
MergePhase SnapshotManager::DecideMergePhase(const SnapshotStatus& status) {
|
||||
if (status.compression_enabled() && status.device_size() < status.old_partition_size()) {
|
||||
return MergePhase::FIRST_PHASE;
|
||||
}
|
||||
return MergePhase::SECOND_PHASE;
|
||||
}
|
||||
|
||||
} // namespace snapshot
|
||||
} // namespace android
|
||||
|
|
Loading…
Reference in New Issue