From c779cf93bb30a909219ab0c4aadaae1a996637d6 Mon Sep 17 00:00:00 2001
From: Todd Eisenberger <teisenbe@google.com>
Date: Mon, 22 Apr 2019 22:07:42 +0000
Subject: [PATCH] [devcoordinator][devhost] Drive suspend from the coordinator

The coordinator now handles coordinating suspend on a device-by-device
basis, rather than just handling telling which devhosts to begin
suspending in which order.

Change-Id: I2dbd0370c93158f5a33db5a1f51f64200f13e6f5
---
 .../core/devmgr/devcoordinator/BUILD.gn       |   1 +
 .../devcoordinator/coordinator-test.cpp       | 151 ++++++++++++++-
 .../devmgr/devcoordinator/coordinator.cpp     | 173 +++++-------------
 .../core/devmgr/devcoordinator/coordinator.h  |  38 +---
 .../core/devmgr/devcoordinator/device.cpp     |  35 +++-
 .../core/devmgr/devcoordinator/device.h       |  26 +++
 .../devmgr/devcoordinator/suspend-task.cpp    |  67 +++++++
 .../core/devmgr/devcoordinator/suspend-task.h |  30 +++
 zircon/system/core/devmgr/devhost/core.cpp    |  45 +----
 zircon/system/core/devmgr/devhost/devhost.cpp |  11 +-
 .../fuchsia-device-manager/coordinator.fidl   |   4 +-
 11 files changed, 366 insertions(+), 215 deletions(-)
 create mode 100644 zircon/system/core/devmgr/devcoordinator/suspend-task.cpp
 create mode 100644 zircon/system/core/devmgr/devcoordinator/suspend-task.h

diff --git a/zircon/system/core/devmgr/devcoordinator/BUILD.gn b/zircon/system/core/devmgr/devcoordinator/BUILD.gn
index 6da6d667350..86cc6ea4bc0 100644
--- a/zircon/system/core/devmgr/devcoordinator/BUILD.gn
+++ b/zircon/system/core/devmgr/devcoordinator/BUILD.gn
@@ -52,6 +52,7 @@ source_set("common") {
     "driver.cpp",
     "driver.h",
     "fidl.cpp",
+    "suspend-task.cpp",
     "task.cpp",
     "vmo-writer.cpp",
     "vmo-writer.h",
diff --git a/zircon/system/core/devmgr/devcoordinator/coordinator-test.cpp b/zircon/system/core/devmgr/devcoordinator/coordinator-test.cpp
index 4ab4f85d1dd..bbd02d8bee6 100644
--- a/zircon/system/core/devmgr/devcoordinator/coordinator-test.cpp
+++ b/zircon/system/core/devmgr/devcoordinator/coordinator-test.cpp
@@ -257,6 +257,43 @@ void CheckCreateDeviceReceived(const zx::channel& remote, const char* expected_d
                     "");
 }
 
+// Reads a Suspend request from remote, checks that it is for the expected
+// flags, and then sends the given response.
+void CheckSuspendReceived(const zx::channel& remote, uint32_t expected_flags,
+                          zx_status_t return_status) {
+    // Read the Suspend request.
+    FIDL_ALIGNDECL uint8_t bytes[ZX_CHANNEL_MAX_MSG_BYTES];
+    zx_handle_t handles[ZX_CHANNEL_MAX_MSG_HANDLES];
+    uint32_t actual_bytes;
+    uint32_t actual_handles;
+    zx_status_t status = remote.rea2(0, bytes, handles, sizeof(bytes), fbl::count_of(handles),
+                                     &actual_bytes, &actual_handles);
+    ASSERT_OK(status);
+    ASSERT_LT(0, actual_bytes);
+    ASSERT_EQ(0, actual_handles);
+
+    // Validate the Suspend request.
+    auto hdr = reinterpret_cast<fidl_message_header_t*>(bytes);
+    ASSERT_EQ(fuchsia_device_manager_DeviceControllerSuspendOrdinal, hdr->ordinal);
+    status = fidl_decode(&fuchsia_device_manager_DeviceControllerSuspendRequestTable, bytes,
+                         actual_bytes, handles, actual_handles, nullptr);
+    ASSERT_OK(status);
+    auto req = reinterpret_cast<fuchsia_device_manager_DeviceControllerSuspendRequest*>(bytes);
+    ASSERT_EQ(req->flags, expected_flags);
+
+    // Write the Suspend response.
+    memset(bytes, 0, sizeof(bytes));
+    auto resp = reinterpret_cast<fuchsia_device_manager_DeviceControllerSuspendResponse*>(bytes);
+    resp->hdr.ordinal = fuchsia_device_manager_DeviceControllerSuspendOrdinal;
+    resp->status = return_status;
+    status = fidl_encode(&fuchsia_device_manager_DeviceControllerSuspendResponseTable, bytes,
+                         sizeof(*resp), handles, fbl::count_of(handles), &actual_handles, nullptr);
+    ASSERT_OK(status);
+    ASSERT_EQ(0, actual_handles);
+    status = remote.write(0, bytes, sizeof(*resp), nullptr, 0);
+    ASSERT_OK(status);
+}
+
 // Reads a CreateCompositeDevice from remote, checks expectations, and sends
 // a ZX_OK response.
 void CheckCreateCompositeDeviceReceived(const zx::channel& remote, const char* expected_name,
@@ -342,9 +379,9 @@ struct DeviceState {
     zx::channel remote;
 };
 
-class CompositeTestCase : public zxtest::Test {
+class MultipleDeviceTestCase : public zxtest::Test {
 public:
-    ~CompositeTestCase() override = default;
+    ~MultipleDeviceTestCase() override = default;
 
     async::Loop* loop() { return &loop_; }
     devmgr::Coordinator* coordinator() { return &coordinator_; }
@@ -353,15 +390,18 @@ public:
     const zx::channel& devhost_remote() { return devhost_remote_; }
 
     const fbl::RefPtr<devmgr::Device>& platform_bus() const { return platform_bus_.device; }
+    const zx::channel& platform_bus_remote() const { return platform_bus_.remote; }
     DeviceState* device(size_t index) const { return &devices_[index]; }
 
     void AddDevice(const fbl::RefPtr<devmgr::Device>& parent, const char* name,
                    uint32_t protocol_id, fbl::String driver, size_t* device_index);
     void RemoveDevice(size_t device_index);
+
+    bool DeviceHasPendingMessages(size_t device_index);
+    bool DeviceHasPendingMessages(const zx::channel& remote);
 protected:
     void SetUp() override {
         ASSERT_NO_FATAL_FAILURES(InitializeCoordinator(&coordinator_));
-        ASSERT_NOT_NULL(coordinator_.component_driver());
 
         // refcount starts at zero, so bump it up to keep us from being cleaned up
         devhost_.AddRef();
@@ -379,8 +419,7 @@ protected:
                                                            &sys_proxy_remote_));
         loop_.RunUntilIdle();
 
-        // Create a child of the sys_device, since only directly children of it can
-        // issue AddComposite
+        // Create a child of the sys_device (an equivalent of the platform bus)
         {
             zx::channel local;
             zx_status_t status = zx::channel::create(0, &local, &platform_bus_.remote);
@@ -408,7 +447,7 @@ protected:
 
         devhost_.devices().clear();
     }
-private:
+
     async::Loop loop_{&kAsyncLoopConfigNoAttachToThread};
     devmgr::Coordinator coordinator_{DefaultConfig(loop_.dispatcher())};
 
@@ -431,8 +470,8 @@ private:
     fbl::Vector<DeviceState> devices_;
 };
 
-void CompositeTestCase::AddDevice(const fbl::RefPtr<devmgr::Device>& parent, const char* name,
-                                     uint32_t protocol_id, fbl::String driver, size_t* index) {
+void MultipleDeviceTestCase::AddDevice(const fbl::RefPtr<devmgr::Device>& parent, const char* name,
+                                       uint32_t protocol_id, fbl::String driver, size_t* index) {
     DeviceState state;
 
     zx::channel local;
@@ -450,7 +489,7 @@ void CompositeTestCase::AddDevice(const fbl::RefPtr<devmgr::Device>& parent, con
     *index = devices_.size() - 1;
 }
 
-void CompositeTestCase::RemoveDevice(size_t device_index) {
+void MultipleDeviceTestCase::RemoveDevice(size_t device_index) {
     auto& state = devices_[device_index];
     ASSERT_OK(coordinator_.RemoveDevice(state.device, false));
     state.device.reset();
@@ -458,6 +497,100 @@ void CompositeTestCase::RemoveDevice(size_t device_index) {
     loop_.RunUntilIdle();
 }
 
+bool MultipleDeviceTestCase::DeviceHasPendingMessages(const zx::channel& remote) {
+    return remote.wait_one(ZX_CHANNEL_READABLE, zx::time(0), nullptr) == ZX_OK;
+}
+bool MultipleDeviceTestCase::DeviceHasPendingMessages(size_t device_index) {
+    return DeviceHasPendingMessages(devices_[device_index].remote);
+}
+
+// Verify the suspend order is correct
+TEST_F(MultipleDeviceTestCase, Suspend) {
+    struct DeviceDesc {
+        // Index into the device desc array below.  UINT32_MAX = platform_bus()
+        const size_t parent_desc_index;
+        const char* const name;
+        // index for use with device()
+        size_t index = 0;
+        bool suspended = false;
+    };
+    DeviceDesc devices[] = {
+        { UINT32_MAX, "root_child1" },
+        { UINT32_MAX, "root_child2" },
+        { 0, "root_child1_1" },
+        { 0, "root_child1_2" },
+        { 2, "root_child1_1_1" },
+        { 1, "root_child2_1" },
+    };
+    for (auto& desc : devices) {
+        fbl::RefPtr<devmgr::Device> parent;
+        if (desc.parent_desc_index == UINT32_MAX) {
+            parent = platform_bus();
+        } else {
+            size_t index = devices[desc.parent_desc_index].index;
+            parent = device(index)->device;
+        }
+        ASSERT_NO_FATAL_FAILURES(AddDevice(parent, desc.name, 0 /* protocol id */, "",
+                                           &desc.index));
+    }
+
+    coordinator()->Suspend(DEVICE_SUSPEND_FLAG_POWEROFF);
+    loop()->RunUntilIdle();
+
+    size_t num_to_suspend = fbl::count_of(devices);
+    while (num_to_suspend > 0) {
+        // Check that platform bus is not suspended yet.
+        ASSERT_FALSE(DeviceHasPendingMessages(platform_bus_remote()));
+
+        bool made_progress = false;
+        // Since the table of devices above is topologically sorted (i.e.
+        // any child is below its parent), this loop should always be able
+        // to catch a parent receiving a suspend message before its child.
+        for (size_t i = 0; i < fbl::count_of(devices); ++i) {
+            auto& desc = devices[i];
+            if (desc.suspended) {
+                continue;
+            }
+
+            if (!DeviceHasPendingMessages(desc.index)) {
+                continue;
+            }
+
+            ASSERT_NO_FATAL_FAILURES(CheckSuspendReceived(
+                    device(desc.index)->remote, DEVICE_SUSPEND_FLAG_POWEROFF, ZX_OK));
+
+            // Make sure all descendants of this device are already suspended.
+            // We just need to check immediate children since this will
+            // recursively enforce that property.
+            for (auto& other_desc : devices) {
+                if (other_desc.parent_desc_index == i) {
+                    ASSERT_TRUE(other_desc.suspended);
+                }
+            }
+
+            desc.suspended = true;
+            --num_to_suspend;
+            made_progress = true;
+        }
+
+        // Make sure we're not stuck waiting
+        ASSERT_TRUE(made_progress);
+        loop()->RunUntilIdle();
+    }
+    ASSERT_NO_FATAL_FAILURES(CheckSuspendReceived(
+            platform_bus_remote(), DEVICE_SUSPEND_FLAG_POWEROFF, ZX_OK));
+}
+
+class CompositeTestCase : public MultipleDeviceTestCase {
+public:
+    ~CompositeTestCase() override = default;
+protected:
+    void SetUp() override {
+        MultipleDeviceTestCase::SetUp();
+        ASSERT_NOT_NULL(coordinator_.component_driver());
+    }
+};
+
 class CompositeAddOrderTestCase : public CompositeTestCase {
 public:
     enum class AddLocation {
diff --git a/zircon/system/core/devmgr/devcoordinator/coordinator.cpp b/zircon/system/core/devmgr/devcoordinator/coordinator.cpp
index c3e829fafe3..7ac355cc3a2 100644
--- a/zircon/system/core/devmgr/devcoordinator/coordinator.cpp
+++ b/zircon/system/core/devmgr/devcoordinator/coordinator.cpp
@@ -78,56 +78,6 @@ void vfs_exit(const zx::event& fshost_event) {
     printf("devcoordinator: Successfully waited for VFS exit completion\n");
 }
 
-zx_status_t suspend_devhost(devmgr::Devhost* dh, devmgr::SuspendContext* ctx) {
-    if (dh->devices().is_empty()) {
-        return ZX_OK;
-    }
-    devmgr::Device* dev = &dh->devices().front();
-
-    if (!(dev->flags & DEV_CTX_PROXY)) {
-        log(INFO, "devcoordinator: devhost root '%s' (%p) is not a proxy\n", dev->name.data(), dev);
-        return ZX_ERR_BAD_STATE;
-    }
-    log(DEVLC, "devcoordinator: suspend devhost %p device '%s' (%p)\n", dh, dev->name.data(), dev);
-
-    zx_status_t status = dh_send_suspend(dev, ctx->sflags());
-    if (status != ZX_OK) {
-        return status;
-    }
-
-    dh->flags() |= devmgr::Devhost::Flags::kSuspend;
-    // TODO(teisenbe/kulakowski) Make SuspendContext automatically refcounted.
-    ctx->AddRef();
-    return ZX_OK;
-}
-
-void process_suspend_list(devmgr::SuspendContext* ctx) {
-    auto dh = ctx->devhosts().make_iterator(*ctx->dh());
-    devmgr::Devhost* parent = nullptr;
-    do {
-        if (!parent || (dh->parent() == parent)) {
-            // send Message::Op::kSuspend each set of children of a devhost at a time,
-            // since they can run in parallel
-            suspend_devhost(dh.CopyPointer(), &ctx->coordinator()->suspend_context());
-            parent = dh->parent();
-        } else {
-            // if the parent is different than the previous devhost's
-            // parent, either this devhost is the parent, a child of
-            // its parent's sibling, or the parent's sibling, so stop
-            // processing until all the outstanding suspends are done
-            parent = nullptr;
-            break;
-        }
-    } while (++dh != ctx->devhosts().end());
-    // next devhost to process once all the outstanding suspends are done
-    if (dh.IsValid()) {
-        ctx->set_dh(dh.CopyPointer());
-    } else {
-        ctx->set_dh(nullptr);
-        ctx->devhosts().clear();
-    }
-}
-
 void suspend_fallback(const zx::resource& root_resource, uint32_t flags) {
     log(INFO, "devcoordinator: suspend fallback with flags 0x%08x\n", flags);
     if (flags == DEVICE_SUSPEND_FLAG_REBOOT) {
@@ -749,6 +699,11 @@ zx_status_t Coordinator::RemoveDevice(const fbl::RefPtr<Device>& dev, bool force
     // remove from devfs, preventing further OPEN attempts
     devfs_unpublish(dev.get());
 
+    // Mark any suspend that's in-flight as completed, since if the device is
+    // removed it should be in its lowest state.
+    // TODO(teisenbe): Should we mark it as failed if this is a forced removal?
+    dev->CompleteSuspend(ZX_OK);
+
     if (dev->proxy) {
         zx_status_t r = dh_send_remove_device(dev->proxy.get());
         if (r != ZX_OK) {
@@ -1334,42 +1289,12 @@ void Coordinator::HandleNewDevice(const fbl::RefPtr<Device>& dev) {
     BindDevice(dev, fbl::StringPiece("") /* autobind */, true /* new device */);
 }
 
-static void append_suspend_list(SuspendContext* ctx, Devhost* dh) {
-    // suspend order is children first
-    for (auto& child : dh->children()) {
-        ctx->devhosts().push_front(&child);
-    }
-    for (auto& child : dh->children()) {
-        append_suspend_list(ctx, &child);
-    }
-}
-
-// Returns the devhost at the front of the queue.
-void Coordinator::BuildSuspendList() {
-    auto& devhosts = suspend_context().devhosts();
-
-    // sys_device must suspend last as on x86 it invokes
-    // ACPI S-state transition
-    devhosts.push_front(sys_device_->proxy->host());
-    append_suspend_list(&suspend_context(), sys_device_->proxy->host());
-
-    devhosts.push_front(root_device_->proxy->host());
-    append_suspend_list(&suspend_context(), root_device_->proxy->host());
-
-    devhosts.push_front(misc_device_->proxy->host());
-    append_suspend_list(&suspend_context(), misc_device_->proxy->host());
-
-    // test devices do not (yet) participate in suspend
-
-    suspend_context().set_dh(&devhosts.front());
-}
-
 static int suspend_timeout_thread(void* arg) {
     // 10 seconds
     zx_nanosleep(zx_deadline_after(ZX_SEC(10)));
 
-    auto ctx = static_cast<SuspendContext*>(arg);
-    auto coordinator = ctx->coordinator();
+    auto coordinator = static_cast<Coordinator*>(arg);
+    auto ctx = &coordinator->suspend_context();
     if (coordinator->suspend_debug()) {
         if (ctx->flags() == SuspendContext::Flags::kRunning) {
             return 0; // success
@@ -1384,10 +1309,9 @@ static int suspend_timeout_thread(void* arg) {
 }
 
 void Coordinator::Suspend(SuspendContext ctx) {
-    // these top level devices should all have proxies. if not,
-    // the system hasn't fully initialized yet and cannot go to
-    // suspend.
-    if (!sys_device_->proxy || !root_device_->proxy || !misc_device_->proxy) {
+    // The sys device should have a proxy. If not, the system hasn't fully initialized yet and
+    // cannot go to suspend.
+    if (!sys_device_->proxy) {
         return;
     }
     if (suspend_context().flags() == SuspendContext::Flags::kSuspend) {
@@ -1395,11 +1319,45 @@ void Coordinator::Suspend(SuspendContext ctx) {
     }
     // Move the socket in to prevent the rpc handler from closing the handle.
     suspend_context() = std::move(ctx);
-    BuildSuspendList();
+
+    auto completion = [this](zx_status_t status) {
+        auto& ctx = suspend_context();
+        if (status != ZX_OK) {
+            // TODO: unroll suspend
+            // do not continue to suspend as this indicates a driver suspend
+            // problem and should show as a bug
+            log(ERROR, "devcoordinator: failed to suspend: %s\n", zx_status_get_string(status));
+            // notify dmctl
+            ctx.CloseSocket();
+            if (ctx.sflags() == DEVICE_SUSPEND_FLAG_MEXEC) {
+                ctx.kernel().signal(0, ZX_USER_SIGNAL_0);
+            }
+            ctx.set_flags(devmgr::SuspendContext::Flags::kRunning);
+            return;
+        }
+
+        if (ctx.sflags() == DEVICE_SUSPEND_FLAG_MEXEC) {
+            zx_system_mexec(root_resource().get(), ctx.kernel().get(), ctx.bootdata().get());
+        } else {
+            // should never get here on x86
+            // on arm, if the platform driver does not implement
+            // suspend go to the kernel fallback
+            ::suspend_fallback(root_resource(), ctx.sflags());
+            // this handle is leaked on the shutdown path for x86
+            ctx.CloseSocket();
+            // if we get here the system did not suspend successfully
+            ctx.set_flags(devmgr::SuspendContext::Flags::kRunning);
+        }
+    };
+
+    // We don't need to suspend anything except sys_device and it's children,
+    // since we do not run suspend hooks for children of test or misc
+    auto task = SuspendTask::Create(sys_device(), ctx.sflags(), std::move(completion));
+    suspend_context().set_task(std::move(task));
 
     if (suspend_fallback() || suspend_debug()) {
         thrd_t t;
-        int ret = thrd_create_with_name(&t, suspend_timeout_thread, &suspend_context(),
+        int ret = thrd_create_with_name(&t, suspend_timeout_thread, this,
                                         "devcoord-suspend-timeout");
         if (ret != thrd_success) {
             log(ERROR, "devcoordinator: failed to create suspend timeout thread\n");
@@ -1407,8 +1365,6 @@ void Coordinator::Suspend(SuspendContext ctx) {
             thrd_detach(t);
         }
     }
-
-    process_suspend_list(&suspend_context());
 }
 
 void Coordinator::Suspend(uint32_t flags) {
@@ -1416,11 +1372,11 @@ void Coordinator::Suspend(uint32_t flags) {
         vfs_exit(fshost_event());
     }
 
-    Suspend(SuspendContext(this, SuspendContext::Flags::kSuspend, flags, std::move(dmctl_socket_)));
+    Suspend(SuspendContext(SuspendContext::Flags::kSuspend, flags, std::move(dmctl_socket_)));
 }
 
 void Coordinator::DmMexec(zx::vmo kernel, zx::vmo bootdata) {
-    Suspend(SuspendContext(this, SuspendContext::Flags::kSuspend, DEVICE_SUSPEND_FLAG_MEXEC,
+    Suspend(SuspendContext(SuspendContext::Flags::kSuspend, DEVICE_SUSPEND_FLAG_MEXEC,
                            zx::socket(), std::move(kernel), std::move(bootdata)));
 }
 
@@ -1758,37 +1714,4 @@ zx_status_t Coordinator::BindOutgoingServices(zx::channel listen_on) {
     return outgoing_services_.Serve(std::move(listen_on));
 }
 
-void SuspendContext::ContinueSuspend(const zx::resource& root_resource) {
-    if (status() != ZX_OK) {
-        // TODO: unroll suspend
-        // do not continue to suspend as this indicates a driver suspend
-        // problem and should show as a bug
-        log(ERROR, "devcoordinator: failed to suspend\n");
-        // notify dmctl
-        CloseSocket();
-        if (sflags() == DEVICE_SUSPEND_FLAG_MEXEC) {
-            kernel().signal(0, ZX_USER_SIGNAL_0);
-        }
-        set_flags(Flags::kRunning);
-        return;
-    }
-
-    if (Release()) {
-        if (dh() != nullptr) {
-            process_suspend_list(this);
-        } else if (sflags() == DEVICE_SUSPEND_FLAG_MEXEC) {
-            zx_system_mexec(root_resource.get(), kernel().get(), bootdata().get());
-        } else {
-            // should never get here on x86
-            // on arm, if the platform driver does not implement
-            // suspend go to the kernel fallback
-            suspend_fallback(root_resource, sflags());
-            // this handle is leaked on the shutdown path for x86
-            CloseSocket();
-            // if we get here the system did not suspend successfully
-            set_flags(Flags::kRunning);
-        }
-    }
-}
-
 } // namespace devmgr
diff --git a/zircon/system/core/devmgr/devcoordinator/coordinator.h b/zircon/system/core/devmgr/devcoordinator/coordinator.h
index 07e60eb1797..680fa2b5aa5 100644
--- a/zircon/system/core/devmgr/devcoordinator/coordinator.h
+++ b/zircon/system/core/devmgr/devcoordinator/coordinator.h
@@ -28,6 +28,7 @@
 #include "device.h"
 #include "driver.h"
 #include "metadata.h"
+#include "suspend-task.h"
 #include "vmo-writer.h"
 
 namespace devmgr {
@@ -43,62 +44,35 @@ public:
 
     SuspendContext() = default;
 
-    SuspendContext(Coordinator* coordinator, Flags flags, uint32_t sflags, zx::socket socket,
+    SuspendContext(Flags flags, uint32_t sflags, zx::socket socket,
                    zx::vmo kernel = zx::vmo(), zx::vmo bootdata = zx::vmo())
-        : coordinator_(coordinator), flags_(flags), sflags_(sflags), socket_(std::move(socket)),
+        : flags_(flags), sflags_(sflags), socket_(std::move(socket)),
           kernel_(std::move(kernel)), bootdata_(std::move(bootdata)) {}
 
-    ~SuspendContext() { devhosts_.clear(); }
+    ~SuspendContext() {}
 
     SuspendContext(SuspendContext&&) = default;
     SuspendContext& operator=(SuspendContext&&) = default;
 
-    void ContinueSuspend(const zx::resource& root_resource);
+    void set_task(fbl::RefPtr<SuspendTask> task) { task_ = std::move(task); }
 
-    Coordinator* coordinator() { return coordinator_; }
-
-    zx_status_t status() const { return status_; }
-    void set_status(zx_status_t status) { status_ = status; }
     Flags flags() const { return flags_; }
     void set_flags(Flags flags) { flags_ = flags; }
     uint32_t sflags() const { return sflags_; }
 
-    Devhost* dh() const { return dh_; }
-    void set_dh(Devhost* dh) { dh_ = dh; }
-
-    using DevhostList = fbl::DoublyLinkedList<Devhost*, Devhost::SuspendNode>;
-    DevhostList& devhosts() { return devhosts_; }
-    const DevhostList& devhosts() const { return devhosts_; }
-
     const zx::vmo& kernel() const { return kernel_; }
     const zx::vmo& bootdata() const { return bootdata_; }
 
     // Close the socket whose ownership was handed to this SuspendContext.
     void CloseSocket() { socket_.reset(); }
 
-    // The AddRef and Release functions follow the contract for fbl::RefPtr.
-    void AddRef() const { ++count_; }
-
-    // Returns true when the last message reference has been released.
-    bool Release() const {
-        const int32_t rc = count_;
-        --count_;
-        return rc == 1;
-    }
-
 private:
-    Coordinator* coordinator_ = nullptr;
+    fbl::RefPtr<SuspendTask> task_;
 
-    zx_status_t status_ = ZX_OK;
     Flags flags_ = Flags::kRunning;
 
     // suspend flags
     uint32_t sflags_ = 0u;
-    // outstanding msgs
-    mutable uint32_t count_ = 0u;
-    // next devhost to process
-    Devhost* dh_ = nullptr;
-    fbl::DoublyLinkedList<Devhost*, Devhost::SuspendNode> devhosts_;
 
     // socket to notify on for 'dm reboot' and 'dm poweroff'
     zx::socket socket_;
diff --git a/zircon/system/core/devmgr/devcoordinator/device.cpp b/zircon/system/core/devmgr/devcoordinator/device.cpp
index 4aea688f2d2..028b743c464 100644
--- a/zircon/system/core/devmgr/devcoordinator/device.cpp
+++ b/zircon/system/core/devmgr/devcoordinator/device.cpp
@@ -190,6 +190,31 @@ zx_status_t Device::SignalReadyForBind(zx::duration delay) {
     return publish_task_.PostDelayed(this->coordinator->dispatcher(), delay);
 }
 
+zx_status_t Device::SendSuspend(uint32_t flags, SuspendCompletion completion) {
+    if (suspend_completion_) {
+        // We already have a pending suspend
+        return ZX_ERR_UNAVAILABLE;
+    }
+    log(DEVLC, "devcoordinator: suspend dev %p name='%s'\n", this, this->name.data());
+    zx_status_t status = dh_send_suspend(this, flags);
+    if (status != ZX_OK) {
+        return status;
+    }
+    suspend_completion_ = std::move(completion);
+    return ZX_OK;
+}
+
+void Device::CompleteSuspend(zx_status_t status) {
+    if (status == ZX_OK) {
+        state_ = Device::State::kSuspended;
+    }
+
+    SuspendCompletion completion(std::move(suspend_completion_));
+    if (completion) {
+        completion(status);
+    }
+}
+
 // Handle inbound messages from devhost to devices
 void Device::HandleRpc(fbl::RefPtr<Device>&& dev, async_dispatcher_t* dispatcher,
                        async::WaitBase* wait, zx_status_t status,
@@ -357,8 +382,14 @@ zx_status_t Device::HandleRead() {
             log(ERROR, "devcoordinator: rpc: suspend '%s' status %d\n", this->name.data(),
                 resp->status);
         }
-        coordinator->suspend_context().set_status(resp->status);
-        coordinator->suspend_context().ContinueSuspend(coordinator->root_resource());
+
+        if (!suspend_completion_) {
+            log(ERROR, "devcoordinator: rpc: unexpected suspend reply for '%s' status %d\n",
+                this->name.data(), resp->status);
+            return ZX_ERR_IO;
+        }
+        log(DEVLC, "devcoordinator: suspended dev %p name='%s'\n", this, this->name.data());
+        CompleteSuspend(resp->status);
     } else {
         log(ERROR, "devcoordinator: rpc: dev '%s' received wrong unexpected reply %08x\n",
             this->name.data(), hdr->ordinal);
diff --git a/zircon/system/core/devmgr/devcoordinator/device.h b/zircon/system/core/devmgr/devcoordinator/device.h
index 0322b5eff6c..6c6007def84 100644
--- a/zircon/system/core/devmgr/devcoordinator/device.h
+++ b/zircon/system/core/devmgr/devcoordinator/device.h
@@ -82,6 +82,16 @@ struct Device : public fbl::RefCounted<Device>, public AsyncLoopRefCountedRpcHan
     // or after it becomes visible.
     zx_status_t SignalReadyForBind(zx::duration delay = zx::sec(0));
 
+    using SuspendCompletion = fit::function<void(zx_status_t)>;
+    // Issue a Suspend request to this device.  When the response comes in, the
+    // given completion will be invoked.
+    zx_status_t SendSuspend(uint32_t flags, SuspendCompletion completion);
+
+    // Run the completion for the outstanding suspend, if any.  This method is
+    // only exposed currently because RemoveDevice is on Coordinator instead of
+    // Device.
+    void CompleteSuspend(zx_status_t status);
+
     Coordinator* coordinator;
     uint32_t flags = 0;
 
@@ -181,6 +191,15 @@ struct Device : public fbl::RefCounted<Device>, public AsyncLoopRefCountedRpcHan
     Devhost* host() const { return host_; }
     uint64_t local_id() const { return local_id_; }
 
+    // TODO(teisenbe): We probably want more states.  For example, the DEAD flag
+    // should probably move in to here.
+    enum class State {
+        kActive,
+        kSuspended,
+    };
+
+    State state() { return state_; }
+
 private:
     zx_status_t HandleRead();
 
@@ -208,6 +227,13 @@ private:
     // The id of this device from the perspective of the devhost.  This can be
     // used to communicate with the devhost about this device.
     uint64_t local_id_ = 0;
+
+    // The current state of the device
+    State state_ = State::kActive;
+
+    // If a suspend is in-progress, this completion will be invoked when it is
+    // completed.
+    SuspendCompletion suspend_completion_;
 };
 
 } // namespace devmgr
diff --git a/zircon/system/core/devmgr/devcoordinator/suspend-task.cpp b/zircon/system/core/devmgr/devcoordinator/suspend-task.cpp
new file mode 100644
index 00000000000..2e911312269
--- /dev/null
+++ b/zircon/system/core/devmgr/devcoordinator/suspend-task.cpp
@@ -0,0 +1,67 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "suspend-task.h"
+
+#include "coordinator.h"
+
+namespace devmgr {
+
+SuspendTask::SuspendTask(fbl::RefPtr<Device> device, uint32_t flags, Completion completion)
+    : Task(device->coordinator->dispatcher(), std::move(completion)), device_(std::move(device)),
+      flags_(flags) {}
+
+SuspendTask::~SuspendTask() = default;
+
+fbl::RefPtr<SuspendTask> SuspendTask::Create(fbl::RefPtr<Device> device, uint32_t flags,
+                                             Completion completion) {
+    return fbl::MakeRefCounted<SuspendTask>(std::move(device), flags, std::move(completion));
+}
+
+void SuspendTask::Run() {
+    bool found_more_dependencies = false;
+    for (auto& child : device_->children) {
+        // Use a switch statement here so that this gets reconsidered if we add
+        // more states.
+        switch (child.state()) {
+        case Device::State::kSuspended: continue;
+        case Device::State::kActive: break;
+        }
+        auto task = SuspendTask::Create(fbl::WrapRefPtr(&child), flags_);
+        AddDependency(std::move(task));
+        found_more_dependencies = true;
+    }
+    if (found_more_dependencies) {
+        return;
+    }
+
+    // Handle the device proxy, if it exists, after children since they might
+    // depend on it.
+    if (device_->proxy != nullptr) {
+        switch (device_->proxy->state()) {
+        case Device::State::kSuspended: break;
+        case Device::State::kActive: {
+            auto task = SuspendTask::Create(device_->proxy, flags_);
+            AddDependency(std::move(task));
+            return;
+        }
+        }
+    }
+
+    // Check if this device is not in a devhost.  This happens for the
+    // top-level devices like /sys provided by devcoordinator
+    if (device_->host() == nullptr) {
+        return Complete(ZX_OK);
+    }
+
+    auto completion = [this](zx_status_t status) {
+        Complete(status);
+    };
+    zx_status_t status = device_->SendSuspend(flags_, std::move(completion));
+    if (status != ZX_OK) {
+        Complete(status);
+    }
+}
+
+} // namespace devmgr
diff --git a/zircon/system/core/devmgr/devcoordinator/suspend-task.h b/zircon/system/core/devmgr/devcoordinator/suspend-task.h
new file mode 100644
index 00000000000..96f2b676dbf
--- /dev/null
+++ b/zircon/system/core/devmgr/devcoordinator/suspend-task.h
@@ -0,0 +1,30 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#pragma once
+
+#include "device.h"
+#include "task.h"
+
+namespace devmgr {
+
+class SuspendTask final : public Task {
+public:
+    static fbl::RefPtr<SuspendTask> Create(fbl::RefPtr<Device> device, uint32_t flags,
+                                           Completion completion = nullptr);
+
+    // Don/t invoke this, use Create
+    SuspendTask(fbl::RefPtr<Device> device, uint32_t flags, Completion completion);
+
+    ~SuspendTask() final;
+private:
+    void Run() final;
+
+    // The device being suspended
+    fbl::RefPtr<Device> device_;
+    // The target suspend flags
+    uint32_t flags_;
+};
+
+} // namespace devmgr
diff --git a/zircon/system/core/devmgr/devhost/core.cpp b/zircon/system/core/devmgr/devhost/core.cpp
index cef93de3b14..f9a587c3538 100644
--- a/zircon/system/core/devmgr/devhost/core.cpp
+++ b/zircon/system/core/devmgr/devhost/core.cpp
@@ -568,49 +568,24 @@ zx_status_t devhost_device_close(fbl::RefPtr<zx_device_t> dev, uint32_t flags) R
     return dev->CloseOp(flags);
 }
 
-static zx_status_t devhost_device_suspend_locked(const fbl::RefPtr<zx_device>& dev,
-                                                 uint32_t flags) REQ_DM_LOCK {
-    // first suspend children (so we suspend from leaf up)
-    zx_status_t st;
-    for (auto& child : dev->children) {
-        if (!(child.flags & DEV_FLAG_DEAD)) {
-            // Try to get a reference to the child.   This will fail if the last
-            // reference to it went away and fbl_recycle() is going to blocked
-            // waiting for the DM lock
-            auto child_ref =
-                fbl::MakeRefPtrUpgradeFromRaw(&child, &::devmgr::internal::devhost_api_lock);
-            if (child_ref) {
-                st = devhost_device_suspend(std::move(child_ref), flags);
-                if (st != ZX_OK) {
-                    return st;
-                }
-            }
-        }
-    }
-
+zx_status_t devhost_device_suspend(const fbl::RefPtr<zx_device>& dev, uint32_t flags) REQ_DM_LOCK {
+    // TODO this should eventually be two-pass using SUSPENDING/SUSPENDED flags
+    enum_lock_acquire();
 
+    zx_status_t status = ZX_ERR_NOT_SUPPORTED;
     // then invoke our suspend hook
     if (dev->ops->suspend) {
         ApiAutoRelock relock;
-        st = dev->ops->suspend(dev->ctx, flags);
-    } else {
-        st = ZX_ERR_NOT_SUPPORTED;
+        status = dev->ops->suspend(dev->ctx, flags);
     }
 
+    enum_lock_release();
+
     // default_suspend() returns ZX_ERR_NOT_SUPPORTED
-    if ((st != ZX_OK) && (st != ZX_ERR_NOT_SUPPORTED)) {
-        return st;
-    } else {
-        return ZX_OK;
+    if ((status != ZX_OK) && (status != ZX_ERR_NOT_SUPPORTED)) {
+        return status;
     }
-}
-
-zx_status_t devhost_device_suspend(const fbl::RefPtr<zx_device>& dev, uint32_t flags) REQ_DM_LOCK {
-    // TODO this should eventually be two-pass using SUSPENDING/SUSPENDED flags
-    enum_lock_acquire();
-    zx_status_t r = devhost_device_suspend_locked(dev, flags);
-    enum_lock_release();
-    return r;
+    return ZX_OK;
 }
 
 } // namespace devmgr
diff --git a/zircon/system/core/devmgr/devhost/devhost.cpp b/zircon/system/core/devmgr/devhost/devhost.cpp
index ba39f26b07a..e3c97d5fa02 100644
--- a/zircon/system/core/devmgr/devhost/devhost.cpp
+++ b/zircon/system/core/devmgr/devhost/devhost.cpp
@@ -603,19 +603,12 @@ static zx_status_t fidl_ConnectProxy(void* raw_ctx, zx_handle_t raw_shadow) {
 
 static zx_status_t fidl_Suspend(void* raw_ctx, uint32_t flags, fidl_txn_t* txn) {
     auto ctx = static_cast<DevhostRpcReadContext*>(raw_ctx);
-    // call suspend on the device this devhost is rooted on
-    fbl::RefPtr<zx_device_t> device = ctx->conn->dev;
-    while (device->parent != nullptr) {
-        device = device->parent;
-    }
     zx_status_t r;
     {
         ApiAutoLock lock;
-        r = devhost_device_suspend(device, flags);
+        r = devhost_device_suspend(ctx->conn->dev, flags);
     }
-    // TODO(teisenbe): We should probably check this return...
-    fuchsia_device_manager_DeviceControllerSuspend_reply(txn, r);
-    return ZX_OK;
+    return fuchsia_device_manager_DeviceControllerSuspend_reply(txn, r);
 }
 
 static zx_status_t fidl_RemoveDevice(void* raw_ctx) {
diff --git a/zircon/system/fidl/fuchsia-device-manager/coordinator.fidl b/zircon/system/fidl/fuchsia-device-manager/coordinator.fidl
index fb320726dce..dd7b9dc6dad 100644
--- a/zircon/system/fidl/fuchsia-device-manager/coordinator.fidl
+++ b/zircon/system/fidl/fuchsia-device-manager/coordinator.fidl
@@ -83,9 +83,7 @@ protocol DeviceController {
     /// this interface channel will close instead of returning a result.
     RemoveDevice();
 
-    /// Ask devhost to suspend all of its devices, using the target state indicated by |flags|.
-    /// This should logically be in DevhostController, but currently this is instead sent to the
-    /// first device in the devhost
+    /// Ask devhost to suspend this device, using the target state indicated by |flags|.
     Suspend(uint32 flags) -> (zx.status status);
 };
 
-- 
GitLab