From 8a76c5dab05922d7328d3fd25453f2594627167e Mon Sep 17 00:00:00 2001
From: Todd Eisenberger <teisenbe@google.com>
Date: Fri, 10 May 2019 17:27:41 -0700
Subject: [PATCH] [devhost] Fix double-free of ProxyIostate

The double free would occur if
1) some thread invoked Cancel() to queue destruction of a ProxyIostate,
   and
2) before that packet was processed, the ProxyIostate's handler ran and
   decided to destroy itself.

Bug: ZX-4060
Change-Id: I3b9c7275c4e0cd2dc3a6c6cd2d2376c947eb627a
---
 zircon/system/core/devmgr/devhost/devhost.cpp |  4 +-
 .../devmgr/devhost/proxy-iostate-test.cpp     | 35 ++++++++++++++++
 .../core/devmgr/devhost/proxy-iostate.cpp     | 40 +++++++++++--------
 .../core/devmgr/devhost/proxy-iostate.h       | 11 +++--
 4 files changed, 69 insertions(+), 21 deletions(-)

diff --git a/zircon/system/core/devmgr/devhost/devhost.cpp b/zircon/system/core/devmgr/devhost/devhost.cpp
index bb0fb46d96a..2994d7aecec 100644
--- a/zircon/system/core/devmgr/devhost/devhost.cpp
+++ b/zircon/system/core/devmgr/devhost/devhost.cpp
@@ -735,13 +735,13 @@ void DevfsConnection::HandleRpc(fbl::unique_ptr<DevfsConnection> conn,
     log(TRACE, "devhost: destroying devfs conn %p\n", conn.get());
 }
 
+
 static void proxy_ios_destroy(const fbl::RefPtr<zx_device_t>& dev) {
     fbl::AutoLock guard(&dev->proxy_ios_lock);
 
     if (dev->proxy_ios) {
-        dev->proxy_ios->Cancel(DevhostAsyncLoop()->dispatcher());
+        dev->proxy_ios->CancelLocked(DevhostAsyncLoop()->dispatcher());
     }
-    dev->proxy_ios = nullptr;
 }
 
 #define LOGBUF_MAX (ZX_LOG_RECORD_MAX - sizeof(zx_log_record_t))
diff --git a/zircon/system/core/devmgr/devhost/proxy-iostate-test.cpp b/zircon/system/core/devmgr/devhost/proxy-iostate-test.cpp
index 752ab43f5ad..a6bc51f81a1 100644
--- a/zircon/system/core/devmgr/devhost/proxy-iostate-test.cpp
+++ b/zircon/system/core/devmgr/devhost/proxy-iostate-test.cpp
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <fbl/auto_lock.h>
 #include <lib/async-loop/cpp/loop.h>
 #include <zxtest/zxtest.h>
 #include "proxy-iostate.h"
@@ -16,11 +17,45 @@ TEST(ProxyIostateTestCase, Creation) {
     zx::channel proxy_local, proxy_remote;
     ASSERT_OK(zx::channel::create(0, &proxy_local, &proxy_remote));
 
+    {
+        fbl::AutoLock guard(&dev->proxy_ios_lock);
+        ASSERT_NULL(dev->proxy_ios);
+    }
     ASSERT_OK(devmgr::ProxyIostate::Create(dev, std::move(proxy_remote), loop.dispatcher()));
+    {
+        fbl::AutoLock guard(&dev->proxy_ios_lock);
+        ASSERT_NOT_NULL(dev->proxy_ios);
+    }
 
     ASSERT_OK(loop.RunUntilIdle());
 }
 
+// This test reproduces the bug from ZX-4060, in which we would double-free the
+// ProxyIostate due to a cancellation being queued after a channel close event
+// gets queued, but before the channel close is processed.  If the bug is
+// present, and we're running with ASAN, this will crash 100% of the time.
+TEST(ProxyIostateTestCase, ChannelCloseThenCancel) {
+    async::Loop loop(&kAsyncLoopConfigNoAttachToThread);
+
+    fbl::RefPtr<zx_device> dev;
+    ASSERT_OK(zx_device::Create(&dev));
+
+    zx::channel proxy_local, proxy_remote;
+    ASSERT_OK(zx::channel::create(0, &proxy_local, &proxy_remote));
+
+    ASSERT_OK(devmgr::ProxyIostate::Create(dev, std::move(proxy_remote), loop.dispatcher()));
+    ASSERT_OK(loop.RunUntilIdle());
+
+    proxy_local.reset();
+
+    {
+        fbl::AutoLock guard(&dev->proxy_ios_lock);
+        dev->proxy_ios->CancelLocked(loop.dispatcher());
+        ASSERT_NULL(dev->proxy_ios);
+    }
+
+    ASSERT_OK(loop.RunUntilIdle());
+}
 
 int main(int argc, char** argv) {
     return RUN_ALL_TESTS(argc, argv);
diff --git a/zircon/system/core/devmgr/devhost/proxy-iostate.cpp b/zircon/system/core/devmgr/devhost/proxy-iostate.cpp
index 689c4b00ab2..332c034acbc 100644
--- a/zircon/system/core/devmgr/devhost/proxy-iostate.cpp
+++ b/zircon/system/core/devmgr/devhost/proxy-iostate.cpp
@@ -13,23 +13,35 @@ namespace devmgr {
 
 ProxyIostate::~ProxyIostate() {
     fbl::AutoLock guard(&dev->proxy_ios_lock);
-    if (dev->proxy_ios == this) {
-        dev->proxy_ios = nullptr;
-    }
+    ZX_ASSERT(dev->proxy_ios != this);
 }
 
 // Handling RPC From Proxy Devices to BusDevs
 void ProxyIostate::HandleRpc(fbl::unique_ptr<ProxyIostate> conn, async_dispatcher_t* dispatcher,
                              async::WaitBase* wait, zx_status_t status,
                              const zx_packet_signal_t* signal) {
+    auto handle_destroy = [&conn]() {
+        fbl::AutoLock guard(&conn->dev->proxy_ios_lock);
+        // If proxy_ios is not |conn|, then it's had a packet queued already to
+        // destroy it, so we should let the queued destruction handle things.
+        // Otherwise we should destroy it.
+        if (conn->dev->proxy_ios == conn.get()) {
+            // Mark proxy_ios as disconnected, so that CancelLocked doesn't try to
+            // destroy it too
+            conn->dev->proxy_ios = nullptr;
+            // The actual destruction will happen when |conn| goes out of scope.
+        } else {
+            __UNUSED auto ptr = conn.release();
+        }
+    };
     if (status != ZX_OK) {
-        return;
+        return handle_destroy();
     }
 
     if (conn->dev == nullptr) {
         log(RPC_SDW, "proxy-rpc: stale rpc? (ios=%p)\n", conn.get());
         // Do not re-issue the wait here
-        return;
+        return handle_destroy();
     }
     if (signal->observed & ZX_CHANNEL_READABLE) {
         log(RPC_SDW, "proxy-rpc: rpc readable (ios=%p,dev=%p)\n", conn.get(), conn->dev.get());
@@ -37,16 +49,14 @@ void ProxyIostate::HandleRpc(fbl::unique_ptr<ProxyIostate> conn, async_dispatche
         if (r != ZX_OK) {
             log(RPC_SDW, "proxy-rpc: rpc cb error %d (ios=%p,dev=%p)\n", r, conn.get(),
                 conn->dev.get());
-            // Let |conn| be destroyed
-            return;
+            return handle_destroy();
         }
         BeginWait(std::move(conn), dispatcher);
         return;
     }
     if (signal->observed & ZX_CHANNEL_PEER_CLOSED) {
         log(RPC_SDW, "proxy-rpc: peer closed (ios=%p,dev=%p)\n", conn.get(), conn->dev.get());
-        // Let |conn| be destroyed
-        return;
+        return handle_destroy();
     }
     log(ERROR, "devhost: no work? %08x\n", signal->observed);
     BeginWait(std::move(conn), dispatcher);
@@ -59,16 +69,14 @@ zx_status_t ProxyIostate::Create(const fbl::RefPtr<zx_device_t>& dev, zx::channe
     fbl::AutoLock guard(&dev->proxy_ios_lock);
 
     if (dev->proxy_ios) {
-        dev->proxy_ios->Cancel(dispatcher);
-        dev->proxy_ios = nullptr;
+        dev->proxy_ios->CancelLocked(dispatcher);
     }
 
-    auto ios = std::make_unique<ProxyIostate>();
+    auto ios = std::make_unique<ProxyIostate>(dev);
     if (ios == nullptr) {
         return ZX_ERR_NO_MEMORY;
     }
 
-    ios->dev = dev;
     ios->set_channel(std::move(rpc));
 
     // |ios| is will be owned by the async loop.  |dev| holds a reference that will be
@@ -84,9 +92,9 @@ zx_status_t ProxyIostate::Create(const fbl::RefPtr<zx_device_t>& dev, zx::channe
     return ZX_OK;
 }
 
-// The device for which ProxyIostate is currently attached to should have
-// its proxy_ios_lock held across Cancel().
-void ProxyIostate::Cancel(async_dispatcher_t* dispatcher) {
+void ProxyIostate::CancelLocked(async_dispatcher_t* dispatcher) {
+    ZX_ASSERT(this->dev->proxy_ios == this);
+    this->dev->proxy_ios = nullptr;
     // TODO(teisenbe): We should probably check the return code in case the
     // queue was full
     ConnectionDestroyer::Get()->QueueProxyConnection(dispatcher, this);
diff --git a/zircon/system/core/devmgr/devhost/proxy-iostate.h b/zircon/system/core/devmgr/devhost/proxy-iostate.h
index 83615b576c3..05d0d557791 100644
--- a/zircon/system/core/devmgr/devhost/proxy-iostate.h
+++ b/zircon/system/core/devmgr/devhost/proxy-iostate.h
@@ -8,6 +8,7 @@
 #include <fbl/unique_ptr.h>
 #include <lib/async/cpp/wait.h>
 #include <lib/zx/channel.h>
+#include <zircon/thread_annotations.h>
 #include "../shared/async-loop-owned-rpc-handler.h"
 
 struct zx_device;
@@ -15,7 +16,7 @@ struct zx_device;
 namespace devmgr {
 
 struct ProxyIostate : AsyncLoopOwnedRpcHandler<ProxyIostate> {
-    ProxyIostate() = default;
+    explicit ProxyIostate(fbl::RefPtr<zx_device> device) : dev(std::move(device)) {}
     ~ProxyIostate();
 
     // Creates a ProxyIostate and points |dev| at it.  The ProxyIostate is owned
@@ -25,13 +26,17 @@ struct ProxyIostate : AsyncLoopOwnedRpcHandler<ProxyIostate> {
                               async_dispatcher_t* dispatcher);
 
     // Request the destruction of the proxy connection
-    void Cancel(async_dispatcher_t* dispatcher);
+    // The device for which ProxyIostate is currently attached to should have
+    // its proxy_ios_lock held across CancelLocked().
+    // We must disable thread safety analysis because the lock is in |this->dev|,
+    // and Clang cannot reason about the aliasing involved.
+    void CancelLocked(async_dispatcher_t* dispatcher) TA_NO_THREAD_SAFETY_ANALYSIS;
 
     static void HandleRpc(fbl::unique_ptr<ProxyIostate> conn, async_dispatcher_t* dispatcher,
                           async::WaitBase* wait, zx_status_t status,
                           const zx_packet_signal_t* signal);
 
-    fbl::RefPtr<zx_device> dev;
+    const fbl::RefPtr<zx_device> dev;
 };
 static void proxy_ios_destroy(const fbl::RefPtr<zx_device>& dev);
 
-- 
GitLab