From 93f42e98b1c8750bd43296a30c5562b133d83598 Mon Sep 17 00:00:00 2001
From: Mukesh Agrawal <quiche@google.com>
Date: Tue, 14 May 2019 02:46:27 +0000
Subject: [PATCH] [benchmarks] reduce setup time for RandomMemcpy benchmark

Allocating the memory buffer for RandomMemcpy can be very slow. In
some manual testing on Eve, the buffer allocation appeared to take 4-8
seconds.

This is with a debug build which uses -O0, and the std::vector
constructor will produce code that does a function call for every byte
in the 128MB buffer.  (In release builds, the compiler will optimise
that to a memcpy().  When unoptimised, it's very slow.)

Optimize the buffer allocation, by using a primitive array, rather
than a std::vector. We fill the array before reading from it, to
ensure that we do not cause any reads of unitialized memory.

The net result of the optimization is that the time to run the
benchmarks in "unit-test-mode", on my Eve, drops from ~2 minutes to
~30 seconds.

Bug: FLK-124 #done
Test: /pkgfs/packages/zircon_benchmarks/0/test/zircon_benchmarks (on eve)
Test: /pkgfs/packages/zircon_benchmarks/0/test/zircon_benchmarks -p (on eve)
Change-Id: I449a304a60ccee6a137e9371ca4455b8840fedde
---
 garnet/bin/zircon_benchmarks/random_memcpy.cc | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/garnet/bin/zircon_benchmarks/random_memcpy.cc b/garnet/bin/zircon_benchmarks/random_memcpy.cc
index 389f443dd75..14f72747334 100644
--- a/garnet/bin/zircon_benchmarks/random_memcpy.cc
+++ b/garnet/bin/zircon_benchmarks/random_memcpy.cc
@@ -2,6 +2,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <cstring>
 #include <iostream>
 #include <random>
 #include <vector>
@@ -45,8 +46,9 @@ bool RandomMemcpy(perftest::RepeatState* state, size_t block_size_bytes,
     return false;
   }
 
-  // Prepare the buffer. The std::vector constructor will zero the bytes.
-  std::vector<char> buf(buffer_size_bytes);
+  // Prepare the buffer.
+  auto buf = std::make_unique<char[]>(buffer_size_bytes);
+  memset(buf.get(), 0, buffer_size_bytes);
 
   // Prepare the random source and destination addresses.
   const size_t cache_size_bytes = kCacheSizeMB * 1024 * 1024;
@@ -62,9 +64,9 @@ bool RandomMemcpy(perftest::RepeatState* state, size_t block_size_bytes,
   std::vector<char*> src_addrs(access_sequence_len);
   std::vector<char*> dst_addrs(access_sequence_len);
   std::generate(src_addrs.begin(), src_addrs.end(),
-                [&] { return &buf.front() + rand_offset_gen(rand_dev); });
+                [&] { return buf.get() + rand_offset_gen(rand_dev); });
   std::generate(dst_addrs.begin(), dst_addrs.end(),
-                [&] { return &buf.front() + rand_offset_gen(rand_dev); });
+                [&] { return buf.get() + rand_offset_gen(rand_dev); });
 
   // Run the benchmark task.
   state->SetBytesProcessedPerRun(block_size_bytes);
-- 
GitLab