From 93f42e98b1c8750bd43296a30c5562b133d83598 Mon Sep 17 00:00:00 2001 From: Mukesh Agrawal <quiche@google.com> Date: Tue, 14 May 2019 02:46:27 +0000 Subject: [PATCH] [benchmarks] reduce setup time for RandomMemcpy benchmark Allocating the memory buffer for RandomMemcpy can be very slow. In some manual testing on Eve, the buffer allocation appeared to take 4-8 seconds. This is with a debug build which uses -O0, and the std::vector constructor will produce code that does a function call for every byte in the 128MB buffer. (In release builds, the compiler will optimise that to a memcpy(). When unoptimised, it's very slow.) Optimize the buffer allocation, by using a primitive array, rather than a std::vector. We fill the array before reading from it, to ensure that we do not cause any reads of unitialized memory. The net result of the optimization is that the time to run the benchmarks in "unit-test-mode", on my Eve, drops from ~2 minutes to ~30 seconds. Bug: FLK-124 #done Test: /pkgfs/packages/zircon_benchmarks/0/test/zircon_benchmarks (on eve) Test: /pkgfs/packages/zircon_benchmarks/0/test/zircon_benchmarks -p (on eve) Change-Id: I449a304a60ccee6a137e9371ca4455b8840fedde --- garnet/bin/zircon_benchmarks/random_memcpy.cc | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/garnet/bin/zircon_benchmarks/random_memcpy.cc b/garnet/bin/zircon_benchmarks/random_memcpy.cc index 389f443dd75..14f72747334 100644 --- a/garnet/bin/zircon_benchmarks/random_memcpy.cc +++ b/garnet/bin/zircon_benchmarks/random_memcpy.cc @@ -2,6 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include <cstring> #include <iostream> #include <random> #include <vector> @@ -45,8 +46,9 @@ bool RandomMemcpy(perftest::RepeatState* state, size_t block_size_bytes, return false; } - // Prepare the buffer. The std::vector constructor will zero the bytes. - std::vector<char> buf(buffer_size_bytes); + // Prepare the buffer. + auto buf = std::make_unique<char[]>(buffer_size_bytes); + memset(buf.get(), 0, buffer_size_bytes); // Prepare the random source and destination addresses. const size_t cache_size_bytes = kCacheSizeMB * 1024 * 1024; @@ -62,9 +64,9 @@ bool RandomMemcpy(perftest::RepeatState* state, size_t block_size_bytes, std::vector<char*> src_addrs(access_sequence_len); std::vector<char*> dst_addrs(access_sequence_len); std::generate(src_addrs.begin(), src_addrs.end(), - [&] { return &buf.front() + rand_offset_gen(rand_dev); }); + [&] { return buf.get() + rand_offset_gen(rand_dev); }); std::generate(dst_addrs.begin(), dst_addrs.end(), - [&] { return &buf.front() + rand_offset_gen(rand_dev); }); + [&] { return buf.get() + rand_offset_gen(rand_dev); }); // Run the benchmark task. state->SetBytesProcessedPerRun(block_size_bytes); -- GitLab