diff --git a/src/graphics/lib/compute/hotsort/BUILD.gn b/src/graphics/lib/compute/hotsort/BUILD.gn index 307947ea383e90260f2c9a1934f69f12e1bcd6e9..f6fe8d87068dc5631d8f39ddfbcc01ebb2d24c05 100644 --- a/src/graphics/lib/compute/hotsort/BUILD.gn +++ b/src/graphics/lib/compute/hotsort/BUILD.gn @@ -4,6 +4,13 @@ group("hotsort") { deps = [ - "hotsort_gen($host_toolchain)", + "platforms/vk", + ] +} + +group("tests") { + testonly = true + public_deps = [ + "platforms/vk/tests", ] } diff --git a/src/graphics/lib/compute/hotsort/hotsort_gen/gen.h b/src/graphics/lib/compute/hotsort/hotsort_gen/gen.h index 814dd008e22a50e5a88fc8844f3107dc67f5e11a..6a4a44f92ad4c017f5e0b11a2e317ebef3cbd005 100644 --- a/src/graphics/lib/compute/hotsort/hotsort_gen/gen.h +++ b/src/graphics/lib/compute/hotsort/hotsort_gen/gen.h @@ -70,6 +70,17 @@ struct hsg_config struct { uint32_t dwords; // number of dwords in a key -- .type will be extended to support key-vals } type; + + struct { // This provides GLSL-specific support for specifying the set and binding + struct { // locations for the input and output buffers. Note that this will be + uint32_t set; // removed once supported Vulkan targets support GLSL_EXT_buffer_reference. + uint32_t binding; + } in; + struct { + uint32_t set; + uint32_t binding; + } out; + } glsl; }; // diff --git a/src/graphics/lib/compute/hotsort/hotsort_gen/main.c b/src/graphics/lib/compute/hotsort/hotsort_gen/main.c index 73c3ad1e8978e3a05e9a384b57098a7715221d15..9a39238920778ceb35d9260679084189e7315f0d 100644 --- a/src/graphics/lib/compute/hotsort/hotsort_gen/main.c +++ b/src/graphics/lib/compute/hotsort/hotsort_gen/main.c @@ -6,12 +6,13 @@ // // -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <getopt.h> #include <ctype.h> +#include <getopt.h> #include <inttypes.h> +#include <limits.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> #include <unistd.h> // @@ -143,14 +144,14 @@ struct hsg_config hsg_config = .lo = 1, .hi = 1 }, - .half = { + .half = { .warps = 1, .lo = 1, .hi = 1 }, }, - .block = { + .block = { .warps_min = 1, .warps_max = UINT32_MAX, .warps_mod = 2, @@ -162,7 +163,7 @@ struct hsg_config hsg_config = .smem_bc = UINT32_MAX }, - .warp = { + .warp = { .lanes = 32, .lanes_log2 = 5, }, @@ -172,8 +173,19 @@ struct hsg_config hsg_config = .xtra = 0 }, - .type = { + .type = { .dwords = 2 + }, + + .glsl = { + .in = { + .set = 0, + .binding = 1 + }, + .out = { + .set = 0, + .binding = 0 + } } }; @@ -1464,7 +1476,7 @@ main(int argc, char * argv[]) struct hsg_target target; - while ((opt = getopt(argc,argv,"hvzo:a:g:G:s:S:w:b:B:m:M:k:r:x:t:f:F:c:C:p:P:D:")) != EOF) + while ((opt = getopt(argc,argv,"hvzo:a:g:G:s:S:w:b:B:m:M:k:r:x:t:f:F:c:C:p:P:D:L:")) != EOF) { switch (opt) { @@ -1613,6 +1625,33 @@ main(int argc, char * argv[]) hsg_optarg(optarg), HSG_CONFIG_DEFINE_LEN_SIZE-1); break; + + case 'L': + { + // expects 4 concatenated and comma-separated unsigned integers + char * str = optarg; + + if ((hsg_config.glsl.in.set = (uint32_t)strtoul(str,&str,10)) == (uint32_t)ULONG_MAX) + return EXIT_FAILURE; + + if (*str++ != ',') + return EXIT_FAILURE; + + if ((hsg_config.glsl.in.binding = (uint32_t)strtoul(str,&str,10)) == (uint32_t)ULONG_MAX) + return EXIT_FAILURE; + + if (*str++ != ',') + return EXIT_FAILURE; + + if ((hsg_config.glsl.out.set = (uint32_t)strtoul(str,&str,10)) == (uint32_t)ULONG_MAX) + return EXIT_FAILURE; + + if (*str++ != ',') + return EXIT_FAILURE; + + if ((hsg_config.glsl.out.binding = (uint32_t)strtoul(str,&str,10)) == (uint32_t)ULONG_MAX) + return EXIT_FAILURE; + } } } diff --git a/src/graphics/lib/compute/hotsort/hotsort_gen/target_glsl.c b/src/graphics/lib/compute/hotsort/hotsort_gen/target_glsl.c index eb58d89fb863a51b1bc1ed0e41e4fd27732475ec..3bc61a382d15185633ce2f81ae17cf99cb0b043f 100644 --- a/src/graphics/lib/compute/hotsort/hotsort_gen/target_glsl.c +++ b/src/graphics/lib/compute/hotsort/hotsort_gen/target_glsl.c @@ -93,8 +93,8 @@ hsg_macros(struct hsg_config const * const config, FILE * file) "// target-specific config \n" "#include \"hs_config.h\" \n" " \n" - "// arch/target-specific macros \n" - "#include \"hs_glsl_macros.h\" \n" + "// vendor<arch<target-specific macros \n" + "#include \"hs_glsl_macros_vendor.h\" \n" " \n" "// \n" "// \n" @@ -187,7 +187,7 @@ hsg_target_header_and_module(struct hsg_config const * const config) " } \n" "}; \n" " \n" - "#include \"hs_target_modules_init.inl\" \n" + "#include \"hs_target_modules_dump.inl\" \n" " \n" "// \n" "// \n" @@ -288,9 +288,14 @@ hsg_target_glsl(struct hsg_target * const target, "#define HS_HM_BLOCK_HEIGHT %u \n" "#define HS_HM_SCALE_MIN %u \n" "#define HS_HM_SCALE_MAX %u \n" - "#define HS_EMPTY \n\n", - config->warp.lanes_log2, // FIXME -- this matters for SIMD - config->warp.lanes_log2, + "#define HS_EMPTY \n" + "#define HS_KV_IN_SET %u \n" + "#define HS_KV_IN_BINDING %u \n" + "#define HS_KV_OUT_SET %u \n" + "#define HS_KV_OUT_BINDING %u \n" + "#define HS_IS_IN_PLACE ((HS_KV_IN_SET == HS_KV_OUT_SET) && (HS_KV_IN_BINDING == HS_KV_OUT_BINDING))\n\n", + config->warp.lanes_log2, // NOTE: the number of slab threads might + config->warp.lanes_log2, // NOTE: not always equal slab width config->thread.regs, config->thread.regs, config->type.dwords, @@ -302,7 +307,11 @@ hsg_target_glsl(struct hsg_target * const target, config->merge.flip.hi, config->merge.half.warps, config->merge.half.lo, - config->merge.half.hi); + config->merge.half.hi, + config->glsl.in.set, + config->glsl.in.binding, + config->glsl.out.set, + config->glsl.out.binding); fprintf(target->state->header, "#define HS_SLAB_ROWS() \\\n"); diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..417567f1278a1bdea1d377e8a28040038abe998c --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/BUILD.gn @@ -0,0 +1,22 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +source_set("vk") { + public = [ + "hotsort_vk.h", + ] + sources = [ + "hotsort_vk.h", + "hotsort_vk.c", + "hotsort_vk_target.h", + ] + include_dirs = [ + "//src/graphics/lib/compute" + ] + deps = [ + "//src/graphics/lib/compute/common", + "//src/graphics/lib/compute/common/vk", + "//third_party/vulkan_loader_and_validation_layers:vulkan", + ] +} diff --git a/src/graphics/lib/compute/hotsort/vk/hs_vk.c b/src/graphics/lib/compute/hotsort/platforms/vk/hotsort_vk.c similarity index 80% rename from src/graphics/lib/compute/hotsort/vk/hs_vk.c rename to src/graphics/lib/compute/hotsort/platforms/vk/hotsort_vk.c index 7f2be4fd212a8c4a5a4a0c0e7ff9abde0681050d..7da1f17fe576b158542f311dcc1b210a91aa7370 100644 --- a/src/graphics/lib/compute/hotsort/vk/hs_vk.c +++ b/src/graphics/lib/compute/hotsort/platforms/vk/hotsort_vk.c @@ -12,12 +12,12 @@ #include "common/vk/vk_assert.h" #include "common/vk/vk_barrier.h" -#if defined(HS_VK_SHADER_INFO_AMD_STATISTICS) || defined(HS_VK_SHADER_INFO_AMD_DISASSEMBLY) +#if defined(HOTSORT_VK_SHADER_INFO_AMD_STATISTICS) || defined(HOTSORT_VK_SHADER_INFO_AMD_DISASSEMBLY) #include "common/vk/vk_shader_info_amd.h" #endif -#include "hs_vk.h" -#include "hs_vk_target.h" +#include "hotsort_vk.h" +#include "hotsort_vk_target.h" // // We want concurrent kernel execution to occur in a few places. @@ -60,7 +60,7 @@ // |/ YES // + | // | v -// | END_WITH_EVENTS(bs_full,bs_frac) +// | END_WITH(bs_full,bs_frac) // | // | // WAITFOR(pad_out,bs_full,bs_frac) >>> first iteration of loop <<< @@ -95,32 +95,30 @@ // YES // | // v -// END_WITH_EVENTS(bc) +// END_WITH(bc) // -struct hs_vk +struct hotsort_vk { - struct hs_vk_target_config config; + struct hotsort_vk_target_config config; - bool is_in_place; + uint32_t slab_keys; + uint32_t key_val_size; + uint32_t bs_slabs_log2_ru; + uint32_t bc_slabs_log2_max; - uint32_t slab_keys; - uint32_t key_val_size; - uint32_t bs_slabs_log2_ru; - uint32_t bc_slabs_log2_max; - - VkPipelineLayout pl; + VkPipelineLayout pl; struct { - uint32_t count; - VkPipeline * bs; - VkPipeline * bc; - VkPipeline * fm[3]; - VkPipeline * hm[3]; - VkPipeline * fill_in; - VkPipeline * fill_out; - VkPipeline * transpose; - VkPipeline all[]; + uint32_t count; + VkPipeline * bs; + VkPipeline * bc; + VkPipeline * fm[3]; + VkPipeline * hm[3]; + VkPipeline * fill_in; + VkPipeline * fill_out; + VkPipeline * transpose; + VkPipeline all[]; } pipelines; }; @@ -128,13 +126,12 @@ struct hs_vk // // -struct hs_vk * -hs_vk_create(VkDevice device, - VkAllocationCallbacks const * allocator, - VkPipelineCache pc, - VkPipelineLayout pl, - struct hs_vk_ds_locations const * const locations, - struct hs_vk_target const * const target) +struct hotsort_vk * +hotsort_vk_create(VkDevice device, + VkAllocationCallbacks const * allocator, + VkPipelineCache pipeline_cache, + VkPipelineLayout pipeline_layout, + struct hotsort_vk_target const * const target) { // // we reference these values a lot @@ -177,14 +174,9 @@ hs_vk_create(VkDevice device, uint32_t const count_all = count_bs + count_bc_fm_hm_fills_transpose; // - // allocate hs_vk + // allocate hotsort_vk // - struct hs_vk * const hs = malloc(sizeof(*hs) + sizeof(VkPipeline*) * count_all); - - // in-place sort? - hs->is_in_place = - (locations->in.set == locations->out.set) && - (locations->in.binding == locations->out.binding); + struct hotsort_vk * const hs = malloc(sizeof(*hs) + sizeof(VkPipeline*) * count_all); // copy the config from the target -- we need these values later hs->config = target->config; @@ -196,7 +188,7 @@ hs_vk_create(VkDevice device, hs->bc_slabs_log2_max = bc_slabs_log2_max; // save pipeline layout for vkCmdPushConstants() - hs->pl = pl; + hs->pl = pipeline_layout; // save kernel count hs->pipelines.count = count_all; @@ -217,7 +209,7 @@ hs_vk_create(VkDevice device, .pName = "main", .pSpecializationInfo = NULL }, - .layout = pl, + .layout = pipeline_layout, .basePipelineHandle = VK_NULL_HANDLE, .basePipelineIndex = 0 }; @@ -226,32 +218,29 @@ hs_vk_create(VkDevice device, // Create a shader module, use it to create a pipeline... and // dispose of the shader module. // - // The BS compute shaders have the same layout - // The non-BS compute shaders have the same layout + // BS shaders have layout: (vout,vin) + // FILL_IN shaders have layout: (----,vin) + // FILL_OUT shaders have layout: (vout) + // otherwise shaders have layout: (vout) // VkShaderModuleCreateInfo smci = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = NULL, .flags = 0, .codeSize = 0, - .pCode = (uint32_t const *)target->modules // FIXME -- unfortunate typecast -- fix during generation + .pCode = NULL }; - // - // BS shaders have layout: (vout,vin) - // non-BS shaders have layout: (vout) - // + uint32_t const * modules = target->modules; + for (uint32_t ii=0; ii<count_all; ii++) { - // convert bytes to words - uint32_t const * const module = smci.pCode + smci.codeSize / sizeof(*module); + uint32_t const module_dwords = *modules++; - // - // FIXME -- patch descriptor set locations - // + smci.codeSize = module_dwords * sizeof(*modules); + smci.pCode = modules; - smci.codeSize = NTOHL_MACRO(module[0]); - smci.pCode = module + 1; + modules += module_dwords; vk(CreateShaderModule(device, &smci, @@ -259,7 +248,7 @@ hs_vk_create(VkDevice device, &cpci.stage.module)); vk(CreateComputePipelines(device, - pc, + pipeline_cache, 1, &cpci, allocator, @@ -322,10 +311,10 @@ hs_vk_create(VkDevice device, // // optionally dump pipeline stats // -#ifdef HS_VK_SHADER_INFO_AMD_STATISTICS +#ifdef HOTSORT_VK_SHADER_INFO_AMD_STATISTICS vk_shader_info_amd_statistics(device,hs->pipelines.all,NULL,hs->pipelines.count); #endif -#ifdef HS_VK_SHADER_INFO_AMD_DISASSEMBLY +#ifdef HOTSORT_VK_SHADER_INFO_AMD_DISASSEMBLY vk_shader_info_amd_disassembly(device,hs->pipelines.all,NULL,hs->pipelines.count); #endif @@ -340,9 +329,9 @@ hs_vk_create(VkDevice device, // void -hs_vk_release(VkDevice device, - VkAllocationCallbacks const * const allocator, - struct hs_vk * const hs) +hotsort_vk_release(VkDevice device, + VkAllocationCallbacks const * const allocator, + struct hotsort_vk * const hs) { for (uint32_t ii=0; ii<hs->pipelines.count; ii++) { @@ -359,10 +348,10 @@ hs_vk_release(VkDevice device, // void -hs_vk_pad(struct hs_vk const * const hs, - uint32_t const count, - uint32_t * const padded_in, - uint32_t * const padded_out) +hotsort_vk_pad(struct hotsort_vk const * const hs, + uint32_t const count, + uint32_t * const padded_in, + uint32_t * const padded_out) { // // round up the count to slabs @@ -411,9 +400,9 @@ hs_vk_pad(struct hs_vk const * const hs, static void -hs_cmd_transpose(VkCommandBuffer cb, - struct hs_vk const * const hs, - uint32_t const bx_ru) +hs_cmd_transpose(VkCommandBuffer cb, + struct hotsort_vk const * const hs, + uint32_t const bx_ru) { vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -428,10 +417,10 @@ hs_cmd_transpose(VkCommandBuffer cb, static void -hs_cmd_fill_in(VkCommandBuffer cb, - struct hs_vk const * const hs, - uint32_t const from_slab, - uint32_t const to_slab) +hs_cmd_fill_in(VkCommandBuffer cb, + struct hotsort_vk const * const hs, + uint32_t const from_slab, + uint32_t const to_slab) { vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -444,10 +433,10 @@ hs_cmd_fill_in(VkCommandBuffer cb, static void -hs_cmd_fill_out(VkCommandBuffer cb, - struct hs_vk const * const hs, - uint32_t const from_slab, - uint32_t const to_slab) +hs_cmd_fill_out(VkCommandBuffer cb, + struct hotsort_vk const * const hs, + uint32_t const from_slab, + uint32_t const to_slab) { vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -465,7 +454,7 @@ hs_cmd_fill_out(VkCommandBuffer cb, static void hs_cmd_bc(VkCommandBuffer cb, - struct hs_vk const * const hs, + struct hotsort_vk const * const hs, uint32_t const down_slabs, uint32_t const clean_slabs_log2) { @@ -486,10 +475,10 @@ hs_cmd_bc(VkCommandBuffer cb, static uint32_t -hs_cmd_hm(VkCommandBuffer cb, - struct hs_vk const * const hs, - uint32_t const down_slabs, - uint32_t const clean_slabs_log2) +hs_cmd_hm(VkCommandBuffer cb, + struct hotsort_vk const * const hs, + uint32_t const down_slabs, + uint32_t const clean_slabs_log2) { // how many scaled half-merge spans are there? uint32_t const frac_ru = (1 << clean_slabs_log2) - 1; @@ -518,11 +507,11 @@ hs_cmd_hm(VkCommandBuffer cb, static uint32_t -hs_cmd_fm(VkCommandBuffer cb, - struct hs_vk const * const hs, - uint32_t const bx_ru, - uint32_t * const down_slabs, - uint32_t const up_scale_log2) +hs_cmd_fm(VkCommandBuffer cb, + struct hotsort_vk const * const hs, + uint32_t const bx_ru, + uint32_t * const down_slabs, + uint32_t const up_scale_log2) { // // FIXME OPTIMIZATION: in previous HotSort launchers it's sometimes @@ -623,9 +612,9 @@ hs_cmd_fm(VkCommandBuffer cb, static void -hs_cmd_bs(VkCommandBuffer cb, - struct hs_vk const * const hs, - uint32_t const padded_in) +hs_cmd_bs(VkCommandBuffer cb, + struct hotsort_vk const * const hs, + uint32_t const padded_in) { uint32_t const slabs_in = padded_in / hs->slab_keys; uint32_t const full_bs = slabs_in / hs->config.block.slabs; @@ -660,20 +649,20 @@ hs_cmd_bs(VkCommandBuffer cb, // void -hs_vk_sort(VkCommandBuffer cb, - struct hs_vk const * const hs, - struct hs_vk_ds_offsets const * const offsets, - uint32_t const count, - uint32_t const padded_in, - uint32_t const padded_out, - bool const linearize) +hotsort_vk_sort(VkCommandBuffer cb, + struct hotsort_vk const * const hs, + struct hotsort_vk_ds_offsets const * const offsets, + uint32_t const count, + uint32_t const padded_in, + uint32_t const padded_out, + bool const linearize) { // // append the push constants // size_t const kv_size = (hs->config.dwords.key + hs->config.dwords.val) * sizeof(uint32_t); - struct hs_vk_push const push = + struct hotsort_vk_push const push = { .kv_offset_in = (uint32_t)(offsets->in / kv_size), .kv_offset_out = (uint32_t)(offsets->out / kv_size), @@ -682,9 +671,9 @@ hs_vk_sort(VkCommandBuffer cb, vkCmdPushConstants(cb, hs->pl, - HS_VK_PUSH_CONSTANT_RANGE_STAGE_FLAGS, - HS_VK_PUSH_CONSTANT_RANGE_OFFSET, - HS_VK_PUSH_CONSTANT_RANGE_SIZE, + HOTSORT_VK_PUSH_CONSTANT_RANGE_STAGE_FLAGS, + HOTSORT_VK_PUSH_CONSTANT_RANGE_OFFSET, + HOTSORT_VK_PUSH_CONSTANT_RANGE_SIZE, &push); // @@ -694,9 +683,10 @@ hs_vk_sort(VkCommandBuffer cb, // - pre-sort fill needs to happen before bs() // - pre-merge fill needs to happen before fm() // - uint32_t const padded_pre_sort = hs->is_in_place ? padded_out : padded_in; + bool const is_in_place = hs->config.is_in_place && (offsets->in == offsets->out); + uint32_t const padded_pre_sort = is_in_place ? padded_out : padded_in; bool const is_pre_sort_reqd = padded_pre_sort > count; - bool const is_pre_merge_reqd = !hs->is_in_place && (padded_out > padded_in); + bool const is_pre_merge_reqd = !is_in_place && (padded_out > padded_in); // // pre-sort fill? diff --git a/src/graphics/lib/compute/hotsort/vk/hs_vk.h b/src/graphics/lib/compute/hotsort/platforms/vk/hotsort_vk.h similarity index 57% rename from src/graphics/lib/compute/hotsort/vk/hs_vk.h rename to src/graphics/lib/compute/hotsort/platforms/vk/hotsort_vk.h index 2d52b778dcdab02ae58a386103ca0a60822d6152..81c1955d0281524f09b0d8b647c0bdceaf6fdb22 100644 --- a/src/graphics/lib/compute/hotsort/vk/hs_vk.h +++ b/src/graphics/lib/compute/hotsort/platforms/vk/hotsort_vk.h @@ -22,7 +22,7 @@ // // -#include "hs_vk_target.h" +#include "hotsort_vk_target.h" // // HotSort/VK relies on pipeline layout compatibility: @@ -48,49 +48,29 @@ // The locations of the input and output buffers are declared at // HotSort instance creation. // -// The buffer offsets can vary with each invocation of hs_vk_sort(). +// The buffer offsets can vary with each invocation of hotsort_vk_sort(). // // // HotSort push constants are expected at offset 0 // -struct hs_vk_push +struct hotsort_vk_push { uint32_t kv_offset_in; uint32_t kv_offset_out; uint32_t kv_count; }; -#define HS_VK_PUSH_CONSTANT_RANGE_STAGE_FLAGS VK_SHADER_STAGE_COMPUTE_BIT -#define HS_VK_PUSH_CONSTANT_RANGE_OFFSET 0 -#define HS_VK_PUSH_CONSTANT_RANGE_SIZE sizeof(struct hs_vk_push) - -// -// Declare the descriptor set layout binding locations for the -// input and output key-value storage buffers. -// -// If the input and output locations are identical then the sort will -// be performed "in place". -// - -struct hs_vk_ds_locations -{ - struct { - uint32_t set; - uint32_t binding; - } in; - struct { - uint32_t set; - uint32_t binding; - } out; -}; +#define HOTSORT_VK_PUSH_CONSTANT_RANGE_STAGE_FLAGS VK_SHADER_STAGE_COMPUTE_BIT +#define HOTSORT_VK_PUSH_CONSTANT_RANGE_OFFSET 0 +#define HOTSORT_VK_PUSH_CONSTANT_RANGE_SIZE sizeof(struct hotsort_vk_push) // // Declare the offsets of the key-value arrays before sorting. // -struct hs_vk_ds_offsets +struct hotsort_vk_ds_offsets { VkDeviceSize in; VkDeviceSize out; @@ -101,13 +81,12 @@ struct hs_vk_ds_offsets // buffers at specific descriptor set locations. // -struct hs_vk * -hs_vk_create(VkDevice device, - VkAllocationCallbacks const * allocator, - VkPipelineCache pipeline_cache, - VkPipelineLayout pipeline_layout, - struct hs_vk_ds_locations const * const locations, - struct hs_vk_target const * const target); +struct hotsort_vk * +hotsort_vk_create(VkDevice device, + VkAllocationCallbacks const * allocator, + VkPipelineCache pipeline_cache, + VkPipelineLayout pipeline_layout, + struct hotsort_vk_target const * const target); // // Resources will be disposed of with the same device and allocator @@ -115,9 +94,9 @@ hs_vk_create(VkDevice device, // void -hs_vk_release(VkDevice device, - VkAllocationCallbacks const * const allocator, - struct hs_vk * const hs); +hotsort_vk_release(VkDevice device, + VkAllocationCallbacks const * const allocator, + struct hotsort_vk * const hs); // // Explicitly reveal what padding of maximum valued key-vals will be @@ -138,10 +117,10 @@ hs_vk_release(VkDevice device, // void -hs_vk_pad(struct hs_vk const * const hs, - uint32_t const count, - uint32_t * const padded_in, - uint32_t * const padded_out); +hotsort_vk_pad(struct hotsort_vk const * const hs, + uint32_t const count, + uint32_t * const padded_in, + uint32_t * const padded_out); // // Append commands to the command buffer that, when enqueued, will: @@ -161,13 +140,13 @@ hs_vk_pad(struct hs_vk const * const hs, // void -hs_vk_sort(VkCommandBuffer cb, - struct hs_vk const * const hs, - struct hs_vk_ds_offsets const * const offsets, - uint32_t const count, - uint32_t const padded_in, - uint32_t const padded_out, - bool const linearize); +hotsort_vk_sort(VkCommandBuffer cb, + struct hotsort_vk const * const hs, + struct hotsort_vk_ds_offsets const * const offsets, + uint32_t const count, + uint32_t const padded_in, + uint32_t const padded_out, + bool const linearize); // // diff --git a/src/graphics/lib/compute/hotsort/vk/hs_vk_target.h b/src/graphics/lib/compute/hotsort/platforms/vk/hotsort_vk_target.h similarity index 83% rename from src/graphics/lib/compute/hotsort/vk/hs_vk_target.h rename to src/graphics/lib/compute/hotsort/platforms/vk/hotsort_vk_target.h index 5a2ae5b64af6bc4092b1eba754f70b3104a55128..38457d79bdc272d98479d1a3cc1ab639fe36d3a3 100644 --- a/src/graphics/lib/compute/hotsort/vk/hs_vk_target.h +++ b/src/graphics/lib/compute/hotsort/platforms/vk/hotsort_vk_target.h @@ -10,15 +10,15 @@ #include <stdint.h> -#include "common/macros.h" - // // This structure packages target-specific HotSort parameters and // SPIR-V modules. // -struct hs_vk_target_config +struct hotsort_vk_target_config { + uint8_t is_in_place; + struct { uint8_t threads_log2; uint8_t width_log2; @@ -56,10 +56,10 @@ struct hs_vk_target_config // one SPIR-V module then reevaluate this encoding. // -struct hs_vk_target +struct hotsort_vk_target { - struct hs_vk_target_config config; - ALIGN_MACRO(4) uint8_t modules[]; // modules[] must start on 32-bit boundary + struct hotsort_vk_target_config config; + uint32_t modules[]; }; // diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/targets/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/targets/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..48ecdf80ca11d3006c1282da29ed95cbddd01534 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/BUILD.gn @@ -0,0 +1,15 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# +# post-process SPIR-V modules and convert to C literals +# + +if (current_toolchain == host_toolchain) { + executable("hotsort_modules_to_literals") { + sources = [ + "hotsort_modules_to_literals.c" + ] + } +} diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_comp_names.py b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_comp_names.py new file mode 100755 index 0000000000000000000000000000000000000000..be380cc5e07d0f8df7d8b3b08f012bf1fc3030b7 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_comp_names.py @@ -0,0 +1,124 @@ +#!/usr/bin/python +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# +# The arguments passed to the 'hotsort_gen' code generator determine +# the compute shaders that are emitted. +# +# GN requires the list of compute shaders that *will* be generated by +# 'hotsort_gen'. +# +# This script accepts the exact same getopt() string as hotsort_gen +# but only uses the relevant arguments to determine which shader names +# will be produced by 'hotsort_gen'. +# + +import getopt, sys + +# +# Note that the inputs to these two functions are the number of +# warps/waves/subgroups supported by a GPU block. +# +# Currently, the largest GPU only supports 2^6 warps per block. +# + +def msb(n): + """Given an integer >= 0, return the most significant bit position.""" + assert n < 2 ** 32 + c = 0 + while n > 0: + n >>= 1 + c += 1 + return c + +def pow2_ru(n): + """Given an integer >= 1, return the next power of 2 >= to n.""" + assert n <= 2 ** 31 + n -= 1 + n |= n >> 1 + n |= n >> 2 + n |= n >> 4 + n |= n >> 8 + n |= n >> 16 + n += 1 + return n + +# +# Duplicate the shader file generation logic of 'hotsort_gen' for a +# glsl platform target. +# + +bs_hi = 1 +fm_lo = 1 +fm_hi = 1 +hm_lo = 1 +hm_hi = 1 + +try: + # option list must always match hotsort_gen/main.c + opts, args = getopt.getopt(sys.argv[1:],'hvzo:a:g:G:s:S:w:b:B:m:M:k:r:x:t:f:F:c:C:p:P:D:L:') +except getopt.GetoptError as err: + print str(err) + sys.exit(2) + +for o, v in opts: + if o == '-b': + bs_hi = int(v) + elif o == '-f': + fm_lo = int(v) + elif o == '-F': + fm_hi = int(v) + elif o == '-c': + hm_lo = int(v) + elif o == '-C': + hm_hi = int(v) + +# +# BS +# + +bs_lo_log = 0 + +while (1<<bs_lo_log) <= bs_hi: + print 'hs_bs_%d.comp' % bs_lo_log + bs_lo_log += 1 + +# +# BC +# + +bc_lo_log = 0 + +while bc_lo_log <= msb(bs_hi) - 1: + print 'hs_bc_%d.comp' % bc_lo_log + bc_lo_log += 1 + +# +# FM +# + +for fm_scale in range(fm_lo,fm_hi+1): + span_left = (bs_hi << fm_scale) / 2 + span_left_ru = pow2_ru(span_left) + span_right = 1 + while (span_right <= span_left_ru): + fm_log = msb(pow2_ru(min(span_left,span_right))) - 1 + print 'hs_fm_%d_%d.comp' % (fm_scale, fm_log) + span_right *= 2 + +# +# HM +# + +for hm_scale in range(hm_lo,hm_hi+1): + print 'hs_hm_%d.comp' % hm_scale + +# +# EXTRAS +# + +print 'hs_fill_in.comp' +print 'hs_fill_out.comp' +print 'hs_transpose.comp' diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_modules_to_literals.c b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_modules_to_literals.c new file mode 100644 index 0000000000000000000000000000000000000000..8d4dfc57051f4fb5be63ffb2bb3cf5d69c18f380 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_modules_to_literals.c @@ -0,0 +1,146 @@ +// Copyright 2019 The Fuchsia Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <inttypes.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> + +// +// hotsort_modules_to_literals: +// +// Write a concatenated array of post-processed SPIR-V modules to a +// file as an array of hex-encoded 32-bit C literals. +// +// Arguments: +// +// <output file> <spir-v module:1> ... <spir-v module M> +// +// Operation: +// +// 1. For each SPIR-V module: +// 1. Open and find length +// 2. Reallocate incrementing by length plus dword +// 3. Store length +// 4. Store module +// 5. Close module +// 2. Open and write out literals to output file +// 3. Close output file +// +// SPIR-V modules are encoded with this layout: +// +// DWORD 0 - N : number of dwords in SPIR-V module +// DWORD 1 - module[0] +// DWORD N+1 - module[N-1] +// + +#define HS_LITERALS_PER_LINE 6 + +// +// +// + +int +main(int argc, char const * argv[]) +{ + // This tool will typically be passed ~20 files but if there isn't + // at least one file then fail. + if (argc < 3) { + return EXIT_FAILURE; + } + + // + // layout buffer is reallocated for each module + // + uint32_t * layout = NULL; + size_t layout_size = 0; + uint32_t layout_next = 0; + + // + // load and process all modules + // + uint32_t const module_count = argc - 2; + + for (uint32_t ii=0; ii<module_count; ii++) + { + FILE * module = fopen(argv[2+ii],"rb"); + + if (module == NULL) { + return EXIT_FAILURE; + } + + if (fseek(module,0L,SEEK_END) != 0) { + return EXIT_FAILURE; + } + + long const module_bytes = ftell(module); + + if (module_bytes == EOF) { + return EXIT_FAILURE; + } + + rewind(module); + + // "length + module size" + layout_size += sizeof(uint32_t) + module_bytes; + + layout = realloc(layout,layout_size); + + if (layout == NULL) { + return EXIT_FAILURE; + } + + // store dwords + uint32_t const module_dwords = (uint32_t)(module_bytes / sizeof(uint32_t)); + + layout[layout_next++] = module_dwords; + + // load module + if (fread(layout+layout_next,1,module_bytes,module) != (size_t)module_bytes) { + return EXIT_FAILURE; + } + + // close module + if (fclose(module) != 0) { + return EXIT_FAILURE; + } + + // move to next module + layout_next += module_dwords; + } + + // + // store + // + FILE * file = fopen(argv[1],"wb"); + + if (file == NULL) { + return EXIT_FAILURE; + } + + uint32_t literals = 0; + + for (uint32_t ii=0; ii<layout_next; ii++) + { + fprintf(file,"0x%08X",layout[ii]); + + if ((++literals % HS_LITERALS_PER_LINE) != 0) { + fprintf(file,", "); + } else { + fprintf(file,",\n"); + } + } + + fprintf(file,"\n"); + + if (ferror(file) || fclose(file) != 0) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +// +// +// diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_target.gni b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_target.gni new file mode 100644 index 0000000000000000000000000000000000000000..653a7a4bd9d6a422273db84e334ad4902b310899 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_target.gni @@ -0,0 +1,249 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//build/compiled_action.gni") + +# +# Generates a HotSort target +# +template("hotsort_target") { + # + # Expects: + # + # $hotsort_target_vendor_dir: + # + # The directory of the vendor and arch-specific includes. + # + # $hotsort_target_name: + # + # A name that approximately conveys the configuration of the + # generated HotSort algorithm. + # + # $hotsort_target_args: + # + # The arguments passed to the 'hotsort_gen' HotSort algorithm code + # generator. + # + # $hotsort_target_dump: + # + # If defined the target is group that references a target binary + # produced by the host toolchain. Otherwise, a source set is + # produced on the current toolchain. + # + # The hotsort target name and args are passed to 'hotsort_gen' as + # follows: + # + # 'hotsort_gen -D $hotsort_target_name $hotsort_target_args' + # + # Validation of the args will be performed by 'hotsort_gen'. + # + # Note that $hotsort_target_name is split from the args list as it + # serves a dual role of providing the name of files as well as being + # a symbol in the GLSL source and C include files. + # + assert(defined(invoker.hotsort_target_vendor_dir),"vendor directory must be defined for hotsort_target") + assert(defined(invoker.hotsort_target_name), "name must be defined for hotsort_target") + assert(defined(invoker.hotsort_target_args), "args must be defined for hotsort_target") + + # + # Either dump a binary or produce a source set + # + _hs_target_dump = false + + if (defined(invoker.hotsort_target_dump)) { + _hs_target_dump = invoker.hotsort_target_dump + } + + # + # define sources, includes and deps + # + _hs_target_gen_includes_public = [ + "$target_gen_dir/hs_target.h" + ] + + _hs_target_gen_includes = [ + "$target_gen_dir/hs_config.h", + ] + _hs_target_gen_includes_public + + _hs_target_includes = [ + invoker.hotsort_target_vendor_dir, + "//src/graphics/lib/compute/hotsort/platforms/vk/targets" + ] + _hs_target_gen_includes + + _hs_target_gen_sources = [ + "$target_gen_dir/" + invoker.hotsort_target_name + ".c", + ] + + _hs_target_gen_inlines = [ + "$target_gen_dir/hs_modules.inl", + ] + + _hs_target_include_dirs = [ + "$target_gen_dir", + "//src/graphics/lib/compute/hotsort/platforms/vk/targets", + "//src/graphics/lib/compute/hotsort/platforms/vk", + ] + + _hs_target_sources = _hs_target_includes + _hs_target_gen_sources + _hs_target_gen_inlines + + # + # generated compute shaders + # + _hs_comp_names = exec_script("//src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_comp_names.py", + invoker.hotsort_target_args, + "list lines") + _hs_comp_sources = process_file_template(_hs_comp_names,"$target_gen_dir/comp/{{source_file_part}}") + + # + # generate the .comp shaders + # + # Note that hs_modules.txt should match names returned by script + # + compiled_action("gen_comp") { + tool = "//src/graphics/lib/compute/hotsort/hotsort_gen" + outputs = _hs_comp_sources + _hs_target_gen_sources + _hs_target_gen_includes + args = [ + "-D" , invoker.hotsort_target_name + ] + invoker.hotsort_target_args + } + + # + # compile the .comp shaders to SPIR-V modules + # + compiled_action_foreach("gen_spv") { + tool = "//third_party/shaderc/third_party/glslang:glslangValidator" + sources = _hs_comp_sources + inputs = _hs_target_includes + outputs = [ + "$target_gen_dir/spv/{{source_name_part}}.spv" + ] + args = [ + "-s", # Note: '-s' silences glslangValidator printing filename + # but detailed error messages are also silenced + "--target-env", "vulkan1.1", + "-I" + rebase_path(target_gen_dir,root_build_dir), # <target> + "-I" + rebase_path(invoker.hotsort_target_vendor_dir,root_build_dir), # <vendor> + "-I" + rebase_path("//src/graphics/lib/compute/hotsort/platforms/vk/targets",root_build_dir), # <default> + "-o", + rebase_path(outputs[0],root_build_dir), + "{{source}}" + ] + public_deps = [ + ":gen_comp" + ] + } + + # + # optimize the modules + # + compiled_action_foreach("gen_opt") { + tool = "//third_party/shaderc/third_party/spirv-tools:spirv-opt" + sources = get_target_outputs(":gen_spv") + outputs = [ + "$target_gen_dir/opt/{{source_name_part}}.spv" + ] + args = [ + "-O", + "{{source}}", + "-o", + rebase_path(outputs[0],root_build_dir), + ] + public_deps = [ + ":gen_spv" + ] + } + + # + # remap the optimized modules + # + compiled_action_foreach("gen_remap") { + tool = "//third_party/shaderc/third_party/glslang:spirv-remap" + sources = get_target_outputs(":gen_opt") + outputs = [ + "$target_gen_dir/remap/{{source_name_part}}.spv" + ] + args = [ + "--do-everything", + "--input", + "{{source}}", + "--output", + rebase_path(target_gen_dir,root_build_dir) + "/remap" + ] + public_deps = [ + ":gen_opt" + ] + } + + # + # dump the modules as uint32_t literals + # + compiled_action("gen_modules") { + tool = "//src/graphics/lib/compute/hotsort/platforms/vk/targets:hotsort_modules_to_literals" + sources = get_target_outputs(":gen_remap") + outputs = _hs_target_gen_inlines + args = rebase_path(outputs,root_build_dir) + rebase_path(sources,root_build_dir) + public_deps = [ + ":gen_remap" + ] + } + + # + # either dump a binary or return a source set + # + if (_hs_target_dump) { + # + # executable for dumping a binary image of target + # + _hs_target_dump_name = "hotsort_dump_" + invoker.hotsort_target_name + + executable(_hs_target_dump_name) { + defines = [ "HS_DUMP" ] + sources = _hs_target_gen_sources + include_dirs = _hs_target_include_dirs + public_deps = [ + ":gen_modules" + ] + } + + # + # dump a binary image of target + # + compiled_action("gen_bin") { + tool = ":$_hs_target_dump_name" + sources = _hs_target_sources + outputs = [ + "$target_gen_dir/hs_target.bin" + ] + args = rebase_path(outputs,root_build_dir) + public_deps = [ + ":$_hs_target_dump_name" + ] + } + + # + # dummy group invokes $host_toolchain + # + group(target_name) { + public_deps = [ + ":gen_bin($host_toolchain)" + ] + } + } else { + # + # target is a source set + # + source_set(target_name) { + public = [ "$target_gen_dir/hs_target.h" ] + sources = _hs_target_sources + include_dirs = _hs_target_include_dirs + public_deps = [ + ":gen_modules", + ] + } + } +} + +# +# +# diff --git a/src/graphics/lib/compute/hotsort/vk/hs_glsl_macros.h b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hs_glsl_macros.h similarity index 88% rename from src/graphics/lib/compute/hotsort/vk/hs_glsl_macros.h rename to src/graphics/lib/compute/hotsort/platforms/vk/targets/hs_glsl_macros.h index 75d6af5dd6dbcc6a38142c75772391fd7b44d630..9d1c95f8517d29e491002ff9bcf0840a8b6f6c38 100644 --- a/src/graphics/lib/compute/hotsort/vk/hs_glsl_macros.h +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hs_glsl_macros.h @@ -5,6 +5,18 @@ #ifndef HS_GLSL_MACROS_ONCE #define HS_GLSL_MACROS_ONCE +// +// require necessary extensions -- move this downward as soon as we +// target GPUs with no support for GPUs +// + +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_shuffle : require + +#if HS_KEY_DWORDS == 2 +#extension GL_ARB_gpu_shader_int64 : require +#endif + // // Define the type based on key and val sizes // @@ -14,7 +26,7 @@ #define HS_KEY_TYPE uint #define HS_KEY_VAL_MAX HS_KEY_TYPE(-1) #endif -#elif HS_KEY_DWORDS == 2 // FIXME -- might want to use uint2 +#elif HS_KEY_DWORDS == 2 // FIXME -- some targets will use uint2 #if HS_VAL_DWORDS == 0 #define HS_KEY_TYPE uint64_t // GL_ARB_gpu_shader_int64 #define HS_KEY_VAL_MAX HS_KEY_TYPE(-1L) @@ -32,9 +44,9 @@ // // -#define HS_GLSL_WORKGROUP_SIZE(_x,_y,_z) \ - layout( local_size_x = _x, \ - local_size_y = _y, \ +#define HS_GLSL_WORKGROUP_SIZE(_x,_y,_z) \ + layout( local_size_x = _x, \ + local_size_y = _y, \ local_size_z = _z) in #define HS_GLSL_BINDING(_m,_n) \ @@ -52,8 +64,13 @@ // These can be overidden // -#define HS_KV_IN kv_in -#define HS_KV_OUT kv_out +#ifndef HS_KV_IN +#define HS_KV_IN kv_in +#endif + +#ifndef HS_KV_OUT +#define HS_KV_OUT kv_out +#endif // // @@ -69,54 +86,54 @@ // KERNEL PROTOS // -#define HS_BS_KERNEL_PROTO(slab_count,slab_count_ru_log2) \ - HS_GLSL_SUBGROUP_SIZE() \ - HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS*slab_count,1,1); \ - HS_GLSL_BINDING(0,0) writeonly buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ - HS_GLSL_BINDING(0,1) readonly buffer _kv_in { HS_KEY_TYPE HS_KV_IN[]; }; \ - HS_GLSL_PUSH(); \ +#define HS_BS_KERNEL_PROTO(slab_count,slab_count_ru_log2) \ + HS_GLSL_SUBGROUP_SIZE() \ + HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS*slab_count,1,1); \ + HS_GLSL_BINDING(HS_KV_OUT_SET,HS_KV_OUT_BINDING) writeonly buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ + HS_GLSL_BINDING(HS_KV_IN_SET,HS_KV_IN_BINDING) readonly buffer _kv_in { HS_KEY_TYPE HS_KV_IN[]; }; \ + HS_GLSL_PUSH(); \ void main() -#define HS_BC_KERNEL_PROTO(slab_count,slab_count_log2) \ - HS_GLSL_SUBGROUP_SIZE() \ - HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS*slab_count,1,1); \ - HS_GLSL_BINDING(0,0) buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ - HS_GLSL_PUSH(); \ +#define HS_BC_KERNEL_PROTO(slab_count,slab_count_log2) \ + HS_GLSL_SUBGROUP_SIZE() \ + HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS*slab_count,1,1); \ + HS_GLSL_BINDING(HS_KV_OUT_SET,HS_KV_OUT_BINDING) buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ + HS_GLSL_PUSH(); \ void main() -#define HS_FM_KERNEL_PROTO(s,r) \ - HS_GLSL_SUBGROUP_SIZE() \ - HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ - HS_GLSL_BINDING(0,0) buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ - HS_GLSL_PUSH(); \ +#define HS_FM_KERNEL_PROTO(s,r) \ + HS_GLSL_SUBGROUP_SIZE() \ + HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ + HS_GLSL_BINDING(HS_KV_OUT_SET,HS_KV_OUT_BINDING) buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ + HS_GLSL_PUSH(); \ void main() -#define HS_HM_KERNEL_PROTO(s) \ - HS_GLSL_SUBGROUP_SIZE() \ - HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ - HS_GLSL_BINDING(0,0) buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ - HS_GLSL_PUSH(); \ +#define HS_HM_KERNEL_PROTO(s) \ + HS_GLSL_SUBGROUP_SIZE() \ + HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ + HS_GLSL_BINDING(HS_KV_OUT_SET,HS_KV_OUT_BINDING) buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ + HS_GLSL_PUSH(); \ void main() -#define HS_FILL_IN_KERNEL_PROTO() \ - HS_GLSL_SUBGROUP_SIZE() \ - HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ - HS_GLSL_BINDING(0,1) writeonly buffer _kv_in { HS_KEY_TYPE HS_KV_IN[]; }; \ - HS_GLSL_PUSH(); \ +#define HS_FILL_IN_KERNEL_PROTO() \ + HS_GLSL_SUBGROUP_SIZE() \ + HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ + HS_GLSL_BINDING(HS_KV_IN_SET,HS_KV_IN_BINDING) writeonly buffer _kv_in { HS_KEY_TYPE HS_KV_IN[]; }; \ + HS_GLSL_PUSH(); \ void main() -#define HS_FILL_OUT_KERNEL_PROTO() \ - HS_GLSL_SUBGROUP_SIZE() \ - HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ - HS_GLSL_BINDING(0,0) writeonly buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ - HS_GLSL_PUSH(); \ +#define HS_FILL_OUT_KERNEL_PROTO() \ + HS_GLSL_SUBGROUP_SIZE() \ + HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ + HS_GLSL_BINDING(HS_KV_OUT_SET,HS_KV_OUT_BINDING) writeonly buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ + HS_GLSL_PUSH(); \ void main() -#define HS_TRANSPOSE_KERNEL_PROTO() \ - HS_GLSL_SUBGROUP_SIZE() \ - HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ - HS_GLSL_BINDING(0,0) buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ - HS_GLSL_PUSH(); \ +#define HS_TRANSPOSE_KERNEL_PROTO() \ + HS_GLSL_SUBGROUP_SIZE() \ + HS_GLSL_WORKGROUP_SIZE(HS_SLAB_THREADS,1,1); \ + HS_GLSL_BINDING(HS_KV_OUT_SET,HS_KV_OUT_BINDING) buffer _kv_out { HS_KEY_TYPE HS_KV_OUT[]; }; \ + HS_GLSL_PUSH(); \ void main() // diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/targets/hs_target_config_init.inl b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hs_target_config_init.inl new file mode 100644 index 0000000000000000000000000000000000000000..c6ae1d866f2ef4431d77a01c0e9d93e2f185045b --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hs_target_config_init.inl @@ -0,0 +1,35 @@ +// Copyright 2019 The Fuchsia Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +.is_in_place = HS_IS_IN_PLACE, + +.slab = { + .threads_log2 = HS_SLAB_THREADS_LOG2, + .width_log2 = HS_SLAB_WIDTH_LOG2, + .height = HS_SLAB_HEIGHT + }, + +.dwords = { + .key = HS_KEY_DWORDS, + .val = HS_VAL_DWORDS + }, + +.block = { + .slabs = HS_BS_SLABS + }, + +.merge = { + .fm = { + .scale_min = HS_FM_SCALE_MIN, + .scale_max = HS_FM_SCALE_MAX + }, + .hm = { + .scale_min = HS_HM_SCALE_MIN, + .scale_max = HS_HM_SCALE_MAX, + } + } + +// +// +// diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/targets/hs_target_modules_dump.inl b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hs_target_modules_dump.inl new file mode 100644 index 0000000000000000000000000000000000000000..ad3b1227a26740f829c97ccbebc06b6aeef32a04 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/hs_target_modules_dump.inl @@ -0,0 +1,58 @@ +// Copyright 2019 The Fuchsia Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifdef HS_DUMP + +#include <stdlib.h> +#include <stdio.h> +#include <inttypes.h> + +// +// Dump 'struct hotsort_vk_target HS_TARGET_NAME' to a file +// + +int +main(int argc, char const * argv[]) +{ + FILE * file = fopen(argv[1],"wb"); + + if (file == NULL) { + return EXIT_FAILURE; + } + + size_t const size_config = sizeof(HS_TARGET_NAME.config); + + if (fwrite(&HS_TARGET_NAME.config,1,size_config,file) != size_config) { + return EXIT_FAILURE; + } + + uint32_t const * modules = HS_TARGET_NAME.modules; + uint32_t dwords = modules[0]; + + while (dwords > 0) + { + dwords += 1; + + size_t const size_module = dwords * sizeof(*modules); + + if (fwrite(modules,1,size_module,file) != size_module) { + return EXIT_FAILURE; + } + + modules += dwords; + dwords = modules[0]; + } + + if (fclose(file) != 0) { + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} + +#endif + +// +// +// diff --git a/src/graphics/lib/compute/hotsort/vk/targets/amd/hs_glsl_macros.h b/src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/amd/hs_glsl_macros_vendor.h similarity index 96% rename from src/graphics/lib/compute/hotsort/vk/targets/amd/hs_glsl_macros.h rename to src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/amd/hs_glsl_macros_vendor.h index 8b6129fc116afb0550d2edbe41cc9c7b6650ed55..e6a9bfb4113999dae70f146522fcc1b8c9a395f8 100644 --- a/src/graphics/lib/compute/hotsort/vk/targets/amd/hs_glsl_macros.h +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/amd/hs_glsl_macros_vendor.h @@ -9,7 +9,7 @@ // // -#include "../../hs_glsl_macros.h" +#include "hs_glsl_macros.h" // // OVERRIDE SUBGROUP LANE ID diff --git a/src/graphics/lib/compute/hotsort/vk/targets/intel/hs_glsl_macros.h b/src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/intel/hs_glsl_macros_vendor.h similarity index 97% rename from src/graphics/lib/compute/hotsort/vk/targets/intel/hs_glsl_macros.h rename to src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/intel/hs_glsl_macros_vendor.h index 59b74b96752de31c5de3a39299b3f1dacc63752f..ffc30b44b766fb20212632daf1661bd37a5f4cdf 100644 --- a/src/graphics/lib/compute/hotsort/vk/targets/intel/hs_glsl_macros.h +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/intel/hs_glsl_macros_vendor.h @@ -9,7 +9,7 @@ // // -#include "../../hs_glsl_macros.h" +#include "hs_glsl_macros.h" // // Waiting for Intel's to provide an equivalent to their OpenCL diff --git a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/hs_glsl_macros.h b/src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/nvidia/hs_glsl_macros_vendor.h similarity index 96% rename from src/graphics/lib/compute/hotsort/vk/targets/nvidia/hs_glsl_macros.h rename to src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/nvidia/hs_glsl_macros_vendor.h index 2872d9381ad67cb9a482a6ca25e5052aa84f2680..91d6b257adbd1f0586ced6f86c04bb4bbfd268e2 100644 --- a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/hs_glsl_macros.h +++ b/src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/nvidia/hs_glsl_macros_vendor.h @@ -9,7 +9,7 @@ // // -#include "../../hs_glsl_macros.h" +#include "hs_glsl_macros.h" // // OVERRIDE SUBGROUP LANE ID diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/tests/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/tests/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..8e604d8330b08467d0cee6d01aa31dc9f59ecfe0 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/BUILD.gn @@ -0,0 +1,9 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +group("tests") { + public_deps = [ + "hotsort_vk_bench" + ] +} diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..5d15a50c20a0ce1556d145c45454570a5e20f57c --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/BUILD.gn @@ -0,0 +1,75 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//build/package.gni") +import("//third_party/vulkan_loader_and_validation_layers/layers/layers.gni") + +# +# Generate and build several HotSort targets +# + +group("targets") { + public_deps = [ + "targets/amd/gcn3/u32:generate", + "targets/amd/gcn3/u64:generate", + "targets/intel/gen8/u32:generate", + "targets/intel/gen8/u64:generate", + "targets/nvidia/sm35/u32:generate", + "targets/nvidia/sm35/u64:generate", + ] +} + +# +# hotsort_vk_bench: benchmark HotSort +# + +package_name = "hotsort_vk_bench" + +executable("bin") { + output_name = package_name + sources = [ + "main.c", + "sort.cpp" + ] + include_dirs = [ + "$target_gen_dir", + "//src/graphics/lib/compute", + "//src/graphics/lib/compute/hotsort/platforms/vk" + ] + deps = [ + ":targets", + "//src/graphics/lib/compute/common", + "//src/graphics/lib/compute/common/vk", + "//src/graphics/lib/compute/hotsort/platforms/vk", + "//third_party/vulkan_loader_and_validation_layers:vulkan" + ] +} + +# +# +# + +package(package_name) { + deps = [ + ":bin", + "//third_party/vulkan_loader_and_validation_layers/layers" + ] + + binary = package_name + + meta = [ + { + path = rebase_path("meta/$package_name.cmx") + dest = "$package_name.cmx" + }, + ] + + public_deps = vulkan_validation_layers.public_deps + loadable_modules = vulkan_validation_layers.loadable_modules + resources = vulkan_validation_layers.resources +} + +# +# +# diff --git a/src/graphics/lib/compute/hotsort/vk/bench/main.c b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/main.c similarity index 92% rename from src/graphics/lib/compute/hotsort/vk/bench/main.c rename to src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/main.c index e0dcf3b1c0fbc0ba4c67336fca12160ebd2051fd..eee52a6862ae9c6bdf20d6f85f472675efa005b5 100644 --- a/src/graphics/lib/compute/hotsort/vk/bench/main.c +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/main.c @@ -24,20 +24,20 @@ // // -#include "hs_vk.h" +#include "hotsort_vk.h" // // Compile-time images of HotSort targets // -#include "hotsort/vk/targets/intel/gen8/u32/hs_target.h" -#include "hotsort/vk/targets/intel/gen8/u64/hs_target.h" +#include "targets/intel/gen8/u32/hs_target.h" +#include "targets/intel/gen8/u64/hs_target.h" -#include "hotsort/vk/targets/nvidia/sm_35/u32/hs_target.h" -#include "hotsort/vk/targets/nvidia/sm_35/u64/hs_target.h" +#include "targets/nvidia/sm35/u32/hs_target.h" +#include "targets/nvidia/sm35/u64/hs_target.h" -#include "hotsort/vk/targets/amd/gcn3/u32/hs_target.h" -#include "hotsort/vk/targets/amd/gcn3/u64/hs_target.h" +#include "targets/amd/gcn3/u32/hs_target.h" +#include "targets/amd/gcn3/u64/hs_target.h" // // @@ -73,7 +73,7 @@ hs_transpose_slabs_u32(uint32_t const hs_words, { uint32_t const slab_keys = hs_width * hs_height; size_t const slab_size = sizeof(uint32_t) * hs_words * slab_keys; - uint32_t * const slab = ALLOCA_MACRO(slab_size); + uint32_t * const slab = malloc(slab_size); uint32_t slab_count = count / slab_keys; while (slab_count-- > 0) @@ -86,6 +86,8 @@ hs_transpose_slabs_u32(uint32_t const hs_words, vout_h += slab_keys; } + + free(slab); } static @@ -98,7 +100,7 @@ hs_transpose_slabs_u64(uint32_t const hs_words, { uint32_t const slab_keys = hs_width * hs_height; size_t const slab_size = sizeof(uint32_t) * hs_words * slab_keys; - uint64_t * const slab = ALLOCA_MACRO(slab_size); + uint64_t * const slab = malloc(slab_size); uint32_t slab_count = count / slab_keys; while (slab_count-- > 0) @@ -111,6 +113,8 @@ hs_transpose_slabs_u64(uint32_t const hs_words, vout_h += slab_keys; } + + free(slab); } static @@ -165,7 +169,7 @@ vk_debug_report_cb(VkDebugReportFlagsEXT flags, if (is_error) { - fprintf(stderr,"%s %s %s\n", + fprintf(stderr,"%-43s - %-12s - %s\n", flag_str, pLayerPrefix, pMessage); @@ -219,6 +223,8 @@ hs_fill_rand(uint32_t * vin_h, uint32_t const count, uint32_t const words) // // +#if !defined(NDEBUG) && defined(HS_VK_DUMP_SLABS) + static void hs_debug_u32(uint32_t const hs_width, @@ -259,16 +265,18 @@ hs_debug_u64(uint32_t const hs_width, } } +#endif + // // // bool -is_matching_device(VkPhysicalDeviceProperties const * const phy_device_props, - struct hs_vk_target const * * const hs_target, - uint32_t const vendor_id, - uint32_t const device_id, - uint32_t const key_val_words) +is_matching_device(VkPhysicalDeviceProperties const * const phy_device_props, + struct hotsort_vk_target const * * const hs_target, + uint32_t const vendor_id, + uint32_t const device_id, + uint32_t const key_val_words) { if ((phy_device_props->vendorID != vendor_id) || (phy_device_props->deviceID != device_id)) return false; @@ -305,9 +313,9 @@ is_matching_device(VkPhysicalDeviceProperties const * const phy_device_props, // AMD GCN // if (key_val_words == 1) - *hs_target = &hs_amd_gcn_u32; + *hs_target = &hs_amd_gcn3_u32; else - *hs_target = &hs_amd_gcn_u64; + *hs_target = &hs_amd_gcn3_u64; } else { @@ -353,13 +361,13 @@ main(int argc, char const * argv[]) // create a Vulkan instances // VkApplicationInfo const app_info = { - .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, - .pNext = NULL, - .pApplicationName = "HotSort Bench", - .applicationVersion = 0, - .pEngineName = "HotSort", - .engineVersion = 0, - .apiVersion = VK_API_VERSION_1_1 + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .pNext = NULL, + .pApplicationName = "HotSort Bench", + .applicationVersion = 0, + .pEngineName = "HotSort", + .engineVersion = 0, + .apiVersion = VK_API_VERSION_1_1 }; char const * const instance_enabled_layers[] = { @@ -447,7 +455,7 @@ main(int argc, char const * argv[]) VkPhysicalDevice phy_device = VK_NULL_HANDLE; VkPhysicalDeviceProperties phy_device_props; - struct hs_vk_target const * hs_target; + struct hotsort_vk_target const * hs_target; for (uint32_t ii=0; ii<phy_device_count; ii++) { @@ -532,7 +540,7 @@ main(int argc, char const * argv[]) // clumsily enable AMD GCN shader info extension // char const * const device_enabled_extensions[] = { -#if defined( HS_VK_VERBOSE_STATISTICS_AMD ) || defined( HS_VK_VERBOSE_DISASSEMBLY_AMD ) +#if defined( HOTSORT_VK_VERBOSE_STATISTICS_AMD ) || defined( HOTSORT_VK_VERBOSE_DISASSEMBLY_AMD ) VK_AMD_SHADER_INFO_EXTENSION_NAME #else NULL @@ -541,7 +549,7 @@ main(int argc, char const * argv[]) uint32_t device_enabled_extension_count = 0; -#if defined( HS_VK_VERBOSE_STATISTICS_AMD ) || defined( HS_VK_VERBOSE_DISASSEMBLY_AMD ) +#if defined( HOTSORT_VK_VERBOSE_STATISTICS_AMD ) || defined( HOTSORT_VK_VERBOSE_DISASSEMBLY_AMD ) if (phy_device_props.vendorID == 0x1002) device_enabled_extension_count = 1; #endif @@ -658,9 +666,9 @@ main(int argc, char const * argv[]) .pushConstantRangeCount = 1, .pPushConstantRanges = (VkPushConstantRange[]){ { - .stageFlags = HS_VK_PUSH_CONSTANT_RANGE_STAGE_FLAGS, - .offset = HS_VK_PUSH_CONSTANT_RANGE_OFFSET, - .size = HS_VK_PUSH_CONSTANT_RANGE_SIZE + .stageFlags = HOTSORT_VK_PUSH_CONSTANT_RANGE_STAGE_FLAGS, + .offset = HOTSORT_VK_PUSH_CONSTANT_RANGE_OFFSET, + .size = HOTSORT_VK_PUSH_CONSTANT_RANGE_SIZE } } }; @@ -687,27 +695,11 @@ main(int argc, char const * argv[]) vk(AllocateDescriptorSets(device,&dsai,&ds)); - // - // create HotSort instance - // - struct hs_vk_ds_locations const ds_locations = - { - .in = { - .set = 0, - .binding = 1 - }, - .out = { - .set = 0, - .binding = 0 - } - }; - - struct hs_vk * const hs = hs_vk_create(device, - NULL, - pc, - pl, - &ds_locations, - hs_target); + struct hotsort_vk * const hs = hotsort_vk_create(device, + NULL, + pc, + pl, + hs_target); // // create a command pool for this thread // @@ -749,7 +741,7 @@ main(int argc, char const * argv[]) // uint32_t buffer_in_count, buffer_out_count; - hs_vk_pad(hs,count_hi,&buffer_in_count,&buffer_out_count); + hotsort_vk_pad(hs,count_hi,&buffer_in_count,&buffer_out_count); size_t const buffer_out_size = buffer_out_count * key_val_words * sizeof(uint32_t); @@ -770,10 +762,10 @@ main(int argc, char const * argv[]) VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, - vk(CreateBuffer(device, - &bci, - NULL, - &vin)); + vk(CreateBuffer(device, + &bci, + NULL, + &vin)); vk(CreateBuffer(device, &bci, @@ -962,7 +954,7 @@ main(int argc, char const * argv[]) // // buffer offsets // - struct hs_vk_ds_offsets const offsets = + struct hotsort_vk_ds_offsets const offsets = { .in = 0UL, .out = 0UL @@ -992,6 +984,11 @@ main(int argc, char const * argv[]) "Avg. Mkeys/s, " "Max. Mkeys/s\n"); + // + // accumulate verifications + // + bool all_verified = true; + // // test a range // @@ -1002,7 +999,7 @@ main(int argc, char const * argv[]) // uint32_t count_padded_in, count_padded_out; - hs_vk_pad(hs,count,&count_padded_in,&count_padded_out); + hotsort_vk_pad(hs,count,&count_padded_in,&count_padded_out); // // initialize vin with 'count' random keys @@ -1057,13 +1054,13 @@ main(int argc, char const * argv[]) // // append sorting commands // - hs_vk_sort(cb, - hs, - &offsets, - count, - count_padded_in, - count_padded_out, - linearize); + hotsort_vk_sort(cb, + hs, + &offsets, + count, + count_padded_in, + count_padded_out, + linearize); // // end timestamp @@ -1124,7 +1121,7 @@ main(int argc, char const * argv[]) // char const * cpu_algo = NULL; double cpu_ns = 0.0; - bool verified = false; + bool verified = true; if (verify) { @@ -1178,7 +1175,7 @@ main(int argc, char const * argv[]) // verify verified = memcmp(sorted_h,sorted_map,size_padded_in) == 0; -#ifndef NDEBUG +#if !defined(NDEBUG) && defined(HS_VK_DUMP_SLABS) if (!verified) { if (key_val_words == 1) @@ -1211,6 +1208,11 @@ main(int argc, char const * argv[]) vkUnmapMemory(device,mem_sorted); } + // + // any verification failures? + // + all_verified = all_verified && verified; + // // REPORT // @@ -1254,7 +1256,7 @@ main(int argc, char const * argv[]) vkDestroyPipelineLayout(device,pl,NULL); // release shared HotSort state - hs_vk_release(device,NULL,hs); + hotsort_vk_release(device,NULL,hs); // destroy the vin/vout buffers (before device memory) vkDestroyBuffer(device,vin, NULL); @@ -1287,7 +1289,7 @@ main(int argc, char const * argv[]) vkDestroyInstance(instance,NULL); - return EXIT_SUCCESS; + return all_verified ? EXIT_SUCCESS : EXIT_FAILURE; } // diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/meta/hotsort_vk_bench.cmx b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/meta/hotsort_vk_bench.cmx new file mode 100644 index 0000000000000000000000000000000000000000..1c51dfe9310ab622e5b0a47e22531a541315c1fd --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/meta/hotsort_vk_bench.cmx @@ -0,0 +1,15 @@ +{ + "program": { + "binary": "bin/app" + }, + "sandbox": { + "features": [ + "vulkan" + ], + "services": [ + "fuchsia.sysmem.Allocator", + "fuchsia.vulkan.loader.Loader", + "fuchsia.tracelink.Registry" + ] + } +} diff --git a/src/graphics/lib/compute/hotsort/vk/bench/sort.cpp b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/sort.cpp similarity index 91% rename from src/graphics/lib/compute/hotsort/vk/bench/sort.cpp rename to src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/sort.cpp index fb438d390930051d4d5046e81e508d6926a8fd98..70b1b55865f49feb1a648b9220df5c2305527f51 100644 --- a/src/graphics/lib/compute/hotsort/vk/bench/sort.cpp +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/sort.cpp @@ -30,7 +30,12 @@ // // -#if (__cplusplus >= 201703L) && !defined(HS_USE_STD_SORT) && !defined(HS_USE_QSORT) +#if defined(__cpp_lib_execution) \ + && (__cpp_lib_execution >= 201603L) \ + && defined(__cpp_lib_parallel_algorithm) \ + && (__cpp_lib_parallel_algorithm >= 201603L) \ + && !defined(HS_USE_STD_SORT) \ + && !defined(HS_USE_QSORT) #define HS_USE_PARALLEL_SORT #include <algorithm> diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/amd/gcn3/u32/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/amd/gcn3/u32/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..3fc827d841fe6debc5324359d69fb20441eea1a3 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/amd/gcn3/u32/BUILD.gn @@ -0,0 +1,36 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_target.gni") + +# +# configuration +# +# $HS_GEN -v -a "glsl" -D HS_AMD_GCN3 -t 1 -w 64 -r 16 -s 32768 -S 32768 -b 16 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z +# + +hotsort_target("generate") +{ + hotsort_target_vendor_dir = "//src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/amd" + hotsort_target_name = "hs_amd_gcn3_u32" + hotsort_target_args = [ + # "-v", + "-o" , rebase_path(target_gen_dir,root_build_dir), + "-a" , "glsl", + "-t" , "1", + "-w" , "64", + "-r" , "16", + "-s" , "32768", + "-S" , "32768", + "-b" , "16", + "-m" , "1", + "-M" , "1", + "-f" , "1", + "-F" , "1", + "-c" , "1", + "-C" , "1", + "-L" , "0,1,0,0", + "-z" + ] +} diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/amd/gcn3/u64/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/amd/gcn3/u64/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..2484994f776d7016e5d6ea64cf81d59cae2360df --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/amd/gcn3/u64/BUILD.gn @@ -0,0 +1,36 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_target.gni") + +# +# configuration +# +# $HS_GEN -v -a "glsl" -D HS_AMD_GCN3 -t 2 -w 64 -r 8 -s 32768 -S 32768 -b 16 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z +# + +hotsort_target("generate") +{ + hotsort_target_vendor_dir = "//src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/amd" + hotsort_target_name = "hs_amd_gcn3_u64" + hotsort_target_args = [ + # "-v", + "-o" , rebase_path(target_gen_dir,root_build_dir), + "-a" , "glsl", + "-t" , "2", + "-w" , "64", + "-r" , "8", + "-s" , "32768", + "-S" , "32768", + "-b" , "16", + "-m" , "1", + "-M" , "1", + "-f" , "1", + "-F" , "1", + "-c" , "1", + "-C" , "1", + "-L" , "0,1,0,0", + "-z" + ] +} diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/intel/gen8/u32/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/intel/gen8/u32/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..f3c179d57f7375ba2905db4bd19925c5ed1cb203 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/intel/gen8/u32/BUILD.gn @@ -0,0 +1,37 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_target.gni") + +# +# configuration +# +# $HS_GEN -v -a "glsl" -D HS_INTEL_GEN8 -t 1 -w 16 -r 8 -s 21504 -S 65536 -b 16 -B 48 -m 1 -M 1 -f 0 -F 0 -c 0 -C 0 -z +# + +hotsort_target("generate") +{ + hotsort_target_vendor_dir = "//src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/intel" + hotsort_target_name = "hs_intel_gen8_u32" + hotsort_target_args = [ + # "-v", + "-o" , rebase_path(target_gen_dir,root_build_dir), + "-a" , "glsl", + "-t" , "1", + "-w" , "16", + "-r" , "8", + "-s" , "21504", + "-S" , "65536", + "-b" , "16", + "-B" , "48", + "-m" , "1", + "-M" , "1", + "-f" , "0", + "-F" , "0", + "-c" , "0", + "-C" , "0", + "-L" , "0,1,0,0", + "-z" + ] +} diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/intel/gen8/u64/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/intel/gen8/u64/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..47e98f188f86b4ec4f7054b60fe58e7b0c902be3 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/intel/gen8/u64/BUILD.gn @@ -0,0 +1,37 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_target.gni") + +# +# configuration +# +# $HS_GEN -v -a "glsl" -D HS_INTEL_GEN8 -t 2 -w 8 -r 16 -s 21504 -S 65536 -b 16 -B 48 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z +# + +hotsort_target("generate") +{ + hotsort_target_vendor_dir = "//src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/intel" + hotsort_target_name = "hs_intel_gen8_u64" + hotsort_target_args = [ + # "-v", + "-o" , rebase_path(target_gen_dir,root_build_dir), + "-a" , "glsl", + "-t" , "2", + "-w" , "8", + "-r" , "16", + "-s" , "21504", + "-S" , "65536", + "-b" , "16", + "-B" , "48", + "-m" , "1", + "-M" , "1", + "-f" , "1", + "-F" , "1", + "-c" , "1", + "-C" , "1", + "-L" , "0,1,0,0", + "-z" + ] +} diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/nvidia/sm35/u32/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/nvidia/sm35/u32/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..9c7bab9128febf861796dcb12815d73a906483ef --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/nvidia/sm35/u32/BUILD.gn @@ -0,0 +1,39 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_target.gni") + +# +# configuration +# +# OLD: $HS_GEN -v -a "glsl" -D HS_NVIDIA_SM35 -t 1 -w 32 -r 32 -s 49152 -S 65536 -b 32 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z +# NEW: $HS_GEN -v -a "glsl" -D HS_NVIDIA_SM35 -t 1 -w 32 -r 16 -s 32768 -S 32768 -b 16 -m 1 -M 1 -p 1 -P 1 -f 0 -F 0 -c 0 -C 0 -z +# + +hotsort_target("generate") +{ + hotsort_target_vendor_dir = "//src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/nvidia" + hotsort_target_name = "hs_nvidia_sm35_u32" + hotsort_target_args = [ + # "-v", + "-o" , rebase_path(target_gen_dir,root_build_dir), + "-a" , "glsl", + "-t" , "1", + "-w" , "32", + "-r" , "16", + "-s" , "32768", + "-S" , "32768", + "-b" , "16", + "-m" , "1", + "-M" , "1", + "-p" , "1", + "-P" , "1", + "-f" , "0", + "-F" , "0", + "-c" , "0", + "-C" , "0", + "-L" , "0,1,0,0", + "-z" + ] +} diff --git a/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/nvidia/sm35/u64/BUILD.gn b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/nvidia/sm35/u64/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..40d54223a8f4b2fc137c025c03bb1d3e389743a5 --- /dev/null +++ b/src/graphics/lib/compute/hotsort/platforms/vk/tests/hotsort_vk_bench/targets/nvidia/sm35/u64/BUILD.gn @@ -0,0 +1,39 @@ +# Copyright 2019 The Fuchsia Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import("//src/graphics/lib/compute/hotsort/platforms/vk/targets/hotsort_target.gni") + +# +# configuration +# +# OLD: $HS_GEN -v -a "glsl" -D HS_NVIDIA_SM35 -t 2 -w 32 -r 32 -s 49152 -S 65536 -b 16 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z +# NEW: $HS_GEN -v -a "glsl" -D HS_NVIDIA_SM35 -t 2 -w 32 -r 8 -s 32768 -S 32768 -b 16 -m 1 -M 1 -p 1 -P 1 -f 0 -F 0 -c 0 -C 0 -z +# + +hotsort_target("generate") +{ + hotsort_target_vendor_dir = "//src/graphics/lib/compute/hotsort/platforms/vk/targets/vendors/nvidia" + hotsort_target_name = "hs_nvidia_sm35_u64" + hotsort_target_args = [ + # "-v", + "-o" , rebase_path(target_gen_dir,root_build_dir), + "-a" , "glsl", + "-t" , "2", + "-w" , "32", + "-r" , "8", + "-s" , "32768", + "-S" , "32768", + "-b" , "16", + "-m" , "1", + "-M" , "1", + "-p" , "1", + "-P" , "1", + "-f" , "0", + "-F" , "0", + "-c" , "0", + "-C" , "0", + "-L" , "0,1,0,0", + "-z" + ] +} diff --git a/src/graphics/lib/compute/hotsort/vk/hs_glsl_preamble.h b/src/graphics/lib/compute/hotsort/vk/hs_glsl_preamble.h deleted file mode 100644 index fa55d76fd7309c5f7a2b1858d8cb6a2f1a0bedc1..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/hs_glsl_preamble.h +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef HS_GLSL_PREAMBLE_ONCE -#define HS_GLSL_PREAMBLE_ONCE - -// -// -// - -#define HS_EVAL(a) a -#define HS_HASH_SYMBOL # -#define HS_HASH() HS_EVAL(HS_HASH_SYMBOL) -#define HS_GLSL_EXT() HS_HASH()extension // will be indented one space -#define HS_GLSL_EXT_REQUIRE(name) HS_GLSL_EXT() name : require -#define HS_GLSL_VERSION(ver) HS_HASH()version ver // will be indented one space - -// -// -// - -HS_GLSL_VERSION(460) -HS_GLSL_EXT_REQUIRE(GL_KHR_shader_subgroup_basic) -HS_GLSL_EXT_REQUIRE(GL_KHR_shader_subgroup_shuffle) - -// -// -// - -#if HS_KEY_DWORDS == 2 -HS_GLSL_EXT_REQUIRE(GL_ARB_gpu_shader_int64) -#endif - -// -// -// - -#endif - -// -// -// diff --git a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/gen.sh b/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/gen.sh deleted file mode 100644 index 8e5c4f957a513a328d003a2ee7e0d3ad4950a609..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/gen.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash - -## -## Copyright 2019 The Fuchsia Authors. All rights reserved. -## Use of this source code is governed by a BSD-style license that can be -## found in the LICENSE file. -## - -## -## exit on error -## - -## set -e - -## -## delete the previous images -## - -rm *.comp -rm *.spv -rm *.xxd - -## -## -## - -HS_GEN=../../../../../gen/hs_gen - -## --- 32-bit keys --- - -$HS_GEN -v -a "glsl" -D HS_AMD_GCN -t 1 -w 64 -r 16 -s 32768 -S 32768 -b 16 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z - -## -## remove trailing whitespace from generated files -## - -sed -i 's/[[:space:]]*$//' hs_config.h -sed -i 's/[[:space:]]*$//' hs_modules.h - -## -## -## - -whereis glslangValidator - -## -## FIXME -- convert this to a bash script -## -## Note that we can use xargs instead of the cmd for/do -## - -for f in *.comp -do - dos2unix $f - clang-format -style=Mozilla -i $f - cpp -P -I ../.. -I ../../../.. $f > ${f%%.*}.pre.comp - clang-format -style=Mozilla -i ${f%%.*}.pre.comp - glslangValidator --target-env vulkan1.1 -o ${f%%.*}.spv ${f%%.*}.pre.comp - spirv-opt -O ${f%%.*}.spv -o ${f%%.*}.spv -## spirv-remap -v --do-everything --input %%~nf.spv --output remap - xxd -i < ${f%%.*}.spv > ${f%%.*}.spv.xxd - len=$(wc -c < ${f%%.*}.spv) - echo ${f%%.*}.spv $len - printf "%.8x" $len | xxd -r -p | xxd -i > ${f%%.*}.len.xxd -done - -## -## dump a binary -## - -cc -I ../../../.. -I ../../../../../.. -D=HS_DUMP -o hs_dump *.c -hs_dump - -## -## delete temporary files -## - -rm *.pre.comp -rm *.comp -rm *.spv diff --git a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/hs_config.h b/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/hs_config.h deleted file mode 100644 index 57ffbe2e55a0950889c9ebb67e7cd29630bd08c2..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/hs_config.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef HS_GLSL_ONCE -#define HS_GLSL_ONCE - -#define HS_SLAB_THREADS_LOG2 6 -#define HS_SLAB_THREADS (1 << HS_SLAB_THREADS_LOG2) -#define HS_SLAB_WIDTH_LOG2 6 -#define HS_SLAB_WIDTH (1 << HS_SLAB_WIDTH_LOG2) -#define HS_SLAB_HEIGHT 16 -#define HS_SLAB_KEYS (HS_SLAB_WIDTH * HS_SLAB_HEIGHT) -#define HS_REG_LAST(c) c##16 -#define HS_KEY_DWORDS 1 -#define HS_VAL_DWORDS 0 -#define HS_BS_SLABS 16 -#define HS_BS_SLABS_LOG2_RU 4 -#define HS_BC_SLABS_LOG2_MAX 4 -#define HS_FM_BLOCK_HEIGHT 1 -#define HS_FM_SCALE_MIN 1 -#define HS_FM_SCALE_MAX 1 -#define HS_HM_BLOCK_HEIGHT 1 -#define HS_HM_SCALE_MIN 1 -#define HS_HM_SCALE_MAX 1 -#define HS_EMPTY - -#define HS_AMD_GCN - -#define HS_SLAB_ROWS() \ - HS_SLAB_ROW( 1, 0 ) \ - HS_SLAB_ROW( 2, 1 ) \ - HS_SLAB_ROW( 3, 2 ) \ - HS_SLAB_ROW( 4, 3 ) \ - HS_SLAB_ROW( 5, 4 ) \ - HS_SLAB_ROW( 6, 5 ) \ - HS_SLAB_ROW( 7, 6 ) \ - HS_SLAB_ROW( 8, 7 ) \ - HS_SLAB_ROW( 9, 8 ) \ - HS_SLAB_ROW( 10, 9 ) \ - HS_SLAB_ROW( 11, 10 ) \ - HS_SLAB_ROW( 12, 11 ) \ - HS_SLAB_ROW( 13, 12 ) \ - HS_SLAB_ROW( 14, 13 ) \ - HS_SLAB_ROW( 15, 14 ) \ - HS_SLAB_ROW( 16, 15 ) \ - HS_EMPTY - -#define HS_TRANSPOSE_SLAB() \ - HS_TRANSPOSE_STAGE( 1 ) \ - HS_TRANSPOSE_STAGE( 2 ) \ - HS_TRANSPOSE_STAGE( 3 ) \ - HS_TRANSPOSE_STAGE( 4 ) \ - HS_TRANSPOSE_STAGE( 5 ) \ - HS_TRANSPOSE_STAGE( 6 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 2, 1 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 4, 3 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 6, 5 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 8, 7 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 10, 9 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 12, 11 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 14, 13 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 16, 15 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 3, 1 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 4, 2 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 7, 5 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 8, 6 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 11, 9 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 12, 10 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 15, 13 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 16, 14 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 5, 1 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 6, 2 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 7, 3 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 8, 4 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 13, 9 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 14, 10 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 15, 11 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 16, 12 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 9, 1 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 10, 2 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 11, 3 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 12, 4 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 13, 5 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 14, 6 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 15, 7 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 16, 8 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 2, 1 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 4, 3 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 6, 5 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 8, 7 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 10, 9 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 12, 11 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 14, 13 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 16, 15 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 3, 1 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 4, 2 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 7, 5 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 8, 6 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 11, 9 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 12, 10 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 15, 13 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 16, 14 ) \ - HS_TRANSPOSE_REMAP( x, 1, 1 ) \ - HS_TRANSPOSE_REMAP( x, 2, 5 ) \ - HS_TRANSPOSE_REMAP( x, 3, 9 ) \ - HS_TRANSPOSE_REMAP( x, 4, 13 ) \ - HS_TRANSPOSE_REMAP( x, 5, 2 ) \ - HS_TRANSPOSE_REMAP( x, 6, 6 ) \ - HS_TRANSPOSE_REMAP( x, 7, 10 ) \ - HS_TRANSPOSE_REMAP( x, 8, 14 ) \ - HS_TRANSPOSE_REMAP( x, 9, 3 ) \ - HS_TRANSPOSE_REMAP( x, 10, 7 ) \ - HS_TRANSPOSE_REMAP( x, 11, 11 ) \ - HS_TRANSPOSE_REMAP( x, 12, 15 ) \ - HS_TRANSPOSE_REMAP( x, 13, 4 ) \ - HS_TRANSPOSE_REMAP( x, 14, 8 ) \ - HS_TRANSPOSE_REMAP( x, 15, 12 ) \ - HS_TRANSPOSE_REMAP( x, 16, 16 ) \ - HS_EMPTY - -#endif - -// -// -// - diff --git a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/hs_modules.h b/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/hs_modules.h deleted file mode 100644 index 5099a65f3358c5d050f53aac0f66df5d26be5047..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/hs_modules.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "hs_bs_0.len.xxd" -, -#include "hs_bs_0.spv.xxd" -, -#include "hs_bs_1.len.xxd" -, -#include "hs_bs_1.spv.xxd" -, -#include "hs_bs_2.len.xxd" -, -#include "hs_bs_2.spv.xxd" -, -#include "hs_bs_3.len.xxd" -, -#include "hs_bs_3.spv.xxd" -, -#include "hs_bs_4.len.xxd" -, -#include "hs_bs_4.spv.xxd" -, -#include "hs_bc_0.len.xxd" -, -#include "hs_bc_0.spv.xxd" -, -#include "hs_bc_1.len.xxd" -, -#include "hs_bc_1.spv.xxd" -, -#include "hs_bc_2.len.xxd" -, -#include "hs_bc_2.spv.xxd" -, -#include "hs_bc_3.len.xxd" -, -#include "hs_bc_3.spv.xxd" -, -#include "hs_bc_4.len.xxd" -, -#include "hs_bc_4.spv.xxd" -, -#include "hs_fm_1_0.len.xxd" -, -#include "hs_fm_1_0.spv.xxd" -, -#include "hs_fm_1_1.len.xxd" -, -#include "hs_fm_1_1.spv.xxd" -, -#include "hs_fm_1_2.len.xxd" -, -#include "hs_fm_1_2.spv.xxd" -, -#include "hs_fm_1_3.len.xxd" -, -#include "hs_fm_1_3.spv.xxd" -, -#include "hs_fm_1_4.len.xxd" -, -#include "hs_fm_1_4.spv.xxd" -, -#include "hs_hm_1.len.xxd" -, -#include "hs_hm_1.spv.xxd" -, -#include "hs_fill_in.len.xxd" -, -#include "hs_fill_in.spv.xxd" -, -#include "hs_fill_out.len.xxd" -, -#include "hs_fill_out.spv.xxd" -, -#include "hs_transpose.len.xxd" -, -#include "hs_transpose.spv.xxd" -, diff --git a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/hs_target.h b/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/hs_target.h deleted file mode 100644 index 00b3c9877b1560b7a53211ba032a3b6c0638c7bb..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u32/hs_target.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -// -// -// - -#include "hs_vk_target.h" - -// -// -// - -#undef HS_TARGET_NAME -#define HS_TARGET_NAME hs_amd_gcn_u32 - -// -// -// - -extern struct hs_vk_target const HS_TARGET_NAME; - -// -// -// diff --git a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/gen.sh b/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/gen.sh deleted file mode 100644 index c5aec69482e8676d8fdc2a395dde673e5b8e7f80..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/gen.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash - -## -## Copyright 2019 The Fuchsia Authors. All rights reserved. -## Use of this source code is governed by a BSD-style license that can be -## found in the LICENSE file. -## - -## -## exit on error -## - -## set -e - -## -## delete the previous images -## - -rm *.comp -rm *.spv -rm *.xxd - -## -## -## - -HS_GEN=../../../../../gen/hs_gen - -## --- 64-bit keys - -$HS_GEN -v -a "glsl" -D HS_AMD_GCN -t 2 -w 64 -r 8 -s 32768 -S 32768 -b 16 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z - -## -## remove trailing whitespace from generated files -## - -sed -i 's/[[:space:]]*$//' hs_config.h -sed -i 's/[[:space:]]*$//' hs_modules.h - -## -## -## - -whereis glslangValidator - -## -## FIXME -- convert this to a bash script -## -## Note that we can use xargs instead of the cmd for/do -## - -for f in *.comp -do - dos2unix $f - clang-format -style=Mozilla -i $f - cpp -P -I ../.. -I ../../../.. $f > ${f%%.*}.pre.comp - clang-format -style=Mozilla -i ${f%%.*}.pre.comp - glslangValidator --target-env vulkan1.1 -o ${f%%.*}.spv ${f%%.*}.pre.comp - spirv-opt -O ${f%%.*}.spv -o ${f%%.*}.spv -## spirv-remap -v --do-everything --input %%~nf.spv --output remap - xxd -i < ${f%%.*}.spv > ${f%%.*}.spv.xxd - len=$(wc -c < ${f%%.*}.spv) - echo ${f%%.*}.spv $len - printf "%.8x" $len | xxd -r -p | xxd -i > ${f%%.*}.len.xxd -done - -## -## dump a binary -## - -cc -I ../../../.. -I ../../../../../.. -D=HS_DUMP -o hs_dump *.c -hs_dump - -## -## delete temporary files -## - -rm *.pre.comp -rm *.comp -rm *.spv diff --git a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/hs_config.h b/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/hs_config.h deleted file mode 100644 index ce5ddcaee9b7c98b825b35f80056c59ffe264c26..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/hs_config.h +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef HS_GLSL_ONCE -#define HS_GLSL_ONCE - -#define HS_SLAB_THREADS_LOG2 6 -#define HS_SLAB_THREADS (1 << HS_SLAB_THREADS_LOG2) -#define HS_SLAB_WIDTH_LOG2 6 -#define HS_SLAB_WIDTH (1 << HS_SLAB_WIDTH_LOG2) -#define HS_SLAB_HEIGHT 8 -#define HS_SLAB_KEYS (HS_SLAB_WIDTH * HS_SLAB_HEIGHT) -#define HS_REG_LAST(c) c##8 -#define HS_KEY_DWORDS 2 -#define HS_VAL_DWORDS 0 -#define HS_BS_SLABS 16 -#define HS_BS_SLABS_LOG2_RU 4 -#define HS_BC_SLABS_LOG2_MAX 4 -#define HS_FM_BLOCK_HEIGHT 1 -#define HS_FM_SCALE_MIN 1 -#define HS_FM_SCALE_MAX 1 -#define HS_HM_BLOCK_HEIGHT 1 -#define HS_HM_SCALE_MIN 1 -#define HS_HM_SCALE_MAX 1 -#define HS_EMPTY - -#define HS_AMD_GCN - -#define HS_SLAB_ROWS() \ - HS_SLAB_ROW( 1, 0 ) \ - HS_SLAB_ROW( 2, 1 ) \ - HS_SLAB_ROW( 3, 2 ) \ - HS_SLAB_ROW( 4, 3 ) \ - HS_SLAB_ROW( 5, 4 ) \ - HS_SLAB_ROW( 6, 5 ) \ - HS_SLAB_ROW( 7, 6 ) \ - HS_SLAB_ROW( 8, 7 ) \ - HS_EMPTY - -#define HS_TRANSPOSE_SLAB() \ - HS_TRANSPOSE_STAGE( 1 ) \ - HS_TRANSPOSE_STAGE( 2 ) \ - HS_TRANSPOSE_STAGE( 3 ) \ - HS_TRANSPOSE_STAGE( 4 ) \ - HS_TRANSPOSE_STAGE( 5 ) \ - HS_TRANSPOSE_STAGE( 6 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 2, 1 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 4, 3 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 6, 5 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 8, 7 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 3, 1 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 4, 2 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 7, 5 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 8, 6 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 5, 1 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 6, 2 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 7, 3 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 8, 4 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 2, 1 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 4, 3 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 6, 5 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 8, 7 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 3, 1 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 4, 2 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 7, 5 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 8, 6 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 5, 1 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 6, 2 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 7, 3 ) \ - HS_TRANSPOSE_BLEND( w, x, 6, 8, 4 ) \ - HS_TRANSPOSE_REMAP( x, 1, 1 ) \ - HS_TRANSPOSE_REMAP( x, 2, 2 ) \ - HS_TRANSPOSE_REMAP( x, 3, 3 ) \ - HS_TRANSPOSE_REMAP( x, 4, 4 ) \ - HS_TRANSPOSE_REMAP( x, 5, 5 ) \ - HS_TRANSPOSE_REMAP( x, 6, 6 ) \ - HS_TRANSPOSE_REMAP( x, 7, 7 ) \ - HS_TRANSPOSE_REMAP( x, 8, 8 ) \ - HS_EMPTY - -#endif - -// -// -// - diff --git a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/hs_modules.h b/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/hs_modules.h deleted file mode 100644 index 5099a65f3358c5d050f53aac0f66df5d26be5047..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/hs_modules.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "hs_bs_0.len.xxd" -, -#include "hs_bs_0.spv.xxd" -, -#include "hs_bs_1.len.xxd" -, -#include "hs_bs_1.spv.xxd" -, -#include "hs_bs_2.len.xxd" -, -#include "hs_bs_2.spv.xxd" -, -#include "hs_bs_3.len.xxd" -, -#include "hs_bs_3.spv.xxd" -, -#include "hs_bs_4.len.xxd" -, -#include "hs_bs_4.spv.xxd" -, -#include "hs_bc_0.len.xxd" -, -#include "hs_bc_0.spv.xxd" -, -#include "hs_bc_1.len.xxd" -, -#include "hs_bc_1.spv.xxd" -, -#include "hs_bc_2.len.xxd" -, -#include "hs_bc_2.spv.xxd" -, -#include "hs_bc_3.len.xxd" -, -#include "hs_bc_3.spv.xxd" -, -#include "hs_bc_4.len.xxd" -, -#include "hs_bc_4.spv.xxd" -, -#include "hs_fm_1_0.len.xxd" -, -#include "hs_fm_1_0.spv.xxd" -, -#include "hs_fm_1_1.len.xxd" -, -#include "hs_fm_1_1.spv.xxd" -, -#include "hs_fm_1_2.len.xxd" -, -#include "hs_fm_1_2.spv.xxd" -, -#include "hs_fm_1_3.len.xxd" -, -#include "hs_fm_1_3.spv.xxd" -, -#include "hs_fm_1_4.len.xxd" -, -#include "hs_fm_1_4.spv.xxd" -, -#include "hs_hm_1.len.xxd" -, -#include "hs_hm_1.spv.xxd" -, -#include "hs_fill_in.len.xxd" -, -#include "hs_fill_in.spv.xxd" -, -#include "hs_fill_out.len.xxd" -, -#include "hs_fill_out.spv.xxd" -, -#include "hs_transpose.len.xxd" -, -#include "hs_transpose.spv.xxd" -, diff --git a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/hs_target.h b/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/hs_target.h deleted file mode 100644 index aa21e11fa1c37fcf838b05031b7f05e24341d906..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/amd/gcn3/u64/hs_target.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -// -// -// - -#include "hs_vk_target.h" - -// -// -// - -#undef HS_TARGET_NAME -#define HS_TARGET_NAME hs_amd_gcn_u64 - -// -// -// - -extern struct hs_vk_target const HS_TARGET_NAME; - -// -// -// diff --git a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/gen.sh b/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/gen.sh deleted file mode 100644 index 647cf3c36192eb1db679f5aede1c43cd05d090ca..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/gen.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash - -## -## Copyright 2019 The Fuchsia Authors. All rights reserved. -## Use of this source code is governed by a BSD-style license that can be -## found in the LICENSE file. -## - -## -## exit on error -## - -## set -e - -## -## delete the previous images -## - -rm *.comp -rm *.spv -rm *.xxd - -## -## -## - -HS_GEN=../../../../../gen/hs_gen - -## --- 32-bit keys --- - -$HS_GEN -v -a "glsl" -D HS_INTEL_GEN8 -t 1 -w 16 -r 8 -s 21504 -S 65536 -b 16 -B 48 -m 1 -M 1 -f 0 -F 0 -c 0 -C 0 -z - -## -## remove trailing whitespace from generated files -## - -sed -i 's/[[:space:]]*$//' hs_config.h -sed -i 's/[[:space:]]*$//' hs_modules.h - -## -## -## - -whereis glslangValidator - -## -## FIXME -- convert this to a bash script -## -## Note that we can use xargs instead of the cmd for/do -## - -for f in *.comp -do - dos2unix $f - clang-format -style=Mozilla -i $f - cpp -P -I ../.. -I ../../../.. $f > ${f%%.*}.pre.comp - clang-format -style=Mozilla -i ${f%%.*}.pre.comp - glslangValidator --target-env vulkan1.1 -o ${f%%.*}.spv ${f%%.*}.pre.comp - spirv-opt -O ${f%%.*}.spv -o ${f%%.*}.spv -## spirv-remap -v --do-everything --input %%~nf.spv --output remap - xxd -i < ${f%%.*}.spv > ${f%%.*}.spv.xxd - len=$(wc -c < ${f%%.*}.spv) - echo ${f%%.*}.spv $len - printf "%.8x" $len | xxd -r -p | xxd -i > ${f%%.*}.len.xxd -done - -## -## dump a binary -## - -cc -I ../../../.. -I ../../../../../.. -D=HS_DUMP -o hs_dump *.c -hs_dump - -## -## delete temporary files -## - -rm *.pre.comp -rm *.comp -rm *.spv diff --git a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/hs_config.h b/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/hs_config.h deleted file mode 100644 index fad716aae2879d4c64d71d2e96f54534b126f56d..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/hs_config.h +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef HS_GLSL_ONCE -#define HS_GLSL_ONCE - -#define HS_SLAB_THREADS_LOG2 4 -#define HS_SLAB_THREADS (1 << HS_SLAB_THREADS_LOG2) -#define HS_SLAB_WIDTH_LOG2 4 -#define HS_SLAB_WIDTH (1 << HS_SLAB_WIDTH_LOG2) -#define HS_SLAB_HEIGHT 8 -#define HS_SLAB_KEYS (HS_SLAB_WIDTH * HS_SLAB_HEIGHT) -#define HS_REG_LAST(c) c##8 -#define HS_KEY_DWORDS 1 -#define HS_VAL_DWORDS 0 -#define HS_BS_SLABS 16 -#define HS_BS_SLABS_LOG2_RU 4 -#define HS_BC_SLABS_LOG2_MAX 4 -#define HS_FM_BLOCK_HEIGHT 1 -#define HS_FM_SCALE_MIN 0 -#define HS_FM_SCALE_MAX 0 -#define HS_HM_BLOCK_HEIGHT 1 -#define HS_HM_SCALE_MIN 0 -#define HS_HM_SCALE_MAX 0 -#define HS_EMPTY - -#define HS_INTEL_GEN8 - -#define HS_SLAB_ROWS() \ - HS_SLAB_ROW( 1, 0 ) \ - HS_SLAB_ROW( 2, 1 ) \ - HS_SLAB_ROW( 3, 2 ) \ - HS_SLAB_ROW( 4, 3 ) \ - HS_SLAB_ROW( 5, 4 ) \ - HS_SLAB_ROW( 6, 5 ) \ - HS_SLAB_ROW( 7, 6 ) \ - HS_SLAB_ROW( 8, 7 ) \ - HS_EMPTY - -#define HS_TRANSPOSE_SLAB() \ - HS_TRANSPOSE_STAGE( 1 ) \ - HS_TRANSPOSE_STAGE( 2 ) \ - HS_TRANSPOSE_STAGE( 3 ) \ - HS_TRANSPOSE_STAGE( 4 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 2, 1 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 4, 3 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 6, 5 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 8, 7 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 3, 1 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 4, 2 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 7, 5 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 8, 6 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 5, 1 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 6, 2 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 7, 3 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 8, 4 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 2, 1 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 4, 3 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 6, 5 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 8, 7 ) \ - HS_TRANSPOSE_REMAP( v, 1, 1 ) \ - HS_TRANSPOSE_REMAP( v, 2, 5 ) \ - HS_TRANSPOSE_REMAP( v, 3, 2 ) \ - HS_TRANSPOSE_REMAP( v, 4, 6 ) \ - HS_TRANSPOSE_REMAP( v, 5, 3 ) \ - HS_TRANSPOSE_REMAP( v, 6, 7 ) \ - HS_TRANSPOSE_REMAP( v, 7, 4 ) \ - HS_TRANSPOSE_REMAP( v, 8, 8 ) \ - HS_EMPTY - -#endif - -// -// -// - diff --git a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/hs_modules.h b/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/hs_modules.h deleted file mode 100644 index db653be10496975d1d485d110727057c14df7af5..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/hs_modules.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "hs_bs_0.len.xxd" -, -#include "hs_bs_0.spv.xxd" -, -#include "hs_bs_1.len.xxd" -, -#include "hs_bs_1.spv.xxd" -, -#include "hs_bs_2.len.xxd" -, -#include "hs_bs_2.spv.xxd" -, -#include "hs_bs_3.len.xxd" -, -#include "hs_bs_3.spv.xxd" -, -#include "hs_bs_4.len.xxd" -, -#include "hs_bs_4.spv.xxd" -, -#include "hs_bc_0.len.xxd" -, -#include "hs_bc_0.spv.xxd" -, -#include "hs_bc_1.len.xxd" -, -#include "hs_bc_1.spv.xxd" -, -#include "hs_bc_2.len.xxd" -, -#include "hs_bc_2.spv.xxd" -, -#include "hs_bc_3.len.xxd" -, -#include "hs_bc_3.spv.xxd" -, -#include "hs_bc_4.len.xxd" -, -#include "hs_bc_4.spv.xxd" -, -#include "hs_fm_0_0.len.xxd" -, -#include "hs_fm_0_0.spv.xxd" -, -#include "hs_fm_0_1.len.xxd" -, -#include "hs_fm_0_1.spv.xxd" -, -#include "hs_fm_0_2.len.xxd" -, -#include "hs_fm_0_2.spv.xxd" -, -#include "hs_fm_0_3.len.xxd" -, -#include "hs_fm_0_3.spv.xxd" -, -#include "hs_hm_0.len.xxd" -, -#include "hs_hm_0.spv.xxd" -, -#include "hs_fill_in.len.xxd" -, -#include "hs_fill_in.spv.xxd" -, -#include "hs_fill_out.len.xxd" -, -#include "hs_fill_out.spv.xxd" -, -#include "hs_transpose.len.xxd" -, -#include "hs_transpose.spv.xxd" -, diff --git a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/hs_target.h b/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/hs_target.h deleted file mode 100644 index fb8ddd55cdff7f7d3350b531ba440c63e056b8c2..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u32/hs_target.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -// -// -// - -#include "hs_vk_target.h" - -// -// -// - -#undef HS_TARGET_NAME -#define HS_TARGET_NAME hs_intel_gen8_u32 - -// -// -// - -extern struct hs_vk_target const HS_TARGET_NAME; - -// -// -// diff --git a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/gen.sh b/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/gen.sh deleted file mode 100644 index 63d5285e3b2c8380e201cae32b9e6ea288c87f71..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/gen.sh +++ /dev/null @@ -1,80 +0,0 @@ -#!/bin/bash - -## -## Copyright 2019 The Fuchsia Authors. All rights reserved. -## Use of this source code is governed by a BSD-style license that can be -## found in the LICENSE file. -## - -## -## exit on error -## - -## set -e - -## -## delete the previous images -## - -rm *.comp -rm *.spv -rm *.xxd - -## -## -## - -HS_GEN=../../../../../gen/hs_gen - -## --- 64-bit keys --- - -$HS_GEN -v -a "glsl" -D HS_INTEL_GEN8 -t 2 -w 8 -r 16 -s 21504 -S 65536 -b 16 -B 48 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z - -## -## remove trailing whitespace from generated files -## - -sed -i 's/[[:space:]]*$//' hs_config.h -sed -i 's/[[:space:]]*$//' hs_modules.h - -## -## -## - -whereis glslangValidator - -## -## FIXME -- convert this to a bash script -## -## Note that we can use xargs instead of the cmd for/do -## - -for f in *.comp -do - dos2unix $f - clang-format -style=Mozilla -i $f - cpp -P -I ../.. -I ../../../.. $f > ${f%%.*}.pre.comp - clang-format -style=Mozilla -i ${f%%.*}.pre.comp - glslangValidator --target-env vulkan1.1 -o ${f%%.*}.spv ${f%%.*}.pre.comp - spirv-opt -O ${f%%.*}.spv -o ${f%%.*}.spv -## spirv-remap -v --do-everything --input %%~nf.spv --output remap - xxd -i < ${f%%.*}.spv > ${f%%.*}.spv.xxd - len=$(wc -c < ${f%%.*}.spv) - echo ${f%%.*}.spv $len - printf "%.8x" $len | xxd -r -p | xxd -i > ${f%%.*}.len.xxd -done - -## -## dump a binary -## - -cc -I ../../../.. -I ../../../../../.. -D=HS_DUMP -o hs_dump *.c -hs_dump - -## -## delete temporary files -## - -rm *.pre.comp -rm *.comp -rm *.spv diff --git a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/hs_config.h b/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/hs_config.h deleted file mode 100644 index 6fd4c85283a9d0f19a0455b5fb8789caaa31c0b4..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/hs_config.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef HS_GLSL_ONCE -#define HS_GLSL_ONCE - -#define HS_SLAB_THREADS_LOG2 3 -#define HS_SLAB_THREADS (1 << HS_SLAB_THREADS_LOG2) -#define HS_SLAB_WIDTH_LOG2 3 -#define HS_SLAB_WIDTH (1 << HS_SLAB_WIDTH_LOG2) -#define HS_SLAB_HEIGHT 16 -#define HS_SLAB_KEYS (HS_SLAB_WIDTH * HS_SLAB_HEIGHT) -#define HS_REG_LAST(c) c##16 -#define HS_KEY_DWORDS 2 -#define HS_VAL_DWORDS 0 -#define HS_BS_SLABS 16 -#define HS_BS_SLABS_LOG2_RU 4 -#define HS_BC_SLABS_LOG2_MAX 4 -#define HS_FM_BLOCK_HEIGHT 1 -#define HS_FM_SCALE_MIN 1 -#define HS_FM_SCALE_MAX 1 -#define HS_HM_BLOCK_HEIGHT 1 -#define HS_HM_SCALE_MIN 1 -#define HS_HM_SCALE_MAX 1 -#define HS_EMPTY - -#define HS_INTEL_GEN8 - -#define HS_SLAB_ROWS() \ - HS_SLAB_ROW( 1, 0 ) \ - HS_SLAB_ROW( 2, 1 ) \ - HS_SLAB_ROW( 3, 2 ) \ - HS_SLAB_ROW( 4, 3 ) \ - HS_SLAB_ROW( 5, 4 ) \ - HS_SLAB_ROW( 6, 5 ) \ - HS_SLAB_ROW( 7, 6 ) \ - HS_SLAB_ROW( 8, 7 ) \ - HS_SLAB_ROW( 9, 8 ) \ - HS_SLAB_ROW( 10, 9 ) \ - HS_SLAB_ROW( 11, 10 ) \ - HS_SLAB_ROW( 12, 11 ) \ - HS_SLAB_ROW( 13, 12 ) \ - HS_SLAB_ROW( 14, 13 ) \ - HS_SLAB_ROW( 15, 14 ) \ - HS_SLAB_ROW( 16, 15 ) \ - HS_EMPTY - -#define HS_TRANSPOSE_SLAB() \ - HS_TRANSPOSE_STAGE( 1 ) \ - HS_TRANSPOSE_STAGE( 2 ) \ - HS_TRANSPOSE_STAGE( 3 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 2, 1 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 4, 3 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 6, 5 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 8, 7 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 10, 9 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 12, 11 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 14, 13 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 16, 15 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 3, 1 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 4, 2 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 7, 5 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 8, 6 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 11, 9 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 12, 10 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 15, 13 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 16, 14 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 5, 1 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 6, 2 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 7, 3 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 8, 4 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 13, 9 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 14, 10 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 15, 11 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 16, 12 ) \ - HS_TRANSPOSE_REMAP( u, 1, 1 ) \ - HS_TRANSPOSE_REMAP( u, 2, 3 ) \ - HS_TRANSPOSE_REMAP( u, 3, 5 ) \ - HS_TRANSPOSE_REMAP( u, 4, 7 ) \ - HS_TRANSPOSE_REMAP( u, 5, 9 ) \ - HS_TRANSPOSE_REMAP( u, 6, 11 ) \ - HS_TRANSPOSE_REMAP( u, 7, 13 ) \ - HS_TRANSPOSE_REMAP( u, 8, 15 ) \ - HS_TRANSPOSE_REMAP( u, 9, 2 ) \ - HS_TRANSPOSE_REMAP( u, 10, 4 ) \ - HS_TRANSPOSE_REMAP( u, 11, 6 ) \ - HS_TRANSPOSE_REMAP( u, 12, 8 ) \ - HS_TRANSPOSE_REMAP( u, 13, 10 ) \ - HS_TRANSPOSE_REMAP( u, 14, 12 ) \ - HS_TRANSPOSE_REMAP( u, 15, 14 ) \ - HS_TRANSPOSE_REMAP( u, 16, 16 ) \ - HS_EMPTY - -#endif - -// -// -// - diff --git a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/hs_modules.h b/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/hs_modules.h deleted file mode 100644 index 5099a65f3358c5d050f53aac0f66df5d26be5047..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/hs_modules.h +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "hs_bs_0.len.xxd" -, -#include "hs_bs_0.spv.xxd" -, -#include "hs_bs_1.len.xxd" -, -#include "hs_bs_1.spv.xxd" -, -#include "hs_bs_2.len.xxd" -, -#include "hs_bs_2.spv.xxd" -, -#include "hs_bs_3.len.xxd" -, -#include "hs_bs_3.spv.xxd" -, -#include "hs_bs_4.len.xxd" -, -#include "hs_bs_4.spv.xxd" -, -#include "hs_bc_0.len.xxd" -, -#include "hs_bc_0.spv.xxd" -, -#include "hs_bc_1.len.xxd" -, -#include "hs_bc_1.spv.xxd" -, -#include "hs_bc_2.len.xxd" -, -#include "hs_bc_2.spv.xxd" -, -#include "hs_bc_3.len.xxd" -, -#include "hs_bc_3.spv.xxd" -, -#include "hs_bc_4.len.xxd" -, -#include "hs_bc_4.spv.xxd" -, -#include "hs_fm_1_0.len.xxd" -, -#include "hs_fm_1_0.spv.xxd" -, -#include "hs_fm_1_1.len.xxd" -, -#include "hs_fm_1_1.spv.xxd" -, -#include "hs_fm_1_2.len.xxd" -, -#include "hs_fm_1_2.spv.xxd" -, -#include "hs_fm_1_3.len.xxd" -, -#include "hs_fm_1_3.spv.xxd" -, -#include "hs_fm_1_4.len.xxd" -, -#include "hs_fm_1_4.spv.xxd" -, -#include "hs_hm_1.len.xxd" -, -#include "hs_hm_1.spv.xxd" -, -#include "hs_fill_in.len.xxd" -, -#include "hs_fill_in.spv.xxd" -, -#include "hs_fill_out.len.xxd" -, -#include "hs_fill_out.spv.xxd" -, -#include "hs_transpose.len.xxd" -, -#include "hs_transpose.spv.xxd" -, diff --git a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/hs_target.h b/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/hs_target.h deleted file mode 100644 index c4329c342c9a84bf8d78602d4b77c10682826e0a..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/intel/gen8/u64/hs_target.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -// -// -// - -#include "hs_vk_target.h" - -// -// -// - -#undef HS_TARGET_NAME -#define HS_TARGET_NAME hs_intel_gen8_u64 - -// -// -// - -extern struct hs_vk_target const HS_TARGET_NAME; - -// -// -// diff --git a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/gen.sh b/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/gen.sh deleted file mode 100644 index 107d0cba6db9fed011e5dcae9789f32578e0aa0a..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/gen.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash - -## -## Copyright 2019 The Fuchsia Authors. All rights reserved. -## Use of this source code is governed by a BSD-style license that can be -## found in the LICENSE file. -## - -## -## exit on error -## - -## set -e - -## -## delete the previous images -## - -rm *.comp -rm *.spv -rm *.xxd - -## -## -## - -HS_GEN=../../../../../gen/hs_gen - -## --- 32-bit keys --- - -## $HS_GEN -v -a "glsl" -D HS_NVIDIA_SM35 -t 1 -w 32 -r 32 -s 49152 -S 65536 -b 32 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z -$HS_GEN -v -a "glsl" -D HS_NVIDIA_SM35 -t 1 -w 32 -r 16 -s 32768 -S 32768 -b 16 -m 1 -M 1 -p 1 -P 1 -f 0 -F 0 -c 0 -C 0 -z - -## -## remove trailing whitespace from generated files -## - -sed -i 's/[[:space:]]*$//' hs_config.h -sed -i 's/[[:space:]]*$//' hs_modules.h - -## -## -## - -whereis glslangValidator - -## -## FIXME -- convert this to a bash script -## -## Note that we can use xargs instead of the cmd for/do -## - -for f in *.comp -do - dos2unix $f - clang-format -style=Mozilla -i $f - cpp -P -I ../.. -I ../../../.. $f > ${f%%.*}.pre.comp - clang-format -style=Mozilla -i ${f%%.*}.pre.comp - glslangValidator --target-env vulkan1.1 -o ${f%%.*}.spv ${f%%.*}.pre.comp - spirv-opt -O ${f%%.*}.spv -o ${f%%.*}.spv -## spirv-remap -v --do-everything --input %%~nf.spv --output remap - xxd -i < ${f%%.*}.spv > ${f%%.*}.spv.xxd - len=$(wc -c < ${f%%.*}.spv) - echo ${f%%.*}.spv $len - printf "%.8x" $len | xxd -r -p | xxd -i > ${f%%.*}.len.xxd -done - -## -## dump a binary -## - -cc -I ../../../.. -I ../../../../../.. -D=HS_DUMP -o hs_dump *.c -hs_dump - -## -## delete temporary files -## - -rm *.pre.comp -rm *.comp -rm *.spv diff --git a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/hs_config.h b/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/hs_config.h deleted file mode 100644 index f94a30480270083509fdc9944f46d1e95cba24bb..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/hs_config.h +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef HS_GLSL_ONCE -#define HS_GLSL_ONCE - -#define HS_SLAB_THREADS_LOG2 5 -#define HS_SLAB_THREADS (1 << HS_SLAB_THREADS_LOG2) -#define HS_SLAB_WIDTH_LOG2 5 -#define HS_SLAB_WIDTH (1 << HS_SLAB_WIDTH_LOG2) -#define HS_SLAB_HEIGHT 16 -#define HS_SLAB_KEYS (HS_SLAB_WIDTH * HS_SLAB_HEIGHT) -#define HS_REG_LAST(c) c##16 -#define HS_KEY_DWORDS 1 -#define HS_VAL_DWORDS 0 -#define HS_BS_SLABS 16 -#define HS_BS_SLABS_LOG2_RU 4 -#define HS_BC_SLABS_LOG2_MAX 4 -#define HS_FM_BLOCK_HEIGHT 1 -#define HS_FM_SCALE_MIN 0 -#define HS_FM_SCALE_MAX 0 -#define HS_HM_BLOCK_HEIGHT 1 -#define HS_HM_SCALE_MIN 0 -#define HS_HM_SCALE_MAX 0 -#define HS_EMPTY - -#define HS_NVIDIA_SM35 - -#define HS_SLAB_ROWS() \ - HS_SLAB_ROW( 1, 0 ) \ - HS_SLAB_ROW( 2, 1 ) \ - HS_SLAB_ROW( 3, 2 ) \ - HS_SLAB_ROW( 4, 3 ) \ - HS_SLAB_ROW( 5, 4 ) \ - HS_SLAB_ROW( 6, 5 ) \ - HS_SLAB_ROW( 7, 6 ) \ - HS_SLAB_ROW( 8, 7 ) \ - HS_SLAB_ROW( 9, 8 ) \ - HS_SLAB_ROW( 10, 9 ) \ - HS_SLAB_ROW( 11, 10 ) \ - HS_SLAB_ROW( 12, 11 ) \ - HS_SLAB_ROW( 13, 12 ) \ - HS_SLAB_ROW( 14, 13 ) \ - HS_SLAB_ROW( 15, 14 ) \ - HS_SLAB_ROW( 16, 15 ) \ - HS_EMPTY - -#define HS_TRANSPOSE_SLAB() \ - HS_TRANSPOSE_STAGE( 1 ) \ - HS_TRANSPOSE_STAGE( 2 ) \ - HS_TRANSPOSE_STAGE( 3 ) \ - HS_TRANSPOSE_STAGE( 4 ) \ - HS_TRANSPOSE_STAGE( 5 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 2, 1 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 4, 3 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 6, 5 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 8, 7 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 10, 9 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 12, 11 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 14, 13 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 16, 15 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 3, 1 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 4, 2 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 7, 5 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 8, 6 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 11, 9 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 12, 10 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 15, 13 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 16, 14 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 5, 1 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 6, 2 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 7, 3 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 8, 4 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 13, 9 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 14, 10 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 15, 11 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 16, 12 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 9, 1 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 10, 2 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 11, 3 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 12, 4 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 13, 5 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 14, 6 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 15, 7 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 16, 8 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 2, 1 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 4, 3 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 6, 5 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 8, 7 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 10, 9 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 12, 11 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 14, 13 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 16, 15 ) \ - HS_TRANSPOSE_REMAP( w, 1, 1 ) \ - HS_TRANSPOSE_REMAP( w, 2, 9 ) \ - HS_TRANSPOSE_REMAP( w, 3, 2 ) \ - HS_TRANSPOSE_REMAP( w, 4, 10 ) \ - HS_TRANSPOSE_REMAP( w, 5, 3 ) \ - HS_TRANSPOSE_REMAP( w, 6, 11 ) \ - HS_TRANSPOSE_REMAP( w, 7, 4 ) \ - HS_TRANSPOSE_REMAP( w, 8, 12 ) \ - HS_TRANSPOSE_REMAP( w, 9, 5 ) \ - HS_TRANSPOSE_REMAP( w, 10, 13 ) \ - HS_TRANSPOSE_REMAP( w, 11, 6 ) \ - HS_TRANSPOSE_REMAP( w, 12, 14 ) \ - HS_TRANSPOSE_REMAP( w, 13, 7 ) \ - HS_TRANSPOSE_REMAP( w, 14, 15 ) \ - HS_TRANSPOSE_REMAP( w, 15, 8 ) \ - HS_TRANSPOSE_REMAP( w, 16, 16 ) \ - HS_EMPTY - -#endif - -// -// -// - diff --git a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/hs_modules.h b/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/hs_modules.h deleted file mode 100644 index db653be10496975d1d485d110727057c14df7af5..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/hs_modules.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "hs_bs_0.len.xxd" -, -#include "hs_bs_0.spv.xxd" -, -#include "hs_bs_1.len.xxd" -, -#include "hs_bs_1.spv.xxd" -, -#include "hs_bs_2.len.xxd" -, -#include "hs_bs_2.spv.xxd" -, -#include "hs_bs_3.len.xxd" -, -#include "hs_bs_3.spv.xxd" -, -#include "hs_bs_4.len.xxd" -, -#include "hs_bs_4.spv.xxd" -, -#include "hs_bc_0.len.xxd" -, -#include "hs_bc_0.spv.xxd" -, -#include "hs_bc_1.len.xxd" -, -#include "hs_bc_1.spv.xxd" -, -#include "hs_bc_2.len.xxd" -, -#include "hs_bc_2.spv.xxd" -, -#include "hs_bc_3.len.xxd" -, -#include "hs_bc_3.spv.xxd" -, -#include "hs_bc_4.len.xxd" -, -#include "hs_bc_4.spv.xxd" -, -#include "hs_fm_0_0.len.xxd" -, -#include "hs_fm_0_0.spv.xxd" -, -#include "hs_fm_0_1.len.xxd" -, -#include "hs_fm_0_1.spv.xxd" -, -#include "hs_fm_0_2.len.xxd" -, -#include "hs_fm_0_2.spv.xxd" -, -#include "hs_fm_0_3.len.xxd" -, -#include "hs_fm_0_3.spv.xxd" -, -#include "hs_hm_0.len.xxd" -, -#include "hs_hm_0.spv.xxd" -, -#include "hs_fill_in.len.xxd" -, -#include "hs_fill_in.spv.xxd" -, -#include "hs_fill_out.len.xxd" -, -#include "hs_fill_out.spv.xxd" -, -#include "hs_transpose.len.xxd" -, -#include "hs_transpose.spv.xxd" -, diff --git a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/hs_target.h b/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/hs_target.h deleted file mode 100644 index 0b0b0bd1abd9d9a5aa2c01bd6871fd2fb85c814c..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u32/hs_target.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -// -// -// - -#include "hs_vk_target.h" - -// -// -// - -#undef HS_TARGET_NAME -#define HS_TARGET_NAME hs_nvidia_sm35_u32 - -// -// -// - -extern struct hs_vk_target const HS_TARGET_NAME; - -// -// -// diff --git a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/gen.sh b/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/gen.sh deleted file mode 100644 index a8ac7411089c2779f9dfaec6a5ab6ac09a272e10..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/gen.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash - -## -## Copyright 2019 The Fuchsia Authors. All rights reserved. -## Use of this source code is governed by a BSD-style license that can be -## found in the LICENSE file. -## - -## -## exit on error -## - -## set -e - -## -## delete the previous images -## - -rm *.comp -rm *.spv -rm *.xxd - -## -## -## - -HS_GEN=../../../../../gen/hs_gen - -## --- 64-bit keys --- - -## $HS_GEN -v -a "glsl" -D HS_NVIDIA_SM35 -t 2 -w 32 -r 32 -s 49152 -S 65536 -b 16 -m 1 -M 1 -f 1 -F 1 -c 1 -C 1 -z -$HS_GEN -v -a "glsl" -D HS_NVIDIA_SM35 -t 2 -w 32 -r 8 -s 32768 -S 32768 -b 16 -m 1 -M 1 -p 1 -P 1 -f 0 -F 0 -c 0 -C 0 -z - -## -## remove trailing whitespace from generated files -## - -sed -i 's/[[:space:]]*$//' hs_config.h -sed -i 's/[[:space:]]*$//' hs_modules.h - -## -## -## - -whereis glslangValidator - -## -## FIXME -- convert this to a bash script -## -## Note that we can use xargs instead of the cmd for/do -## - -for f in *.comp -do - dos2unix $f - clang-format -style=Mozilla -i $f - cpp -P -I ../.. -I ../../../.. $f > ${f%%.*}.pre.comp - clang-format -style=Mozilla -i ${f%%.*}.pre.comp - glslangValidator --target-env vulkan1.1 -o ${f%%.*}.spv ${f%%.*}.pre.comp - spirv-opt -O ${f%%.*}.spv -o ${f%%.*}.spv -## spirv-remap -v --do-everything --input %%~nf.spv --output remap - xxd -i < ${f%%.*}.spv > ${f%%.*}.spv.xxd - len=$(wc -c < ${f%%.*}.spv) - echo ${f%%.*}.spv $len - printf "%.8x" $len | xxd -r -p | xxd -i > ${f%%.*}.len.xxd -done - -## -## dump a binary -## - -cc -I ../../../.. -I ../../../../../.. -D=HS_DUMP -o hs_dump *.c -hs_dump - -## -## delete temporary files -## - -rm *.pre.comp -rm *.comp -rm *.spv diff --git a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/hs_config.h b/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/hs_config.h deleted file mode 100644 index f711d14ea738ee359d25c0900cf71f1cf34b288b..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/hs_config.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef HS_GLSL_ONCE -#define HS_GLSL_ONCE - -#define HS_SLAB_THREADS_LOG2 5 -#define HS_SLAB_THREADS (1 << HS_SLAB_THREADS_LOG2) -#define HS_SLAB_WIDTH_LOG2 5 -#define HS_SLAB_WIDTH (1 << HS_SLAB_WIDTH_LOG2) -#define HS_SLAB_HEIGHT 8 -#define HS_SLAB_KEYS (HS_SLAB_WIDTH * HS_SLAB_HEIGHT) -#define HS_REG_LAST(c) c##8 -#define HS_KEY_DWORDS 2 -#define HS_VAL_DWORDS 0 -#define HS_BS_SLABS 16 -#define HS_BS_SLABS_LOG2_RU 4 -#define HS_BC_SLABS_LOG2_MAX 4 -#define HS_FM_BLOCK_HEIGHT 1 -#define HS_FM_SCALE_MIN 0 -#define HS_FM_SCALE_MAX 0 -#define HS_HM_BLOCK_HEIGHT 1 -#define HS_HM_SCALE_MIN 0 -#define HS_HM_SCALE_MAX 0 -#define HS_EMPTY - -#define HS_NVIDIA_SM35 - -#define HS_SLAB_ROWS() \ - HS_SLAB_ROW( 1, 0 ) \ - HS_SLAB_ROW( 2, 1 ) \ - HS_SLAB_ROW( 3, 2 ) \ - HS_SLAB_ROW( 4, 3 ) \ - HS_SLAB_ROW( 5, 4 ) \ - HS_SLAB_ROW( 6, 5 ) \ - HS_SLAB_ROW( 7, 6 ) \ - HS_SLAB_ROW( 8, 7 ) \ - HS_EMPTY - -#define HS_TRANSPOSE_SLAB() \ - HS_TRANSPOSE_STAGE( 1 ) \ - HS_TRANSPOSE_STAGE( 2 ) \ - HS_TRANSPOSE_STAGE( 3 ) \ - HS_TRANSPOSE_STAGE( 4 ) \ - HS_TRANSPOSE_STAGE( 5 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 2, 1 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 4, 3 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 6, 5 ) \ - HS_TRANSPOSE_BLEND( r, s, 1, 8, 7 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 3, 1 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 4, 2 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 7, 5 ) \ - HS_TRANSPOSE_BLEND( s, t, 2, 8, 6 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 5, 1 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 6, 2 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 7, 3 ) \ - HS_TRANSPOSE_BLEND( t, u, 3, 8, 4 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 2, 1 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 4, 3 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 6, 5 ) \ - HS_TRANSPOSE_BLEND( u, v, 4, 8, 7 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 3, 1 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 4, 2 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 7, 5 ) \ - HS_TRANSPOSE_BLEND( v, w, 5, 8, 6 ) \ - HS_TRANSPOSE_REMAP( w, 1, 1 ) \ - HS_TRANSPOSE_REMAP( w, 2, 3 ) \ - HS_TRANSPOSE_REMAP( w, 3, 5 ) \ - HS_TRANSPOSE_REMAP( w, 4, 7 ) \ - HS_TRANSPOSE_REMAP( w, 5, 2 ) \ - HS_TRANSPOSE_REMAP( w, 6, 4 ) \ - HS_TRANSPOSE_REMAP( w, 7, 6 ) \ - HS_TRANSPOSE_REMAP( w, 8, 8 ) \ - HS_EMPTY - -#endif - -// -// -// - diff --git a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/hs_modules.h b/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/hs_modules.h deleted file mode 100644 index db653be10496975d1d485d110727057c14df7af5..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/hs_modules.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "hs_bs_0.len.xxd" -, -#include "hs_bs_0.spv.xxd" -, -#include "hs_bs_1.len.xxd" -, -#include "hs_bs_1.spv.xxd" -, -#include "hs_bs_2.len.xxd" -, -#include "hs_bs_2.spv.xxd" -, -#include "hs_bs_3.len.xxd" -, -#include "hs_bs_3.spv.xxd" -, -#include "hs_bs_4.len.xxd" -, -#include "hs_bs_4.spv.xxd" -, -#include "hs_bc_0.len.xxd" -, -#include "hs_bc_0.spv.xxd" -, -#include "hs_bc_1.len.xxd" -, -#include "hs_bc_1.spv.xxd" -, -#include "hs_bc_2.len.xxd" -, -#include "hs_bc_2.spv.xxd" -, -#include "hs_bc_3.len.xxd" -, -#include "hs_bc_3.spv.xxd" -, -#include "hs_bc_4.len.xxd" -, -#include "hs_bc_4.spv.xxd" -, -#include "hs_fm_0_0.len.xxd" -, -#include "hs_fm_0_0.spv.xxd" -, -#include "hs_fm_0_1.len.xxd" -, -#include "hs_fm_0_1.spv.xxd" -, -#include "hs_fm_0_2.len.xxd" -, -#include "hs_fm_0_2.spv.xxd" -, -#include "hs_fm_0_3.len.xxd" -, -#include "hs_fm_0_3.spv.xxd" -, -#include "hs_hm_0.len.xxd" -, -#include "hs_hm_0.spv.xxd" -, -#include "hs_fill_in.len.xxd" -, -#include "hs_fill_in.spv.xxd" -, -#include "hs_fill_out.len.xxd" -, -#include "hs_fill_out.spv.xxd" -, -#include "hs_transpose.len.xxd" -, -#include "hs_transpose.spv.xxd" -, diff --git a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/hs_target.h b/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/hs_target.h deleted file mode 100644 index 6cf4dd3d05318246c832e16d6d61e849422a33e4..0000000000000000000000000000000000000000 --- a/src/graphics/lib/compute/hotsort/vk/targets/nvidia/sm_35/u64/hs_target.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2019 The Fuchsia Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#pragma once - -// -// -// - -#include "hs_vk_target.h" - -// -// -// - -#undef HS_TARGET_NAME -#define HS_TARGET_NAME hs_nvidia_sm35_u64 - -// -// -// - -extern struct hs_vk_target const HS_TARGET_NAME; - -// -// -//