diff --git a/README.md b/README.md index bec6ca4..71ae4e3 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,39 @@ Vulkan Flocking: compute and shading in one pipeline! **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 6** -* (TODO) YOUR NAME HERE - Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Ruoyu Fan +* Tested on: Windows 10 x64, i7-4720HQ @ 2.60GHz, 16GB Memory, GTX 970M 3072MB (personal laptop) + * Visual Studio 2015 & LunarG Vulkan SDK 1.0.30.0 - ### (TODO: Your README) +![](screenshots/1.gif) - Include screenshots, analysis, etc. (Remember, this is public, so don't put - anything here that you don't want to share with the world.) +### Q&A + +> * Why do you think Vulkan expects explicit descriptors for things like +generating pipelines and commands? HINT: this may relate to something in the +comments about some components using pre-allocated GPU memory. + +Because command buffers in Vulkan lives in pre-allocated GPU command pool, and we cannot +update them once created, they need updatable descriptor sets to figure out which +buffers to operate on and how to correctly map data from buffers to inputs and outputs of every stages of the pipeline. +This way we can use a single command buffer to operate on varying data. + +> * Describe a situation besides flip-flop buffers in which you may need multiple +descriptor sets to fit one descriptor layout. + +For example, in deferred shading pipeline's debug view, instead of passing current state and all g-buffers into debug fragment shader, I can use depth/color/normal maps as different descriptor sets in one descriptor layout, and use different sets according to current configration + +> * What are some problems to keep in mind when using multiple Vulkan queues? +> * take into consideration that different queues may be backed by different hardware +> * take into consideration that the same buffer may be used across multiple queues> + +* Queue operations on different queues have no implicit ordering constraints, and may execute in any order. Explicit ordering constraints between queues can be expressed with semaphores and fences. (https://www.khronos.org/registry/vulkan/specs/1.0/xhtml/vkspec.html#fundamentals-queueoperation) +* When two queues are operating on the same buffer, we need to take race condition into consideration. + +> * What is one advantage of using compute commands that can share data with a +rendering pipeline? + +Don't need to copy the inputs and outputs of compute and render stages around, that might be some giant amount of data for copying. ### Credits diff --git a/data/shaders/computeparticles/generate-spirv.bat b/data/shaders/computeparticles/generate-spirv.bat index be03b7c..a6b8c8c 100644 --- a/data/shaders/computeparticles/generate-spirv.bat +++ b/data/shaders/computeparticles/generate-spirv.bat @@ -1,5 +1,3 @@ -glslangvalidator -V particle.frag -o particle.frag.spv -glslangvalidator -V particle.vert -o particle.vert.spv -glslangvalidator -V particle.comp -o particle.comp.spv - - +glslangvalidator -V particle.frag.glsl -o particle.frag.spv -S frag +glslangvalidator -V particle.vert.glsl -o particle.vert.spv -S vert +glslangvalidator -V particle.comp.glsl -o particle.comp.spv -S comp diff --git a/data/shaders/computeparticles/particle.comp b/data/shaders/computeparticles/particle.comp deleted file mode 100644 index b7dc2f7..0000000 --- a/data/shaders/computeparticles/particle.comp +++ /dev/null @@ -1,77 +0,0 @@ -#version 450 - -#extension GL_ARB_separate_shader_objects : enable -#extension GL_ARB_shading_language_420pack : enable - -struct Particle -{ - vec2 pos; - vec2 vel; -}; - -// LOOK: These bindings correspond to the DescriptorSetLayouts and -// the DescriptorSets from prepareCompute()! - -// Binding 0 : Particle storage buffer (read) -layout(std140, binding = 0) buffer ParticlesA -{ - Particle particlesA[ ]; -}; - -// Binding 1 : Particle storage buffer (write) -layout(std140, binding = 1) buffer ParticlesB -{ - Particle particlesB[ ]; -}; - -layout (local_size_x = 16, local_size_y = 16) in; - -// LOOK: rule weights and distances, as well as particle count, based off uniforms. -// The deltaT here has to be updated every frame to account for changes in -// frame rate. -layout (binding = 2) uniform UBO -{ - float deltaT; - float rule1Distance; - float rule2Distance; - float rule3Distance; - float rule1Scale; - float rule2Scale; - float rule3Scale; - int particleCount; -} ubo; - -void main() -{ - // LOOK: This is very similar to a CUDA kernel. - // Right now, the compute shader only advects the particles with their - // velocity and handles wrap-around. - // TODO: implement flocking behavior. - - // Current SSBO index - uint index = gl_GlobalInvocationID.x; - // Don't try to write beyond particle count - if (index >= ubo.particleCount) - return; - - // Read position and velocity - vec2 vPos = particlesA[index].pos.xy; - vec2 vVel = particlesA[index].vel.xy; - - // clamp velocity for a more pleasing simulation. - vVel = normalize(vVel) * clamp(length(vVel), 0.0, 0.1); - - // kinematic update - vPos += vVel * ubo.deltaT; - - // Wrap around boundary - if (vPos.x < -1.0) vPos.x = 1.0; - if (vPos.x > 1.0) vPos.x = -1.0; - if (vPos.y < -1.0) vPos.y = 1.0; - if (vPos.y > 1.0) vPos.y = -1.0; - - particlesB[index].pos.xy = vPos; - - // Write back - particlesB[index].vel.xy = vVel; -} diff --git a/data/shaders/computeparticles/particle.comp.glsl b/data/shaders/computeparticles/particle.comp.glsl new file mode 100644 index 0000000..097601b --- /dev/null +++ b/data/shaders/computeparticles/particle.comp.glsl @@ -0,0 +1,131 @@ +#version 450 + +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shading_language_420pack : enable + +struct Particle +{ + vec2 pos; + vec2 vel; +}; + +// LOOK: These bindings correspond to the DescriptorSetLayouts and +// the DescriptorSets from prepareCompute()! + +// Binding 0 : Particle storage buffer (read) +layout(std140, binding = 0) buffer ParticlesA +{ + Particle particlesA[ ]; +}; + +// Binding 1 : Particle storage buffer (write) +layout(std140, binding = 1) buffer ParticlesB +{ + Particle particlesB[ ]; +}; + +layout (local_size_x = 16, local_size_y = 16) in; + +// LOOK: rule weights and distances, as well as particle count, based off uniforms. +// The deltaT here has to be updated every frame to account for changes in +// frame rate. +layout (binding = 2) uniform UBO +{ + float deltaT; + float rule1Distance; + float rule2Distance; + float rule3Distance; + float rule1Scale; + float rule2Scale; + float rule3Scale; + int particleCount; +} ubo; + +void main() +{ + // LOOK: This is very similar to a CUDA kernel. + // Right now, the compute shader only advects the particles with their + // velocity and handles wrap-around. + // DONE: implement flocking behavior. + + // Current SSBO index + uint index = gl_GlobalInvocationID.x; + // Don't try to write beyond particle count + if (index >= ubo.particleCount) + return; + + // Read position and velocity + vec2 vPos = particlesA[index].pos.xy; + vec2 vVel = particlesA[index].vel.xy; + + vec2 delta_vel = vec2(0.0); + vec2 rule1_neighbor_pos_sum = vec2(0.0); + float rule1_neighbor_count = 0.0; + vec2 rule2_total_offset = vec2(0.0); + vec2 rule3_neighbor_vel_sum = vec2(0.0); + float rule3_neighbor_count = 0.0; + + vec2 current_offset; + float current_distance; + for (int i = 0; i < ubo.particleCount; i++) + { + if (i == index) continue; + + current_offset = particlesA[i].pos.xy - vPos; + current_distance = length(current_offset); + + // Rule 1: Get neighbor position sum and neighbor count for rule1 + if (current_distance < ubo.rule1Distance) + { + rule1_neighbor_pos_sum += particlesA[i].pos.xy; + rule1_neighbor_count += 1.0; + } + // Rule 2: Calculate offset for rule 2 + if (current_distance < ubo.rule2Distance) + { + rule2_total_offset -= current_offset; + } + // Rule 3: Get velocity sum and neighbor count for rule 3 + if (current_distance < ubo.rule3Distance) + { + rule3_neighbor_vel_sum += particlesA[i].vel.xy; + rule3_neighbor_count += 1.0; + } + + } + + // Rule 1: boids fly towards their local perceived center of mass, which excludes themselves + if (rule1_neighbor_count > 0.0) + { + delta_vel += ubo.rule1Scale * ((rule1_neighbor_pos_sum / rule1_neighbor_count) - vPos); + } + + // Rule 2: boids try to stay a distance d away from each other + delta_vel += ubo.rule2Scale * rule2_total_offset; + + // Rule 3: boids try to match the speed of surrounding boids + if (rule3_neighbor_count > 0.0) + { + delta_vel += ubo.rule3Scale * (rule3_neighbor_vel_sum / rule3_neighbor_count); // said this looks better using the parameters + //delta_vel += ubo.rule3Scale * ((rule3_neighbor_vel_sum / rule3_neighbor_count) - vVel); + } + + vVel += delta_vel; + + // clamp velocity for a more pleasing simulation. + vVel = normalize(vVel) * clamp(length(vVel), 0.0, 0.1); + + // kinematic update + vPos += vVel * ubo.deltaT; + + // Wrap around boundary + if (vPos.x < -1.0) vPos.x = 1.0; + if (vPos.x > 1.0) vPos.x = -1.0; + if (vPos.y < -1.0) vPos.y = 1.0; + if (vPos.y > 1.0) vPos.y = -1.0; + + particlesB[index].pos.xy = vPos; + + // Write back + particlesB[index].vel.xy = vVel; +} diff --git a/data/shaders/computeparticles/particle.comp.spv b/data/shaders/computeparticles/particle.comp.spv index 059ab59..e63787a 100644 Binary files a/data/shaders/computeparticles/particle.comp.spv and b/data/shaders/computeparticles/particle.comp.spv differ diff --git a/data/shaders/computeparticles/particle.frag b/data/shaders/computeparticles/particle.frag.glsl similarity index 100% rename from data/shaders/computeparticles/particle.frag rename to data/shaders/computeparticles/particle.frag.glsl diff --git a/data/shaders/computeparticles/particle.vert b/data/shaders/computeparticles/particle.vert.glsl similarity index 100% rename from data/shaders/computeparticles/particle.vert rename to data/shaders/computeparticles/particle.vert.glsl diff --git a/screenshots/1.gif b/screenshots/1.gif new file mode 100644 index 0000000..fec2319 Binary files /dev/null and b/screenshots/1.gif differ diff --git a/screenshots/11.12.2016_progress_1.jpg b/screenshots/11.12.2016_progress_1.jpg new file mode 100644 index 0000000..4ea9fe4 Binary files /dev/null and b/screenshots/11.12.2016_progress_1.jpg differ diff --git a/screenshots/2.gif b/screenshots/2.gif new file mode 100644 index 0000000..f6d548b Binary files /dev/null and b/screenshots/2.gif differ diff --git a/vulkanBoids/vulkanBoids.cpp b/vulkanBoids/vulkanBoids.cpp index 9b2f122..3b54d7d 100644 --- a/vulkanBoids/vulkanBoids.cpp +++ b/vulkanBoids/vulkanBoids.cpp @@ -22,12 +22,13 @@ #define GLM_FORCE_DEPTH_ZERO_TO_ONE #include #include +#include #include #include "vulkanexamplebase.h" #define VERTEX_BUFFER_BIND_ID 0 -#define ENABLE_VALIDATION true // LOOK: toggle Vulkan validation layers. These make debugging much easier! +#define ENABLE_VALIDATION false // LOOK: toggle Vulkan validation layers. These make debugging much easier! #define PARTICLE_COUNT 4 * 1024 // LOOK: change particle count here // LOOK: constants for the boids algorithm. These will be passed to the GPU compute part of the assignment @@ -157,7 +158,8 @@ class VulkanExample : public VulkanExampleBase for (auto& particle : particleBuffer) { particle.pos = glm::vec2(rDistribution(rGenerator), rDistribution(rGenerator)); - // TODO: add randomized velocities with a slight scale here, something like 0.1f. + // DONE: add randomized velocities with a slight scale here, something like 0.1f. + particle.vel = glm::diskRand(0.1f); } VkDeviceSize storageBufferSize = particleBuffer.size() * sizeof(Particle); @@ -244,7 +246,7 @@ class VulkanExample : public VulkanExampleBase VERTEX_BUFFER_BIND_ID, 1, VK_FORMAT_R32G32_SFLOAT, - offsetof(Particle, pos)); // TODO: change this so that we can color the particles based on velocity. + offsetof(Particle, vel)); // DONE: change this so that we can color the particles based on velocity. // vertices.inputState encapsulates everything we need for these particular buffers to // interface with the graphics pipeline. @@ -540,14 +542,37 @@ class VulkanExample : public VulkanExampleBase compute.descriptorSets[0], VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, - &compute.uniformBuffer.descriptor) + &compute.uniformBuffer.descriptor), - // TODO: write the second descriptorSet, using the top for reference. + // DONE: write the second descriptorSet, using the top for reference. // We want the descriptorSets to be used for flip-flopping: // on one frame, we use one descriptorSet with the compute pass, // on the next frame, we use the other. // What has to be different about how the second descriptorSet is written here? + + // Binding 0 : Particle position storage buffer + vkTools::initializers::writeDescriptorSet( + compute.descriptorSets[1], + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + 0, // LOOK: which binding in the descriptor set Layout? + &compute.storageBufferB.descriptor), + + // Binding 1 : Particle position storage buffer + vkTools::initializers::writeDescriptorSet( + compute.descriptorSets[1], + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + 1, + &compute.storageBufferA.descriptor), + + // Binding 2 : Uniform buffer + vkTools::initializers::writeDescriptorSet( + compute.descriptorSets[1], + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + 2, + &compute.uniformBuffer.descriptor) }; + + vkUpdateDescriptorSets(device, static_cast(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL); } @@ -568,7 +593,7 @@ class VulkanExample : public VulkanExampleBase VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE)); VulkanExampleBase::submitFrame(); - + // LOOK: wait for fence that was submitted with the compute commandBuffer to complete. // Then, reset it for the next round of compute vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX); @@ -583,13 +608,16 @@ class VulkanExample : public VulkanExampleBase // are done executing. VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence)); - // TODO: handle flip-flop logic. We want the next iteration to + // DONE: handle flip-flop logic. We want the next iteration to // run the compute pipeline with flipped SSBOs, so we have to // swap the descriptorSets, which each allow access to the SSBOs // in one configuration. // We also want to flip what SSBO we draw with in the next // pass through the graphics pipeline. // Feel free to use std::swap here. You should need it twice. + std::swap(compute.descriptorSets[0], compute.descriptorSets[1]); + std::swap(compute.storageBufferA, compute.storageBufferB); + // TODO: ping-pong command buffers? } // Record command buffers for drawing using the graphics pipeline @@ -639,7 +667,9 @@ class VulkanExample : public VulkanExampleBase // How does this influence flip-flopping in draw()? // Try drawing with storageBufferA instead of storageBufferB. What happens? Why? VkDeviceSize offsets[1] = { 0 }; + //vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &compute.storageBufferA.buffer, offsets); vkCmdBindVertexBuffers(drawCmdBuffers[i], VERTEX_BUFFER_BIND_ID, 1, &compute.storageBufferB.buffer, offsets); + vkCmdDraw(drawCmdBuffers[i], PARTICLE_COUNT, 1, 0, 0); vkCmdEndRenderPass(drawCmdBuffers[i]);