GreemDev-Ryujinx/src/Ryujinx.Graphics.Metal/Shaders/ChangeBufferStride.metal
Isaac Marovitz dae0f3cded Argument Buffers (#24)
* Stuff

* More arg buffer stuff

* Fixes

* Rebase

* Pass storage buffers to inline functions

* Fix binding

* Fix typo + Fix a couple shaders

* Enforce ids

* Dispose

* Mark used buffers as resident

* Update depth clear shader

* Fix non-contiguous struct defs

* Update ChangeBufferStride

* Fix StorageBuffer assignments

* Fix odyssey crash

* Retain buffer bindings

* Pad Std140

* Set texture data with safe buffers

* Clone buffers

* Always declare vert in

* Stop clears from breaking OpenGL games

* Fix depth clear

* Use invariant position

* Horribly inefficient texture & sampler arg buffers

* Fix missing struct access

* Minimise rebinds as much as possible

* Build arg buffers on staging buffer
2024-12-23 21:37:01 -06:00

73 lines
2.3 KiB
Metal

#include <metal_stdlib>
using namespace metal;
struct StrideArguments {
int4 data;
};
struct InData {
uint8_t data[1];
};
struct OutData {
uint8_t data[1];
};
struct ConstantBuffers {
constant StrideArguments* stride_arguments;
};
struct StorageBuffers {
ulong padding;
device InData* in_data;
device OutData* out_data;
};
kernel void kernelMain(constant ConstantBuffers &constant_buffers [[buffer(20)]],
device StorageBuffers &storage_buffers [[buffer(21)]],
uint3 thread_position_in_grid [[thread_position_in_grid]],
uint3 threads_per_threadgroup [[threads_per_threadgroup]],
uint3 threadgroups_per_grid [[threads_per_grid]])
{
// Determine what slice of the stride copies this invocation will perform.
int sourceStride = constant_buffers.stride_arguments->data.x;
int targetStride = constant_buffers.stride_arguments->data.y;
int bufferSize = constant_buffers.stride_arguments->data.z;
int sourceOffset = constant_buffers.stride_arguments->data.w;
int strideRemainder = targetStride - sourceStride;
int invocations = int(threads_per_threadgroup.x * threadgroups_per_grid.x);
int copiesRequired = bufferSize / sourceStride;
// Find the copies that this invocation should perform.
// - Copies that all invocations perform.
int allInvocationCopies = copiesRequired / invocations;
// - Extra remainder copy that this invocation performs.
int index = int(thread_position_in_grid.x);
int extra = (index < (copiesRequired % invocations)) ? 1 : 0;
int copyCount = allInvocationCopies + extra;
// Finally, get the starting offset. Make sure to count extra copies.
int startCopy = allInvocationCopies * index + min(copiesRequired % invocations, index);
int srcOffset = sourceOffset + startCopy * sourceStride;
int dstOffset = startCopy * targetStride;
// Perform the copies for this region
for (int i = 0; i < copyCount; i++) {
for (int j = 0; j < sourceStride; j++) {
storage_buffers.out_data->data[dstOffset++] = storage_buffers.in_data->data[srcOffset++];
}
for (int j = 0; j < strideRemainder; j++) {
storage_buffers.out_data->data[dstOffset++] = uint8_t(0);
}
}
}