Skip to content

Commit

Permalink
Only dirty the uniform UVSCALEOFFSET when really needed
Browse files Browse the repository at this point in the history
Broken out from #17479

With OpenGL, greatly reduces the amount of glUniform4fv calls in many games (and
similar in the other backends).
  • Loading branch information
hrydgard committed May 25, 2023
1 parent 82934b9 commit 80e47b7
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 23 deletions.
4 changes: 2 additions & 2 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
// Code checks this reg directly, not just the vtype ID.
if (!prevThrough) {
gstate.vertType |= GE_VTYPE_THROUGH;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
}

int bytesRead;
Expand All @@ -246,7 +246,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex

if (!prevThrough) {
gstate.vertType &= ~GE_VTYPE_THROUGH;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
}
}

Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask);

// All framebuffers are array textures in Vulkan now.
if (gstate_c.arrayTexture && g_Config.iGPUBackend == (int)GPUBackend::VULKAN) {
if (gstate_c.textureIsArray && g_Config.iGPUBackend == (int)GPUBackend::VULKAN) {
id.SetBit(FS_BIT_SAMPLE_ARRAY_TEXTURE);
}

Expand Down
4 changes: 4 additions & 0 deletions GPU/Common/TextureCacheCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
Unbind();
gstate_c.SetTextureIs3D(false);
gstate_c.SetTextureIsArray(false);
gstate_c.SetTextureIsFramebuffer(false);
return nullptr;
}

Expand Down Expand Up @@ -573,6 +574,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
gstate_c.SetTextureIsArray(false);
gstate_c.SetTextureIsBGRA((entry->status & TexCacheEntry::STATUS_BGRA) != 0);
gstate_c.SetTextureIsFramebuffer(false);

if (rehash) {
// Update in case any of these changed.
Expand Down Expand Up @@ -681,6 +683,7 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
gstate_c.curTextureHeight = h;
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
gstate_c.SetTextureIsArray(false); // Ordinary 2D textures still aren't used by array view in VK. We probably might as well, though, at this point..
gstate_c.SetTextureIsFramebuffer(false);

failedTexture_ = false;
nextTexture_ = entry;
Expand Down Expand Up @@ -1154,6 +1157,7 @@ void TextureCacheCommon::SetTextureFramebuffer(const AttachCandidate &candidate)
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
gstate_c.SetTextureIsBGRA(false);
gstate_c.SetTextureIsFramebuffer(true);
gstate_c.curTextureXOffset = fbInfo.xOffset;
gstate_c.curTextureYOffset = fbInfo.yOffset;
u32 texW = (u32)gstate.getTextureWidth(0);
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/VertexDecoderCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1176,7 +1176,7 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,

steps_[numSteps_++] = morphcount == 1 ? colstep[col] : colstep_morph[col];

// All color formats decode to DEC_U8_4 currently.
// All color formats decode to DEC_U8_4.
// They can become floats later during transform though.
decFmt.c0fmt = DEC_U8_4;
decFmt.c0off = decOff;
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/VertexDecoderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ class VertexDecoder {

u32 VertexType() const { return fmt_; }

const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
const DecVtxFormat &GetDecVtxFmt() const { return decFmt; }

void DecodeVerts(u8 *decoded, const void *verts, int indexLowerBound, int indexUpperBound) const;

Expand Down
20 changes: 7 additions & 13 deletions GPU/GPUCommonHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_TEXFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
{ GE_CMD_TEXLEVEL, FLAG_EXECUTEONCHANGE, DIRTY_TEXTURE_PARAMS, &GPUCommonHW::Execute_TexLevel },
{ GE_CMD_TEXLODSLOPE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
{ GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE | DIRTY_UVSCALEOFFSET },
{ GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
{ GE_CMD_TEXADDR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
{ GE_CMD_TEXADDR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
{ GE_CMD_TEXADDR3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
Expand Down Expand Up @@ -827,13 +827,14 @@ void GPUCommonHW::FastRunLoop(DisplayList &list) {
}

void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) {
if (diff)
if (diff) {
// TODO: We only need to dirty vshader-state here if the output format will be different.
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) {
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
// Switching between through and non-through, we need to invalidate a bunch of stuff.
if (diff & GE_VTYPE_THROUGH_MASK)

if (diff & GE_VTYPE_THROUGH_MASK) {
// Switching between through and non-through, we need to invalidate a bunch of stuff.
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
}
}
}

Expand All @@ -844,8 +845,6 @@ void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) {
gstate.vertType ^= diff;
Flush();
gstate.vertType ^= diff;
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK))
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
// In this case, we may be doing weights and morphs.
// Update any bone matrix uniforms so it uses them correctly.
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
Expand Down Expand Up @@ -1125,8 +1124,6 @@ void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {

void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);

gstate_c.framebufFormat = gstate.FrameBufFormat();

// This also make skipping drawing very effective.
Expand Down Expand Up @@ -1198,8 +1195,6 @@ void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {

void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);

gstate_c.framebufFormat = gstate.FrameBufFormat();

// This also make skipping drawing very effective.
Expand Down Expand Up @@ -1289,7 +1284,6 @@ void GPUCommonHW::Execute_TexSize0(u32 op, u32 diff) {
if (diff || gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS)) {
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
// We will need to reset the texture now.
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
}
Expand Down
13 changes: 10 additions & 3 deletions GPU/GPUState.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,8 +569,8 @@ struct GPUStateCache {
}
}
void SetTextureIsArray(bool isArrayTexture) { // VK only
if (arrayTexture != isArrayTexture) {
arrayTexture = isArrayTexture;
if (textureIsArray != isArrayTexture) {
textureIsArray = isArrayTexture;
Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
}
Expand All @@ -580,6 +580,12 @@ struct GPUStateCache {
Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
}
void SetTextureIsFramebuffer(bool isFramebuffer) {
if (textureIsFramebuffer != isFramebuffer) {
textureIsFramebuffer = isFramebuffer;
Dirty(DIRTY_UVSCALEOFFSET);
}
}
void SetUseFlags(u32 newFlags) {
if (newFlags != useFlags_) {
if (useFlags_ != 0)
Expand Down Expand Up @@ -614,7 +620,8 @@ struct GPUStateCache {

bool bgraTexture;
bool needShaderTexClamp;
bool arrayTexture;
bool textureIsArray;
bool textureIsFramebuffer;
bool useFlagsChanged;

float morphWeights[8];
Expand Down
4 changes: 2 additions & 2 deletions GPU/Vulkan/DrawEngineVulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,7 @@ void DrawEngineVulkan::DoFlush() {
textureCache_->ApplyTexture();
textureCache_->GetVulkanHandles(imageView, sampler);
if (imageView == VK_NULL_HANDLE)
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.arrayTexture ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
if (sampler == VK_NULL_HANDLE)
sampler = nullSampler_;
}
Expand Down Expand Up @@ -910,7 +910,7 @@ void DrawEngineVulkan::DoFlush() {
textureCache_->ApplyTexture();
textureCache_->GetVulkanHandles(imageView, sampler);
if (imageView == VK_NULL_HANDLE)
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.arrayTexture ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
if (sampler == VK_NULL_HANDLE)
sampler = nullSampler_;
}
Expand Down

0 comments on commit 80e47b7

Please sign in to comment.