From 168e0eee4ee05b271b0b0ea2cd6e962a844f6586 Mon Sep 17 00:00:00 2001 From: Alberto Mardegan Date: Sun, 8 Dec 2024 21:05:22 +0300 Subject: [PATCH 1/8] utils: fix conversion of matrices from GL to GX GX matrices are 3x4, and the old code was overflowing: wee need to skip the fourth row of the matrix. Fixes one issue from https://github.com/devkitPro/opengx/issues/87 --- src/utils.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils.h b/src/utils.h index 1be5fd1..dfded50 100644 --- a/src/utils.h +++ b/src/utils.h @@ -275,10 +275,12 @@ static inline void gl_matrix_to_gx(const GLfloat *source, Mtx mv) float w = source[15]; if (w != 1.0 && w != 0.0) { for (int i = 0; i < 16; i++) { + if (i % 4 == 3) continue; mv[i%4][i/4] = source[i] / w; } } else { for (int i = 0; i < 16; i++) { + if (i % 4 == 3) continue; mv[i%4][i/4] = source[i]; } } From e49a642a7c72e5a0420a2492482e5c6b50bc262b Mon Sep 17 00:00:00 2001 From: Alberto Mardegan Date: Sun, 8 Dec 2024 22:14:21 +0300 Subject: [PATCH 2/8] matrices: fix glFrustum() It was using the Mtx44 matrix, which is row-major, whereas glMultMatrix expects a column-major matrix. --- src/gc_gl.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/gc_gl.c b/src/gc_gl.c index 0192242..0204618 100644 --- a/src/gc_gl.c +++ b/src/gc_gl.c @@ -2490,30 +2490,30 @@ static void draw_arrays_general(DrawMode gxmode, int first, int count) void glFrustum(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble near, GLdouble far) { - Mtx44 mt; + float mt[16]; f32 tmp; tmp = 1.0f / (right - left); - mt[0][0] = (2 * near) * tmp; - mt[0][1] = 0.0f; - mt[0][2] = (right + left) * tmp; - mt[0][3] = 0.0f; + mt[0] = (2 * near) * tmp; + mt[4] = 0.0f; + mt[8] = (right + left) * tmp; + mt[12] = 0.0f; tmp = 1.0f / (top - bottom); - mt[1][0] = 0.0f; - mt[1][1] = (2 * near) * tmp; - mt[1][2] = (top + bottom) * tmp; - mt[1][3] = 0.0f; + mt[1] = 0.0f; + mt[5] = (2 * near) * tmp; + mt[9] = (top + bottom) * tmp; + mt[13] = 0.0f; tmp = 1.0f / (far - near); - mt[2][0] = 0.0f; - mt[2][1] = 0.0f; - mt[2][2] = -(far + near) * tmp; - mt[2][3] = -2.0 * (far * near) * tmp; - mt[3][0] = 0.0f; - mt[3][1] = 0.0f; - mt[3][2] = -1.0f; - mt[3][3] = 0.0f; - - glMultMatrixf((float *)mt); + mt[2] = 0.0f; + mt[6] = 0.0f; + mt[10] = -(far + near) * tmp; + mt[14] = -2.0 * (far * near) * tmp; + mt[3] = 0.0f; + mt[7] = 0.0f; + mt[11] = -1.0f; + mt[15] = 0.0f; + + glMultMatrixf(mt); } void glOrtho(GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble near_val, GLdouble far_val) From bf22be0b8338af21afe8d224d0272ffcb4a3faf3 Mon Sep 17 00:00:00 2001 From: Alberto Mardegan Date: Mon, 9 Dec 2024 20:59:43 +0300 Subject: [PATCH 3/8] lists: remove unused union member --- src/call_lists.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/call_lists.c b/src/call_lists.c index 8f6f66c..bd4a21b 100644 --- a/src/call_lists.c +++ b/src/call_lists.c @@ -53,12 +53,6 @@ typedef struct { CommandType type; union { - struct GXDisplayList { - void *list; - u32 size; - struct client_state cs; - } gxlist; - GLuint gllist; // glCallList GLenum cap; // glEnable, glDisable From 92d61c61fabfe4567c70caddd375ba5aee2195f3 Mon Sep 17 00:00:00 2001 From: Alberto Mardegan Date: Mon, 9 Dec 2024 21:02:47 +0300 Subject: [PATCH 4/8] arrays: do never send more than 2 coordinates per texture This causes FIFO errors, since we told GX that we are sending two coordinates, but effectively send three. So we need to ensure that we never send more than two texture coordinates. --- src/arrays.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/arrays.cpp b/src/arrays.cpp index de32918..dbad5fb 100644 --- a/src/arrays.cpp +++ b/src/arrays.cpp @@ -116,6 +116,9 @@ static TemplateSelectionInfo select_template(GLenum type, info.format.type = num_components == 1 ? GX_TEX_S : GX_TEX_ST; info.format.size = gl_type_to_gx_size(type); info.same_type = num_components <= 2; + /* The hardware does not support sending more than 2 texture + * coordinates */ + if (num_components > 2) info.format.num_components = 2; break; case GX_VA_CLR0: case GX_VA_CLR1: From 89bc099fcd2d4b3c0ca829742855a79766c93dba Mon Sep 17 00:00:00 2001 From: Alberto Mardegan Date: Mon, 9 Dec 2024 21:05:09 +0300 Subject: [PATCH 5/8] state: redefine client_state as union Add an "as_int" member that allows comparing the structure as a whole. --- src/call_lists.c | 4 ++-- src/gc_gl.c | 8 ++------ src/state.h | 17 ++++++++++------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/call_lists.c b/src/call_lists.c index bd4a21b..ceb036f 100644 --- a/src/call_lists.c +++ b/src/call_lists.c @@ -103,7 +103,7 @@ typedef struct struct DrawGeometry { GLenum mode; uint16_t count; - struct client_state cs; + union client_state cs; u32 list_size; void *gxlist; } draw_geometry; @@ -267,7 +267,7 @@ static void flat_draw_geometry(void *cb_data) static void run_draw_geometry(struct DrawGeometry *dg) { - struct client_state cs; + union client_state cs; /* Update the drawing mode on the list. This required peeping into * GX_Begin() code. */ diff --git a/src/gc_gl.c b/src/gc_gl.c index 0204618..37bf6d3 100644 --- a/src/gc_gl.c +++ b/src/gc_gl.c @@ -300,11 +300,7 @@ void ogx_initialize() glparamstate.imm_mode.current_numverts = 0; glparamstate.imm_mode.in_gl_begin = 0; - glparamstate.cs.vertex_enabled = 0; // DisableClientState on everything - glparamstate.cs.normal_enabled = 0; - glparamstate.cs.texcoord_enabled = 0; - glparamstate.cs.index_enabled = 0; - glparamstate.cs.color_enabled = 0; + glparamstate.cs.as_int = 0; // DisableClientState on everything glparamstate.texture_enabled = 0; glparamstate.pack_alignment = 4; @@ -949,7 +945,7 @@ void glBegin(GLenum mode) void glEnd() { - struct client_state cs_backup = glparamstate.cs; + union client_state cs_backup = glparamstate.cs; VertexData *base = glparamstate.imm_mode.current_vertices; int stride = sizeof(VertexData); for (int i = 0; i < MAX_TEXTURE_UNITS; i++) { diff --git a/src/state.h b/src/state.h index 3db3c20..c106894 100644 --- a/src/state.h +++ b/src/state.h @@ -181,14 +181,17 @@ typedef struct glparams_ void *index_array; OgxArrayReader vertex_array, normal_array, color_array; OgxArrayReader texcoord_array[MAX_TEXTURE_UNITS]; - struct client_state + union client_state { - unsigned vertex_enabled : 1; - unsigned normal_enabled : 1; - unsigned index_enabled : 1; - unsigned color_enabled : 1; - unsigned texcoord_enabled : MAX_TEXTURE_UNITS; - char active_texture; + struct { + unsigned vertex_enabled : 1; + unsigned normal_enabled : 1; + unsigned index_enabled : 1; + unsigned color_enabled : 1; + unsigned texcoord_enabled : MAX_TEXTURE_UNITS; + char active_texture; + }; + uint32_t as_int; } cs; unsigned texture_enabled : MAX_TEXTURE_UNITS; From 4b276ba838e192c88d35ac0c69a4b93b02deaff8 Mon Sep 17 00:00:00 2001 From: Alberto Mardegan Date: Tue, 10 Dec 2024 18:23:27 +0300 Subject: [PATCH 6/8] state: setup fog only if its settings changed Move the setup_fog() sunction out of the _ogx_setup_render_stages() function and into _ogx_apply_state(), since by render stages we mean all waht is related with the TEV stages. Add a dirty bit to signal when the fogging configuration has changed and needs to be setup again. --- src/gc_gl.c | 16 ++++++++++++---- src/state.h | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/gc_gl.c b/src/gc_gl.c index 37bf6d3..f3c52cc 100644 --- a/src/gc_gl.c +++ b/src/gc_gl.c @@ -586,6 +586,7 @@ void glEnable(GLenum cap) break; case GL_FOG: glparamstate.fog.enabled = 1; + glparamstate.dirty.bits.dirty_fog = 1; break; case GL_LIGHTING: glparamstate.lighting.enabled = 1; @@ -696,6 +697,7 @@ void glFogf(GLenum pname, GLfloat param) glparamstate.fog.end = param; break; } + glparamstate.dirty.bits.dirty_fog = 1; } void glFogi(GLenum pname, GLint param) @@ -708,8 +710,9 @@ void glFogi(GLenum pname, GLint param) case GL_FOG_START: case GL_FOG_END: glFogf(pname, param); - break; + return; } + glparamstate.dirty.bits.dirty_fog = 1; } void glFogfv(GLenum pname, const GLfloat *params) @@ -720,11 +723,12 @@ void glFogfv(GLenum pname, const GLfloat *params) case GL_FOG_START: case GL_FOG_END: glFogf(pname, params[0]); - break; + return; case GL_FOG_COLOR: floatcpy(glparamstate.fog.color, params, 4); break; } + glparamstate.dirty.bits.dirty_fog = 1; } void glLightf(GLenum light, GLenum pname, GLfloat param) @@ -1347,6 +1351,7 @@ void glClear(GLbitfield mask) if (glparamstate.fog.enabled) { /* Disable fog while clearing */ GX_SetFog(GX_FOG_NONE, 0.0, 0.0, 0.0, 0.0, glparamstate.clear_color); + glparamstate.dirty.bits.dirty_fog = 1; } GX_Begin(GX_QUADS, GX_VTXFMT0, 4); @@ -2226,8 +2231,6 @@ bool _ogx_setup_render_stages() * OgxGpuResources::{tevstage,texcoord}_first. */ GX_SetNumTevStages(_ogx_gpu_resources->tevstage_first); GX_SetNumTexGens(_ogx_gpu_resources->texcoord_first); - - setup_fog(); return true; } @@ -2281,6 +2284,11 @@ void _ogx_apply_state() update_normal_matrix(); } + if (glparamstate.dirty.bits.dirty_fog) { + setup_fog(); + glparamstate.dirty.bits.dirty_fog = 0; + } + /* Reset the updated bits to 0. We don't unconditionally reset everything * to 0 because some states might still be dirty: for example, the stencil * checks alters the texture coordinate generation. */ diff --git a/src/state.h b/src/state.h index c106894..82b1ca3 100644 --- a/src/state.h +++ b/src/state.h @@ -245,6 +245,7 @@ typedef struct glparams_ unsigned dirty_clip_planes : 1; unsigned dirty_cull : 1; unsigned dirty_stencil : 1; + unsigned dirty_fog : 1; } bits; unsigned int all; } dirty; From 51328e6c5e6b1d5dc95d52488aefa1cca0e96cec Mon Sep 17 00:00:00 2001 From: Alberto Mardegan Date: Tue, 10 Dec 2024 19:12:20 +0300 Subject: [PATCH 7/8] state: do not rebuild TEV stages unless needed Add a dirty flag to track whether the TEV stages must be rebuilt. The replaces the dirty_lighting, dirty_clip_planes, dirty_stencil and the (de facto unused) dirty_material flags, because all these factors ultimately require the TEV to be rebuilt. This is not super-precide, and we err on the side of rendering safety, but it still brings a huge speed up in the crack-attack game, which uses call lists with several draw commands in each (so the state is guaranteed to be clean between one command and the next). --- src/accum.c | 1 + src/clip.c | 6 +++-- src/efb.c | 1 + src/gc_gl.c | 65 ++++++++++++++++++++++++++++++++----------------- src/raster.cpp | 1 + src/selection.c | 1 + src/state.h | 5 +--- src/stencil.c | 7 ++++-- src/texture.c | 11 +++++++-- src/vertex.cpp | 2 ++ 10 files changed, 68 insertions(+), 32 deletions(-) diff --git a/src/accum.c b/src/accum.c index c065c5a..57ce3ab 100644 --- a/src/accum.c +++ b/src/accum.c @@ -72,6 +72,7 @@ static void draw_screen(GXTexObj *texture, float value) GX_SetNumChans(1); GX_SetChanCtrl(GX_COLOR0A0, GX_DISABLE, GX_SRC_VTX, GX_SRC_VTX, 0, GX_DF_NONE, GX_AF_NONE); + glparamstate.dirty.bits.dirty_tev = 1; GX_SetCullMode(GX_CULL_NONE); glparamstate.dirty.bits.dirty_cull = 1; diff --git a/src/clip.c b/src/clip.c index 62fbae5..628e7b8 100644 --- a/src/clip.c +++ b/src/clip.c @@ -165,13 +165,13 @@ void _ogx_clip_setup_tev() void _ogx_clip_enabled(int plane) { glparamstate.clip_plane_mask |= 1 << plane; - glparamstate.dirty.bits.dirty_clip_planes = 1; + glparamstate.dirty.bits.dirty_tev = 1; } void _ogx_clip_disabled(int plane) { glparamstate.clip_plane_mask &= ~(1 << plane); - glparamstate.dirty.bits.dirty_clip_planes = 1; + glparamstate.dirty.bits.dirty_tev = 1; } void glClipPlane(GLenum plane, const GLdouble *equation) @@ -192,4 +192,6 @@ void glClipPlane(GLenum plane, const GLdouble *equation) guMtx44Inverse(mv, mv_inverse); ClipPlane p0 = { equation[0], equation[1], equation[2], equation[3] }; mtx44_multiply(p0, mv_inverse, *p); + + glparamstate.dirty.bits.dirty_tev = 1; } diff --git a/src/efb.c b/src/efb.c index 632249c..652d15f 100644 --- a/src/efb.c +++ b/src/efb.c @@ -88,6 +88,7 @@ void _ogx_efb_restore_texobj(GXTexObj *texobj) GX_SetNumChans(0); GX_SetTevOp(GX_TEVSTAGE0, GX_REPLACE); GX_SetTevOrder(GX_TEVSTAGE0, GX_TEXCOORD0, GX_TEXMAP0, GX_COLORNULL); + glparamstate.dirty.bits.dirty_tev = 1; GX_SetCullMode(GX_CULL_NONE); glparamstate.dirty.bits.dirty_cull = 1; diff --git a/src/gc_gl.c b/src/gc_gl.c index f3c52cc..ddd5f12 100644 --- a/src/gc_gl.c +++ b/src/gc_gl.c @@ -544,6 +544,7 @@ void glEnable(GLenum cap) switch (cap) { case GL_TEXTURE_2D: glparamstate.texture_enabled |= (1 << glparamstate.active_texture); + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_TEXTURE_GEN_S: case GL_TEXTURE_GEN_T: @@ -553,9 +554,11 @@ void glEnable(GLenum cap) OgxTextureUnit *tu = active_tex_unit(); tu->gen_enabled |= (1 << (cap - GL_TEXTURE_GEN_S)); } + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_COLOR_MATERIAL: glparamstate.lighting.color_material_enabled = 1; + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_CULL_FACE: glparamstate.cullenabled = 1; @@ -590,14 +593,14 @@ void glEnable(GLenum cap) break; case GL_LIGHTING: glparamstate.lighting.enabled = 1; - glparamstate.dirty.bits.dirty_lighting = 1; + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_LIGHT0: case GL_LIGHT1: case GL_LIGHT2: case GL_LIGHT3: glparamstate.lighting.lights[cap - GL_LIGHT0].enabled = 1; - glparamstate.dirty.bits.dirty_lighting = 1; + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_POINT_SPRITE: glparamstate.point_sprites_enabled = 1; @@ -618,6 +621,7 @@ void glDisable(GLenum cap) switch (cap) { case GL_TEXTURE_2D: glparamstate.texture_enabled &= ~(1 << glparamstate.active_texture); + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_TEXTURE_GEN_S: case GL_TEXTURE_GEN_T: @@ -627,9 +631,11 @@ void glDisable(GLenum cap) OgxTextureUnit *tu = active_tex_unit(); tu->gen_enabled &= ~(1 << (cap - GL_TEXTURE_GEN_S)); } + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_COLOR_MATERIAL: glparamstate.lighting.color_material_enabled = 0; + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_CULL_FACE: glparamstate.cullenabled = 0; @@ -660,14 +666,14 @@ void glDisable(GLenum cap) break; case GL_LIGHTING: glparamstate.lighting.enabled = 0; - glparamstate.dirty.bits.dirty_lighting = 1; + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_LIGHT0: case GL_LIGHT1: case GL_LIGHT2: case GL_LIGHT3: glparamstate.lighting.lights[cap - GL_LIGHT0].enabled = 0; - glparamstate.dirty.bits.dirty_lighting = 1; + glparamstate.dirty.bits.dirty_tev = 1; break; case GL_POINT_SPRITE: glparamstate.point_sprites_enabled = 0; @@ -756,7 +762,7 @@ void glLightf(GLenum light, GLenum pname, GLfloat param) default: break; } - glparamstate.dirty.bits.dirty_lighting = 1; + glparamstate.dirty.bits.dirty_tev = 1; } void glLightfv(GLenum light, GLenum pname, const GLfloat *params) @@ -794,7 +800,7 @@ void glLightfv(GLenum light, GLenum pname, const GLfloat *params) floatcpy(glparamstate.lighting.lights[lnum].specular_color, params, 4); break; } - glparamstate.dirty.bits.dirty_lighting = 1; + glparamstate.dirty.bits.dirty_tev = 1; } void glLightModelfv(GLenum pname, const GLfloat *params) @@ -804,7 +810,7 @@ void glLightModelfv(GLenum pname, const GLfloat *params) floatcpy(glparamstate.lighting.globalambient, params, 4); break; } - glparamstate.dirty.bits.dirty_material = 1; + glparamstate.dirty.bits.dirty_tev = 1; }; void glMaterialf(GLenum face, GLenum pname, GLfloat param) @@ -839,13 +845,14 @@ void glMaterialfv(GLenum face, GLenum pname, const GLfloat *params) default: break; } - glparamstate.dirty.bits.dirty_material = 1; + glparamstate.dirty.bits.dirty_tev = 1; }; void glColorMaterial(GLenum face, GLenum mode) { /* TODO: support the face parameter */ glparamstate.lighting.color_material_mode = mode; + glparamstate.dirty.bits.dirty_tev = 1; } void glPixelStoref(GLenum pname, GLfloat param) @@ -1023,6 +1030,7 @@ void glPopMatrix(void) } memcpy(glparamstate.projection_matrix, glparamstate.projection_stack[glparamstate.cur_proj_mat], sizeof(Mtx44)); glparamstate.cur_proj_mat--; + glparamstate.dirty.bits.dirty_matrices = 1; break; case 1: if (glparamstate.cur_modv_mat < 0) { @@ -1031,6 +1039,7 @@ void glPopMatrix(void) } memcpy(glparamstate.modelview_matrix, glparamstate.modelview_stack[glparamstate.cur_modv_mat], sizeof(Mtx)); glparamstate.cur_modv_mat--; + glparamstate.dirty.bits.dirty_matrices = 1; break; case 2: { @@ -1041,10 +1050,11 @@ void glPopMatrix(void) } tu->matrix_index--; } + glparamstate.dirty.bits.dirty_tev = 1; + break; default: break; } - glparamstate.dirty.bits.dirty_matrices = 1; } void glPushMatrix(void) @@ -1089,20 +1099,22 @@ void glLoadMatrixf(const GLfloat *m) switch (glparamstate.matrixmode) { case 0: gl_matrix_to_gx44(m, glparamstate.projection_matrix); + glparamstate.dirty.bits.dirty_matrices = 1; break; case 1: gl_matrix_to_gx(m, glparamstate.modelview_matrix); + glparamstate.dirty.bits.dirty_matrices = 1; break; case 2: { OgxTextureUnit *tu = active_tex_unit(); gl_matrix_to_gx(m, tu->matrix[tu->matrix_index]); } + glparamstate.dirty.bits.dirty_tev = 1; break; default: return; } - glparamstate.dirty.bits.dirty_matrices = 1; } void glMultMatrixd(const GLdouble *m) @@ -1126,12 +1138,15 @@ void glMultMatrixf(const GLfloat *m) gl_matrix_to_gx44(m, mtx44); guMtx44Concat(glparamstate.projection_matrix, mtx44, glparamstate.projection_matrix); + glparamstate.dirty.bits.dirty_matrices = 1; break; case 1: target = &glparamstate.modelview_matrix; + glparamstate.dirty.bits.dirty_matrices = 1; break; case 2: target = current_tex_matrix(); + glparamstate.dirty.bits.dirty_tev = 1; break; default: break; @@ -1141,7 +1156,6 @@ void glMultMatrixf(const GLfloat *m) gl_matrix_to_gx(m, mtx); guMtxConcat(*target, mtx, *target); } - glparamstate.dirty.bits.dirty_matrices = 1; } void glLoadIdentity() @@ -1151,21 +1165,22 @@ void glLoadIdentity() switch (glparamstate.matrixmode) { case 0: guMtx44Identity(glparamstate.projection_matrix); + glparamstate.dirty.bits.dirty_matrices = 1; break; case 1: guMtxIdentity(glparamstate.modelview_matrix); + glparamstate.dirty.bits.dirty_matrices = 1; break; case 2: { OgxTextureUnit *tu = active_tex_unit(); guMtxIdentity(tu->matrix[tu->matrix_index]); } + glparamstate.dirty.bits.dirty_tev = 1; break; default: return; } - - glparamstate.dirty.bits.dirty_matrices = 1; } void glScalef(GLfloat x, GLfloat y, GLfloat z) @@ -1179,12 +1194,15 @@ void glScalef(GLfloat x, GLfloat y, GLfloat z) guMtxApplyScale(glparamstate.projection_matrix, glparamstate.projection_matrix, x, y, z); + glparamstate.dirty.bits.dirty_matrices = 1; break; case 1: target = &glparamstate.modelview_matrix; + glparamstate.dirty.bits.dirty_matrices = 1; break; case 2: target = current_tex_matrix(); + glparamstate.dirty.bits.dirty_tev = 1; break; default: break; @@ -1193,7 +1211,6 @@ void glScalef(GLfloat x, GLfloat y, GLfloat z) if (target) { guMtxApplyScale(*target, *target, x, y, z); } - glparamstate.dirty.bits.dirty_matrices = 1; } void glTranslated(GLdouble x, GLdouble y, GLdouble z) @@ -1212,12 +1229,15 @@ void glTranslatef(GLfloat x, GLfloat y, GLfloat z) guMtxApplyTrans(glparamstate.projection_matrix, glparamstate.projection_matrix, x, y, z); + glparamstate.dirty.bits.dirty_matrices = 1; break; case 1: target = &glparamstate.modelview_matrix; + glparamstate.dirty.bits.dirty_matrices = 1; break; case 2: target = current_tex_matrix(); + glparamstate.dirty.bits.dirty_tev = 1; break; default: break; @@ -1226,7 +1246,6 @@ void glTranslatef(GLfloat x, GLfloat y, GLfloat z) if (target) { guMtxApplyTrans(*target, *target, x, y, z); } - glparamstate.dirty.bits.dirty_matrices = 1; } void glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) @@ -1244,12 +1263,15 @@ void glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) rot[3][0] = rot[3][1] = rot[3][2] = 0.0f; rot[3][3] = 1.0f; guMtx44Concat(glparamstate.projection_matrix, rot, glparamstate.projection_matrix); + glparamstate.dirty.bits.dirty_matrices = 1; break; case 1: target = &glparamstate.modelview_matrix; + glparamstate.dirty.bits.dirty_matrices = 1; break; case 2: target = current_tex_matrix(); + glparamstate.dirty.bits.dirty_tev = 1; break; default: break; @@ -1258,7 +1280,6 @@ void glRotatef(GLfloat angle, GLfloat x, GLfloat y, GLfloat z) if (target) { guMtxConcat(*target, rot, *target); } - glparamstate.dirty.bits.dirty_matrices = 1; } void glClearColor(GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha) @@ -1376,6 +1397,7 @@ void glClear(GLbitfield mask) glparamstate.dirty.bits.dirty_z = 1; glparamstate.dirty.bits.dirty_color_update = 1; glparamstate.dirty.bits.dirty_matrices = 1; + glparamstate.dirty.bits.dirty_tev = 1; glparamstate.dirty.bits.dirty_cull = 1; glparamstate.draw_count++; @@ -2068,6 +2090,8 @@ static void setup_fog() bool _ogx_setup_render_stages() { + if (!glparamstate.dirty.bits.dirty_tev) return true; + u8 raster_output, raster_reg_index; if (glparamstate.texture_enabled) { raster_reg_index = _ogx_gpu_resources->tevreg_first++; @@ -2231,6 +2255,7 @@ bool _ogx_setup_render_stages() * OgxGpuResources::{tevstage,texcoord}_first. */ GX_SetNumTevStages(_ogx_gpu_resources->tevstage_first); GX_SetNumTexGens(_ogx_gpu_resources->texcoord_first); + glparamstate.dirty.bits.dirty_tev = false; return true; } @@ -2252,8 +2277,7 @@ void _ogx_apply_state() } if (glparamstate.dirty.bits.dirty_alphatest || - glparamstate.dirty.bits.dirty_stencil || - glparamstate.dirty.bits.dirty_clip_planes) { + glparamstate.dirty.bits.dirty_tev) { u8 params[4] = { GX_ALWAYS, 0, GX_ALWAYS, 0 }; int comparisons = 0; if (glparamstate.alphatest_enabled) { @@ -2280,7 +2304,7 @@ void _ogx_apply_state() update_modelview_matrix(); update_projection_matrix(); } - if (glparamstate.dirty.bits.dirty_matrices | glparamstate.dirty.bits.dirty_lighting) { + if (glparamstate.dirty.bits.dirty_matrices | glparamstate.dirty.bits.dirty_tev) { update_normal_matrix(); } @@ -2293,12 +2317,9 @@ void _ogx_apply_state() * to 0 because some states might still be dirty: for example, the stencil * checks alters the texture coordinate generation. */ glparamstate.dirty.bits.dirty_cull = 0; - glparamstate.dirty.bits.dirty_lighting = 0; glparamstate.dirty.bits.dirty_matrices = 0; - glparamstate.dirty.bits.dirty_stencil = 0; glparamstate.dirty.bits.dirty_alphatest = 0; glparamstate.dirty.bits.dirty_blend = 0; - glparamstate.dirty.bits.dirty_clip_planes = 0; glparamstate.dirty.bits.dirty_color_update = 0; glparamstate.dirty.bits.dirty_z = 0; } diff --git a/src/raster.cpp b/src/raster.cpp index a0a140e..81772cb 100644 --- a/src/raster.cpp +++ b/src/raster.cpp @@ -227,6 +227,7 @@ static void draw_raster_texture(GXTexObj *texture, int width, int height, GX_SetNumTexGens(1); GX_SetNumTevStages(1); GX_SetTevOrder(GX_TEVSTAGE0, GX_TEXCOORD0, GX_TEXMAP0, GX_COLOR0A0); + glparamstate.dirty.bits.dirty_tev = 1; GX_SetCullMode(GX_CULL_NONE); glparamstate.dirty.bits.dirty_cull = 1; diff --git a/src/selection.c b/src/selection.c index 5661200..4a18e70 100644 --- a/src/selection.c +++ b/src/selection.c @@ -103,6 +103,7 @@ static void restore_z_buffer() GX_SetNumChans(0); GX_SetTevOp(GX_TEVSTAGE0, GX_REPLACE); GX_SetTevOrder(GX_TEVSTAGE0, GX_TEXCOORD0, GX_TEXMAP0, GX_COLORNULL); + glparamstate.dirty.bits.dirty_tev = 1; GX_SetCullMode(GX_CULL_NONE); glparamstate.dirty.bits.dirty_cull = 1; diff --git a/src/state.h b/src/state.h index 82b1ca3..abebeee 100644 --- a/src/state.h +++ b/src/state.h @@ -240,11 +240,8 @@ typedef struct glparams_ unsigned dirty_clearz : 1; unsigned dirty_color_update : 1; unsigned dirty_matrices : 1; - unsigned dirty_lighting : 1; - unsigned dirty_material : 1; - unsigned dirty_clip_planes : 1; + unsigned dirty_tev : 1; unsigned dirty_cull : 1; - unsigned dirty_stencil : 1; unsigned dirty_fog : 1; } bits; unsigned int all; diff --git a/src/stencil.c b/src/stencil.c index e5e7f62..8955d10 100644 --- a/src/stencil.c +++ b/src/stencil.c @@ -545,13 +545,13 @@ void _ogx_stencil_draw(OgxStencilDrawCallback callback, void *cb_data) void _ogx_stencil_enabled() { glparamstate.stencil.enabled = 1; - glparamstate.dirty.bits.dirty_stencil = 1; + glparamstate.dirty.bits.dirty_tev = 1; } void _ogx_stencil_disabled() { glparamstate.stencil.enabled = 0; - glparamstate.dirty.bits.dirty_stencil = 1; + glparamstate.dirty.bits.dirty_tev = 1; } void _ogx_stencil_update() @@ -659,14 +659,17 @@ void glStencilFunc(GLenum func, GLint ref, GLuint mask) glparamstate.stencil.func = new_func; if (tev_stage_needed(new_type) && new_type != old_type) s_stencil_texture_needs_update = true; + glparamstate.dirty.bits.dirty_tev = 1; } if (new_ref != glparamstate.stencil.ref) { glparamstate.stencil.ref = new_ref; s_stencil_texture_needs_update = true; + glparamstate.dirty.bits.dirty_tev = 1; } if (new_mask != glparamstate.stencil.mask) { glparamstate.stencil.mask = new_mask; s_stencil_texture_needs_update = true; + glparamstate.dirty.bits.dirty_tev = 1; } } diff --git a/src/texture.c b/src/texture.c index 16354b4..5beef67 100644 --- a/src/texture.c +++ b/src/texture.c @@ -212,6 +212,7 @@ void glTexGeni(GLenum coord, GLenum pname, GLint param) tu->gen_mode = param; break; } + glparamstate.dirty.bits.dirty_tev = 1; } void glTexGenfv(GLenum coord, GLenum pname, const GLfloat *params) @@ -221,7 +222,7 @@ void glTexGenfv(GLenum coord, GLenum pname, const GLfloat *params) switch (pname) { case GL_TEXTURE_GEN_MODE: glTexGeni(coord, pname, params[0]); - break; + return; case GL_EYE_PLANE: if (coord == GL_S) { floatcpy(tu->texture_eye_plane_s, params, 4); @@ -237,6 +238,7 @@ void glTexGenfv(GLenum coord, GLenum pname, const GLfloat *params) } break; } + glparamstate.dirty.bits.dirty_tev = 1; } void glTexEnvf(GLenum target, GLenum pname, GLfloat param) @@ -296,6 +298,7 @@ void glTexEnvi(GLenum target, GLenum pname, GLint param) tu->mode = param; break; } + glparamstate.dirty.bits.dirty_tev = 1; } void glTexEnvfv(GLenum target, GLenum pname, const GLfloat *params) @@ -307,8 +310,9 @@ void glTexEnvfv(GLenum target, GLenum pname, const GLfloat *params) break; default: glTexEnvf(target, pname, params[0]); - break; + return; } + glparamstate.dirty.bits.dirty_tev = 1; } void glTexEnviv(GLenum target, GLenum pname, const GLint *params) @@ -394,6 +398,7 @@ static void update_texture(const void *data, int level, GLenum format, GLenum ty GX_InitTexObjLOD(obj, ti->min_filter, ti->mag_filter, ti->minlevel, ti->maxlevel, 0, GX_ENABLE, GX_ENABLE, GX_ANISO_1); GX_InitTexObjUserData(obj, ti->ud.ptr); + glparamstate.dirty.bits.dirty_tev = 1; } void glTexImage2D(GLenum target, GLint level, GLint internalFormat, GLsizei width, GLsizei height, @@ -519,6 +524,8 @@ void glBindTexture(GLenum target, GLuint texture) * defined yet. We do this when setting up the texturing TEV stage. */ int unit = glparamstate.active_texture; glparamstate.texture_unit[unit].glcurtex = texture; + + glparamstate.dirty.bits.dirty_tev = 1; } void glTexImage3D(GLenum target, GLint level, GLint internalFormat, diff --git a/src/vertex.cpp b/src/vertex.cpp index 7238faa..15dfc48 100644 --- a/src/vertex.cpp +++ b/src/vertex.cpp @@ -77,6 +77,7 @@ void set_current_color(T red, T green, T blue, T alpha = full_color()) } floatcpy(glparamstate.imm_mode.current_color, c, 4); + glparamstate.dirty.bits.dirty_tev = 1; } static inline void set_current_tex_unit_coords(int unit, float s, float t = 0) @@ -302,6 +303,7 @@ void glNormal3fv(const GLfloat *v) HANDLE_CALL_LIST(NORMAL, v); } floatcpy(glparamstate.imm_mode.current_normal, v, 3); + glparamstate.dirty.bits.dirty_tev = 1; } void glNormal3iv(const GLint *v) From 6c225cc479fbfc6e22428678734f99e4a68485a1 Mon Sep 17 00:00:00 2001 From: Alberto Mardegan Date: Tue, 10 Dec 2024 19:20:24 +0300 Subject: [PATCH 8/8] lists: do not resend vertex format unless needed This is not a terribly heavy operation, but since it's not hard to optimize it out, let's do it. --- src/call_lists.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/call_lists.c b/src/call_lists.c index ceb036f..f69b47e 100644 --- a/src/call_lists.c +++ b/src/call_lists.c @@ -132,6 +132,8 @@ static GXColor s_current_color; static float s_current_normal[3]; static bool s_last_draw_used_indexed_data = false; static uint16_t s_last_draw_sync_token = 0; +static union client_state s_last_client_state; +static bool s_last_client_state_is_valid = false; #define BUFFER_IS_VALID(buffer) (((uint32_t)buffer) > 1) #define LIST_IS_USED(index) BUFFER_IS_VALID(call_lists[index].head) @@ -194,13 +196,12 @@ static Command *new_command(CommandBuffer **head) } } -static void execute_draw_geometry_list(struct DrawGeometry *dg) +static void setup_draw_geometry(struct DrawGeometry *dg, + bool uses_indexed_data) { - static uint16_t counter = 0; u8 vtxindex = GX_VTXFMT0; GXColor current_color; - bool uses_indexed_data = !dg->cs.normal_enabled || !dg->cs.color_enabled; if (uses_indexed_data && s_last_draw_used_indexed_data) { bool data_changed = false; /* If the indexed data has changed, we need to wait until the previous @@ -248,6 +249,17 @@ static void execute_draw_geometry_list(struct DrawGeometry *dg) * so we won't add them unless they are enabled. */ GX_InvVtxCache(); +} + +static void execute_draw_geometry_list(struct DrawGeometry *dg) +{ + bool uses_indexed_data = !dg->cs.normal_enabled || !dg->cs.color_enabled; + if (!s_last_client_state_is_valid || + s_last_client_state.as_int != dg->cs.as_int) { + setup_draw_geometry(dg, uses_indexed_data); + s_last_client_state = dg->cs; + s_last_client_state_is_valid = true; + } GX_CallDispList(dg->gxlist, dg->list_size); @@ -298,9 +310,11 @@ static void run_draw_geometry(struct DrawGeometry *dg) glparamstate.draw_count++; if (glparamstate.stencil.enabled) { + s_last_client_state_is_valid = false; _ogx_gpu_resources_push(); _ogx_stencil_draw(flat_draw_geometry, dg); _ogx_gpu_resources_pop(); + s_last_client_state_is_valid = false; } } @@ -765,6 +779,10 @@ void glCallList(GLuint id) } done: + /* Until we find a reliable mechanism to ensure that the client state has + * been preserved, avoid reusing it across different lists. */ + s_last_client_state_is_valid = false; + if (must_decrement) { glparamstate.current_call_list.execution_depth--; }