#include <3ds/gpu/registers.h>\r
#include <3ds/gpu/shaderProgram.h>\r
\r
-static void GPU_SetShaderOutmap(u32 outmapData[8]);\r
+static void GPU_SetShaderOutmap(const u32 outmapData[8]);\r
static void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);\r
static void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);\r
\r
sp->geoShaderInputPermutation[0] = 0x76543210;\r
sp->geoShaderInputPermutation[1] = 0xFEDCBA98;\r
sp->geoShaderInputStride = stride;\r
- sp->geoShaderMode = GSH_NORMAL;\r
\r
return shaderInstanceInit(sp->geometryShader, dvle);\r
}\r
return 0;\r
}\r
\r
-Result shaderProgramSetGshMode(shaderProgram_s* sp, geoShaderMode mode)\r
+static inline void shaderProgramUploadDvle(const DVLE_s* dvle)\r
{\r
- if(!sp || !sp->geometryShader)return -1;\r
-\r
- sp->geoShaderMode = mode & 3;\r
- return 0;\r
+ const DVLP_s* dvlp = dvle->dvlp;\r
+ // Limit vertex shader code size to the first 512 instructions\r
+ int codeSize = dvle->type == GEOMETRY_SHDR ? dvlp->codeSize : (dvlp->codeSize < 512 ? dvlp->codeSize : 512);\r
+ GPU_SendShaderCode(dvle->type, dvlp->codeData, 0, codeSize);\r
+ GPU_SendOperandDescriptors(dvle->type, dvlp->opcdescData, 0, dvlp->opdescSize);\r
}\r
\r
-Result shaderProgramConfigure(shaderProgram_s* sp, bool sendVshCode, bool sendGshCode)\r
+static inline void shaderProgramMergeOutmaps(u32* outmapData, const u32* vshOutmap, const u32* gshOutmap)\r
{\r
- if(!sp)return -1;\r
+ int i, j;\r
\r
- if(!sp->vertexShader)return -2;\r
+ // Find and copy attributes common to both vertex and geometry shader\r
+ u32 vsh_common = 0, gsh_common = 0;\r
+ for (i = 1; i < 8; i ++)\r
+ {\r
+ u32 mask = gshOutmap[i];\r
+ if (mask == 0x1F1F1F1F)\r
+ break;\r
+ for (j = 1; j < 8; j ++)\r
+ {\r
+ if (vshOutmap[j] == mask)\r
+ {\r
+ outmapData[++outmapData[0]] = mask;\r
+ vsh_common |= BIT(j);\r
+ gsh_common |= BIT(i);\r
+ break;\r
+ }\r
+ }\r
+ }\r
\r
- // configure geostage\r
- // has to be done first or else VSH registers might only reconfigure 3 of the 4 shader units !\r
- if(!sp->geometryShader)\r
+ // Find and copy attributes that are exclusive to the geometry shader\r
+ for (i = 1; i < 8; i ++)\r
{\r
- GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000);\r
- GPUCMD_AddMaskedWrite(GPUREG_VSH_COM_MODE, 0x1, 0x00000000);\r
- }else{\r
- GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002);\r
- GPUCMD_AddMaskedWrite(GPUREG_VSH_COM_MODE, 0x1, 0x00000001);\r
+ u32 mask = gshOutmap[i];\r
+ if (mask == 0x1F1F1F1F)\r
+ break;\r
+ if (!(gsh_common & BIT(i)))\r
+ outmapData[++outmapData[0]] = mask;\r
}\r
\r
- // setup vertex shader stuff no matter what\r
- const DVLE_s* vshDvle = sp->vertexShader->dvle;\r
- const DVLP_s* vshDvlp = vshDvle->dvlp;\r
- if (sendVshCode)\r
+ // Find and copy attributes that are exclusive to the vertex shader\r
+ for (i = 1; i < 8; i ++)\r
{\r
- GPU_SendShaderCode(vshDvle->type, vshDvlp->codeData, 0, vshDvlp->codeSize);\r
- GPU_SendOperandDescriptors(vshDvle->type, vshDvlp->opcdescData, 0, vshDvlp->opdescSize);\r
+ u32 mask = vshOutmap[i];\r
+ if (mask == 0x1F1F1F1F)\r
+ break;\r
+ if (!(vsh_common & BIT(i)))\r
+ outmapData[++outmapData[0]] = mask;\r
}\r
- GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF));\r
- GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask);\r
+}\r
+\r
+Result shaderProgramConfigure(shaderProgram_s* sp, bool sendVshCode, bool sendGshCode)\r
+{\r
+ if (!sp || !sp->vertexShader) return -1;\r
\r
- GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL1, vshDvle->outmapData[0]-1); // ?\r
- GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL2, vshDvle->outmapData[0]-1); // ?\r
+ // Get pointers to relevant structures\r
+ const DVLE_s* vshDvle = sp->vertexShader->dvle;\r
+ const DVLE_s* gshDvle = sp->geometryShader ? sp->geometryShader->dvle : NULL;\r
+ const DVLE_s* mainDvle = gshDvle ? gshDvle : vshDvle;\r
+\r
+ // Variables for working with the outmap\r
+ u32 outmapData[8];\r
+ u32 outmapMode = mainDvle->outmapMode;\r
+ u32 outmapClock = mainDvle->outmapClock;\r
+\r
+ // Initialize geometry engine - do this early in order to ensure all 4 units are correctly initialized\r
+ GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x3, gshDvle ? 2 : 0);\r
+ GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 0x3, 0);\r
+ GPUCMD_AddMaskedWrite(GPUREG_VSH_COM_MODE, 0x1, gshDvle ? 1 : 0);\r
+\r
+ // Set up vertex shader code blob (if necessary)\r
+ if (sendVshCode)\r
+ shaderProgramUploadDvle(vshDvle);\r
\r
- bool subdivision = sp->geoShaderMode >= GSH_SUBDIVISION_LOOP;\r
- GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, subdivision ? 0x80000000 : 0); // Enable or disable subdivision\r
- u32 gshMisc = 0;\r
- if (subdivision)\r
- gshMisc = 1;\r
- else if (sp->geoShaderMode == GSH_PARTICLE)\r
- gshMisc = 0x01004302;\r
- GPUCMD_AddWrite(GPUREG_GSH_MISC0, gshMisc);\r
- GPUCMD_AddWrite(GPUREG_GSH_MISC1, sp->geoShaderMode);\r
+ // Set up vertex shader entrypoint & outmap mask\r
+ GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF));\r
+ GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask);\r
+ GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL1, vshDvle->outmapData[0]-1);\r
+ GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL2, vshDvle->outmapData[0]-1);\r
\r
- if(!sp->geometryShader)\r
+ // Set up geometry shader (if present)\r
+ if (gshDvle)\r
{\r
- // finish setting up vertex shader alone\r
- GPU_SetShaderOutmap((u32*)vshDvle->outmapData);\r
-\r
- GPUCMD_AddWrite(GPUREG_SH_OUTATTR_MODE, vshDvle->outmapMode);\r
- GPUCMD_AddWrite(GPUREG_SH_OUTATTR_CLOCK, vshDvle->outmapClock);\r
- }else{\r
- // setup both vertex and geometry shader\r
- const DVLE_s* gshDvle = sp->geometryShader->dvle;\r
- const DVLP_s* gshDvlp = gshDvle->dvlp;\r
+ // Set up geometry shader code blob (if necessary)\r
if (sendGshCode)\r
- {\r
- GPU_SendShaderCode(gshDvle->type, gshDvlp->codeData, 0, gshDvlp->codeSize);\r
- GPU_SendOperandDescriptors(gshDvle->type, gshDvlp->opcdescData, 0, gshDvlp->opdescSize);\r
- }\r
+ shaderProgramUploadDvle(gshDvle);\r
+\r
+ // Set up geometry shader entrypoint & outmap mask\r
GPUCMD_AddWrite(GPUREG_GSH_ENTRYPOINT, 0x7FFF0000|(gshDvle->mainOffset&0xFFFF));\r
GPUCMD_AddWrite(GPUREG_GSH_OUTMAP_MASK, gshDvle->outmapMask);\r
+ }\r
\r
- GPU_SetShaderOutmap((u32*)gshDvle->outmapData);\r
+ // Merge vertex shader & geometry shader outmaps if requested\r
+ if (gshDvle && gshDvle->mergeOutmaps)\r
+ {\r
+ // Clear outmap\r
+ memset(outmapData, 0x1F, sizeof(outmapData));\r
+ outmapData[0] = 0;\r
+\r
+ // Merge outmaps\r
+ shaderProgramMergeOutmaps(outmapData, vshDvle->outmapData, gshDvle->outmapData);\r
+ outmapMode |= vshDvle->outmapMode;\r
+ outmapClock |= vshDvle->outmapClock;\r
+ } else\r
+ memcpy(outmapData, mainDvle->outmapData, sizeof(outmapData));\r
+\r
+ // Upload and configure outmap\r
+ GPU_SetShaderOutmap(outmapData);\r
+ GPUCMD_AddWrite(GPUREG_SH_OUTATTR_MODE, outmapMode);\r
+ GPUCMD_AddWrite(GPUREG_SH_OUTATTR_CLOCK, outmapClock);\r
+\r
+ // Configure geostage\r
+ if (gshDvle)\r
+ {\r
+ // Input stride: use value if specified, otherwise use number of outputs in vertex shader\r
+ int stride = sp->geoShaderInputStride ? sp->geoShaderInputStride : vshDvle->outmapData[0];\r
\r
- //GSH input attributes stuff\r
- GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000|(sp->geoShaderInputStride-1)|(subdivision?0x100:0));\r
- GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, sp->geoShaderInputPermutation, 2);\r
+ // Enable or disable variable-size primitive processing\r
+ GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0xA, gshDvle->gshMode == GSH_VARIABLE_PRIM ? 0x80000000 : 0);\r
+\r
+ // Set up geoshader processing mode\r
+ u32 misc = gshDvle->gshMode;\r
+ if (misc == GSH_FIXED_PRIM)\r
+ misc |= 0x01000000 | ((u32)gshDvle->gshFixedVtxStart<<16) | ((stride-1)<<12) | ((u32)(gshDvle->gshFixedVtxNum-1)<<8);\r
+ GPUCMD_AddWrite(GPUREG_GSH_MISC0, misc);\r
\r
- GPUCMD_AddWrite(GPUREG_SH_OUTATTR_MODE, gshDvle->outmapMode);\r
- GPUCMD_AddWrite(GPUREG_SH_OUTATTR_CLOCK, gshDvle->outmapClock);\r
+ // Set up variable-size primitive mode parameters\r
+ GPUCMD_AddWrite(GPUREG_GSH_MISC1, gshDvle->gshMode == GSH_VARIABLE_PRIM ? (gshDvle->gshVariableVtxNum-1) : 0);\r
+\r
+ // Set up geoshader input\r
+ GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000 | (gshDvle->gshMode ? 0x0100 : 0) | (stride-1));\r
+\r
+ // Set up geoshader permutation\r
+ GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, sp->geoShaderInputPermutation, 2);\r
+ } else\r
+ {\r
+ // Defaults for when geostage is disabled\r
+ GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0xA, 0);\r
+ GPUCMD_AddWrite(GPUREG_GSH_MISC0, 0);\r
+ GPUCMD_AddWrite(GPUREG_GSH_MISC1, 0);\r
+ GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0xA0000000);\r
}\r
\r
return 0;\r
int i;\r
\r
// Set up uniforms\r
- GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|~sp->vertexShader->boolUniforms);\r
+ GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|sp->vertexShader->boolUniforms);\r
GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, sp->vertexShader->intUniforms, 4);\r
for(i=0; i<sp->vertexShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)&sp->vertexShader->float24Uniforms[i], 4);\r
if (sp->geometryShader)\r
{\r
- GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|~sp->geometryShader->boolUniforms);\r
+ GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|sp->geometryShader->boolUniforms);\r
GPUCMD_AddIncrementalWrites(GPUREG_GSH_INTUNIFORM_I0, sp->geometryShader->intUniforms, 4);\r
for(i=0; i<sp->geometryShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_CONFIG, (u32*)&sp->geometryShader->float24Uniforms[i], 4);\r
}\r
return 0;\r
}\r
\r
-void GPU_SetShaderOutmap(u32 outmapData[8])\r
+void GPU_SetShaderOutmap(const u32 outmapData[8])\r
{\r
GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, outmapData[0]-1);\r
GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, outmapData, 8);\r
dvle->dvlp=&ret->DVLP;
dvle->type=(dvleData[1]>>16)&0xFF;
+ dvle->mergeOutmaps=(dvleData[1]>>24)&1;
dvle->mainOffset=dvleData[2];
dvle->endmainOffset=dvleData[3];
+ if(dvle->type==GEOMETRY_SHDR)
+ {
+ dvle->gshMode=dvleData[5]&0xFF;
+ dvle->gshFixedVtxStart=(dvleData[5]>>8)&0xFF;
+ dvle->gshVariableVtxNum=(dvleData[5]>>16)&0xFF;
+ dvle->gshFixedVtxNum=(dvleData[5]>>24)&0xFF;
+ }
+
dvle->constTableSize=dvleData[7];
dvle->constTableData=(DVLE_constEntry_s*)&dvleData[dvleData[6]/4];
void DVLE_GenerateOutmap(DVLE_s* dvle)
{
- if(!dvle)return;
+ if (!dvle) return;
+ // Initialize outmap data
memset(dvle->outmapData, 0x1F, sizeof(dvle->outmapData));
+ dvle->outmapData[0] = 0;
+ dvle->outmapMask = 0;
+ dvle->outmapMode = 0;
+ dvle->outmapClock = 0;
- int i;
- u8 numAttr=0;
- u8 maxAttr=0;
- u8 attrMask=0;
- u32 attrMode=0;
- u32 attrClock=0;
-
- for(i=0;i<dvle->outTableSize;i++)
+ int i, j, k;
+ for (i = 0; i < dvle->outTableSize; i ++)
{
- u32* out=&dvle->outmapData[dvle->outTableData[i].regID+1];
- u32 mask=0x00000000;
- u8 tmpmask=dvle->outTableData[i].mask;
- mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
- mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
- mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
- mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
+ int type = dvle->outTableData[i].type;
+ int mask = dvle->outTableData[i].mask;
+ int regID = dvle->outTableData[i].regID;
+ u32* out = &dvle->outmapData[regID+1];
- if(*out==0x1F1F1F1F)numAttr++;
-
- u32 val=0x1F1F1F1F;
- switch(dvle->outTableData[i].type)
+ if (!(dvle->outmapMask & BIT(regID)))
{
- case RESULT_POSITION: val=0x03020100; break;
- case RESULT_NORMALQUAT: val=0x07060504; break;
- case RESULT_COLOR: val=0x0B0A0908; break;
- case RESULT_TEXCOORD0: val=0x1F1F0D0C; break;
- case RESULT_TEXCOORD0W: val=0x10101010; break;
- case RESULT_TEXCOORD1: val=0x1F1F0F0E; break;
- case RESULT_TEXCOORD2: val=0x1F1F1716; break;
- case RESULT_VIEW: val=0x1F141312; break;
+ dvle->outmapMask |= BIT(regID);
+ dvle->outmapData[0] ++;
}
- *out=((*out)&~mask)|(val&mask);
- switch(dvle->outTableData[i].type)
+ int sem = 0x1F, num = 0;
+ switch (type)
{
- case RESULT_POSITION:
- if ((*out & 0xFF0000)==0x020000)
- attrClock |= BIT(0);
- break;
- case RESULT_COLOR:
- attrClock |= BIT(1);
- break;
- case RESULT_TEXCOORD0:
- attrMode = 1;
- attrClock |= BIT(8);
- break;
- case RESULT_TEXCOORD1:
- attrMode = 1;
- attrClock |= BIT(9);
- break;
- case RESULT_TEXCOORD2:
- attrMode = 1;
- attrClock |= BIT(10);
- break;
- case RESULT_TEXCOORD0W:
- attrMode = 1;
- attrClock |= BIT(16);
- break;
- case RESULT_NORMALQUAT:
- case RESULT_VIEW:
- attrClock |= BIT(24);
- break;
+ case RESULT_POSITION: sem = 0x00; num = 4; break;
+ case RESULT_NORMALQUAT: sem = 0x04; num = 4; dvle->outmapClock |= BIT(24); break;
+ case RESULT_COLOR: sem = 0x08; num = 4; dvle->outmapClock |= BIT(1); break;
+ case RESULT_TEXCOORD0: sem = 0x0C; num = 2; dvle->outmapClock |= BIT(8); dvle->outmapMode = 1; break;
+ case RESULT_TEXCOORD0W: sem = 0x10; num = 1; dvle->outmapClock |= BIT(16); dvle->outmapMode = 1; break;
+ case RESULT_TEXCOORD1: sem = 0x0E; num = 2; dvle->outmapClock |= BIT(9); dvle->outmapMode = 1; break;
+ case RESULT_TEXCOORD2: sem = 0x16; num = 2; dvle->outmapClock |= BIT(10); dvle->outmapMode = 1; break;
+ case RESULT_VIEW: sem = 0x12; num = 3; dvle->outmapClock |= BIT(24); break;
+ default: continue;
}
- attrMask|=1<<dvle->outTableData[i].regID;
- if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1;
+ for (j = 0, k = 0; j < 4 && k < num; j ++)
+ {
+ if (mask & BIT(j))
+ {
+ *out &= ~(0xFF << (j*8));
+ *out |= (sem++) << (j*8);
+ k ++;
+ if (type==RESULT_POSITION && k==3)
+ dvle->outmapClock |= BIT(0);
+ }
+ }
}
-
- dvle->outmapData[0]=numAttr;
- dvle->outmapMask=attrMask;
- dvle->outmapMode=attrMode;
- dvle->outmapClock=attrClock;
}