]> Chaos Git - corbenik/ctrulib.git/commitdiff
shaderProgram
authorsmea <smealum@gmail.com>
Sat, 3 Jan 2015 03:16:23 +0000 (19:16 -0800)
committersmea <smealum@gmail.com>
Sat, 3 Jan 2015 03:16:23 +0000 (19:16 -0800)
libctru/include/3ds.h
libctru/include/3ds/gpu/gpu.h
libctru/include/3ds/gpu/shaderProgram.h
libctru/include/3ds/gpu/shbin.h
libctru/source/gpu/gpu.c
libctru/source/gpu/shaderProgram.c
libctru/source/gpu/shbin.c

index 83e6d05fab3b9c354c5e0f953dfda4a65700ced5..20e6b28f6e80af9963dd657d23b4705346389e90 100644 (file)
@@ -35,6 +35,7 @@ extern "C" {
 #include <3ds/gpu/gx.h>
 #include <3ds/gpu/gpu.h>
 #include <3ds/gpu/shbin.h>
+#include <3ds/gpu/shaderProgram.h>
 
 #include <3ds/sdmc.h>
 
index dc590e08025742221c09e6751221ab0da2667b36..63732400a1bc7077ea1f3ae876404dda612f3db4 100644 (file)
@@ -203,6 +203,11 @@ typedef enum{
        GPU_UNKPRIM = 0x0300 // ?
 }GPU_Primitive_t;
 
+typedef enum{
+       GPU_VERTEX_SHADER=0x0,
+       GPU_GEOMETRY_SHADER=0x1
+}GPU_SHADER_TYPE;
+
 void GPU_SetUniform(u32 startreg, u32* data, u32 numreg);
 
 void GPU_SetViewport(u32* depthBuffer, u32* colorBuffer, u32 x, u32 y, u32 w, u32 h);
@@ -232,5 +237,8 @@ void GPU_SetTexEnv(u8 id, u16 rgbSources, u16 alphaSources, u16 rgbOperands, u16
 
 void GPU_DrawArray(GPU_Primitive_t primitive, u32 n);
 void GPU_DrawElements(GPU_Primitive_t primitive, u32* indexArray, u32 n);
-
 void GPU_FinishDrawing();
+
+void GPU_SetShaderOutmap(u32 outmapData[8]);
+void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);
+void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);
index e5f2cba7e7f61d83c9e4a679c5874c85db41d1d0..b6f031597122af49d0d339110dac04da06e97e8c 100644 (file)
@@ -3,11 +3,20 @@
 #include <3ds/types.h>\r
 #include <3ds/gpu/shbin.h>\r
 \r
+typedef struct\r
+{\r
+       u32 id;\r
+       u32 data[3];\r
+}float24Uniform_s;\r
+\r
 // this structure describes an instance of either a vertex or geometry shader\r
 typedef struct\r
 {\r
        DVLE_s* dvle;\r
        u16 boolUniforms;\r
+       u32 intUniforms[4];\r
+       float24Uniform_s* float24Uniforms;\r
+       u8 numFloat24Uniforms;\r
 }shaderInstance_s;\r
 \r
 // this structure describes an instance of a full shader program\r
index 89cbde951a1327c95e1d30bd2c6ad2fb0dca1d56..ed41114c4354be741ac1cd796af46c29696a9af3 100644 (file)
@@ -1,10 +1,18 @@
 #pragma once
 
+#include <3ds/gpu/gpu.h>
+
 typedef enum{
-       VERTEX_SHDR=0x0,
-       GEOMETRY_SHDR=0x1
+       VERTEX_SHDR=GPU_VERTEX_SHADER,
+       GEOMETRY_SHDR=GPU_GEOMETRY_SHADER
 }DVLE_type;
 
+typedef enum{
+       DVLE_CONST_BOOL=0x0,
+       DVLE_CONST_u8=0x1,
+       DVLE_CONST_FLOAT24=0x2,
+}DVLE_constantType;
+
 typedef enum{
        RESULT_POSITION = 0x0,
        RESULT_NORMALQUAT = 0x1,
@@ -24,7 +32,8 @@ typedef struct{
 }DVLP_s;
 
 typedef struct{
-       u32 header;
+       u16 type;
+       u16 id;
        u32 data[4];
 }DVLE_constEntry_s;
 
@@ -43,6 +52,7 @@ typedef struct{
 
 typedef struct{
        DVLE_type type;
+       DVLP_s* dvlp;
        u32 mainOffset, endmainOffset;
        u32 constTableSize;
        DVLE_constEntry_s* constTableData;
@@ -51,6 +61,8 @@ typedef struct{
        u32 uniformTableSize;
        DVLE_uniformEntry_s* uniformTableData;
        char* symbolTableData;
+       u8 outmapMask;
+       u32 outmapData[8];
 }DVLE_s;
 
 typedef struct{
@@ -69,3 +81,4 @@ void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type);
 
 void DVLE_SendOutmap(DVLE_s* dvle);
 void DVLE_SendConstants(DVLE_s* dvle);
+void DVLE_GenerateOutmap(DVLE_s* dvle);
index ba36fe1dd3eb3cb1f6c576a50bc8cdb1fcfbb3d5..b1f3dbbe7286c8f5e095e24ed9035b70db180692 100644 (file)
@@ -509,3 +509,34 @@ void GPU_FinishDrawing()
        GPUCMD_AddSingleParam(0x000F0110, 0x00000001); 
        GPUCMD_AddSingleParam(0x000F0063, 0x00000001);
 }
+
+void GPU_SetShaderOutmap(u32 outmapData[8])
+{
+       GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, outmapData[0]-1);
+       GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, outmapData, 8);
+}
+
+void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length)
+{
+       if(!data)return;
+
+       u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0);
+
+       GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, offset);
+
+       int i;
+       for(i=0;i<length;i+=0x80)GPUCMD_AddWrites(GPUREG_VSH_CODETRANSFER_DATA+regOffset, &data[i], ((length-i)<0x80)?(length-i):0x80);
+
+       GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_END+regOffset, 0x00000001);
+}
+
+void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length)
+{
+       if(!data)return;
+
+       u32 regOffset=(type==GPU_GEOMETRY_SHADER)?(-0x30):(0x0);
+
+       GPUCMD_AddWrite(GPUREG_VSH_OPDESCS_CONFIG+regOffset, offset);
+
+       GPUCMD_AddWrites(GPUREG_VSH_OPDESCS_DATA+regOffset, data, length);
+}
index 5a288028caacd4f46cdce6aab66a45ce3353de15..769ee7981aa318d1f61a9607e14dedadb4401691 100644 (file)
@@ -1,5 +1,7 @@
 #include <stdlib.h>\r
+#include <string.h>\r
 #include <3ds/types.h>\r
+#include <3ds/gpu/registers.h>\r
 #include <3ds/gpu/shaderProgram.h>\r
 \r
 Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle)\r
@@ -7,7 +9,64 @@ Result shaderInstanceInit(shaderInstance_s* si, DVLE_s* dvle)
        if(!si || !dvle)return -1;\r
 \r
        si->dvle = dvle;\r
+\r
        si->boolUniforms = 0xFFFF;\r
+       si->intUniforms[0] = 0x00000000;\r
+       si->intUniforms[1] = 0x00000000;\r
+       si->intUniforms[2] = 0x00000000;\r
+       si->intUniforms[3] = 0x00000000;\r
+       si->float24Uniforms = NULL;\r
+\r
+       int i;\r
+       DVLE_constEntry_s* cnst = dvle->constTableData;\r
+       if(cnst)\r
+       {\r
+               int float24cnt=0;\r
+               for(i=0; i<dvle->constTableSize; i++)\r
+               {\r
+                       switch(cnst[i].type)\r
+                       {\r
+                               case DVLE_CONST_BOOL:\r
+                                       shaderInstanceSetBool(si, cnst[i].id, cnst[i].data[0]&1);\r
+                                       break;\r
+                               case DVLE_CONST_u8:\r
+                                       if(cnst[i].id<4)si->intUniforms[cnst[i].id] = cnst[i].data[0];\r
+                                       break;\r
+                               case DVLE_CONST_FLOAT24:\r
+                                       float24cnt++;\r
+                                       break;\r
+                       }\r
+               }\r
+\r
+               if(float24cnt)\r
+               {\r
+                       si->float24Uniforms = malloc(sizeof(float24Uniform_s)*float24cnt);\r
+                       if(!si->float24Uniforms)\r
+                       {\r
+                               float24cnt = 0;\r
+                               u32 rev[3];\r
+                               u8* rev8=(u8*)rev;\r
+                               for(i=0; i<dvle->constTableSize; i++)\r
+                               {\r
+                                       if(cnst[i].type==DVLE_CONST_FLOAT24)\r
+                                       {\r
+                                               memcpy(&rev8[0], &cnst[i].data[0], 3);\r
+                                               memcpy(&rev8[3], &cnst[i].data[1], 3);\r
+                                               memcpy(&rev8[6], &cnst[i].data[2], 3);\r
+                                               memcpy(&rev8[9], &cnst[i].data[3], 3);\r
+\r
+                                               si->float24Uniforms[float24cnt].id = cnst[i].id;\r
+                                               si->float24Uniforms[float24cnt].data[0] = rev[2];\r
+                                               si->float24Uniforms[float24cnt].data[1] = rev[1];\r
+                                               si->float24Uniforms[float24cnt].data[2] = rev[0];\r
+\r
+                                               float24cnt++;\r
+                                       }\r
+                               }\r
+                       }\r
+                       si->numFloat24Uniforms = float24cnt;\r
+               }\r
+       }\r
 \r
        return 0;\r
 }\r
@@ -16,6 +75,7 @@ Result shaderInstanceFree(shaderInstance_s* si)
 {\r
        if(!si)return -1;\r
 \r
+       if(si->float24Uniforms)free(si->float24Uniforms);\r
        free(si);\r
 \r
        return 0;\r
@@ -97,11 +157,58 @@ Result shaderProgramUse(shaderProgram_s* sp)
 \r
        if(!sp->vertexShader)return -2;\r
 \r
+       int i;\r
+\r
+       // setup vertex shader stuff no matter what\r
+       const DVLE_s* vshDvle = sp->vertexShader->dvle;\r
+       const DVLP_s* vshDvlp = vshDvle->dvlp;\r
+       GPU_SendShaderCode(vshDvle->type, vshDvlp->codeData, 0, vshDvlp->codeSize);\r
+       GPU_SendOperandDescriptors(vshDvle->type, vshDvlp->opcdescData, 0, vshDvlp->opdescSize);\r
+       GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|sp->vertexShader->boolUniforms);\r
+       GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, sp->vertexShader->intUniforms, 4);\r
+       for(i=0; i<sp->vertexShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)&sp->vertexShader->float24Uniforms[i], 4);\r
+       GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF));\r
+       GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask);\r
+\r
+       GPUCMD_AddWrite(GPUREG_024A, vshDvle->outmapData[0]-1); // ?\r
+       GPUCMD_AddWrite(GPUREG_0251, vshDvle->outmapData[0]-1); // ?\r
+\r
+       GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, 0x00000000); // ?\r
+       GPUCMD_AddWrite(GPUREG_0252, 0x00000000); // ?\r
+\r
        if(!sp->geometryShader)\r
        {\r
-               // only deal with vertex shader\r
+               // finish setting up vertex shader alone\r
+               GPU_SetShaderOutmap((u32*)vshDvle->outmapData);\r
+\r
+               GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000);\r
+               GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000000);\r
+\r
+               GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ?\r
+               GPUCMD_AddWrite(GPUREG_006F, 0x00000703); // ?\r
        }else{\r
                // setup both vertex and geometry shader\r
+               const DVLE_s* gshDvle = sp->geometryShader->dvle;\r
+               const DVLP_s* gshDvlp = gshDvle->dvlp;\r
+               GPU_SendShaderCode(gshDvle->type, gshDvlp->codeData, 0, gshDvlp->codeSize);\r
+               GPU_SendOperandDescriptors(gshDvle->type, gshDvlp->opcdescData, 0, gshDvlp->opdescSize);\r
+               GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|sp->geometryShader->boolUniforms);\r
+               GPUCMD_AddIncrementalWrites(GPUREG_GSH_INTUNIFORM_I0, sp->geometryShader->intUniforms, 4);\r
+               for(i=0; i<sp->geometryShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_CONFIG, (u32*)&sp->geometryShader->float24Uniforms[i], 4);\r
+               GPUCMD_AddWrite(GPUREG_GSH_ENTRYPOINT, 0x7FFF0000|(gshDvle->mainOffset&0xFFFF));\r
+               GPUCMD_AddWrite(GPUREG_GSH_OUTMAP_MASK, gshDvle->outmapMask);\r
+\r
+               GPU_SetShaderOutmap((u32*)gshDvle->outmapData);\r
+\r
+               //GSH input attributes stuff\r
+               GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000003);\r
+               GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, ((u32[]){0x76543210, 0xFEDCBA98}), 2);\r
+\r
+               GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002);\r
+               GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, 0x00000001);\r
+\r
+               GPUCMD_AddWrite(GPUREG_0064, 0x00000001); // ?\r
+               GPUCMD_AddWrite(GPUREG_006F, 0x01030703); // ?\r
        }\r
 \r
        return 0;\r
index e825f85eab154913ab66050575de03105f78a9a7..9c51f9b54eb7fb1d27b0f9c2e59bcd7e4cfe0244 100644 (file)
@@ -25,15 +25,18 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize)
        ret->DVLP.codeSize=dvlpData[3];
        ret->DVLP.codeData=&dvlpData[dvlpData[2]/4];
        ret->DVLP.opdescSize=dvlpData[5];
-       ret->DVLP.opcdescData=&dvlpData[dvlpData[4]/4];
+       ret->DVLP.opcdescData=(u32*)malloc(sizeof(u32)*ret->DVLP.opdescSize);
+       if(!ret->DVLP.opcdescData)goto clean2;
+       int i; for(i=0;i<ret->DVLP.opdescSize;i++)ret->DVLP.opcdescData[i]=dvlpData[dvlpData[4]/4+i*2];
 
        //parse DVLE
-       int i;
        for(i=0;i<ret->numDVLE;i++)
        {
                DVLE_s* dvle=&ret->DVLE[i];
                u32* dvleData=&shbinData[shbinData[2+i]/4];
 
+               dvle->dvlp=&ret->DVLP;
+
                dvle->type=(dvleData[1]>>16)&0xFF;
                dvle->mainOffset=dvleData[2];
                dvle->endmainOffset=dvleData[3];
@@ -48,9 +51,13 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize)
                dvle->uniformTableData=(DVLE_uniformEntry_s*)&dvleData[dvleData[12]/4];
 
                dvle->symbolTableData=(char*)&dvleData[dvleData[14]/4];
+
+               DVLE_GenerateOutmap(dvle);
        }
 
        goto exit;
+       clean2:
+               free(ret->DVLE);
        clean1:
                free(ret);
                ret=NULL;
@@ -58,6 +65,13 @@ DVLB_s* SHDR_ParseSHBIN(u32* shbinData, u32 shbinSize)
                return ret;
 }
 
+//TODO
+void SHDR_FreeDVLB(DVLB_s* dvlb)
+{
+       if(!dvlb)return;
+
+}
+
 s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID)
 {
        if(!dvlb || !name)return -1;
@@ -76,51 +90,31 @@ s8 SHDR_GetUniformRegister(DVLB_s* dvlb, const char* name, u8 programID)
 void DVLP_SendCode(DVLP_s* dvlp, DVLE_type type)
 {
        if(!dvlp)return;
-
-       u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0);
-
-       GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_CONFIG+regOffset, 0x00000000);
-
-       int i;
-       for(i=0;i<dvlp->codeSize;i+=0x80)GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_CODETRANSFER_DATA)+regOffset, &dvlp->codeData[i], ((dvlp->codeSize-i)<0x80)?(dvlp->codeSize-i):0x80);
-
-       GPUCMD_AddWrite(GPUREG_VSH_CODETRANSFER_END+regOffset, 0x00000001);
+       
+       GPU_SendShaderCode(type, dvlp->codeData, 0, dvlp->codeSize);
 }
 
 void DVLP_SendOpDesc(DVLP_s* dvlp, DVLE_type type)
 {
        if(!dvlp)return;
 
-       u32 regOffset=(type==GEOMETRY_SHDR)?(-0x30):(0x0);
-
-       GPUCMD_AddWrite(GPUREG_VSH_OPDESCS_CONFIG+regOffset, 0x00000000);
-
-       u32 param[0x80];
-
-       int i;
-       //TODO : should probably preprocess this
-       for(i=0;i<dvlp->opdescSize;i++)param[i]=dvlp->opcdescData[i*2];
-
-       GPUCMD_Add(GPUCMD_HEADER(0, 0xF, GPUREG_VSH_OPDESCS_DATA)+regOffset, param, dvlp->opdescSize);
+       GPU_SendOperandDescriptors(type, dvlp->opcdescData, 0, dvlp->opdescSize);
 }
 
-void DVLE_SendOutmap(DVLE_s* dvle)
+void DVLE_GenerateOutmap(DVLE_s* dvle)
 {
        if(!dvle)return;
 
-       u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0);
-
-       u32 param[0x8]={0x00000000,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F,
-                                       0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F,0x1F1F1F1F};
+       memset(dvle->outmapData, 0x1F, sizeof(dvle->outmapData));
 
        int i;
        u8 numAttr=0;
        u8 maxAttr=0;
        u8 attrMask=0;
-       //TODO : should probably preprocess this
+
        for(i=0;i<dvle->outTableSize;i++)
        {
-               u32* out=&param[dvle->outTableData[i].regID+1];
+               u32* out=&dvle->outmapData[dvle->outTableData[i].regID+1];
                u32 mask=0x00000000;
                u8 tmpmask=dvle->outTableData[i].mask;
                mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
@@ -148,17 +142,24 @@ void DVLE_SendOutmap(DVLE_s* dvle)
                if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1;
        }
 
-       param[0]=numAttr;
+       dvle->outmapData[0]=numAttr;
+       dvle->outmapMask=attrMask;
+}
+
+void DVLE_SendOutmap(DVLE_s* dvle)
+{
+       if(!dvle)return;
+
+       u32 regOffset=(dvle->type==GEOMETRY_SHDR)?(-0x30):(0x0);
 
        if(dvle->type==VERTEX_SHDR)
        {
-               GPUCMD_AddWrite(GPUREG_024A, numAttr-1); //?
-               GPUCMD_AddWrite(GPUREG_0251, numAttr-1); //?
+               GPUCMD_AddWrite(GPUREG_024A, dvle->outmapData[0]-1); //?
+               GPUCMD_AddWrite(GPUREG_0251, dvle->outmapData[0]-1); //?
        }
        
-       GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, attrMask);
-       GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, numAttr-1);
-       GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, param, 8);
+       GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK+regOffset, dvle->outmapMask);
+       GPU_SetShaderOutmap(dvle->outmapData);
 }
 
 void DVLE_SendConstants(DVLE_s* dvle)
@@ -180,7 +181,7 @@ void DVLE_SendConstants(DVLE_s* dvle)
                memcpy(&rev8[6], &cnst->data[2], 3);
                memcpy(&rev8[9], &cnst->data[3], 3);
 
-               param[0x0]=(cnst->header>>16)&0xFF;
+               param[0x0]=(cnst->id)&0xFF;
                param[0x1]=rev[2];
                param[0x2]=rev[1];
                param[0x3]=rev[0];
@@ -196,7 +197,6 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id)
 
        u32 regOffset=(dvlb->DVLE[id].type==GEOMETRY_SHDR)?(-0x30):(0x0);
 
-
        GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000);
        GPUCMD_AddMaskedWrite(GPUREG_0244, 0x1, (dvlb->DVLE[id].type==GEOMETRY_SHDR)?0x1:0x0);
 
@@ -215,10 +215,3 @@ void SHDR_UseProgram(DVLB_s* dvlb, u8 id)
                GPUCMD_AddWrite(GPUREG_0064, 0x00000001);
                GPUCMD_AddWrite(GPUREG_006F, 0x00000703);
 }
-
-//TODO
-void SHDR_FreeDVLB(DVLB_s* dvlb)
-{
-       if(!dvlb)return;
-
-}