]> Chaos Git - corbenik/ctrulib.git/commitdiff
Enhanced and corrected shader code to fully support geometry shaders
authorfincs <fincs.alt1@gmail.com>
Wed, 20 Jul 2016 15:35:26 +0000 (17:35 +0200)
committerfincs <fincs.alt1@gmail.com>
Wed, 20 Jul 2016 15:35:26 +0000 (17:35 +0200)
libctru/include/3ds/gpu/shaderProgram.h
libctru/include/3ds/gpu/shbin.h
libctru/source/gpu/shaderProgram.c
libctru/source/gpu/shbin.c

index 90f16928d4c02ae255bc0237890eabdecc9886f9..e0d5302ff75cc8168f8241c39d0e2caef1a26249 100644 (file)
@@ -33,18 +33,8 @@ typedef struct
        shaderInstance_s* geometryShader; ///< Geometry shader.\r
        u32 geoShaderInputPermutation[2]; ///< Geometry shader input permutation.\r
        u8 geoShaderInputStride;          ///< Geometry shader input stride.\r
-       u8 geoShaderMode;                 ///< Geometry shader operation mode.\r
 }shaderProgram_s;\r
 \r
-/// Geometry shader operation modes.\r
-typedef enum\r
-{\r
-       GSH_NORMAL                    = 0, ///< Normal operation.\r
-       GSH_PARTICLE                  = 1, ///< Particle system.\r
-       GSH_SUBDIVISION_LOOP          = 2, ///< Loop subdivision surface.\r
-       GSH_SUBDIVISION_CATMULL_CLARK = 3, ///< Catmull-Clark subdivision surface.\r
-} geoShaderMode;\r
-\r
 /**\r
  * @brief Initializes a shader instance.\r
  * @param si Shader instance to initialize.\r
@@ -104,7 +94,7 @@ Result shaderProgramSetVsh(shaderProgram_s* sp, DVLE_s* dvle);
  * @brief Sets the geometry shader of a shader program.\r
  * @param sp Shader program to use.\r
  * @param dvle Geometry shader to set.\r
- * @param stride Stride of the geometry shader.\r
+ * @param stride Input stride of the shader (pass 0 to match the number of outputs of the vertex shader).\r
  */\r
 Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride);\r
 \r
@@ -115,13 +105,6 @@ Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride);
  */\r
 Result shaderProgramSetGshInputPermutation(shaderProgram_s* sp, u64 permutation);\r
 \r
-/**\r
- * @brief Configures the operation mode of the geometry shader of a shader program.\r
- * @param sp Shader program to use.\r
- * @param mode Operation mode to use.\r
- */\r
-Result shaderProgramSetGshMode(shaderProgram_s* sp, geoShaderMode mode);\r
-\r
 /**\r
  * @brief Configures the shader units to use the specified shader program.\r
  * @param sp Shader program to use.\r
index c53b2728e7f9e44ee35d4808449279aa95188612..e331beb7dbcf4234e50742c5eecb3e4c02c2ef17 100644 (file)
@@ -28,9 +28,18 @@ typedef enum{
        RESULT_TEXCOORD0W = 0x4, ///< Texture coordinate 0 W.
        RESULT_TEXCOORD1 = 0x5,  ///< Texture coordinate 1.
        RESULT_TEXCOORD2 = 0x6,  ///< Texture coordinate 2.
-       RESULT_VIEW = 0x8        ///< View.
+       RESULT_VIEW = 0x8,       ///< View.
+       RESULT_DUMMY = 0x9,      ///< Dummy attribute (used as passthrough for geometry shader input).
 }DVLE_outputAttribute_t;
 
+/// Geometry shader operation modes.
+typedef enum
+{
+       GSH_POINT         = 0, ///< Point processing mode.
+       GSH_VARIABLE_PRIM = 1, ///< Variable-size primitive processing mode.
+       GSH_FIXED_PRIM    = 2, ///< Fixed-size primitive processing mode.
+} DVLE_geoShaderMode;
+
 /// DVLP data.
 typedef struct{
        u32 codeSize;     ///< Code size.
@@ -64,6 +73,11 @@ typedef struct{
 /// DVLE data.
 typedef struct{
        DVLE_type type;                        ///< DVLE type.
+       bool mergeOutmaps;                     ///< true = merge vertex/geometry shader outmaps ('dummy' output attribute is present).
+       DVLE_geoShaderMode gshMode;            ///< Geometry shader operation mode.
+       u8 gshFixedVtxStart;                   ///< Starting float uniform register number for storing the fixed-size primitive vertex array.
+       u8 gshVariableVtxNum;                  ///< Number of fully-defined vertices in the variable-size primitive vertex array.
+       u8 gshFixedVtxNum;                     ///< Number of vertices in the fixed-size primitive vertex array.
        DVLP_s* dvlp;                          ///< Contained DVLPs.
        u32 mainOffset;                        ///< Offset of the start of the main function.
        u32 endmainOffset;                     ///< Offset of the end of the main function.
index 8b2c34e88d546e79d42a350ceab793b82dc4d32a..edfcf988c878e083a83e69e40e942a5931ff9f07 100644 (file)
@@ -5,7 +5,7 @@
 #include <3ds/gpu/registers.h>\r
 #include <3ds/gpu/shaderProgram.h>\r
 \r
-static void GPU_SetShaderOutmap(u32 outmapData[8]);\r
+static void GPU_SetShaderOutmap(const u32 outmapData[8]);\r
 static void GPU_SendShaderCode(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);\r
 static void GPU_SendOperandDescriptors(GPU_SHADER_TYPE type, u32* data, u16 offset, u16 length);\r
 \r
@@ -168,7 +168,6 @@ Result shaderProgramSetGsh(shaderProgram_s* sp, DVLE_s* dvle, u8 stride)
        sp->geoShaderInputPermutation[0] = 0x76543210;\r
        sp->geoShaderInputPermutation[1] = 0xFEDCBA98;\r
        sp->geoShaderInputStride = stride;\r
-       sp->geoShaderMode = GSH_NORMAL;\r
 \r
        return shaderInstanceInit(sp->geometryShader, dvle);\r
 }\r
@@ -182,82 +181,149 @@ Result shaderProgramSetGshInputPermutation(shaderProgram_s* sp, u64 permutation)
        return 0;\r
 }\r
 \r
-Result shaderProgramSetGshMode(shaderProgram_s* sp, geoShaderMode mode)\r
+static inline void shaderProgramUploadDvle(const DVLE_s* dvle)\r
 {\r
-       if(!sp || !sp->geometryShader)return -1;\r
-\r
-       sp->geoShaderMode = mode & 3;\r
-       return 0;\r
+       const DVLP_s* dvlp = dvle->dvlp;\r
+       // Limit vertex shader code size to the first 512 instructions\r
+       int codeSize = dvle->type == GEOMETRY_SHDR ? dvlp->codeSize : (dvlp->codeSize < 512 ? dvlp->codeSize : 512);\r
+       GPU_SendShaderCode(dvle->type, dvlp->codeData, 0, codeSize);\r
+       GPU_SendOperandDescriptors(dvle->type, dvlp->opcdescData, 0, dvlp->opdescSize);\r
 }\r
 \r
-Result shaderProgramConfigure(shaderProgram_s* sp, bool sendVshCode, bool sendGshCode)\r
+static inline void shaderProgramMergeOutmaps(u32* outmapData, const u32* vshOutmap, const u32* gshOutmap)\r
 {\r
-       if(!sp)return -1;\r
+       int i, j;\r
 \r
-       if(!sp->vertexShader)return -2;\r
+       // Find and copy attributes common to both vertex and geometry shader\r
+       u32 vsh_common = 0, gsh_common = 0;\r
+       for (i = 1; i < 8; i ++)\r
+       {\r
+               u32 mask = gshOutmap[i];\r
+               if (mask == 0x1F1F1F1F)\r
+                       break;\r
+               for (j = 1; j < 8; j ++)\r
+               {\r
+                       if (vshOutmap[j] == mask)\r
+                       {\r
+                               outmapData[++outmapData[0]] = mask;\r
+                               vsh_common |= BIT(j);\r
+                               gsh_common |= BIT(i);\r
+                               break;\r
+                       }\r
+               }\r
+       }\r
 \r
-       // configure geostage\r
-       // has to be done first or else VSH registers might only reconfigure 3 of the 4 shader units !\r
-       if(!sp->geometryShader)\r
+       // Find and copy attributes that are exclusive to the geometry shader\r
+       for (i = 1; i < 8; i ++)\r
        {\r
-               GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000000);\r
-               GPUCMD_AddMaskedWrite(GPUREG_VSH_COM_MODE, 0x1, 0x00000000);\r
-       }else{\r
-               GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x1, 0x00000002);\r
-               GPUCMD_AddMaskedWrite(GPUREG_VSH_COM_MODE, 0x1, 0x00000001);\r
+               u32 mask = gshOutmap[i];\r
+               if (mask == 0x1F1F1F1F)\r
+                       break;\r
+               if (!(gsh_common & BIT(i)))\r
+                       outmapData[++outmapData[0]] = mask;\r
        }\r
 \r
-       // setup vertex shader stuff no matter what\r
-       const DVLE_s* vshDvle = sp->vertexShader->dvle;\r
-       const DVLP_s* vshDvlp = vshDvle->dvlp;\r
-       if (sendVshCode)\r
+       // Find and copy attributes that are exclusive to the vertex shader\r
+       for (i = 1; i < 8; i ++)\r
        {\r
-               GPU_SendShaderCode(vshDvle->type, vshDvlp->codeData, 0, vshDvlp->codeSize);\r
-               GPU_SendOperandDescriptors(vshDvle->type, vshDvlp->opcdescData, 0, vshDvlp->opdescSize);\r
+               u32 mask = vshOutmap[i];\r
+               if (mask == 0x1F1F1F1F)\r
+                       break;\r
+               if (!(vsh_common & BIT(i)))\r
+                       outmapData[++outmapData[0]] = mask;\r
        }\r
-       GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF));\r
-       GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask);\r
+}\r
+\r
+Result shaderProgramConfigure(shaderProgram_s* sp, bool sendVshCode, bool sendGshCode)\r
+{\r
+       if (!sp || !sp->vertexShader) return -1;\r
 \r
-       GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL1, vshDvle->outmapData[0]-1); // ?\r
-       GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL2, vshDvle->outmapData[0]-1); // ?\r
+       // Get pointers to relevant structures\r
+       const DVLE_s* vshDvle = sp->vertexShader->dvle;\r
+       const DVLE_s* gshDvle = sp->geometryShader ? sp->geometryShader->dvle : NULL;\r
+       const DVLE_s* mainDvle = gshDvle ? gshDvle : vshDvle;\r
+\r
+       // Variables for working with the outmap\r
+       u32 outmapData[8];\r
+       u32 outmapMode = mainDvle->outmapMode;\r
+       u32 outmapClock = mainDvle->outmapClock;\r
+\r
+       // Initialize geometry engine - do this early in order to ensure all 4 units are correctly initialized\r
+       GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x3, gshDvle ? 2 : 0);\r
+       GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 0x3, 0);\r
+       GPUCMD_AddMaskedWrite(GPUREG_VSH_COM_MODE, 0x1, gshDvle ? 1 : 0);\r
+\r
+       // Set up vertex shader code blob (if necessary)\r
+       if (sendVshCode)\r
+               shaderProgramUploadDvle(vshDvle);\r
 \r
-       bool subdivision = sp->geoShaderMode >= GSH_SUBDIVISION_LOOP;\r
-       GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0x8, subdivision ? 0x80000000 : 0); // Enable or disable subdivision\r
-       u32 gshMisc = 0;\r
-       if (subdivision)\r
-               gshMisc = 1;\r
-       else if (sp->geoShaderMode == GSH_PARTICLE)\r
-               gshMisc = 0x01004302;\r
-       GPUCMD_AddWrite(GPUREG_GSH_MISC0, gshMisc);\r
-       GPUCMD_AddWrite(GPUREG_GSH_MISC1, sp->geoShaderMode);\r
+       // Set up vertex shader entrypoint & outmap mask\r
+       GPUCMD_AddWrite(GPUREG_VSH_ENTRYPOINT, 0x7FFF0000|(vshDvle->mainOffset&0xFFFF));\r
+       GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_MASK, vshDvle->outmapMask);\r
+       GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL1, vshDvle->outmapData[0]-1);\r
+       GPUCMD_AddWrite(GPUREG_VSH_OUTMAP_TOTAL2, vshDvle->outmapData[0]-1);\r
 \r
-       if(!sp->geometryShader)\r
+       // Set up geometry shader (if present)\r
+       if (gshDvle)\r
        {\r
-               // finish setting up vertex shader alone\r
-               GPU_SetShaderOutmap((u32*)vshDvle->outmapData);\r
-\r
-               GPUCMD_AddWrite(GPUREG_SH_OUTATTR_MODE, vshDvle->outmapMode);\r
-               GPUCMD_AddWrite(GPUREG_SH_OUTATTR_CLOCK, vshDvle->outmapClock);\r
-       }else{\r
-               // setup both vertex and geometry shader\r
-               const DVLE_s* gshDvle = sp->geometryShader->dvle;\r
-               const DVLP_s* gshDvlp = gshDvle->dvlp;\r
+               // Set up geometry shader code blob (if necessary)\r
                if (sendGshCode)\r
-               {\r
-                       GPU_SendShaderCode(gshDvle->type, gshDvlp->codeData, 0, gshDvlp->codeSize);\r
-                       GPU_SendOperandDescriptors(gshDvle->type, gshDvlp->opcdescData, 0, gshDvlp->opdescSize);\r
-               }\r
+                       shaderProgramUploadDvle(gshDvle);\r
+\r
+               // Set up geometry shader entrypoint & outmap mask\r
                GPUCMD_AddWrite(GPUREG_GSH_ENTRYPOINT, 0x7FFF0000|(gshDvle->mainOffset&0xFFFF));\r
                GPUCMD_AddWrite(GPUREG_GSH_OUTMAP_MASK, gshDvle->outmapMask);\r
+       }\r
 \r
-               GPU_SetShaderOutmap((u32*)gshDvle->outmapData);\r
+       // Merge vertex shader & geometry shader outmaps if requested\r
+       if (gshDvle && gshDvle->mergeOutmaps)\r
+       {\r
+               // Clear outmap\r
+               memset(outmapData, 0x1F, sizeof(outmapData));\r
+               outmapData[0] = 0;\r
+\r
+               // Merge outmaps\r
+               shaderProgramMergeOutmaps(outmapData, vshDvle->outmapData, gshDvle->outmapData);\r
+               outmapMode  |= vshDvle->outmapMode;\r
+               outmapClock |= vshDvle->outmapClock;\r
+       } else\r
+               memcpy(outmapData, mainDvle->outmapData, sizeof(outmapData));\r
+\r
+       // Upload and configure outmap\r
+       GPU_SetShaderOutmap(outmapData);\r
+       GPUCMD_AddWrite(GPUREG_SH_OUTATTR_MODE, outmapMode);\r
+       GPUCMD_AddWrite(GPUREG_SH_OUTATTR_CLOCK, outmapClock);\r
+\r
+       // Configure geostage\r
+       if (gshDvle)\r
+       {\r
+               // Input stride: use value if specified, otherwise use number of outputs in vertex shader\r
+               int stride = sp->geoShaderInputStride ? sp->geoShaderInputStride : vshDvle->outmapData[0];\r
 \r
-               //GSH input attributes stuff\r
-               GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000|(sp->geoShaderInputStride-1)|(subdivision?0x100:0));\r
-               GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, sp->geoShaderInputPermutation, 2);\r
+               // Enable or disable variable-size primitive processing\r
+               GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0xA, gshDvle->gshMode == GSH_VARIABLE_PRIM ? 0x80000000 : 0);\r
+\r
+               // Set up geoshader processing mode\r
+               u32 misc = gshDvle->gshMode;\r
+               if (misc == GSH_FIXED_PRIM)\r
+                       misc |= 0x01000000 | ((u32)gshDvle->gshFixedVtxStart<<16) | ((stride-1)<<12) | ((u32)(gshDvle->gshFixedVtxNum-1)<<8);\r
+               GPUCMD_AddWrite(GPUREG_GSH_MISC0, misc);\r
 \r
-               GPUCMD_AddWrite(GPUREG_SH_OUTATTR_MODE, gshDvle->outmapMode);\r
-               GPUCMD_AddWrite(GPUREG_SH_OUTATTR_CLOCK, gshDvle->outmapClock);\r
+               // Set up variable-size primitive mode parameters\r
+               GPUCMD_AddWrite(GPUREG_GSH_MISC1, gshDvle->gshMode == GSH_VARIABLE_PRIM ? (gshDvle->gshVariableVtxNum-1) : 0);\r
+\r
+               // Set up geoshader input\r
+               GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0x08000000 | (gshDvle->gshMode ? 0x0100 : 0) | (stride-1));\r
+\r
+               // Set up geoshader permutation\r
+               GPUCMD_AddIncrementalWrites(GPUREG_GSH_ATTRIBUTES_PERMUTATION_LOW, sp->geoShaderInputPermutation, 2);\r
+       } else\r
+       {\r
+               // Defaults for when geostage is disabled\r
+               GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 0xA, 0);\r
+               GPUCMD_AddWrite(GPUREG_GSH_MISC0, 0);\r
+               GPUCMD_AddWrite(GPUREG_GSH_MISC1, 0);\r
+               GPUCMD_AddWrite(GPUREG_GSH_INPUTBUFFER_CONFIG, 0xA0000000);\r
        }\r
 \r
        return 0;\r
@@ -271,12 +337,12 @@ Result shaderProgramUse(shaderProgram_s* sp)
        int i;\r
 \r
        // Set up uniforms\r
-       GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|~sp->vertexShader->boolUniforms);\r
+       GPUCMD_AddWrite(GPUREG_VSH_BOOLUNIFORM, 0x7FFF0000|sp->vertexShader->boolUniforms);\r
        GPUCMD_AddIncrementalWrites(GPUREG_VSH_INTUNIFORM_I0, sp->vertexShader->intUniforms, 4);\r
        for(i=0; i<sp->vertexShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_VSH_FLOATUNIFORM_CONFIG, (u32*)&sp->vertexShader->float24Uniforms[i], 4);\r
        if (sp->geometryShader)\r
        {\r
-               GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|~sp->geometryShader->boolUniforms);\r
+               GPUCMD_AddWrite(GPUREG_GSH_BOOLUNIFORM, 0x7FFF0000|sp->geometryShader->boolUniforms);\r
                GPUCMD_AddIncrementalWrites(GPUREG_GSH_INTUNIFORM_I0, sp->geometryShader->intUniforms, 4);\r
                for(i=0; i<sp->geometryShader->numFloat24Uniforms; i++) GPUCMD_AddIncrementalWrites(GPUREG_GSH_FLOATUNIFORM_CONFIG, (u32*)&sp->geometryShader->float24Uniforms[i], 4);\r
        }\r
@@ -284,7 +350,7 @@ Result shaderProgramUse(shaderProgram_s* sp)
        return 0;\r
 }\r
 \r
-void GPU_SetShaderOutmap(u32 outmapData[8])\r
+void GPU_SetShaderOutmap(const u32 outmapData[8])\r
 {\r
        GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x1, outmapData[0]-1);\r
        GPUCMD_AddIncrementalWrites(GPUREG_SH_OUTMAP_TOTAL, outmapData, 8);\r
index 5209c3c55131509c468bcf9f5b75b848bbf80bae..7f60150920b0ee81c6d764590140bc820ed42342 100644 (file)
@@ -38,9 +38,18 @@ DVLB_s* DVLB_ParseFile(u32* shbinData, u32 shbinSize)
                dvle->dvlp=&ret->DVLP;
 
                dvle->type=(dvleData[1]>>16)&0xFF;
+               dvle->mergeOutmaps=(dvleData[1]>>24)&1;
                dvle->mainOffset=dvleData[2];
                dvle->endmainOffset=dvleData[3];
 
+               if(dvle->type==GEOMETRY_SHDR)
+               {
+                       dvle->gshMode=dvleData[5]&0xFF;
+                       dvle->gshFixedVtxStart=(dvleData[5]>>8)&0xFF;
+                       dvle->gshVariableVtxNum=(dvleData[5]>>16)&0xFF;
+                       dvle->gshFixedVtxNum=(dvleData[5]>>24)&0xFF;
+               }
+
                dvle->constTableSize=dvleData[7];
                dvle->constTableData=(DVLE_constEntry_s*)&dvleData[dvleData[6]/4];
 
@@ -89,80 +98,53 @@ s8 DVLE_GetUniformRegister(DVLE_s* dvle, const char* name)
 
 void DVLE_GenerateOutmap(DVLE_s* dvle)
 {
-       if(!dvle)return;
+       if (!dvle) return;
 
+       // Initialize outmap data
        memset(dvle->outmapData, 0x1F, sizeof(dvle->outmapData));
+       dvle->outmapData[0] = 0;
+       dvle->outmapMask    = 0;
+       dvle->outmapMode    = 0;
+       dvle->outmapClock   = 0;
 
-       int i;
-       u8 numAttr=0;
-       u8 maxAttr=0;
-       u8 attrMask=0;
-       u32 attrMode=0;
-       u32 attrClock=0;
-
-       for(i=0;i<dvle->outTableSize;i++)
+       int i, j, k;
+       for (i = 0; i < dvle->outTableSize; i ++)
        {
-               u32* out=&dvle->outmapData[dvle->outTableData[i].regID+1];
-               u32 mask=0x00000000;
-               u8 tmpmask=dvle->outTableData[i].mask;
-               mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
-               mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
-               mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
-               mask=(mask<<8)|((tmpmask&8)?0xFF:0x00);tmpmask<<=1;
+               int type = dvle->outTableData[i].type;
+               int mask = dvle->outTableData[i].mask;
+               int regID = dvle->outTableData[i].regID;
+               u32* out = &dvle->outmapData[regID+1];
                
-               if(*out==0x1F1F1F1F)numAttr++;
-
-               u32 val=0x1F1F1F1F;
-               switch(dvle->outTableData[i].type)
+               if (!(dvle->outmapMask & BIT(regID)))
                {
-                       case RESULT_POSITION: val=0x03020100; break;
-                       case RESULT_NORMALQUAT: val=0x07060504; break;
-                       case RESULT_COLOR: val=0x0B0A0908; break;
-                       case RESULT_TEXCOORD0: val=0x1F1F0D0C; break;
-                       case RESULT_TEXCOORD0W: val=0x10101010; break;
-                       case RESULT_TEXCOORD1: val=0x1F1F0F0E; break;
-                       case RESULT_TEXCOORD2: val=0x1F1F1716; break;
-                       case RESULT_VIEW: val=0x1F141312; break;
+                       dvle->outmapMask |= BIT(regID);
+                       dvle->outmapData[0] ++;
                }
-               *out=((*out)&~mask)|(val&mask);
 
-               switch(dvle->outTableData[i].type)
+               int sem = 0x1F, num = 0;
+               switch (type)
                {
-                       case RESULT_POSITION:
-                               if ((*out & 0xFF0000)==0x020000)
-                                       attrClock |= BIT(0);
-                               break;
-                       case RESULT_COLOR:
-                               attrClock |= BIT(1);
-                               break;
-                       case RESULT_TEXCOORD0:
-                               attrMode = 1;
-                               attrClock |= BIT(8);
-                               break;
-                       case RESULT_TEXCOORD1:
-                               attrMode = 1;
-                               attrClock |= BIT(9);
-                               break;
-                       case RESULT_TEXCOORD2:
-                               attrMode = 1;
-                               attrClock |= BIT(10);
-                               break;
-                       case RESULT_TEXCOORD0W:
-                               attrMode = 1;
-                               attrClock |= BIT(16);
-                               break;
-                       case RESULT_NORMALQUAT:
-                       case RESULT_VIEW:
-                               attrClock |= BIT(24);
-                               break;
+                       case RESULT_POSITION:   sem = 0x00; num = 4;                                                     break;
+                       case RESULT_NORMALQUAT: sem = 0x04; num = 4; dvle->outmapClock |= BIT(24);                       break;
+                       case RESULT_COLOR:      sem = 0x08; num = 4; dvle->outmapClock |= BIT(1);                        break;
+                       case RESULT_TEXCOORD0:  sem = 0x0C; num = 2; dvle->outmapClock |= BIT(8);  dvle->outmapMode = 1; break;
+                       case RESULT_TEXCOORD0W: sem = 0x10; num = 1; dvle->outmapClock |= BIT(16); dvle->outmapMode = 1; break;
+                       case RESULT_TEXCOORD1:  sem = 0x0E; num = 2; dvle->outmapClock |= BIT(9);  dvle->outmapMode = 1; break;
+                       case RESULT_TEXCOORD2:  sem = 0x16; num = 2; dvle->outmapClock |= BIT(10); dvle->outmapMode = 1; break;
+                       case RESULT_VIEW:       sem = 0x12; num = 3; dvle->outmapClock |= BIT(24);                       break;
+                       default: continue;
                }
 
-               attrMask|=1<<dvle->outTableData[i].regID;
-               if(dvle->outTableData[i].regID+1>maxAttr)maxAttr=dvle->outTableData[i].regID+1;
+               for (j = 0, k = 0; j < 4 && k < num; j ++)
+               {
+                       if (mask & BIT(j))
+                       {
+                               *out &= ~(0xFF << (j*8));
+                               *out |= (sem++) << (j*8);
+                               k ++;
+                               if (type==RESULT_POSITION && k==3)
+                                       dvle->outmapClock |= BIT(0);
+                       }
+               }
        }
-
-       dvle->outmapData[0]=numAttr;
-       dvle->outmapMask=attrMask;
-       dvle->outmapMode=attrMode;
-       dvle->outmapClock=attrClock;
 }