]> Chaos Git - corbenik/ctrulib.git/commitdiff
rewrote GPU examples/gpu/data/test.vsh
authorsmea <smealum@gmail.com>
Thu, 20 Nov 2014 01:13:43 +0000 (17:13 -0800)
committersmea <smealum@gmail.com>
Thu, 20 Nov 2014 01:13:43 +0000 (17:13 -0800)
examples/gpu/data/test.vsh
examples/gpu/data/texture.bin [new file with mode: 0644]
examples/gpu/source/_gs.s [new file with mode: 0644]
examples/gpu/source/gs.c [new file with mode: 0644]
examples/gpu/source/gs.h [new file with mode: 0644]
examples/gpu/source/main.c
examples/gpu/source/math.c
examples/gpu/source/math.h

index 686b22b0947c1259b9e03126338c91b92c86c06e..0da0e64af42999a140af3f370e7c480f4c6666d1 100644 (file)
@@ -1,7 +1,7 @@
-; make sure you update aemstro_as for this (27/05/14)
+; make sure you update aemstro_as for this (15/11/14)
  
 ; setup constants
-       .const 5, 0.0, 0.0, -0.99, 1.0
+       .const 20, 1.0, 0.0, 0.5, 1.0
  
 ; setup outmap
        .out o0, result.position
@@ -9,35 +9,37 @@
        .out o2, result.texcoord0
        .out o3, result.texcoord1
        .out o4, result.texcoord2
-; setup uniform map (not required)
-       .uniform 0x10, 0x13, mdlvMtx
-       .uniform 0x14, 0x17, projMtx
+
+; setup uniform map (required to use SHDR_GetUniformRegister)
+       .uniform 0, 3, projection      ; c0-c3 = projection matrix
+       .uniform 4, 7, modelview       ; c4-c7 = modelview matrix
+       .uniform 8, 8, lightDirection  ; c8    = light direction vector
+       .uniform 9, 9, lightAmbient    ; c9    = light ambient color
  
 ;code
        main:
-               mov d1A, d00 (0x4)
-               mov d1A, d25 (0x3)
+               mov r1,  v0       (0x6)
+               mov r1, c20       (0x3)
                ; tempreg = mdlvMtx * in.pos
-               dp4 d10, d44, d1A (0x0)
-               dp4 d10, d45, d1A (0x1)
-               dp4 d10, d46, d1A (0x2)
-               mov d10, d25 (0x3)
+               dp4 r0,  c4,  r1  (0x0)
+               dp4 r0,  c5,  r1  (0x1)
+               dp4 r0,  c6,  r1  (0x2)
+               mov r0, c20       (0x3)
                ; result.pos = projMtx * tempreg
-               dp4 d00, d40, d10 (0x0)
-               dp4 d00, d41, d10 (0x1)
-               dp4 d00, d42, d10 (0x2)
-               dp4 d00, d43, d10 (0x3)
+               dp4 o0,  c0,  r0  (0x0)
+               dp4 o0,  c1,  r0  (0x1)
+               dp4 o0,  c2,  r0  (0x2)
+               dp4 o0,  c3,  r0  (0x3)
                ; result.texcoord = in.texcoord
-               mov d02, d01 (0x5)
-               mov d03, d25 (0x7)
-               mov d04, d25 (0x7)
+               mov o2,  v1       (0x5)
+               mov o3, c20       (0x7)
+               mov o4, c20       (0x7)
                ; result.color = crappy lighting
-               dp3 d1A, d44, d02 (0x0)
-               dp3 d1A, d45, d02 (0x1)
-               dp3 d1A, d46, d02 (0x2)
-               dp4 d01, d00, d1A (0x6)
-               mov d01, d25 (0x3)
+               dp3 r0,  c8,  v2  (0x6)
+               max r0, c20,  r0  (0x4)
+               mul r0,  c9,  r0  (0x8)
+               add o1,  c9,  r0  (0x6)
+               mov o1, c20       (0x3)
                flush
                end
        endmain:
@@ -47,8 +49,8 @@
        .opdesc _y__, xyzw, xyzw ; 0x1
        .opdesc __z_, xyzw, xyzw ; 0x2
        .opdesc ___w, xyzw, xyzw ; 0x3
-       .opdesc xyz_, xyzw, xyzw ; 0x4
+       .opdesc xyz_, yyyy, xyzw ; 0x4
        .opdesc xyzw, xyzw, xyzw ; 0x5
        .opdesc xyz_, xyzw, xyzw ; 0x6
        .opdesc xyzw, yyyw, xyzw ; 0x7
-       .opdesc xyzw, wwww, wwww ; 0x8
+       .opdesc xyz_, wwww, xyzw ; 0x8
diff --git a/examples/gpu/data/texture.bin b/examples/gpu/data/texture.bin
new file mode 100644 (file)
index 0000000..4a3312b
Binary files /dev/null and b/examples/gpu/data/texture.bin differ
diff --git a/examples/gpu/source/_gs.s b/examples/gpu/source/_gs.s
new file mode 100644 (file)
index 0000000..1b9c92b
--- /dev/null
@@ -0,0 +1,16 @@
+.section ".text"\r
+.arm\r
+.align 4\r
+.global _vboMemcpy50\r
+\r
+# r0 : dst\r
+# r1 : src\r
+# fixed size 0x50\r
+_vboMemcpy50:\r
+       push {r4-r11}\r
+       ldmia r1!, {r2-r12}\r
+       stmia r0!, {r2-r12}\r
+       ldmia r1!, {r2-r12}\r
+       stmia r0!, {r2-r12}\r
+       pop {r4-r11}\r
+       bx lr\r
diff --git a/examples/gpu/source/gs.c b/examples/gpu/source/gs.c
new file mode 100644 (file)
index 0000000..4eabd45
--- /dev/null
@@ -0,0 +1,431 @@
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <3ds.h>
+
+#include "gs.h"
+#include "math.h"
+
+#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4)
+
+static void gsInitMatrixStack();
+
+Handle linearAllocMutex;
+
+static u32 gsMatrixStackRegisters[GS_MATRIXTYPES];
+
+typedef struct
+{
+       u32 offset;
+       mtx44 data;
+}bufferMatrix_s;
+
+bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE];
+int bufferMatrixListLength;
+
+//----------------------
+//   GS SYSTEM STUFF
+//----------------------
+
+void initBufferMatrixList()
+{
+       bufferMatrixListLength=0;
+}
+
+void gsInit(DVLB_s* shader)
+{
+       gsInitMatrixStack();
+       initBufferMatrixList();
+       svcCreateMutex(&linearAllocMutex, false);
+       if(shader)
+       {
+               gsMatrixStackRegisters[0]=SHDR_GetUniformRegister(shader, "projection", 0);
+               gsMatrixStackRegisters[1]=SHDR_GetUniformRegister(shader, "modelview", 0);
+       }
+}
+
+void gsExit(void)
+{
+       svcCloseHandle(linearAllocMutex);
+}
+
+void gsStartFrame(void)
+{
+       GPUCMD_SetBufferOffset(0);
+       initBufferMatrixList();
+}
+
+void* gsLinearAlloc(size_t size)
+{
+       void* ret=NULL;
+
+       svcWaitSynchronization(linearAllocMutex, U64_MAX);
+       ret=linearAlloc(size);
+       svcReleaseMutex(linearAllocMutex);
+       
+       return ret;
+}
+
+void gsLinearFree(void* mem)
+{
+       svcWaitSynchronization(linearAllocMutex, U64_MAX);
+       linearFree(mem);
+       svcReleaseMutex(linearAllocMutex);
+}
+
+//----------------------
+//  MATRIX STACK STUFF
+//----------------------
+
+static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE];
+static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04};
+static u8 gsMatrixStackOffsets[GS_MATRIXTYPES];
+static bool gsMatrixStackUpdated[GS_MATRIXTYPES];
+static GS_MATRIX gsCurrentMatrixType;
+
+static void gsInitMatrixStack()
+{
+       int i;
+       for(i=0; i<GS_MATRIXTYPES; i++)
+       {
+               gsMatrixStackOffsets[i]=0;
+               gsMatrixStackUpdated[i]=true;
+               loadIdentity44((float*)gsMatrixStacks[i][0]);
+       }
+       gsCurrentMatrixType=GS_PROJECTION;
+}
+
+float* gsGetMatrix(GS_MATRIX m)
+{
+       if(m<0 || m>=GS_MATRIXTYPES)return NULL;
+       
+       return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]];
+}
+
+int gsLoadMatrix(GS_MATRIX m, float* data)
+{
+       if(m<0 || m>=GS_MATRIXTYPES || !data)return -1;
+       
+       memcpy(gsGetMatrix(m), data, sizeof(mtx44));
+
+       gsMatrixStackUpdated[m]=true;
+
+       return 0;
+}
+
+int gsPushMatrix()
+{
+       const GS_MATRIX m=gsCurrentMatrixType;
+       if(m<0 || m>=GS_MATRIXTYPES)return -1;
+       if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1;
+
+       float* cur=gsGetMatrix(m);
+       gsMatrixStackOffsets[m]++;
+       memcpy(gsGetMatrix(m), cur, sizeof(mtx44));
+
+       return 0;
+}
+
+int gsPopMatrix()
+{
+       const GS_MATRIX m=gsCurrentMatrixType;
+       if(m<0 || m>=GS_MATRIXTYPES)return -1;
+       if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1;
+
+       gsMatrixStackOffsets[m]--;
+
+       gsMatrixStackUpdated[m]=true;
+
+       return 0;
+}
+
+int gsMatrixMode(GS_MATRIX m)
+{
+       if(m<0 || m>=GS_MATRIXTYPES)return -1;
+
+       gsCurrentMatrixType=m;
+
+       return 0;
+}
+
+//------------------------
+// MATRIX TRANSFORM STUFF
+//------------------------
+
+int gsMultMatrix(float* data)
+{
+       if(!data)return -1;
+       
+       mtx44 tmp;
+       multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp);
+       memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44));
+
+       gsMatrixStackUpdated[gsCurrentMatrixType]=true;
+
+       return 0;
+}
+
+void gsLoadIdentity()
+{
+       loadIdentity44(gsGetMatrix(gsCurrentMatrixType));
+       gsMatrixStackUpdated[gsCurrentMatrixType]=true;
+}
+
+void gsProjectionMatrix(float fovy, float aspect, float near, float far)
+{
+       initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far);
+       gsMatrixStackUpdated[gsCurrentMatrixType]=true;
+}
+
+void gsRotateX(float x)
+{
+       rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false);
+       gsMatrixStackUpdated[gsCurrentMatrixType]=true;
+}
+
+void gsRotateY(float y)
+{
+       rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false);
+       gsMatrixStackUpdated[gsCurrentMatrixType]=true;
+}
+
+void gsRotateZ(float z)
+{
+       rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false);
+       gsMatrixStackUpdated[gsCurrentMatrixType]=true;
+}
+
+void gsScale(float x, float y, float z)
+{
+       scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
+       gsMatrixStackUpdated[gsCurrentMatrixType]=true;
+}
+
+void gsTranslate(float x, float y, float z)
+{
+       translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
+       gsMatrixStackUpdated[gsCurrentMatrixType]=true;
+}
+
+//----------------------
+// MATRIX RENDER STUFF
+//----------------------
+
+static void gsSetUniformMatrix(u32 startreg, float* m)
+{
+       float param[16];
+
+       param[0x0]=m[3]; //w
+       param[0x1]=m[2]; //z
+       param[0x2]=m[1]; //y
+       param[0x3]=m[0]; //x
+
+       param[0x4]=m[7];
+       param[0x5]=m[6];
+       param[0x6]=m[5];
+       param[0x7]=m[4];
+       
+       param[0x8]=m[11];
+       param[0x9]=m[10];
+       param[0xa]=m[9];
+       param[0xb]=m[8];
+
+       param[0xc]=m[15];
+       param[0xd]=m[14];
+       param[0xe]=m[13];
+       param[0xf]=m[12];
+
+       GPU_SetUniform(startreg, (u32*)param, 4);
+}
+
+static int gsUpdateTransformation()
+{
+       GS_MATRIX m;
+       for(m=0; m<GS_MATRIXTYPES; m++)
+       {
+               if(gsMatrixStackUpdated[m])
+               {
+                       if(m==GS_PROJECTION && bufferMatrixListLength<BUFFERMATRIXLIST_SIZE)
+                       {
+                               GPUCMD_GetBuffer(NULL, NULL, &bufferMatrixList[bufferMatrixListLength].offset);
+                               memcpy(bufferMatrixList[bufferMatrixListLength].data, gsGetMatrix(m), sizeof(mtx44));
+                               bufferMatrixListLength++;
+                       }
+                       gsSetUniformMatrix(gsMatrixStackRegisters[m], gsGetMatrix(m));
+                       gsMatrixStackUpdated[m]=false;
+               }
+       }
+       return 0;
+}
+
+void gsAdjustBufferMatrices(mtx44 transformation)
+{
+       int i;
+       u32* buffer;
+       u32 offset;
+       GPUCMD_GetBuffer(&buffer, NULL, &offset);
+       for(i=0; i<bufferMatrixListLength; i++)
+       {
+               u32 o=bufferMatrixList[i].offset;
+               if(o+2<offset) //TODO : better check, need to account for param size
+               {
+                       mtx44 newMatrix;
+                       GPUCMD_SetBufferOffset(o);
+                       multMatrix44((float*)bufferMatrixList[i].data, (float*)transformation, (float*)newMatrix);
+                       gsSetUniformMatrix(gsMatrixStackRegisters[GS_PROJECTION], (float*)newMatrix);
+               }
+       }
+       GPUCMD_SetBufferOffset(offset);
+}
+
+//----------------------
+//      VBO STUFF
+//----------------------
+
+int gsVboInit(gsVbo_s* vbo)
+{
+       if(!vbo)return -1;
+
+       vbo->data=NULL;
+       vbo->currentSize=0;
+       vbo->maxSize=0;
+       vbo->commands=NULL;
+       vbo->commandsSize=0;
+
+       return 0;
+}
+
+int gsVboCreate(gsVbo_s* vbo, u32 size)
+{
+       if(!vbo)return -1;
+
+       vbo->data=gsLinearAlloc(size);
+       vbo->numVertices=0;
+       vbo->currentSize=0;
+       vbo->maxSize=size;
+
+       return 0;
+}
+
+void* gsVboGetOffset(gsVbo_s* vbo)
+{
+       if(!vbo)return NULL;
+
+       return (void*)(&((u8*)vbo->data)[vbo->currentSize]);
+}
+
+int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units)
+{
+       if(!vbo || !data || !size)return -1;
+       if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1;
+
+       memcpy(gsVboGetOffset(vbo), data, size);
+       vbo->currentSize+=size;
+       vbo->numVertices+=units;
+
+       return 0;
+}
+
+int gsVboFlushData(gsVbo_s* vbo)
+{
+       if(!vbo)return -1;
+
+       //unnecessary if we use flushAndRun
+       // GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize);
+
+       return 0;
+}
+
+int gsVboDestroy(gsVbo_s* vbo)
+{
+       if(!vbo)return -1;
+
+       if(vbo->commands)free(vbo->commands);
+       if(vbo->data)gsLinearFree(vbo->data);
+       gsVboInit(vbo);
+
+       return 0;
+}
+
+extern u32 debugValue[];
+
+void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n)
+{
+       //set attribute buffer address
+       GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3);
+       //set primitive type
+       GPUCMD_AddSingleParam(0x0002025E, primitive);
+       GPUCMD_AddSingleParam(0x0002025F, 0x00000001);
+       //index buffer not used for drawArrays but 0x000F0227 still required
+       GPUCMD_AddSingleParam(0x000F0227, 0x80000000);
+       //pass number of vertices
+       GPUCMD_AddSingleParam(0x000F0228, n);
+
+       GPUCMD_AddSingleParam(0x00010253, 0x00000001);
+
+       GPUCMD_AddSingleParam(0x00010245, 0x00000000);
+       GPUCMD_AddSingleParam(0x000F022E, 0x00000001);
+       GPUCMD_AddSingleParam(0x00010245, 0x00000001);
+       GPUCMD_AddSingleParam(0x000F0231, 0x00000001);
+
+       // GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff
+}
+
+//not thread safe
+int gsVboPrecomputeCommands(gsVbo_s* vbo)
+{
+       if(!vbo || vbo->commands)return -1;
+
+       static u32 tmpBuffer[128];
+
+       u32* savedAdr; u32 savedSize, savedOffset;
+       GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset);
+       GPUCMD_SetBuffer(tmpBuffer, 128, 0);
+
+       GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
+       
+       GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize);
+       vbo->commands=memalign(0x4, vbo->commandsSize*4);
+       if(!vbo->commands)return -1;
+       memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4);
+
+       GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset);
+
+       return 0;
+}
+
+extern u32* gpuCmdBuf;
+extern u32 gpuCmdBufSize;
+extern u32 gpuCmdBufOffset;
+
+void _vboMemcpy50(u32* dst, u32* src);
+
+void _GPUCMD_AddRawCommands(u32* cmd, u32 size)
+{
+       if(!cmd || !size)return;
+
+       if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd);
+       else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
+       gpuCmdBufOffset+=size;
+}
+
+int gsVboDraw(gsVbo_s* vbo)
+{
+       if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1;
+
+       gsUpdateTransformation();
+
+       gsVboPrecomputeCommands(vbo);
+
+       // u64 val=svcGetSystemTick();
+       if(vbo->commands)
+       {
+               _GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize);
+       }else{
+               GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
+       }
+       // debugValue[5]+=(u32)(svcGetSystemTick()-val);
+       // debugValue[6]++;
+
+       return 0;
+}
diff --git a/examples/gpu/source/gs.h b/examples/gpu/source/gs.h
new file mode 100644 (file)
index 0000000..6976fca
--- /dev/null
@@ -0,0 +1,59 @@
+#ifndef GS_H
+#define GS_H
+
+#include <3ds.h>
+#include "math.h"
+
+#define GS_MATRIXSTACK_SIZE (8)
+
+typedef enum
+{
+       GS_PROJECTION = 0,
+       GS_MODELVIEW = 1,
+       GS_MATRIXTYPES
+}GS_MATRIX;
+
+typedef struct
+{
+       u8* data;
+       u32 currentSize; // in bytes
+       u32 maxSize; // in bytes
+       u32 numVertices;
+       u32* commands;
+       u32 commandsSize;
+}gsVbo_s;
+
+
+void gsInit(DVLB_s* shader);
+void gsExit(void);
+
+void gsStartFrame(void);
+void gsAdjustBufferMatrices(mtx44 transformation);
+
+void* gsLinearAlloc(size_t size);
+void gsLinearFree(void* mem);
+
+float* gsGetMatrix(GS_MATRIX m);
+int gsLoadMatrix(GS_MATRIX m, float* data);
+int gsPushMatrix();
+int gsPopMatrix();
+int gsMatrixMode(GS_MATRIX m);
+
+void gsLoadIdentity();
+void gsProjectionMatrix(float fovy, float aspect, float near, float far);
+void gsRotateX(float x);
+void gsRotateY(float y);
+void gsRotateZ(float z);
+void gsScale(float x, float y, float z);
+void gsTranslate(float x, float y, float z);
+int gsMultMatrix(float* data);
+
+int gsVboInit(gsVbo_s* vbo);
+int gsVboCreate(gsVbo_s* vbo, u32 size);
+int gsVboFlushData(gsVbo_s* vbo);
+int gsVboDestroy(gsVbo_s* vbo);
+int gsVboDraw(gsVbo_s* vbo);
+void* gsVboGetOffset(gsVbo_s* vbo);
+int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units);
+
+#endif
index c059540a9cb9f63de3c18c64ad6ac7a97741359a..17a4bee4bb823b4ccc676f73dd5360ac783d0aaa 100644 (file)
+///////////////////////////////////////
+//            GPU example            //
+///////////////////////////////////////
+
+//this example is meant to show how to use the GPU to render a 3D object
+//it also shows how to do stereoscopic 3D
+//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft
+//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited.
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <math.h>
-#include <string.h>
 #include <3ds.h>
+
 #include "math.h"
+#include "gs.h"
+
 #include "test_vsh_shbin.h"
-#include "test_png_bin.h"
-#include "mdl.h"
+#include "texture_bin.h"
 
-DVLB_s* shader;
-float* vertArray;
-u32* texData;
+//will be moved into ctrulib at some point
+#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080)
 
-void setUniformMatrix(u32 startreg, float* m)
-{
-       float param[16];
+#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0))
 
-       param[0x0]=m[3]; //w
-       param[0x1]=m[2]; //z
-       param[0x2]=m[1]; //y
-       param[0x3]=m[0]; //x
+//shader structure
+DVLB_s* shader;
+//texture data pointer
+u32* texData;
+//vbo structure
+gsVbo_s vbo;
 
-       param[0x4]=m[7];
-       param[0x5]=m[6];
-       param[0x6]=m[5];
-       param[0x7]=m[4];
-       
-       param[0x8]=m[11];
-       param[0x9]=m[10];
-       param[0xa]=m[9];
-       param[0xb]=m[8];
+//GPU framebuffer address
+u32* gpuOut=(u32*)0x1F119400;
+//GPU depth buffer address
+u32* gpuDOut=(u32*)0x1F370800;
 
-       param[0xc]=m[15];
-       param[0xd]=m[14];
-       param[0xe]=m[13];
-       param[0xf]=m[12];
+//angle for the vertex lighting (cf test.vsh)
+float lightAngle;
+//object position and rotation angle
+vect3Df_s position, angle;
 
-       GPU_SetUniform(startreg, (u32*)param, 4);
+//vertex structure
+typedef struct
+{
+       vect3Df_s position;
+       float texcoord[2];
+       vect3Df_s normal;
+}vertex_s;
+
+//object data (cube)
+//obviously this doesn't have to be defined manually, but we will here for the purposes of the example
+//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z}
+//we're drawing triangles so three lines = one triangle
+const vertex_s modelVboData[]=
+{
+       //first face (PZ)
+               //first triangle
+               {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
+               {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
+               {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
+               //second triangle
+               {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
+               {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
+               {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
+       //second face (MZ)
+               //first triangle
+               {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
+               {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
+               {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
+               //second triangle
+               {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
+               {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
+               {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
+       //third face (PX)
+               //first triangle
+               {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
+               {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
+               {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
+               //second triangle
+               {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
+               {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
+               {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
+       //fourth face (MX)
+               //first triangle
+               {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
+               {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
+               {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
+               //second triangle
+               {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
+               {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
+               {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
+       //fifth face (PY)
+               //first triangle
+               {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
+               {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
+               {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
+               //second triangle
+               {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
+               {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
+               {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
+       //sixth face (MY)
+               //first triangle
+               {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
+               {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
+               {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
+               //second triangle
+               {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
+               {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
+               {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
+};
+
+//stolen from staplebutt
+void GPU_SetDummyTexEnv(u8 num)
+{
+       GPU_SetTexEnv(num, 
+               GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), 
+               GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0), 
+               GPU_TEVOPERANDS(0,0,0), 
+               GPU_TEVOPERANDS(0,0,0), 
+               GPU_REPLACE, 
+               GPU_REPLACE, 
+               0xFFFFFFFF);
 }
 
-float angle=0.0f;
-float angleZ=0.0f;
-float tx, ty, tz;
-
-u32* gpuOut=(u32*)0x1F119400;
-u32* gpuDOut=(u32*)0x1F370800;
-
 // topscreen
-void doFrame1()
+void renderFrame()
 {
-       //general setup
-               GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
-
-               GPU_DepthRange(-1.0f, 0.0f);
-
-               GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
-               GPU_SetStencilTest(false, GPU_ALWAYS, 0x00);
-               GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
-
-       // ?
-               GPUCMD_AddSingleParam(0x00010062, 0x00000000); //param always 0x0 according to code
-               GPUCMD_AddSingleParam(0x000F0118, 0x00000000);
-
+       GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
+       
+       GPU_DepthRange(-1.0f, 0.0f);
+       GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
+       GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
+       GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP);
+       GPU_SetBlendingColor(0,0,0,0);
+       GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
+       
+       GPUCMD_AddSingleParam(0x00010062, 0); 
+       GPUCMD_AddSingleParam(0x000F0118, 0);
+       
        //setup shader
-               SHDR_UseProgram(shader, 0);
-
-       //attribute buffers
-               GPU_SetAttributeBuffers(3, (u32*)osConvertVirtToPhys((u32)vertArray),
-                       GPU_ATTRIBFMT(0, 3, GPU_FLOAT)|GPU_ATTRIBFMT(1, 2, GPU_FLOAT)|GPU_ATTRIBFMT(2, 3, GPU_FLOAT),
-                       0xFFC, 0x210, 1, (u32[]){0x00000000}, (u64[]){0x210}, (u8[]){3});
-
-       //?
-               GPUCMD_AddSingleParam(0x000F0100, 0x00E40100);
-               GPUCMD_AddSingleParam(0x000F0101, 0x01010000);
-               GPUCMD_AddSingleParam(0x000F0104, 0x00000010);
+       SHDR_UseProgram(shader, 0);
        
-       //texturing stuff
-               GPUCMD_AddSingleParam(0x0002006F, 0x00000100);
-               GPUCMD_AddSingleParam(0x000F0080, 0x00011001); //enables/disables texturing
+       GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
+       GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
+       
+       GPU_SetTextureEnable(GPU_TEXUNIT0);
        
-       //texenv
-               GPU_SetTexEnv(3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000);
-               GPU_SetTexEnv(4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00000000);
-               GPU_SetTexEnv(5, GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
-                       GPU_TEVOPERANDS(0,0,0), GPU_TEVOPERANDS(0,0,0), GPU_MODULATE, GPU_MODULATE, 0xFFFFFFFF);
+       GPU_SetTexEnv(0, 
+               GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), 
+               GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
+               GPU_TEVOPERANDS(0,0,0), 
+               GPU_TEVOPERANDS(0,0,0), 
+               GPU_MODULATE, GPU_MODULATE, 
+               0xFFFFFFFF);
+       GPU_SetDummyTexEnv(1);
+       GPU_SetDummyTexEnv(2);
+       GPU_SetDummyTexEnv(3);
+       GPU_SetDummyTexEnv(4);
+       GPU_SetDummyTexEnv(5);
 
        //texturing stuff
-               GPU_SetTexture((u32*)osConvertVirtToPhys((u32)texData),256,256,0x6,GPU_RGBA8);
-
-       //setup matrices
-               float modelView[16];
-               float projection[16];
-
-               loadIdentity44(modelView);
-               loadIdentity44(projection);
-
-               translateMatrix(modelView, tx, ty, tz);
-               rotateMatrixX(modelView, angle);
-               rotateMatrixZ(modelView, angleZ);
-
-               initProjectionMatrix(projection, 1.3962634f, 240.0f/400.0f, 0.01f, 10.0f);
-
-               setUniformMatrix(0x24, modelView);
-               setUniformMatrix(0x20, projection);
-
-       //draw first model
-               GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3);
-               // GPU_DrawElements(GPU_TRIANGLES, (u32*)(((u32)((void*)indArray-(void*)gspHeap))+0x20000000-base), 6);
-
-       //setup matrices
-               loadIdentity44(modelView);
-               loadIdentity44(projection);
-
-               translateMatrix(modelView, tx, -ty, tz);
-               rotateMatrixX(modelView, -angle);
-               rotateMatrixZ(modelView, -angleZ);
-
-               setUniformMatrix(0x24, modelView);
-
-       //draw second
-               GPU_DrawArray(GPU_TRIANGLES, mdlFaces*3);
-
-       //finalize stuff ?
-               GPU_FinishDrawing();
-}
-
-void demoControls(void)
-{
-       hidScanInput();
-       u32 PAD=hidKeysHeld();
-
-       if(PAD&KEY_UP)tx+=0.1f;
-       if(PAD&KEY_DOWN)tx-=0.1f;
-
-       if(PAD&KEY_LEFT)ty+=0.1f;
-       if(PAD&KEY_RIGHT)ty-=0.1f;
-
-       if(PAD&KEY_R)tz+=0.1f;
-       if(PAD&KEY_L)tz-=0.1f;
-
-       if(PAD&KEY_A)angle+=0.1f;
-       if(PAD&KEY_Y)angle-=0.1f;
+               GPU_SetTexture(GPU_TEXUNIT0, (u32*)osConvertVirtToPhys((u32)texData),128,128,GPU_TEXTURE_MAG_FILTER(GPU_NEAREST)|GPU_TEXTURE_MIN_FILTER(GPU_NEAREST),GPU_RGBA8);
+               GPU_SetAttributeBuffers(3, (u32*)osConvertVirtToPhys((u32)texData),
+                       GPU_ATTRIBFMT(0, 3, GPU_FLOAT)|GPU_ATTRIBFMT(1, 2, GPU_FLOAT)|GPU_ATTRIBFMT(2, 3, GPU_FLOAT),
+                       0xFFC, 0x210, 1, (u32[]){0x00000000}, (u64[]){0x210}, (u8[]){3});
 
-       if(PAD&KEY_X)angleZ+=0.1f;
-       if(PAD&KEY_B)angleZ-=0.1f;
+       //setup lighting (this is specific to our shader)
+               vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle)));
+               GPU_SetUniform(SHDR_GetUniformRegister(shader, "lightDirection", 0), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 4);
+               GPU_SetUniform(SHDR_GetUniformRegister(shader, "lightAmbient", 0), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 4);
+
+       //initialize projection matrix to standard perspective stuff
+       gsMatrixMode(GS_PROJECTION);
+       gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f);
+       gsRotateZ(M_PI/2); //because framebuffer is sideways...
+
+       //draw object
+               gsMatrixMode(GS_MODELVIEW);
+               gsPushMatrix();
+                       gsTranslate(position.x, position.y, position.z);
+                       gsRotateX(angle.x);
+                       gsRotateY(angle.y);
+                       gsVboDraw(&vbo);
+               gsPopMatrix();
+       GPU_FinishDrawing();
 }
 
-extern u32* gxCmdBuf;
-
-int main()
+int main(int argc, char** argv)
 {
+       //setup services
        srvInit();      
        aptInit();
        gfxInit();
        hidInit(NULL);
-       
+
+       //initialize GPU
        GPU_Init(NULL);
 
+       //let GFX know we're ok with doing stereoscopic 3D rendering
+       gfxSet3D(true);
+
+       //load our vertex shader binary
+       shader=SHDR_ParseSHBIN((u32*)test_vsh_shbin, test_vsh_shbin_size);
+
+       //initialize GS
+       gsInit(shader);
+
+       //allocate our GPU command buffers
+       //they *have* to be on the linear heap
        u32 gpuCmdSize=0x40000;
        u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4);
+       u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4);
 
-       GPU_Reset(gxCmdBuf, gpuCmd, gpuCmdSize);
+       //actually reset the GPU
+       GPU_Reset(NULL, gpuCmd, gpuCmdSize);
 
-       vertArray=(float*)linearAlloc(0x100000);
-       texData=(u32*)linearAlloc(0x100000);
+       //create texture
+       texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned
+       memcpy(texData, texture_bin, texture_bin_size);
 
-       memcpy(texData, test_png_bin, test_png_bin_size);
-       memcpy(vertArray, mdlData, sizeof(mdlData));
-       GSPGPU_FlushDataCache(NULL, mdlData, sizeof(mdlData));
-       GSPGPU_FlushDataCache(NULL, test_png_bin, test_png_bin_size);
+       //create VBO
+       gsVboInit(&vbo);
+       gsVboCreate(&vbo, sizeof(modelVboData));
+       gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s));
+       gsVboFlushData(&vbo);
 
-       tx=ty=0.0f; tz=-0.1f;
-       shader=SHDR_ParseSHBIN((u32*)test_vsh_shbin,test_vsh_shbin_size);
+       //initialize object position and angle
+       position=vect3Df(0.0f, 0.0f, -2.0f);
+       angle=vect3Df(M_PI/4, M_PI/4, 0.0f);
 
-       GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201);
-       gspWaitForPSC0();
-       gfxSwapBuffersGpu();
+       //background color (blue)
+       u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF);
 
        while(aptMainLoop())
        {
-               demoControls();
-
-               GX_SetMemoryFill(gxCmdBuf, (u32*)gpuOut, 0x404040FF, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201);
-               gspWaitForPSC0();
-
-               GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
-               doFrame1();
+               //get current 3D slider state
+               float slider=CONFIG_3D_SLIDERSTATE;
+
+               //controls
+               hidScanInput();
+               //START to exit to hbmenu
+               if(keysDown()&KEY_START)break;
+
+               //A/B to change vertex lighting angle
+               if(keysHeld()&KEY_A)lightAngle+=0.1f;
+               if(keysHeld()&KEY_B)lightAngle-=0.1f;
+
+               //D-PAD to rotate object
+               if(keysHeld()&KEY_RIGHT)angle.x+=0.05f;
+               if(keysHeld()&KEY_LEFT)angle.x-=0.05f;
+               if(keysHeld()&KEY_UP)angle.y+=0.05f;
+               if(keysHeld()&KEY_DOWN)angle.y-=0.05f;
+
+               //R/L to bring object closer to or move it further from the camera
+               if(keysHeld()&KEY_R)position.z+=0.1f;
+               if(keysHeld()&KEY_L)position.z-=0.1f;
+
+               //generate our GPU command buffer for this frame
+               gsStartFrame();
+               renderFrame();
                GPUCMD_Finalize();
-               GPUCMD_Run(gxCmdBuf);
-               gspWaitForP3D();
 
+               if(slider>0.0f)
+               {
+                       //new and exciting 3D !
+                       //make a copy of left gpu buffer
+                       u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset);
+                       memcpy(gpuCmdRight, gpuCmd, offset*4);
+
+                       //setup interaxial
+                       float interaxial=slider*0.12f;
+
+                       //adjust left gpu buffer fo 3D !
+                       {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
+
+                       //draw left framebuffer
+                       GPUCMD_FlushAndRun(NULL);
+
+                       //while GPU starts drawing the left buffer, adjust right one for 3D !
+                       GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset);
+                       {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
+
+                       //we wait for the left buffer to finish drawing
+                       gspWaitForP3D();
+                       GX_SetDisplayTransfer(NULL, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000);
+                       gspWaitForPPF();
+
+                       //we draw the right buffer, wait for it to finish and then switch back to left one
+                       //clear the screen
+                       GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201);
+                       gspWaitForPSC0();
+
+                       //draw the right framebuffer
+                       GPUCMD_FlushAndRun(NULL);
+                       gspWaitForP3D();
+
+                       //transfer from GPU output buffer to actual framebuffer
+                       GX_SetDisplayTransfer(NULL, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), 0x019001E0, 0x01001000);
+                       gspWaitForPPF();
+                       GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
+               }else{
+                       //boring old 2D !
+
+                       //draw the frame
+                       GPUCMD_FlushAndRun(NULL);
+                       gspWaitForP3D();
+
+                       //clear the screen
+                       GX_SetDisplayTransfer(NULL, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000);
+                       gspWaitForPPF();
+               }
+
+               //clear the screen
+               GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], 0x201, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], 0x201);
+               gspWaitForPSC0();
                gfxSwapBuffersGpu();
-               GX_SetDisplayTransfer(gxCmdBuf, (u32*)gpuOut, 0x019001E0, (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), 0x019001E0, 0x01001000);
-               gspWaitForPPF();
-               gspWaitForVBlank();
+
+               gspWaitForEvent(GSPEVENT_VBlank0, true);
        }
 
+       gsExit();
        hidExit();
        gfxExit();
        aptExit();
index 9c0977a759c60684ca0c36ec52d56276ec68b21d..13ab3dd770e7216995242e867dccebddd0998034 100644 (file)
@@ -15,6 +15,7 @@ void multMatrix44(float* m1, float* m2, float* m) //4x4
 {
        int i, j;
        for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
+
 }
 
 void translateMatrix(float* tm, float x, float y, float z)
@@ -26,11 +27,16 @@ void translateMatrix(float* tm, float x, float y, float z)
        rm[7]=y;
        rm[11]=z;
        
-       multMatrix44(rm,tm,m);
+       multMatrix44(tm,rm,m);
        memcpy(tm,m,16*sizeof(float));
 }
 
-void rotateMatrixX(float* tm, float x)
+// 00 01 02 03
+// 04 05 06 07
+// 08 09 10 11
+// 12 13 14 15
+
+void rotateMatrixX(float* tm, float x, bool r)
 {
        float rm[16], m[16];
        memset(rm, 0x00, 16*4);
@@ -40,11 +46,27 @@ void rotateMatrixX(float* tm, float x)
        rm[9]=-sin(x);
        rm[10]=cos(x);
        rm[15]=1.0f;
-       multMatrix44(tm,rm,m);
+       if(!r)multMatrix44(tm,rm,m);
+       else multMatrix44(rm,tm,m);
+       memcpy(tm,m,16*sizeof(float));
+}
+
+void rotateMatrixY(float* tm, float x, bool r)
+{
+       float rm[16], m[16];
+       memset(rm, 0x00, 16*4);
+       rm[0]=cos(x);
+       rm[2]=sin(x);
+       rm[5]=1.0f;
+       rm[8]=-sin(x);
+       rm[10]=cos(x);
+       rm[15]=1.0f;
+       if(!r)multMatrix44(tm,rm,m);
+       else multMatrix44(rm,tm,m);
        memcpy(tm,m,16*sizeof(float));
 }
 
-void rotateMatrixZ(float* tm, float x)
+void rotateMatrixZ(float* tm, float x, bool r)
 {
        float rm[16], m[16];
        memset(rm, 0x00, 16*4);
@@ -54,7 +76,8 @@ void rotateMatrixZ(float* tm, float x)
        rm[5]=cos(x);
        rm[10]=1.0f;
        rm[15]=1.0f;
-       multMatrix44(tm,rm,m);
+       if(!r)multMatrix44(tm,rm,m);
+       else multMatrix44(rm,tm,m);
        memcpy(tm,m,16*sizeof(float));
 }
 
@@ -69,27 +92,57 @@ void initProjectionMatrix(float* m, float fovy, float aspect, float near, float
 {
        float top = near*tan(fovy/2);
        float right = (top*aspect);
+
+       float mp[4*4];
        
-       *(m++) = near/right;
-       *(m++) = 0.0f;
-       *(m++) = 0.0f;
-       *(m++) = 0.0f;
-
-       *(m++) = 0.0f;
-       *(m++) = near/top;
-       *(m++) = 0.0f;
-       *(m++) = 0.0f;
-
-       *(m++) = 0.0f;
-       *(m++) = 0.0f;
-       // *(m++) = -(far+near)/(far-near);
-       *(m++) = 0.0f;
-       // *(m++) = -2.0f*(far*near)/(far-near);
-       // *(m++) = 1.0f;
-       *(m++) = -1.0f;
-
-       *(m++) = 0.0f;
-       *(m++) = 0.0f;
-       *(m++) = -1.0f;
-       *(m++) = 0.0f;
+       mp[0x0] = near/right;
+       mp[0x1] = 0.0f;
+       mp[0x2] = 0.0f;
+       mp[0x3] = 0.0f;
+
+       mp[0x4] = 0.0f;
+       mp[0x5] = near/top;
+       mp[0x6] = 0.0f;
+       mp[0x7] = 0.0f;
+
+       mp[0x8] = 0.0f;
+       mp[0x9] = 0.0f;
+       mp[0xA] = -(far+near)/(far-near);
+       mp[0xB] = -2.0f*(far*near)/(far-near);
+
+       mp[0xC] = 0.0f;
+       mp[0xD] = 0.0f;
+       mp[0xE] = -1.0f;
+       mp[0xF] = 0.0f;
+
+       float mp2[4*4];
+       loadIdentity44(mp2);
+       mp2[0xA]=0.5;
+       mp2[0xB]=-0.5;
+
+       multMatrix44(mp2, mp, m);
+}
+
+vect3Df_s getMatrixColumn(float* m, u8 i)
+{
+       if(!m || i>=4)return vect3Df(0,0,0);
+       return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]);
+}
+
+vect3Df_s getMatrixRow(float* m, u8 i)
+{
+       if(!m || i>=4)return vect3Df(0,0,0);
+       return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]);
+}
+
+vect4Df_s getMatrixColumn4(float* m, u8 i)
+{
+       if(!m || i>=4)return vect4Df(0,0,0,0);
+       return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]);
+}
+
+vect4Df_s getMatrixRow4(float* m, u8 i)
+{
+       if(!m || i>=4)return vect4Df(0,0,0,0);
+       return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]);
 }
index 5eed3602ec8eb09110372eb331e8fd06ac8f3056..8137b902932885e15041e01a7877c4748af61acf 100644 (file)
 #ifndef MATH_H
+#define MATH_H
+
+#include <3ds/types.h>
+#include <math.h>
+
+typedef float mtx44[4][4];
+typedef float mtx33[3][3];
+
+typedef struct
+{
+       s32 x, y, z;
+}vect3Di_s;
+
+static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z)
+{
+       return (vect3Di_s){x,y,z};
+}
+
+static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v)
+{
+       return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z};
+}
+
+static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v)
+{
+       return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z};
+}
+
+static inline vect3Di_s vmuli(vect3Di_s v, s32 f)
+{
+       return (vect3Di_s){v.x*f,v.y*f,v.z*f};
+}
+
+typedef struct
+{
+       float x, y, z;
+}vect3Df_s;
+
+static inline vect3Df_s vect3Df(float x, float y, float z)
+{
+       return (vect3Df_s){x,y,z};
+}
+
+static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v)
+{
+       return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z};
+}
+
+static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v)
+{
+       return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z};
+}
+
+static inline vect3Df_s vmulf(vect3Df_s v, float f)
+{
+       return (vect3Df_s){v.x*f,v.y*f,v.z*f};
+}
+
+static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2)
+{
+       return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z};
+}
+
+static inline float vmagf(vect3Df_s v)
+{
+       return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
+}
+
+static inline float vdistf(vect3Df_s v1, vect3Df_s v2)
+{
+       return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z));
+}
+
+static inline vect3Df_s vnormf(vect3Df_s v)
+{
+       const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
+       return (vect3Df_s){v.x/l,v.y/l,v.z/l};
+}
+
+typedef struct
+{
+       float x, y, z, w;
+}vect4Df_s;
+
+static inline vect4Df_s vect4Df(float x, float y, float z, float w)
+{
+       return (vect4Df_s){x,y,z,w};
+}
+
+static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v)
+{
+       return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w};
+}
+
+static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v)
+{
+       return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w};
+}
+
+static inline vect4Df_s vmulf4(vect4Df_s v, float f)
+{
+       return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f};
+}
+
+static inline float vdotf4(vect4Df_s v1, vect4Df_s v2)
+{
+       return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w;
+}
+
+static inline vect4Df_s vnormf4(vect4Df_s v)
+{
+       const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w);
+       return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l};
+}
+
+//interstuff
+static inline vect3Di_s vf2i(vect3Df_s v)
+{
+       return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)};
+}
+
+static inline vect3Df_s vi2f(vect3Di_s v)
+{
+       return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z};
+}
 
 void loadIdentity44(float* m);
 void multMatrix44(float* m1, float* m2, float* m);
 
 void translateMatrix(float* tm, float x, float y, float z);
-void rotateMatrixX(float* tm, float x);
-void rotateMatrixZ(float* tm, float x);
+void rotateMatrixX(float* tm, float x, bool r);
+void rotateMatrixY(float* tm, float x, bool r);
+void rotateMatrixZ(float* tm, float x, bool r);
 void scaleMatrix(float* tm, float x, float y, float z);
 
 void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far);
 
+vect3Df_s getMatrixColumn(float* m, u8 i);
+vect3Df_s getMatrixRow(float* m, u8 i);
+vect4Df_s getMatrixColumn4(float* m, u8 i);
+vect4Df_s getMatrixRow4(float* m, u8 i);
+
 #endif