+++ /dev/null
-gpu
-=======
-
-example of how to use the GPU with libctru
-
-before trying to compile, make sure to download aemstro
-( https://github.com/smealum/aemstro reflog: 51bfeef9e1a0149726dca43b50919bd45917015a )
-and update AEMSTRO environment variable with the proper path
-
-You'll also need to install Python 3 and have that in your path.
-
+++ /dev/null
-; setup constants
- .const c20, 1.0, 0.0, 0.5, 1.0
-
-; setup outmap
- .out o0, result.position, 0xF
- .out o1, result.color, 0xF
- .out o2, result.texcoord0, 0x3
- .out o3, result.texcoord1, 0x3
- .out o4, result.texcoord2, 0x3
-
-; setup uniform map (not required)
- .uniform c0, c3, projection
- .uniform c4, c7, modelview
- .uniform c8, c8, lightDirection
- .uniform c9, c9, lightAmbient
-
- .vsh vmain, end_vmain
-
-;code
- vmain:
- mov r1, v0 (0x4)
- mov r1, c20 (0x3)
- ; temp = modvMtx * in.pos
- dp4 r0, c4, r1 (0x0)
- dp4 r0, c5, r1 (0x1)
- dp4 r0, c6, r1 (0x2)
- mov r0, c20 (0x3)
- ; result.pos = projMtx * temp
- dp4 o0, c0, r0 (0x0)
- dp4 o0, c1, r0 (0x1)
- dp4 o0, c2, r0 (0x2)
- dp4 o0, c3, r0 (0x3)
- ; result.texcoord = in.texcoord
- mov o2, v1 (0x5)
- mov o3, c20 (0x7)
- mov o4, c20 (0x7)
- ; result.color = crappy lighting
- dp3 r0, c8, v2 (0x4)
- max r0, c20, r0 (0x9)
- mul r0, c9, r0 (0x4)
- add o1, c9, r0 (0x4)
- mov o1, c20 (0x3)
- nop
- end
- end_vmain:
-
-;operand descriptors
- .opdesc x___, xyzw, xyzw ; 0x0
- .opdesc _y__, xyzw, xyzw ; 0x1
- .opdesc __z_, xyzw, xyzw ; 0x2
- .opdesc ___w, xyzw, xyzw ; 0x3
- .opdesc xyz_, xyzw, xyzw ; 0x4
- .opdesc xyzw, xyzw, xyzw ; 0x5
- .opdesc x_zw, xyzw, xyzw ; 0x6
- .opdesc xyzw, yyyw, xyzw ; 0x7
- .opdesc xyz_, wwww, wwww ; 0x8
- .opdesc xyz_, yyyy, xyzw ; 0x9
+++ /dev/null
-.section ".text"\r
-.arm\r
-.align 4\r
-.global _vboMemcpy50\r
-\r
-# r0 : dst\r
-# r1 : src\r
-# fixed size 0x50\r
-_vboMemcpy50:\r
- push {r4-r11}\r
- ldmia r1!, {r2-r12}\r
- stmia r0!, {r2-r12}\r
- ldmia r1!, {r2-r12}\r
- stmia r0!, {r2-r12}\r
- pop {r4-r11}\r
- bx lr\r
+++ /dev/null
-#include <stdlib.h>
-#include <string.h>
-#include <malloc.h>
-#include <3ds.h>
-
-#include "gs.h"
-#include "math.h"
-
-#define BUFFERMATRIXLIST_SIZE (GS_MATRIXSTACK_SIZE*4)
-
-static void gsInitMatrixStack();
-
-Handle linearAllocMutex;
-
-static u32 gsMatrixStackRegisters[GS_MATRIXTYPES];
-
-typedef struct
-{
- u32 offset;
- mtx44 data;
-}bufferMatrix_s;
-
-bufferMatrix_s bufferMatrixList[BUFFERMATRIXLIST_SIZE];
-int bufferMatrixListLength;
-
-//----------------------
-// GS SYSTEM STUFF
-//----------------------
-
-void initBufferMatrixList()
-{
- bufferMatrixListLength=0;
-}
-
-void gsInit(shaderProgram_s* shader)
-{
- gsInitMatrixStack();
- initBufferMatrixList();
- svcCreateMutex(&linearAllocMutex, false);
- if(shader)
- {
- gsMatrixStackRegisters[0]=shaderInstanceGetUniformLocation(shader->vertexShader, "projection");
- gsMatrixStackRegisters[1]=shaderInstanceGetUniformLocation(shader->vertexShader, "modelview");
- shaderProgramUse(shader);
- }
-}
-
-void gsExit(void)
-{
- svcCloseHandle(linearAllocMutex);
-}
-
-void gsStartFrame(void)
-{
- GPUCMD_SetBufferOffset(0);
- initBufferMatrixList();
-}
-
-void* gsLinearAlloc(size_t size)
-{
- void* ret=NULL;
-
- svcWaitSynchronization(linearAllocMutex, U64_MAX);
- ret=linearAlloc(size);
- svcReleaseMutex(linearAllocMutex);
-
- return ret;
-}
-
-void gsLinearFree(void* mem)
-{
- svcWaitSynchronization(linearAllocMutex, U64_MAX);
- linearFree(mem);
- svcReleaseMutex(linearAllocMutex);
-}
-
-//----------------------
-// MATRIX STACK STUFF
-//----------------------
-
-static mtx44 gsMatrixStacks[GS_MATRIXTYPES][GS_MATRIXSTACK_SIZE];
-static u32 gsMatrixStackRegisters[GS_MATRIXTYPES]={0x00, 0x04};
-static u8 gsMatrixStackOffsets[GS_MATRIXTYPES];
-static bool gsMatrixStackUpdated[GS_MATRIXTYPES];
-static GS_MATRIX gsCurrentMatrixType;
-
-static void gsInitMatrixStack()
-{
- int i;
- for(i=0; i<GS_MATRIXTYPES; i++)
- {
- gsMatrixStackOffsets[i]=0;
- gsMatrixStackUpdated[i]=true;
- loadIdentity44((float*)gsMatrixStacks[i][0]);
- }
- gsCurrentMatrixType=GS_PROJECTION;
-}
-
-float* gsGetMatrix(GS_MATRIX m)
-{
- if(m<0 || m>=GS_MATRIXTYPES)return NULL;
-
- return (float*)gsMatrixStacks[m][gsMatrixStackOffsets[m]];
-}
-
-int gsLoadMatrix(GS_MATRIX m, float* data)
-{
- if(m<0 || m>=GS_MATRIXTYPES || !data)return -1;
-
- memcpy(gsGetMatrix(m), data, sizeof(mtx44));
-
- gsMatrixStackUpdated[m]=true;
-
- return 0;
-}
-
-int gsPushMatrix()
-{
- const GS_MATRIX m=gsCurrentMatrixType;
- if(m<0 || m>=GS_MATRIXTYPES)return -1;
- if(gsMatrixStackOffsets[m]<0 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE-1)return -1;
-
- float* cur=gsGetMatrix(m);
- gsMatrixStackOffsets[m]++;
- memcpy(gsGetMatrix(m), cur, sizeof(mtx44));
-
- return 0;
-}
-
-int gsPopMatrix()
-{
- const GS_MATRIX m=gsCurrentMatrixType;
- if(m<0 || m>=GS_MATRIXTYPES)return -1;
- if(gsMatrixStackOffsets[m]<1 || gsMatrixStackOffsets[m]>=GS_MATRIXSTACK_SIZE)return -1;
-
- gsMatrixStackOffsets[m]--;
-
- gsMatrixStackUpdated[m]=true;
-
- return 0;
-}
-
-int gsMatrixMode(GS_MATRIX m)
-{
- if(m<0 || m>=GS_MATRIXTYPES)return -1;
-
- gsCurrentMatrixType=m;
-
- return 0;
-}
-
-//------------------------
-// MATRIX TRANSFORM STUFF
-//------------------------
-
-int gsMultMatrix(float* data)
-{
- if(!data)return -1;
-
- mtx44 tmp;
- multMatrix44(gsGetMatrix(gsCurrentMatrixType), data, (float*)tmp);
- memcpy(gsGetMatrix(gsCurrentMatrixType), (float*)tmp, sizeof(mtx44));
-
- gsMatrixStackUpdated[gsCurrentMatrixType]=true;
-
- return 0;
-}
-
-void gsLoadIdentity()
-{
- loadIdentity44(gsGetMatrix(gsCurrentMatrixType));
- gsMatrixStackUpdated[gsCurrentMatrixType]=true;
-}
-
-void gsProjectionMatrix(float fovy, float aspect, float near, float far)
-{
- initProjectionMatrix(gsGetMatrix(gsCurrentMatrixType), fovy, aspect, near, far);
- gsMatrixStackUpdated[gsCurrentMatrixType]=true;
-}
-
-void gsRotateX(float x)
-{
- rotateMatrixX(gsGetMatrix(gsCurrentMatrixType), x, false);
- gsMatrixStackUpdated[gsCurrentMatrixType]=true;
-}
-
-void gsRotateY(float y)
-{
- rotateMatrixY(gsGetMatrix(gsCurrentMatrixType), y, false);
- gsMatrixStackUpdated[gsCurrentMatrixType]=true;
-}
-
-void gsRotateZ(float z)
-{
- rotateMatrixZ(gsGetMatrix(gsCurrentMatrixType), z, false);
- gsMatrixStackUpdated[gsCurrentMatrixType]=true;
-}
-
-void gsScale(float x, float y, float z)
-{
- scaleMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
- gsMatrixStackUpdated[gsCurrentMatrixType]=true;
-}
-
-void gsTranslate(float x, float y, float z)
-{
- translateMatrix(gsGetMatrix(gsCurrentMatrixType), x, y, z);
- gsMatrixStackUpdated[gsCurrentMatrixType]=true;
-}
-
-//----------------------
-// MATRIX RENDER STUFF
-//----------------------
-
-static void gsSetUniformMatrix(u32 startreg, float* m)
-{
- float param[16];
-
- param[0x0]=m[3]; //w
- param[0x1]=m[2]; //z
- param[0x2]=m[1]; //y
- param[0x3]=m[0]; //x
-
- param[0x4]=m[7];
- param[0x5]=m[6];
- param[0x6]=m[5];
- param[0x7]=m[4];
-
- param[0x8]=m[11];
- param[0x9]=m[10];
- param[0xa]=m[9];
- param[0xb]=m[8];
-
- param[0xc]=m[15];
- param[0xd]=m[14];
- param[0xe]=m[13];
- param[0xf]=m[12];
-
- GPU_SetFloatUniform(GPU_VERTEX_SHADER, startreg, (u32*)param, 4);
-}
-
-static int gsUpdateTransformation()
-{
- GS_MATRIX m;
- for(m=0; m<GS_MATRIXTYPES; m++)
- {
- if(gsMatrixStackUpdated[m])
- {
- if(m==GS_PROJECTION && bufferMatrixListLength<BUFFERMATRIXLIST_SIZE)
- {
- GPUCMD_GetBuffer(NULL, NULL, &bufferMatrixList[bufferMatrixListLength].offset);
- memcpy(bufferMatrixList[bufferMatrixListLength].data, gsGetMatrix(m), sizeof(mtx44));
- bufferMatrixListLength++;
- }
- gsSetUniformMatrix(gsMatrixStackRegisters[m], gsGetMatrix(m));
- gsMatrixStackUpdated[m]=false;
- }
- }
- return 0;
-}
-
-void gsAdjustBufferMatrices(mtx44 transformation)
-{
- int i;
- u32* buffer;
- u32 offset;
- GPUCMD_GetBuffer(&buffer, NULL, &offset);
- for(i=0; i<bufferMatrixListLength; i++)
- {
- u32 o=bufferMatrixList[i].offset;
- if(o+2<offset) //TODO : better check, need to account for param size
- {
- mtx44 newMatrix;
- GPUCMD_SetBufferOffset(o);
- multMatrix44((float*)bufferMatrixList[i].data, (float*)transformation, (float*)newMatrix);
- gsSetUniformMatrix(gsMatrixStackRegisters[GS_PROJECTION], (float*)newMatrix);
- }
- }
- GPUCMD_SetBufferOffset(offset);
-}
-
-//----------------------
-// VBO STUFF
-//----------------------
-
-int gsVboInit(gsVbo_s* vbo)
-{
- if(!vbo)return -1;
-
- vbo->data=NULL;
- vbo->currentSize=0;
- vbo->maxSize=0;
- vbo->commands=NULL;
- vbo->commandsSize=0;
-
- return 0;
-}
-
-int gsVboCreate(gsVbo_s* vbo, u32 size)
-{
- if(!vbo)return -1;
-
- vbo->data=gsLinearAlloc(size);
- vbo->numVertices=0;
- vbo->currentSize=0;
- vbo->maxSize=size;
-
- return 0;
-}
-
-void* gsVboGetOffset(gsVbo_s* vbo)
-{
- if(!vbo)return NULL;
-
- return (void*)(&((u8*)vbo->data)[vbo->currentSize]);
-}
-
-int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units)
-{
- if(!vbo || !data || !size)return -1;
- if(((s32)vbo->maxSize)-((s32)vbo->currentSize) < size)return -1;
-
- memcpy(gsVboGetOffset(vbo), data, size);
- vbo->currentSize+=size;
- vbo->numVertices+=units;
-
- return 0;
-}
-
-int gsVboFlushData(gsVbo_s* vbo)
-{
- if(!vbo)return -1;
-
- //unnecessary if we use flushAndRun
- // GSPGPU_FlushDataCache(NULL, vbo->data, vbo->currentSize);
-
- return 0;
-}
-
-int gsVboDestroy(gsVbo_s* vbo)
-{
- if(!vbo)return -1;
-
- if(vbo->commands)free(vbo->commands);
- if(vbo->data)gsLinearFree(vbo->data);
- gsVboInit(vbo);
-
- return 0;
-}
-
-extern u32 debugValue[];
-
-void GPU_DrawArrayDirectly(GPU_Primitive_t primitive, u8* data, u32 n)
-{
- //set attribute buffer address
- GPUCMD_AddSingleParam(0x000F0200, (osConvertVirtToPhys((u32)data))>>3);
- //set primitive type
- GPUCMD_AddSingleParam(0x0002025E, primitive);
- GPUCMD_AddSingleParam(0x0002025F, 0x00000001);
- //index buffer not used for drawArrays but 0x000F0227 still required
- GPUCMD_AddSingleParam(0x000F0227, 0x80000000);
- //pass number of vertices
- GPUCMD_AddSingleParam(0x000F0228, n);
-
- GPUCMD_AddSingleParam(0x00010253, 0x00000001);
-
- GPUCMD_AddSingleParam(0x00010245, 0x00000000);
- GPUCMD_AddSingleParam(0x000F022E, 0x00000001);
- GPUCMD_AddSingleParam(0x00010245, 0x00000001);
- GPUCMD_AddSingleParam(0x000F0231, 0x00000001);
-
- // GPUCMD_AddSingleParam(0x000F0111, 0x00000001); //breaks stuff
-}
-
-//not thread safe
-int gsVboPrecomputeCommands(gsVbo_s* vbo)
-{
- if(!vbo || vbo->commands)return -1;
-
- static u32 tmpBuffer[128];
-
- u32* savedAdr; u32 savedSize, savedOffset;
- GPUCMD_GetBuffer(&savedAdr, &savedSize, &savedOffset);
- GPUCMD_SetBuffer(tmpBuffer, 128, 0);
-
- GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
-
- GPUCMD_GetBuffer(NULL, NULL, &vbo->commandsSize);
- vbo->commands=memalign(0x4, vbo->commandsSize*4);
- if(!vbo->commands)return -1;
- memcpy(vbo->commands, tmpBuffer, vbo->commandsSize*4);
-
- GPUCMD_SetBuffer(savedAdr, savedSize, savedOffset);
-
- return 0;
-}
-
-extern u32* gpuCmdBuf;
-extern u32 gpuCmdBufSize;
-extern u32 gpuCmdBufOffset;
-
-void _vboMemcpy50(u32* dst, u32* src);
-
-void _GPUCMD_AddRawCommands(u32* cmd, u32 size)
-{
- if(!cmd || !size)return;
-
- if(size*4==0x50)_vboMemcpy50(&gpuCmdBuf[gpuCmdBufOffset], cmd);
- else memcpy(&gpuCmdBuf[gpuCmdBufOffset], cmd, size*4);
- gpuCmdBufOffset+=size;
-}
-
-int gsVboDraw(gsVbo_s* vbo)
-{
- if(!vbo || !vbo->data || !vbo->currentSize || !vbo->maxSize)return -1;
-
- gsUpdateTransformation();
-
- gsVboPrecomputeCommands(vbo);
-
- // u64 val=svcGetSystemTick();
- if(vbo->commands)
- {
- _GPUCMD_AddRawCommands(vbo->commands, vbo->commandsSize);
- }else{
- GPU_DrawArrayDirectly(GPU_TRIANGLES, vbo->data, vbo->numVertices);
- }
- // debugValue[5]+=(u32)(svcGetSystemTick()-val);
- // debugValue[6]++;
-
- return 0;
-}
+++ /dev/null
-#ifndef GS_H
-#define GS_H
-
-#include <3ds.h>
-#include "math.h"
-
-#define GS_MATRIXSTACK_SIZE (8)
-
-typedef enum
-{
- GS_PROJECTION = 0,
- GS_MODELVIEW = 1,
- GS_MATRIXTYPES
-}GS_MATRIX;
-
-typedef struct
-{
- u8* data;
- u32 currentSize; // in bytes
- u32 maxSize; // in bytes
- u32 numVertices;
- u32* commands;
- u32 commandsSize;
-}gsVbo_s;
-
-
-void gsInit(shaderProgram_s* shader);
-void gsExit(void);
-
-void gsStartFrame(void);
-void gsAdjustBufferMatrices(mtx44 transformation);
-
-void* gsLinearAlloc(size_t size);
-void gsLinearFree(void* mem);
-
-float* gsGetMatrix(GS_MATRIX m);
-int gsLoadMatrix(GS_MATRIX m, float* data);
-int gsPushMatrix();
-int gsPopMatrix();
-int gsMatrixMode(GS_MATRIX m);
-
-void gsLoadIdentity();
-void gsProjectionMatrix(float fovy, float aspect, float near, float far);
-void gsRotateX(float x);
-void gsRotateY(float y);
-void gsRotateZ(float z);
-void gsScale(float x, float y, float z);
-void gsTranslate(float x, float y, float z);
-int gsMultMatrix(float* data);
-
-int gsVboInit(gsVbo_s* vbo);
-int gsVboCreate(gsVbo_s* vbo, u32 size);
-int gsVboFlushData(gsVbo_s* vbo);
-int gsVboDestroy(gsVbo_s* vbo);
-int gsVboDraw(gsVbo_s* vbo);
-void* gsVboGetOffset(gsVbo_s* vbo);
-int gsVboAddData(gsVbo_s* vbo, void* data, u32 size, u32 units);
-
-#endif
+++ /dev/null
-///////////////////////////////////////
-// GPU example //
-///////////////////////////////////////
-
-//this example is meant to show how to use the GPU to render a 3D object
-//it also shows how to do stereoscopic 3D
-//it uses GS which is a WIP GPU abstraction layer that's currently part of 3DScraft
-//keep in mind GPU reverse engineering is an ongoing effort and our understanding of it is still fairly limited.
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <3ds.h>
-
-#include "math.h"
-#include "gs.h"
-
-#include "test_vsh_shbin.h"
-#include "texture_bin.h"
-
-//will be moved into ctrulib at some point
-#define CONFIG_3D_SLIDERSTATE (*(float*)0x1FF81080)
-
-#define RGBA8(r,g,b,a) ((((r)&0xFF)<<24) | (((g)&0xFF)<<16) | (((b)&0xFF)<<8) | (((a)&0xFF)<<0))
-
-//transfer from GPU output buffer to actual framebuffer flags
-#define DISPLAY_TRANSFER_FLAGS \
- (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
- GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
- GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_X))
-
-//shader structure
-DVLB_s* dvlb;
-shaderProgram_s shader;
-//texture data pointer
-u32* texData;
-//vbo structure
-gsVbo_s vbo;
-
-//GPU framebuffer address
-u32* gpuOut=(u32*)0x1F119400;
-//GPU depth buffer address
-u32* gpuDOut=(u32*)0x1F370800;
-
-//angle for the vertex lighting (cf test.vsh)
-float lightAngle;
-//object position and rotation angle
-vect3Df_s position, angle;
-
-//vertex structure
-typedef struct
-{
- vect3Df_s position;
- float texcoord[2];
- vect3Df_s normal;
-}vertex_s;
-
-//object data (cube)
-//obviously this doesn't have to be defined manually, but we will here for the purposes of the example
-//each line is a vertex : {position.x, position.y, position.z}, {texcoord.t, texcoord.s}, {normal.x, normal.y, normal.z}
-//we're drawing triangles so three lines = one triangle
-const vertex_s modelVboData[]=
-{
- //first face (PZ)
- //first triangle
- {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
- {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
- {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
- //second triangle
- {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
- {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
- {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, +1.0f}},
- //second face (MZ)
- //first triangle
- {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
- {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
- {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
- //second triangle
- {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
- {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
- {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, 0.0f, -1.0f}},
- //third face (PX)
- //first triangle
- {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
- {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
- {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
- //second triangle
- {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
- {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
- {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){+1.0f, 0.0f, 0.0f}},
- //fourth face (MX)
- //first triangle
- {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
- {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
- {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
- //second triangle
- {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
- {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
- {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){-1.0f, 0.0f, 0.0f}},
- //fifth face (PY)
- //first triangle
- {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
- {(vect3Df_s){-0.5f, +0.5f, +0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
- {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
- //second triangle
- {(vect3Df_s){+0.5f, +0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
- {(vect3Df_s){+0.5f, +0.5f, -0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
- {(vect3Df_s){-0.5f, +0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, +1.0f, 0.0f}},
- //sixth face (MY)
- //first triangle
- {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
- {(vect3Df_s){+0.5f, -0.5f, -0.5f}, (float[]){1.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
- {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
- //second triangle
- {(vect3Df_s){+0.5f, -0.5f, +0.5f}, (float[]){1.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
- {(vect3Df_s){-0.5f, -0.5f, +0.5f}, (float[]){0.0f, 0.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
- {(vect3Df_s){-0.5f, -0.5f, -0.5f}, (float[]){0.0f, 1.0f}, (vect3Df_s){0.0f, -1.0f, 0.0f}},
-};
-
-//stolen from staplebutt
-void GPU_SetDummyTexEnv(u8 num)
-{
- GPU_SetTexEnv(num,
- GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
- GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
- GPU_TEVOPERANDS(0,0,0),
- GPU_TEVOPERANDS(0,0,0),
- GPU_REPLACE,
- GPU_REPLACE,
- 0xFFFFFFFF);
-}
-
-// topscreen
-void renderFrame()
-{
- GPU_SetViewport((u32*)osConvertVirtToPhys((u32)gpuDOut),(u32*)osConvertVirtToPhys((u32)gpuOut),0,0,240*2,400);
-
- GPU_DepthMap(-1.0f, 0.0f);
- GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
- GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
- GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP);
- GPU_SetBlendingColor(0,0,0,0);
- GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
-
- GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
- GPUCMD_AddWrite(GPUREG_0118, 0);
-
- GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
- GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
-
- GPU_SetTextureEnable(GPU_TEXUNIT0);
-
- GPU_SetTexEnv(0,
- GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
- GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR),
- GPU_TEVOPERANDS(0,0,0),
- GPU_TEVOPERANDS(0,0,0),
- GPU_MODULATE, GPU_MODULATE,
- 0xFFFFFFFF);
- GPU_SetDummyTexEnv(1);
- GPU_SetDummyTexEnv(2);
- GPU_SetDummyTexEnv(3);
- GPU_SetDummyTexEnv(4);
- GPU_SetDummyTexEnv(5);
-
- //texturing stuff
- GPU_SetTexture(
- GPU_TEXUNIT0, //texture unit
- (u32*)osConvertVirtToPhys((u32)texData), //data buffer
- 128, //texture width
- 128, //texture height
- GPU_TEXTURE_MAG_FILTER(GPU_NEAREST) | GPU_TEXTURE_MIN_FILTER(GPU_NEAREST), //texture params
- GPU_RGBA8 //texture pixel format
- );
-
- GPU_SetAttributeBuffers(
- 3, //3 attributes: vertices, texcoords, and normals
- (u32*)osConvertVirtToPhys((u32)texData), //mesh buffer
- GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // GPU Input attribute register 0 (v0): 3 floats (position)
- GPU_ATTRIBFMT(1, 2, GPU_FLOAT) | // GPU Input attribute register 1 (v1): 2 floats (texcoord)
- GPU_ATTRIBFMT(2, 3, GPU_FLOAT), // GPU Input attribute register 2 (v2): 3 floats (normal)
- 0xFFC,
- 0x210,
- 1,
- (u32[]){0x00000000},
- (u64[]){0x210},
- (u8[]){3}
- );
-
- //setup lighting (this is specific to our shader)
- vect3Df_s lightDir=vnormf(vect3Df(cos(lightAngle), -1.0f, sin(lightAngle)));
- GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightDirection"), (u32*)(float[]){0.0f, -lightDir.z, -lightDir.y, -lightDir.x}, 1);
- GPU_SetFloatUniform(GPU_VERTEX_SHADER, shaderInstanceGetUniformLocation(shader.vertexShader, "lightAmbient"), (u32*)(float[]){0.7f, 0.4f, 0.4f, 0.4f}, 1);
-
- //initialize projection matrix to standard perspective stuff
- gsMatrixMode(GS_PROJECTION);
- gsProjectionMatrix(80.0f*M_PI/180.0f, 240.0f/400.0f, 0.01f, 100.0f);
- gsRotateZ(M_PI/2); //because framebuffer is sideways...
-
- //draw object
- gsMatrixMode(GS_MODELVIEW);
- gsPushMatrix();
- gsTranslate(position.x, position.y, position.z);
- gsRotateX(angle.x);
- gsRotateY(angle.y);
- gsVboDraw(&vbo);
- gsPopMatrix();
- GPU_FinishDrawing();
-}
-
-int main(int argc, char** argv)
-{
-
- gfxInitDefault();
-
- //initialize GPU
- GPU_Init(NULL);
-
- //let GFX know we're ok with doing stereoscopic 3D rendering
- gfxSet3D(true);
-
- //allocate our GPU command buffers
- //they *have* to be on the linear heap
- u32 gpuCmdSize=0x40000;
- u32* gpuCmd=(u32*)linearAlloc(gpuCmdSize*4);
- u32* gpuCmdRight=(u32*)linearAlloc(gpuCmdSize*4);
-
- //actually reset the GPU
- GPU_Reset(NULL, gpuCmd, gpuCmdSize);
-
- //load our vertex shader binary
- dvlb=DVLB_ParseFile((u32*)test_vsh_shbin, test_vsh_shbin_size);
- shaderProgramInit(&shader);
- shaderProgramSetVsh(&shader, &dvlb->DVLE[0]);
-
- //initialize GS
- gsInit(&shader);
-
- // Flush the command buffer so that the shader upload gets executed
- GPUCMD_Finalize();
- GPUCMD_FlushAndRun(NULL);
- gspWaitForP3D();
-
- //create texture
- texData=(u32*)linearMemAlign(texture_bin_size, 0x80); //textures need to be 0x80-byte aligned
- memcpy(texData, texture_bin, texture_bin_size);
-
- //create VBO
- gsVboInit(&vbo);
- gsVboCreate(&vbo, sizeof(modelVboData));
- gsVboAddData(&vbo, (void*)modelVboData, sizeof(modelVboData), sizeof(modelVboData)/sizeof(vertex_s));
- gsVboFlushData(&vbo);
-
- //initialize object position and angle
- position=vect3Df(0.0f, 0.0f, -2.0f);
- angle=vect3Df(M_PI/4, M_PI/4, 0.0f);
-
- //background color (blue)
- u32 backgroundColor=RGBA8(0x68, 0xB0, 0xD8, 0xFF);
-
- while(aptMainLoop())
- {
- //get current 3D slider state
- float slider=CONFIG_3D_SLIDERSTATE;
-
- //controls
- hidScanInput();
- //START to exit to hbmenu
- if(keysDown()&KEY_START)break;
-
- //A/B to change vertex lighting angle
- if(keysHeld()&KEY_A)lightAngle+=0.1f;
- if(keysHeld()&KEY_B)lightAngle-=0.1f;
-
- //D-PAD to rotate object
- if(keysHeld()&KEY_DOWN)angle.x+=0.05f;
- if(keysHeld()&KEY_UP)angle.x-=0.05f;
- if(keysHeld()&KEY_LEFT)angle.y+=0.05f;
- if(keysHeld()&KEY_RIGHT)angle.y-=0.05f;
-
- //R/L to bring object closer to or move it further from the camera
- if(keysHeld()&KEY_R)position.z+=0.1f;
- if(keysHeld()&KEY_L)position.z-=0.1f;
-
- //generate our GPU command buffer for this frame
- gsStartFrame();
- renderFrame();
- GPUCMD_Finalize();
-
- if(slider>0.0f)
- {
- //new and exciting 3D !
- //make a copy of left gpu buffer
- u32 offset; GPUCMD_GetBuffer(NULL, NULL, &offset);
- memcpy(gpuCmdRight, gpuCmd, offset*4);
-
- //setup interaxial
- float interaxial=slider*0.12f;
-
- //adjust left gpu buffer fo 3D !
- {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, -interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
-
- //draw left framebuffer
- GPUCMD_FlushAndRun(NULL);
-
- //while GPU starts drawing the left buffer, adjust right one for 3D !
- GPUCMD_SetBuffer(gpuCmdRight, gpuCmdSize, offset);
- {mtx44 m; loadIdentity44((float*)m); translateMatrix((float*)m, interaxial*0.5f, 0.0f, 0.0f); gsAdjustBufferMatrices(m);}
-
- //we wait for the left buffer to finish drawing
- gspWaitForP3D();
- GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
- gspWaitForPPF();
-
- //we draw the right buffer, wait for it to finish and then switch back to left one
- //clear the screen
- GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH , (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
- gspWaitForPSC0();
-
- //draw the right framebuffer
- GPUCMD_FlushAndRun(NULL);
- gspWaitForP3D();
-
- //transfer from GPU output buffer to actual framebuffer
- GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_RIGHT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
- gspWaitForPPF();
- GPUCMD_SetBuffer(gpuCmd, gpuCmdSize, 0);
- }else{
- //boring old 2D !
-
- //draw the frame
- GPUCMD_FlushAndRun(NULL);
- gspWaitForP3D();
-
- //clear the screen
- GX_SetDisplayTransfer(NULL, (u32*)gpuOut, GX_BUFFER_DIM(240*2, 400), (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240*2, 400), DISPLAY_TRANSFER_FLAGS);
- gspWaitForPPF();
- }
-
- //clear the screen
- GX_SetMemoryFill(NULL, (u32*)gpuOut, backgroundColor, (u32*)&gpuOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH, (u32*)gpuDOut, 0x00000000, (u32*)&gpuDOut[0x2EE00], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
- gspWaitForPSC0();
- gfxSwapBuffersGpu();
-
- gspWaitForEvent(GSPEVENT_VBlank0, true);
- }
-
- gsExit();
- shaderProgramFree(&shader);
- DVLB_Free(dvlb);
- gfxExit();
- return 0;
-}
+++ /dev/null
-#include <math.h>
-#include <string.h>
-
-#include "math.h"
-
-void loadIdentity44(float* m)
-{
- if(!m)return;
-
- memset(m, 0x00, 16*4);
- m[0]=m[5]=m[10]=m[15]=1.0f;
-}
-
-void multMatrix44(float* m1, float* m2, float* m) //4x4
-{
- int i, j;
- for(i=0;i<4;i++)for(j=0;j<4;j++)m[i+j*4]=(m1[0+j*4]*m2[i+0*4])+(m1[1+j*4]*m2[i+1*4])+(m1[2+j*4]*m2[i+2*4])+(m1[3+j*4]*m2[i+3*4]);
-
-}
-
-void translateMatrix(float* tm, float x, float y, float z)
-{
- float rm[16], m[16];
-
- loadIdentity44(rm);
- rm[3]=x;
- rm[7]=y;
- rm[11]=z;
-
- multMatrix44(tm,rm,m);
- memcpy(tm,m,16*sizeof(float));
-}
-
-// 00 01 02 03
-// 04 05 06 07
-// 08 09 10 11
-// 12 13 14 15
-
-void rotateMatrixX(float* tm, float x, bool r)
-{
- float rm[16], m[16];
- memset(rm, 0x00, 16*4);
- rm[0]=1.0f;
- rm[5]=cos(x);
- rm[6]=sin(x);
- rm[9]=-sin(x);
- rm[10]=cos(x);
- rm[15]=1.0f;
- if(!r)multMatrix44(tm,rm,m);
- else multMatrix44(rm,tm,m);
- memcpy(tm,m,16*sizeof(float));
-}
-
-void rotateMatrixY(float* tm, float x, bool r)
-{
- float rm[16], m[16];
- memset(rm, 0x00, 16*4);
- rm[0]=cos(x);
- rm[2]=sin(x);
- rm[5]=1.0f;
- rm[8]=-sin(x);
- rm[10]=cos(x);
- rm[15]=1.0f;
- if(!r)multMatrix44(tm,rm,m);
- else multMatrix44(rm,tm,m);
- memcpy(tm,m,16*sizeof(float));
-}
-
-void rotateMatrixZ(float* tm, float x, bool r)
-{
- float rm[16], m[16];
- memset(rm, 0x00, 16*4);
- rm[0]=cos(x);
- rm[1]=sin(x);
- rm[4]=-sin(x);
- rm[5]=cos(x);
- rm[10]=1.0f;
- rm[15]=1.0f;
- if(!r)multMatrix44(tm,rm,m);
- else multMatrix44(rm,tm,m);
- memcpy(tm,m,16*sizeof(float));
-}
-
-void scaleMatrix(float* tm, float x, float y, float z)
-{
- tm[0]*=x; tm[4]*=x; tm[8]*=x; tm[12]*=x;
- tm[1]*=y; tm[5]*=y; tm[9]*=y; tm[13]*=y;
- tm[2]*=z; tm[6]*=z; tm[10]*=z; tm[14]*=z;
-}
-
-void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far)
-{
- float top = near*tan(fovy/2);
- float right = (top*aspect);
-
- float mp[4*4];
-
- mp[0x0] = near/right;
- mp[0x1] = 0.0f;
- mp[0x2] = 0.0f;
- mp[0x3] = 0.0f;
-
- mp[0x4] = 0.0f;
- mp[0x5] = near/top;
- mp[0x6] = 0.0f;
- mp[0x7] = 0.0f;
-
- mp[0x8] = 0.0f;
- mp[0x9] = 0.0f;
- mp[0xA] = -(far+near)/(far-near);
- mp[0xB] = -2.0f*(far*near)/(far-near);
-
- mp[0xC] = 0.0f;
- mp[0xD] = 0.0f;
- mp[0xE] = -1.0f;
- mp[0xF] = 0.0f;
-
- float mp2[4*4];
- loadIdentity44(mp2);
- mp2[0xA]=0.5;
- mp2[0xB]=-0.5;
-
- multMatrix44(mp2, mp, m);
-}
-
-vect3Df_s getMatrixColumn(float* m, u8 i)
-{
- if(!m || i>=4)return vect3Df(0,0,0);
- return vect3Df(m[0+i*4],m[1+i*4],m[2+i*4]);
-}
-
-vect3Df_s getMatrixRow(float* m, u8 i)
-{
- if(!m || i>=4)return vect3Df(0,0,0);
- return vect3Df(m[i+0*4],m[i+1*4],m[i+2*4]);
-}
-
-vect4Df_s getMatrixColumn4(float* m, u8 i)
-{
- if(!m || i>=4)return vect4Df(0,0,0,0);
- return vect4Df(m[0+i*4],m[1+i*4],m[2+i*4],m[3+i*4]);
-}
-
-vect4Df_s getMatrixRow4(float* m, u8 i)
-{
- if(!m || i>=4)return vect4Df(0,0,0,0);
- return vect4Df(m[i+0*4],m[i+1*4],m[i+2*4],m[i+3*4]);
-}
+++ /dev/null
-#ifndef MATH_H
-#define MATH_H
-
-#include <3ds/types.h>
-#include <math.h>
-
-typedef float mtx44[4][4];
-typedef float mtx33[3][3];
-
-typedef struct
-{
- s32 x, y, z;
-}vect3Di_s;
-
-static inline vect3Di_s vect3Di(s32 x, s32 y, s32 z)
-{
- return (vect3Di_s){x,y,z};
-}
-
-static inline vect3Di_s vaddi(vect3Di_s u, vect3Di_s v)
-{
- return (vect3Di_s){u.x+v.x,u.y+v.y,u.z+v.z};
-}
-
-static inline vect3Di_s vsubi(vect3Di_s u, vect3Di_s v)
-{
- return (vect3Di_s){u.x-v.x,u.y-v.y,u.z-v.z};
-}
-
-static inline vect3Di_s vmuli(vect3Di_s v, s32 f)
-{
- return (vect3Di_s){v.x*f,v.y*f,v.z*f};
-}
-
-typedef struct
-{
- float x, y, z;
-}vect3Df_s;
-
-static inline vect3Df_s vect3Df(float x, float y, float z)
-{
- return (vect3Df_s){x,y,z};
-}
-
-static inline vect3Df_s vaddf(vect3Df_s u, vect3Df_s v)
-{
- return (vect3Df_s){u.x+v.x,u.y+v.y,u.z+v.z};
-}
-
-static inline vect3Df_s vsubf(vect3Df_s u, vect3Df_s v)
-{
- return (vect3Df_s){u.x-v.x,u.y-v.y,u.z-v.z};
-}
-
-static inline vect3Df_s vmulf(vect3Df_s v, float f)
-{
- return (vect3Df_s){v.x*f,v.y*f,v.z*f};
-}
-
-static inline vect3Df_s vscalef(vect3Df_s v1, vect3Df_s v2)
-{
- return (vect3Df_s){v1.x*v2.x,v1.y*v2.y,v1.z*v2.z};
-}
-
-static inline float vmagf(vect3Df_s v)
-{
- return sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
-}
-
-static inline float vdistf(vect3Df_s v1, vect3Df_s v2)
-{
- return sqrtf((v1.x-v2.x)*(v1.x-v2.x)+(v1.y-v2.y)*(v1.y-v2.y)+(v1.z-v2.z)*(v1.z-v2.z));
-}
-
-static inline vect3Df_s vnormf(vect3Df_s v)
-{
- const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z);
- return (vect3Df_s){v.x/l,v.y/l,v.z/l};
-}
-
-typedef struct
-{
- float x, y, z, w;
-}vect4Df_s;
-
-static inline vect4Df_s vect4Df(float x, float y, float z, float w)
-{
- return (vect4Df_s){x,y,z,w};
-}
-
-static inline vect4Df_s vaddf4(vect4Df_s u, vect4Df_s v)
-{
- return (vect4Df_s){u.x+v.x,u.y+v.y,u.z+v.z,u.w+v.w};
-}
-
-static inline vect4Df_s vsubf4(vect4Df_s u, vect4Df_s v)
-{
- return (vect4Df_s){u.x-v.x,u.y-v.y,u.z-v.z,u.w-v.w};
-}
-
-static inline vect4Df_s vmulf4(vect4Df_s v, float f)
-{
- return (vect4Df_s){v.x*f,v.y*f,v.z*f,v.w*f};
-}
-
-static inline float vdotf4(vect4Df_s v1, vect4Df_s v2)
-{
- return v1.x*v2.x+v1.y*v2.y+v1.z*v2.z+v1.w*v2.w;
-}
-
-static inline vect4Df_s vnormf4(vect4Df_s v)
-{
- const float l=sqrtf(v.x*v.x+v.y*v.y+v.z*v.z+v.w*v.w);
- return (vect4Df_s){v.x/l,v.y/l,v.z/l,v.w/l};
-}
-
-//interstuff
-static inline vect3Di_s vf2i(vect3Df_s v)
-{
- return (vect3Di_s){floorf(v.x),floorf(v.y),floorf(v.z)};
-}
-
-static inline vect3Df_s vi2f(vect3Di_s v)
-{
- return (vect3Df_s){(float)v.x,(float)v.y,(float)v.z};
-}
-
-void loadIdentity44(float* m);
-void multMatrix44(float* m1, float* m2, float* m);
-
-void translateMatrix(float* tm, float x, float y, float z);
-void rotateMatrixX(float* tm, float x, bool r);
-void rotateMatrixY(float* tm, float x, bool r);
-void rotateMatrixZ(float* tm, float x, bool r);
-void scaleMatrix(float* tm, float x, float y, float z);
-
-void initProjectionMatrix(float* m, float fovy, float aspect, float near, float far);
-
-vect3Df_s getMatrixColumn(float* m, u8 i);
-vect3Df_s getMatrixRow(float* m, u8 i);
-vect4Df_s getMatrixColumn4(float* m, u8 i);
-vect4Df_s getMatrixRow4(float* m, u8 i);
-
-#endif
CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
+PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica)))
BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
#---------------------------------------------------------------------------------
endif
#---------------------------------------------------------------------------------
-export OFILES := $(addsuffix .o,$(BINFILES)) \
+export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \
$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
@echo $(notdir $<)
@$(bin2o)
-# WARNING: This is not the right way to do this! TODO: Do it right!
#---------------------------------------------------------------------------------
-%_vsh.h %.vsh.o : %.vsh
+# rule for assembling GPU shaders
#---------------------------------------------------------------------------------
+%.shbin.o: %.pica
@echo $(notdir $<)
- @python3 $(AEMSTRO)/aemstro_as.py $< ../$(notdir $<).shbin
- @bin2s ../$(notdir $<).shbin | $(PREFIX)as -o $@
- @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(notdir $<).shbin | tr . _)`.h
- @echo "extern const u8" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(notdir $<).shbin | tr . _)`.h
- @echo "extern const u32" `(echo $(notdir $<).shbin | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(notdir $<).shbin | tr . _)`.h
- @rm ../$(notdir $<).shbin
+ $(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<)))
+ $(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<)))
+ @picasso $(CURBIN) $< $(CURH)
+ @bin2s $(CURBIN) | $(AS) -o $@
+ @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h
+ @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h
+ @echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h
-include $(DEPENDS)
--- /dev/null
+# GPU example
+
+This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up.
+Users of earlier versions of devkitARM need to install the tool, which can be found in the address below:
+
+https://github.com/fincs/picasso/releases
--- /dev/null
+#include "3dmath.h"
+
+void m4x4_identity(matrix_4x4* out)
+{
+ m4x4_zeros(out);
+ out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f;
+}
+
+void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b)
+{
+ int i, j;
+ for (i = 0; i < 4; i ++)
+ for (j = 0; j < 4; j ++)
+ out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i];
+}
+
+void m4x4_translate(matrix_4x4* mtx, float x, float y, float z)
+{
+ matrix_4x4 tm, om;
+
+ m4x4_identity(&tm);
+ tm.r[0].w = x;
+ tm.r[1].w = y;
+ tm.r[2].w = z;
+
+ m4x4_multiply(&om, mtx, &tm);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_scale(matrix_4x4* mtx, float x, float y, float z)
+{
+ int i;
+ for (i = 0; i < 4; i ++)
+ {
+ mtx->r[i].x *= x;
+ mtx->r[i].y *= y;
+ mtx->r[i].z *= z;
+ }
+}
+
+void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide)
+{
+ matrix_4x4 rm, om;
+
+ float cosAngle = cosf(angle);
+ float sinAngle = sinf(angle);
+
+ m4x4_zeros(&rm);
+ rm.r[0].x = 1.0f;
+ rm.r[1].y = cosAngle;
+ rm.r[1].z = sinAngle;
+ rm.r[2].y = -sinAngle;
+ rm.r[2].z = cosAngle;
+ rm.r[3].w = 1.0f;
+
+ if (bRightSide) m4x4_multiply(&om, mtx, &rm);
+ else m4x4_multiply(&om, &rm, mtx);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide)
+{
+ matrix_4x4 rm, om;
+
+ float cosAngle = cosf(angle);
+ float sinAngle = sinf(angle);
+
+ m4x4_zeros(&rm);
+ rm.r[0].x = cosAngle;
+ rm.r[0].z = sinAngle;
+ rm.r[1].y = 1.0f;
+ rm.r[2].x = -sinAngle;
+ rm.r[2].z = cosAngle;
+ rm.r[3].w = 1.0f;
+
+ if (bRightSide) m4x4_multiply(&om, mtx, &rm);
+ else m4x4_multiply(&om, &rm, mtx);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide)
+{
+ matrix_4x4 rm, om;
+
+ float cosAngle = cosf(angle);
+ float sinAngle = sinf(angle);
+
+ m4x4_zeros(&rm);
+ rm.r[0].x = cosAngle;
+ rm.r[0].y = sinAngle;
+ rm.r[1].x = -sinAngle;
+ rm.r[1].y = cosAngle;
+ rm.r[2].z = 1.0f;
+ rm.r[3].w = 1.0f;
+
+ if (bRightSide) m4x4_multiply(&om, mtx, &rm);
+ else m4x4_multiply(&om, &rm, mtx);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far)
+{
+ matrix_4x4 mp;
+ m4x4_zeros(&mp);
+
+ // Build standard orthogonal projection matrix
+ mp.r[0].x = 2.0f / (right - left);
+ mp.r[0].w = (left + right) / (left - right);
+ mp.r[1].y = 2.0f / (top - bottom);
+ mp.r[1].w = (bottom + top) / (bottom - top);
+ mp.r[2].z = 2.0f / (near - far);
+ mp.r[2].w = (far + near) / (far - near);
+ mp.r[3].w = 1.0f;
+
+ // Fix depth range to [-1, 0]
+ matrix_4x4 mp2, mp3;
+ m4x4_identity(&mp2);
+ mp2.r[2].z = 0.5;
+ mp2.r[2].w = -0.5;
+ m4x4_multiply(&mp3, &mp2, &mp);
+
+ // Fix the 3DS screens' orientation by swapping the X and Y axis
+ m4x4_identity(&mp2);
+ mp2.r[0].x = 0.0;
+ mp2.r[0].y = 1.0;
+ mp2.r[1].x = -1.0; // flipped
+ mp2.r[1].y = 0.0;
+ m4x4_multiply(mtx, &mp2, &mp3);
+}
+
+void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far)
+{
+ // Notes:
+ // We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways,
+ // and so are these parameters -- in fact, they are actually the fovx and the inverse
+ // of the aspect ratio. Therefore the formula for the perspective projection matrix
+ // had to be modified to be expressed in these terms instead.
+
+ // Notes:
+ // fovx = 2 atan(tan(fovy/2)*w/h)
+ // fovy = 2 atan(tan(fovx/2)*h/w)
+ // invaspect = h/w
+
+ // a0,0 = h / (w*tan(fovy/2)) =
+ // = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) =
+ // = h / (w*tan( atan(tan(fovx/2)*h/w) )) =
+ // = h / (w * tan(fovx/2)*h/w) =
+ // = 1 / tan(fovx/2)
+
+ // a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2))
+
+ float fovx_tan = tanf(fovx / 2);
+ matrix_4x4 mp;
+ m4x4_zeros(&mp);
+
+ // Build standard perspective projection matrix
+ mp.r[0].x = 1.0f / fovx_tan;
+ mp.r[1].y = 1.0f / (fovx_tan*invaspect);
+ mp.r[2].z = (near + far) / (near - far);
+ mp.r[2].w = (2 * near * far) / (near - far);
+ mp.r[3].z = -1.0f;
+
+ // Fix depth range to [-1, 0]
+ matrix_4x4 mp2;
+ m4x4_identity(&mp2);
+ mp2.r[2].z = 0.5;
+ mp2.r[2].w = -0.5;
+ m4x4_multiply(mtx, &mp2, &mp);
+
+ // Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation
+ m4x4_rotate_z(mtx, M_PI / 2, true);
+}
--- /dev/null
+/*
+ * Bare-bones simplistic 3D math library
+ * This library is common to all libctru GPU examples
+ */
+
+#pragma once
+#include <string.h>
+#include <stdbool.h>
+#include <math.h>
+
+typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f;
+typedef struct { vector_4f r[4]; } matrix_4x4;
+
+static inline float v4f_dp4(const vector_4f* a, const vector_4f* b)
+{
+ return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w;
+}
+
+static inline float v4f_mod4(const vector_4f* a)
+{
+ return sqrtf(v4f_dp4(a,a));
+}
+
+static inline void v4f_norm4(vector_4f* vec)
+{
+ float m = v4f_mod4(vec);
+ if (m == 0.0) return;
+ vec->x /= m;
+ vec->y /= m;
+ vec->z /= m;
+ vec->w /= m;
+}
+
+static inline void m4x4_zeros(matrix_4x4* out)
+{
+ memset(out, 0, sizeof(*out));
+}
+
+static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in)
+{
+ memcpy(out, in, sizeof(*out));
+}
+
+void m4x4_identity(matrix_4x4* out);
+void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b);
+
+void m4x4_translate(matrix_4x4* mtx, float x, float y, float z);
+void m4x4_scale(matrix_4x4* mtx, float x, float y, float z);
+
+void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide);
+void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide);
+void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide);
+
+// Special versions of the projection matrices that take the 3DS' screen orientation into account
+void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far);
+void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far);
--- /dev/null
+#include "gpu.h"
+
+#define DISPLAY_TRANSFER_FLAGS \
+ (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
+ GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
+ GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
+
+static u32 *colorBuf, *depthBuf;
+static u32 *cmdBuf;
+
+void gpuInit(void)
+{
+ colorBuf = vramAlloc(400*240*4);
+ depthBuf = vramAlloc(400*240*4);
+ cmdBuf = linearAlloc(0x40000*4);
+
+ GPU_Init(NULL);
+ GPU_Reset(NULL, cmdBuf, 0x40000);
+}
+
+void gpuExit(void)
+{
+ linearFree(cmdBuf);
+ vramFree(depthBuf);
+ vramFree(colorBuf);
+}
+
+void gpuClearBuffers(u32 clearColor)
+{
+ GX_SetMemoryFill(NULL,
+ colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH,
+ depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
+ gspWaitForPSC0(); // Wait for the fill to complete
+}
+
+void gpuFrameBegin(void)
+{
+ // Configure the viewport and the depth linear conversion function
+ GPU_SetViewport(
+ (u32*)osConvertVirtToPhys((u32)depthBuf),
+ (u32*)osConvertVirtToPhys((u32)colorBuf),
+ 0, 0, 240, 400); // The top screen is physically 240x400 pixels
+ GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0
+
+ // Configure some boilerplate
+ GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
+ GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
+ GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP);
+ GPU_SetBlendingColor(0,0,0,0);
+ GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
+
+ // This is unknown
+ GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
+ GPUCMD_AddWrite(GPUREG_0118, 0);
+
+ // Configure alpha blending and test
+ GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
+ GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
+
+ int i;
+ for (i = 0; i < 6; i ++)
+ GPU_SetDummyTexEnv(i);
+}
+
+void gpuFrameEnd(void)
+{
+ // Finish rendering
+ GPU_FinishDrawing();
+ GPUCMD_Finalize();
+ GPUCMD_FlushAndRun(NULL);
+ gspWaitForP3D(); // Wait for the rendering to complete
+
+ // Transfer the GPU output to the framebuffer
+ GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400),
+ (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400),
+ DISPLAY_TRANSFER_FLAGS);
+ gspWaitForPPF(); // Wait for the transfer to complete
+
+ // Reset the command buffer
+ GPUCMD_SetBufferOffset(0);
+};
+
+void GPU_SetDummyTexEnv(int id)
+{
+ GPU_SetTexEnv(id,
+ GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
+ GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
+ GPU_TEVOPERANDS(0, 0, 0),
+ GPU_TEVOPERANDS(0, 0, 0),
+ GPU_REPLACE,
+ GPU_REPLACE,
+ 0xFFFFFFFF);
+}
--- /dev/null
+/*
+ * Bare-bones simplistic GPU wrapper
+ * This library is common to all libctru GPU examples
+ */
+
+#pragma once
+#include <string.h>
+#include <3ds.h>
+#include "3dmath.h"
+
+void gpuInit(void);
+void gpuExit(void);
+
+void gpuClearBuffers(u32 clearColor);
+
+void gpuFrameBegin(void);
+void gpuFrameEnd(void);
+
+// Configures the specified fixed-function fragment shading substage to be a no-operation
+void GPU_SetDummyTexEnv(int id);
+
+// Uploads an uniform matrix
+static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix)
+{
+ GPU_SetFloatUniform(type, location, (u32*)matrix, 4);
+}
--- /dev/null
+; Example PICA200 geometry shader
+
+; Uniforms
+.fvec projection[4]
+
+; Constants
+.constf myconst(0.0, 1.0, -1.0, 0.5)
+.alias zeros myconst.xxxx ; Vector full of zeros
+.alias ones myconst.yyyy ; Vector full of ones
+.alias half myconst.wwww
+
+; Outputs - this time the type *is* used
+.out outpos position
+.out outclr color
+
+; Inputs: we will receive the following inputs:
+; v0-v1: position/color of the first vertex
+; v2-v3: position/color of the second vertex
+; v4-v5: position/color of the third vertex
+
+.proc main
+ ; Calculate the midpoints of the vertices
+ mov r4, v0
+ add r4, v2, r4
+ mul r4, half, r4
+ mov r5, v2
+ add r5, v4, r5
+ mul r5, half, r5
+ mov r6, v4
+ add r6, v0, r6
+ mul r6, half, r6
+
+ ; Emit the first triangle
+ mov r0, v0
+ mov r1, r4
+ mov r2, r6
+ call emit_triangle
+
+ ; Emit the second triangle
+ mov r0, r4
+ mov r1, v2
+ mov r2, r5
+ call emit_triangle
+
+ ; Emit the third triangle
+ mov r0, r6
+ mov r1, r5
+ mov r2, v4
+ call emit_triangle
+
+ ; We're finished
+ end
+.end
+
+.proc emit_triangle
+ ; Emit the first vertex
+ setemit 0
+ mov r8, r0
+ mov r9, v1
+ call process_vertex
+ emit
+
+ ; Emit the second vertex
+ setemit 1
+ mov r8, r1
+ mov r9, v3
+ call process_vertex
+ emit
+
+ ; Emit the third vertex and finish the primitive
+ setemit 2, prim
+ mov r8, r2
+ mov r9, v5
+ call process_vertex
+ emit
+.end
+
+; Subroutine
+; Inputs:
+; r8: vertex position
+; r9: vertex color
+.proc process_vertex
+ ; outpos = projectionMatrix * r8
+ dp4 outpos.x, projection[0], r8
+ dp4 outpos.y, projection[1], r8
+ dp4 outpos.z, projection[2], r8
+ dp4 outpos.w, projection[3], r8
+
+ ; outclr = r9
+ mov outclr, r9
+.end
--- /dev/null
+/*
+ * ~~ Simple libctru GPU geometry shader example ~~
+ * This example demonstrates the basics of using the PICA200 in a 3DS homebrew
+ * application in order to render a basic scene using a geoshader.
+ * The example geoshader receives the vertices of a triangle and emits three
+ * smaller triangles, thus forming a 'triforce' shape.
+ */
+
+#include "gpu.h"
+#include "vshader_shbin.h"
+#include "gshader_shbin.h"
+
+#define CLEAR_COLOR 0x68B0D8FF
+
+typedef struct { float position[3]; float color[4]; } vertex;
+
+static const vertex vertex_list[] =
+{
+ { {200.0f, 200.0f, 0.5f}, {1.0f, 0.0f, 0.0f, 1.0f} },
+ { {100.0f, 40.0f, 0.5f}, {0.0f, 1.0f, 0.0f, 1.0f} },
+ { {300.0f, 40.0f, 0.5f}, {0.0f, 0.0f, 1.0f, 1.0f} },
+};
+
+#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
+
+static DVLB_s *vshader_dvlb, *gshader_dvlb;
+static shaderProgram_s program;
+static int uLoc_projection;
+static matrix_4x4 projection;
+
+static void* vbo_data;
+
+static void sceneInit(void)
+{
+ // Load the shaders and create a shader program
+ // The geoshader stride is set to 6 so that it processes a triangle at a time
+ vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
+ gshader_dvlb = DVLB_ParseFile((u32*)gshader_shbin, gshader_shbin_size);
+ shaderProgramInit(&program);
+ shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
+ shaderProgramSetGsh(&program, &gshader_dvlb->DVLE[0], 6);
+
+ // Get the location of the projection matrix uniform
+ uLoc_projection = shaderInstanceGetUniformLocation(program.geometryShader, "projection");
+
+ // Compute the projection matrix
+ m4x4_ortho_tilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0);
+
+ // Create the VBO (vertex buffer object)
+ vbo_data = linearAlloc(sizeof(vertex_list));
+ memcpy(vbo_data, vertex_list, sizeof(vertex_list));
+}
+
+static void sceneRender(void)
+{
+ // Bind the shader program
+ shaderProgramUse(&program);
+
+ // Configure the first fragment shading substage to just pass through the vertex color
+ // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
+ GPU_SetTexEnv(0,
+ GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels
+ GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha
+ GPU_TEVOPERANDS(0, 0, 0), // RGB
+ GPU_TEVOPERANDS(0, 0, 0), // Alpha
+ GPU_REPLACE, GPU_REPLACE, // RGB, Alpha
+ 0xFFFFFFFF);
+
+ // Configure the "attribute buffers" (that is, the vertex input buffers)
+ GPU_SetAttributeBuffers(
+ 2, // Number of inputs per vertex
+ (u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO
+ GPU_ATTRIBFMT(0, 3, GPU_FLOAT) |
+ GPU_ATTRIBFMT(1, 4, GPU_FLOAT), // Format of the inputs (in this case the only input is a 3-element float vector)
+ 0xFFC, // Unused attribute mask, in our case bit 0 is cleared since it is used
+ 0x10, // Attribute permutations (here it is the identity)
+ 1, // Number of buffers
+ (u32[]) { 0x0 }, // Buffer offsets (placeholders)
+ (u64[]) { 0x10 }, // Attribute permutations for each buffer (identity again)
+ (u8[]) { 2 }); // Number of attributes for each buffer
+
+ // Upload the projection matrix
+ GPU_SetFloatUniformMatrix(GPU_GEOMETRY_SHADER, uLoc_projection, &projection);
+
+ // Draw the VBO - GPU_UNKPRIM allows the geoshader to control primitive emission
+ GPU_DrawArray(GPU_UNKPRIM, vertex_list_count);
+}
+
+static void sceneExit(void)
+{
+ // Free the VBO
+ linearFree(vbo_data);
+
+ // Free the shader program
+ shaderProgramFree(&program);
+ DVLB_Free(vshader_dvlb);
+ DVLB_Free(gshader_dvlb);
+}
+
+int main()
+{
+ // Initialize graphics
+ gfxInitDefault();
+ gpuInit();
+
+ // Initialize the scene
+ sceneInit();
+ gpuClearBuffers(CLEAR_COLOR);
+
+ // Main loop
+ while (aptMainLoop())
+ {
+ gspWaitForVBlank(); // Synchronize with the start of VBlank
+ gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible
+ hidScanInput(); // Read the user input
+
+ // Respond to user input
+ u32 kDown = hidKeysDown();
+ if (kDown & KEY_START)
+ break; // break in order to return to hbmenu
+
+ // Render the scene
+ gpuFrameBegin();
+ sceneRender();
+ gpuFrameEnd();
+ gpuClearBuffers(CLEAR_COLOR);
+
+ // Flush the framebuffers out of the data cache (not necessary with pure GPU rendering)
+ //gfxFlushBuffers();
+ }
+
+ // Deinitialize the scene
+ sceneExit();
+
+ // Deinitialize graphics
+ gpuExit();
+ gfxExit();
+ return 0;
+}
--- /dev/null
+; Example PICA200 vertex shader
+
+; Constants
+.constf myconst(0.0, 1.0, -1.0, -0.5)
+.alias zeros myconst.xxxx ; Vector full of zeros
+.alias ones myconst.yyyy ; Vector full of ones
+
+; Outputs - since we are also using a geoshader the output type isn't really used
+.out outpos position
+.out outclr color
+
+; Inputs (defined as aliases for convenience)
+.alias inpos v0
+.alias inclr v1
+
+.proc main
+ ; Pass through both inputs to the geoshader
+ mov outpos.xyz, inpos
+ mov outpos.w, ones
+ mov outclr, inclr
+
+ ; We're finished
+ end
+.end
--- /dev/null
+#---------------------------------------------------------------------------------
+.SUFFIXES:
+#---------------------------------------------------------------------------------
+
+ifeq ($(strip $(DEVKITARM)),)
+$(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
+endif
+
+TOPDIR ?= $(CURDIR)
+include $(DEVKITARM)/3ds_rules
+
+#---------------------------------------------------------------------------------
+# TARGET is the name of the output
+# BUILD is the directory where object files & intermediate files will be placed
+# SOURCES is a list of directories containing source code
+# DATA is a list of directories containing data files
+# INCLUDES is a list of directories containing header files
+#
+# NO_SMDH: if set to anything, no SMDH file is generated.
+# APP_TITLE is the name of the app stored in the SMDH file (Optional)
+# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional)
+# APP_AUTHOR is the author of the app stored in the SMDH file (Optional)
+# ICON is the filename of the icon (.png), relative to the project folder.
+# If not set, it attempts to use one of the following (in this order):
+# - <Project name>.png
+# - icon.png
+# - <libctru folder>/default_icon.png
+#---------------------------------------------------------------------------------
+TARGET := $(notdir $(CURDIR))
+BUILD := build
+SOURCES := source
+DATA := data
+INCLUDES := include
+
+#---------------------------------------------------------------------------------
+# options for code generation
+#---------------------------------------------------------------------------------
+ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard
+
+CFLAGS := -g -Wall -O2 -mword-relocations \
+ -fomit-frame-pointer -ffast-math \
+ $(ARCH)
+
+CFLAGS += $(INCLUDE) -DARM11 -D_3DS
+
+CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11
+
+ASFLAGS := -g $(ARCH)
+LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
+
+LIBS := -lctru -lm
+
+#---------------------------------------------------------------------------------
+# list of directories containing libraries, this must be the top level containing
+# include and lib
+#---------------------------------------------------------------------------------
+LIBDIRS := $(CTRULIB)
+
+
+#---------------------------------------------------------------------------------
+# no real need to edit anything past this point unless you need to add additional
+# rules for different file extensions
+#---------------------------------------------------------------------------------
+ifneq ($(BUILD),$(notdir $(CURDIR)))
+#---------------------------------------------------------------------------------
+
+export OUTPUT := $(CURDIR)/$(TARGET)
+export TOPDIR := $(CURDIR)
+
+export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
+ $(foreach dir,$(DATA),$(CURDIR)/$(dir))
+
+export DEPSDIR := $(CURDIR)/$(BUILD)
+
+CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
+CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
+SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
+PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica)))
+BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
+
+#---------------------------------------------------------------------------------
+# use CXX for linking C++ projects, CC for standard C
+#---------------------------------------------------------------------------------
+ifeq ($(strip $(CPPFILES)),)
+#---------------------------------------------------------------------------------
+ export LD := $(CC)
+#---------------------------------------------------------------------------------
+else
+#---------------------------------------------------------------------------------
+ export LD := $(CXX)
+#---------------------------------------------------------------------------------
+endif
+#---------------------------------------------------------------------------------
+
+export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \
+ $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
+
+export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
+ $(foreach dir,$(LIBDIRS),-I$(dir)/include) \
+ -I$(CURDIR)/$(BUILD)
+
+export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
+
+ifeq ($(strip $(ICON)),)
+ icons := $(wildcard *.png)
+ ifneq (,$(findstring $(TARGET).png,$(icons)))
+ export APP_ICON := $(TOPDIR)/$(TARGET).png
+ else
+ ifneq (,$(findstring icon.png,$(icons)))
+ export APP_ICON := $(TOPDIR)/icon.png
+ endif
+ endif
+else
+ export APP_ICON := $(TOPDIR)/$(ICON)
+endif
+
+ifeq ($(strip $(NO_SMDH)),)
+ export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh
+endif
+
+.PHONY: $(BUILD) clean all
+
+#---------------------------------------------------------------------------------
+all: $(BUILD)
+
+$(BUILD):
+ @[ -d $@ ] || mkdir -p $@
+ @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
+
+#---------------------------------------------------------------------------------
+clean:
+ @echo clean ...
+ @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf
+
+
+#---------------------------------------------------------------------------------
+else
+
+DEPENDS := $(OFILES:.o=.d)
+
+#---------------------------------------------------------------------------------
+# main targets
+#---------------------------------------------------------------------------------
+ifeq ($(strip $(NO_SMDH)),)
+$(OUTPUT).3dsx : $(OUTPUT).elf $(OUTPUT).smdh
+else
+$(OUTPUT).3dsx : $(OUTPUT).elf
+endif
+
+$(OUTPUT).elf : $(OFILES)
+
+#---------------------------------------------------------------------------------
+# you need a rule like this for each extension you use as binary data
+#---------------------------------------------------------------------------------
+%.bin.o : %.bin
+#---------------------------------------------------------------------------------
+ @echo $(notdir $<)
+ @$(bin2o)
+
+#---------------------------------------------------------------------------------
+# rule for assembling GPU shaders
+#---------------------------------------------------------------------------------
+%.shbin.o: %.pica
+ @echo $(notdir $<)
+ $(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<)))
+ $(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<)))
+ @picasso $(CURBIN) $< $(CURH)
+ @bin2s $(CURBIN) | $(AS) -o $@
+ @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h
+ @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h
+ @echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h
+
+-include $(DEPENDS)
+
+#---------------------------------------------------------------------------------------
+endif
+#---------------------------------------------------------------------------------------
--- /dev/null
+# GPU example
+
+This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up.
+Users of earlier versions of devkitARM need to install the tool, which can be found in the address below:
+
+https://github.com/fincs/picasso/releases
--- /dev/null
+#include "3dmath.h"
+
+void m4x4_identity(matrix_4x4* out)
+{
+ m4x4_zeros(out);
+ out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f;
+}
+
+void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b)
+{
+ int i, j;
+ for (i = 0; i < 4; i ++)
+ for (j = 0; j < 4; j ++)
+ out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i];
+}
+
+void m4x4_translate(matrix_4x4* mtx, float x, float y, float z)
+{
+ matrix_4x4 tm, om;
+
+ m4x4_identity(&tm);
+ tm.r[0].w = x;
+ tm.r[1].w = y;
+ tm.r[2].w = z;
+
+ m4x4_multiply(&om, mtx, &tm);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_scale(matrix_4x4* mtx, float x, float y, float z)
+{
+ int i;
+ for (i = 0; i < 4; i ++)
+ {
+ mtx->r[i].x *= x;
+ mtx->r[i].y *= y;
+ mtx->r[i].z *= z;
+ }
+}
+
+void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide)
+{
+ matrix_4x4 rm, om;
+
+ float cosAngle = cosf(angle);
+ float sinAngle = sinf(angle);
+
+ m4x4_zeros(&rm);
+ rm.r[0].x = 1.0f;
+ rm.r[1].y = cosAngle;
+ rm.r[1].z = sinAngle;
+ rm.r[2].y = -sinAngle;
+ rm.r[2].z = cosAngle;
+ rm.r[3].w = 1.0f;
+
+ if (bRightSide) m4x4_multiply(&om, mtx, &rm);
+ else m4x4_multiply(&om, &rm, mtx);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide)
+{
+ matrix_4x4 rm, om;
+
+ float cosAngle = cosf(angle);
+ float sinAngle = sinf(angle);
+
+ m4x4_zeros(&rm);
+ rm.r[0].x = cosAngle;
+ rm.r[0].z = sinAngle;
+ rm.r[1].y = 1.0f;
+ rm.r[2].x = -sinAngle;
+ rm.r[2].z = cosAngle;
+ rm.r[3].w = 1.0f;
+
+ if (bRightSide) m4x4_multiply(&om, mtx, &rm);
+ else m4x4_multiply(&om, &rm, mtx);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide)
+{
+ matrix_4x4 rm, om;
+
+ float cosAngle = cosf(angle);
+ float sinAngle = sinf(angle);
+
+ m4x4_zeros(&rm);
+ rm.r[0].x = cosAngle;
+ rm.r[0].y = sinAngle;
+ rm.r[1].x = -sinAngle;
+ rm.r[1].y = cosAngle;
+ rm.r[2].z = 1.0f;
+ rm.r[3].w = 1.0f;
+
+ if (bRightSide) m4x4_multiply(&om, mtx, &rm);
+ else m4x4_multiply(&om, &rm, mtx);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far)
+{
+ matrix_4x4 mp;
+ m4x4_zeros(&mp);
+
+ // Build standard orthogonal projection matrix
+ mp.r[0].x = 2.0f / (right - left);
+ mp.r[0].w = (left + right) / (left - right);
+ mp.r[1].y = 2.0f / (top - bottom);
+ mp.r[1].w = (bottom + top) / (bottom - top);
+ mp.r[2].z = 2.0f / (near - far);
+ mp.r[2].w = (far + near) / (far - near);
+ mp.r[3].w = 1.0f;
+
+ // Fix depth range to [-1, 0]
+ matrix_4x4 mp2, mp3;
+ m4x4_identity(&mp2);
+ mp2.r[2].z = 0.5;
+ mp2.r[2].w = -0.5;
+ m4x4_multiply(&mp3, &mp2, &mp);
+
+ // Fix the 3DS screens' orientation by swapping the X and Y axis
+ m4x4_identity(&mp2);
+ mp2.r[0].x = 0.0;
+ mp2.r[0].y = 1.0;
+ mp2.r[1].x = -1.0; // flipped
+ mp2.r[1].y = 0.0;
+ m4x4_multiply(mtx, &mp2, &mp3);
+}
+
+void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far)
+{
+ // Notes:
+ // We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways,
+ // and so are these parameters -- in fact, they are actually the fovx and the inverse
+ // of the aspect ratio. Therefore the formula for the perspective projection matrix
+ // had to be modified to be expressed in these terms instead.
+
+ // Notes:
+ // fovx = 2 atan(tan(fovy/2)*w/h)
+ // fovy = 2 atan(tan(fovx/2)*h/w)
+ // invaspect = h/w
+
+ // a0,0 = h / (w*tan(fovy/2)) =
+ // = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) =
+ // = h / (w*tan( atan(tan(fovx/2)*h/w) )) =
+ // = h / (w * tan(fovx/2)*h/w) =
+ // = 1 / tan(fovx/2)
+
+ // a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2))
+
+ float fovx_tan = tanf(fovx / 2);
+ matrix_4x4 mp;
+ m4x4_zeros(&mp);
+
+ // Build standard perspective projection matrix
+ mp.r[0].x = 1.0f / fovx_tan;
+ mp.r[1].y = 1.0f / (fovx_tan*invaspect);
+ mp.r[2].z = (near + far) / (near - far);
+ mp.r[2].w = (2 * near * far) / (near - far);
+ mp.r[3].z = -1.0f;
+
+ // Fix depth range to [-1, 0]
+ matrix_4x4 mp2;
+ m4x4_identity(&mp2);
+ mp2.r[2].z = 0.5;
+ mp2.r[2].w = -0.5;
+ m4x4_multiply(mtx, &mp2, &mp);
+
+ // Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation
+ m4x4_rotate_z(mtx, M_PI / 2, true);
+}
--- /dev/null
+/*
+ * Bare-bones simplistic 3D math library
+ * This library is common to all libctru GPU examples
+ */
+
+#pragma once
+#include <string.h>
+#include <stdbool.h>
+#include <math.h>
+
+typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f;
+typedef struct { vector_4f r[4]; } matrix_4x4;
+
+static inline float v4f_dp4(const vector_4f* a, const vector_4f* b)
+{
+ return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w;
+}
+
+static inline float v4f_mod4(const vector_4f* a)
+{
+ return sqrtf(v4f_dp4(a,a));
+}
+
+static inline void v4f_norm4(vector_4f* vec)
+{
+ float m = v4f_mod4(vec);
+ if (m == 0.0) return;
+ vec->x /= m;
+ vec->y /= m;
+ vec->z /= m;
+ vec->w /= m;
+}
+
+static inline void m4x4_zeros(matrix_4x4* out)
+{
+ memset(out, 0, sizeof(*out));
+}
+
+static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in)
+{
+ memcpy(out, in, sizeof(*out));
+}
+
+void m4x4_identity(matrix_4x4* out);
+void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b);
+
+void m4x4_translate(matrix_4x4* mtx, float x, float y, float z);
+void m4x4_scale(matrix_4x4* mtx, float x, float y, float z);
+
+void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide);
+void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide);
+void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide);
+
+// Special versions of the projection matrices that take the 3DS' screen orientation into account
+void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far);
+void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far);
--- /dev/null
+#include "gpu.h"
+
+#define DISPLAY_TRANSFER_FLAGS \
+ (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
+ GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
+ GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
+
+static u32 *colorBuf, *depthBuf;
+static u32 *cmdBuf;
+
+void gpuInit(void)
+{
+ colorBuf = vramAlloc(400*240*4);
+ depthBuf = vramAlloc(400*240*4);
+ cmdBuf = linearAlloc(0x40000*4);
+
+ GPU_Init(NULL);
+ GPU_Reset(NULL, cmdBuf, 0x40000);
+}
+
+void gpuExit(void)
+{
+ linearFree(cmdBuf);
+ vramFree(depthBuf);
+ vramFree(colorBuf);
+}
+
+void gpuClearBuffers(u32 clearColor)
+{
+ GX_SetMemoryFill(NULL,
+ colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH,
+ depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
+ gspWaitForPSC0(); // Wait for the fill to complete
+}
+
+void gpuFrameBegin(void)
+{
+ // Configure the viewport and the depth linear conversion function
+ GPU_SetViewport(
+ (u32*)osConvertVirtToPhys((u32)depthBuf),
+ (u32*)osConvertVirtToPhys((u32)colorBuf),
+ 0, 0, 240, 400); // The top screen is physically 240x400 pixels
+ GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0
+
+ // Configure some boilerplate
+ GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
+ GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
+ GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP);
+ GPU_SetBlendingColor(0,0,0,0);
+ GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
+
+ // This is unknown
+ GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
+ GPUCMD_AddWrite(GPUREG_0118, 0);
+
+ // Configure alpha blending and test
+ GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
+ GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
+
+ int i;
+ for (i = 0; i < 6; i ++)
+ GPU_SetDummyTexEnv(i);
+}
+
+void gpuFrameEnd(void)
+{
+ // Finish rendering
+ GPU_FinishDrawing();
+ GPUCMD_Finalize();
+ GPUCMD_FlushAndRun(NULL);
+ gspWaitForP3D(); // Wait for the rendering to complete
+
+ // Transfer the GPU output to the framebuffer
+ GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400),
+ (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400),
+ DISPLAY_TRANSFER_FLAGS);
+ gspWaitForPPF(); // Wait for the transfer to complete
+
+ // Reset the command buffer
+ GPUCMD_SetBufferOffset(0);
+};
+
+void GPU_SetDummyTexEnv(int id)
+{
+ GPU_SetTexEnv(id,
+ GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
+ GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
+ GPU_TEVOPERANDS(0, 0, 0),
+ GPU_TEVOPERANDS(0, 0, 0),
+ GPU_REPLACE,
+ GPU_REPLACE,
+ 0xFFFFFFFF);
+}
--- /dev/null
+/*
+ * Bare-bones simplistic GPU wrapper
+ * This library is common to all libctru GPU examples
+ */
+
+#pragma once
+#include <string.h>
+#include <3ds.h>
+#include "3dmath.h"
+
+void gpuInit(void);
+void gpuExit(void);
+
+void gpuClearBuffers(u32 clearColor);
+
+void gpuFrameBegin(void);
+void gpuFrameEnd(void);
+
+// Configures the specified fixed-function fragment shading substage to be a no-operation
+void GPU_SetDummyTexEnv(int id);
+
+// Uploads an uniform matrix
+static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix)
+{
+ GPU_SetFloatUniform(type, location, (u32*)matrix, 4);
+}
--- /dev/null
+/*
+ * ~~ Simple libctru GPU triangle example ~~
+ * This example demonstrates the basics of using the PICA200 in a 3DS homebrew
+ * application in order to render a basic scene consisting of a white solid triangle.
+ */
+
+#include "gpu.h"
+#include "vshader_shbin.h"
+
+#define CLEAR_COLOR 0x68B0D8FF
+
+typedef struct { float x, y, z; } vertex;
+
+static const vertex vertex_list[] =
+{
+ { 200.0f, 200.0f, 0.5f },
+ { 100.0f, 40.0f, 0.5f },
+ { 300.0f, 40.0f, 0.5f },
+};
+
+#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
+
+static DVLB_s* vshader_dvlb;
+static shaderProgram_s program;
+static int uLoc_projection;
+static matrix_4x4 projection;
+
+static void* vbo_data;
+
+static void sceneInit(void)
+{
+ // Load the vertex shader and create a shader program
+ vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
+ shaderProgramInit(&program);
+ shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
+
+ // Get the location of the projection matrix uniform
+ uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
+
+ // Compute the projection matrix
+ m4x4_ortho_tilt(&projection, 0.0, 400.0, 0.0, 240.0, 0.0, 1.0);
+
+ // Create the VBO (vertex buffer object)
+ vbo_data = linearAlloc(sizeof(vertex_list));
+ memcpy(vbo_data, vertex_list, sizeof(vertex_list));
+}
+
+static void sceneRender(void)
+{
+ // Bind the shader program
+ shaderProgramUse(&program);
+
+ // Configure the first fragment shading substage to just pass through the vertex color
+ // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
+ GPU_SetTexEnv(0,
+ GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels
+ GPU_TEVSOURCES(GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha
+ GPU_TEVOPERANDS(0, 0, 0), // RGB
+ GPU_TEVOPERANDS(0, 0, 0), // Alpha
+ GPU_REPLACE, GPU_REPLACE, // RGB, Alpha
+ 0xFFFFFFFF);
+
+ // Configure the "attribute buffers" (that is, the vertex input buffers)
+ GPU_SetAttributeBuffers(
+ 1, // Number of inputs per vertex
+ (u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO
+ GPU_ATTRIBFMT(0, 3, GPU_FLOAT), // Format of the inputs (in this case the only input is a 3-element float vector)
+ 0xFFE, // Unused attribute mask, in our case bit 0 is cleared since it is used
+ 0x0, // Attribute permutations (here it is the identity)
+ 1, // Number of buffers
+ (u32[]) { 0x0 }, // Buffer offsets (placeholders)
+ (u64[]) { 0x0 }, // Attribute permutations for each buffer (identity again)
+ (u8[]) { 1 }); // Number of attributes for each buffer
+
+ // Upload the projection matrix
+ GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_projection, &projection);
+
+ // Draw the VBO
+ GPU_DrawArray(GPU_TRIANGLES, vertex_list_count);
+}
+
+static void sceneExit(void)
+{
+ // Free the VBO
+ linearFree(vbo_data);
+
+ // Free the shader program
+ shaderProgramFree(&program);
+ DVLB_Free(vshader_dvlb);
+}
+
+int main()
+{
+ // Initialize graphics
+ gfxInitDefault();
+ gpuInit();
+
+ // Initialize the scene
+ sceneInit();
+ gpuClearBuffers(CLEAR_COLOR);
+
+ // Main loop
+ while (aptMainLoop())
+ {
+ gspWaitForVBlank(); // Synchronize with the start of VBlank
+ gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible
+ hidScanInput(); // Read the user input
+
+ // Respond to user input
+ u32 kDown = hidKeysDown();
+ if (kDown & KEY_START)
+ break; // break in order to return to hbmenu
+
+ // Render the scene
+ gpuFrameBegin();
+ sceneRender();
+ gpuFrameEnd();
+ gpuClearBuffers(CLEAR_COLOR);
+
+ // Flush the framebuffers out of the data cache (not necessary with pure GPU rendering)
+ //gfxFlushBuffers();
+ }
+
+ // Deinitialize the scene
+ sceneExit();
+
+ // Deinitialize graphics
+ gpuExit();
+ gfxExit();
+ return 0;
+}
--- /dev/null
+; Example PICA200 vertex shader
+
+; Uniforms
+.fvec projection[4]
+
+; Constants
+.constf myconst(0.0, 1.0, -1.0, -0.5)
+.alias zeros myconst.xxxx ; Vector full of zeros
+.alias ones myconst.yyyy ; Vector full of ones
+
+; Outputs
+.out outpos position
+.out outclr color
+
+; Inputs (defined as aliases for convenience)
+.alias inpos v0
+
+.proc main
+ ; Force the w component of inpos to be 1.0
+ mov r0.xyz, inpos
+ mov r0.w, ones
+
+ ; outpos = projectionMatrix * inpos
+ dp4 outpos.x, projection[0], r0
+ dp4 outpos.y, projection[1], r0
+ dp4 outpos.z, projection[2], r0
+ dp4 outpos.w, projection[3], r0
+
+ ; outclr = solid white color
+ mov outclr, ones
+
+ ; We're finished
+ end
+.end
--- /dev/null
+#---------------------------------------------------------------------------------
+.SUFFIXES:
+#---------------------------------------------------------------------------------
+
+ifeq ($(strip $(DEVKITARM)),)
+$(error "Please set DEVKITARM in your environment. export DEVKITARM=<path to>devkitARM")
+endif
+
+TOPDIR ?= $(CURDIR)
+include $(DEVKITARM)/3ds_rules
+
+#---------------------------------------------------------------------------------
+# TARGET is the name of the output
+# BUILD is the directory where object files & intermediate files will be placed
+# SOURCES is a list of directories containing source code
+# DATA is a list of directories containing data files
+# INCLUDES is a list of directories containing header files
+#
+# NO_SMDH: if set to anything, no SMDH file is generated.
+# APP_TITLE is the name of the app stored in the SMDH file (Optional)
+# APP_DESCRIPTION is the description of the app stored in the SMDH file (Optional)
+# APP_AUTHOR is the author of the app stored in the SMDH file (Optional)
+# ICON is the filename of the icon (.png), relative to the project folder.
+# If not set, it attempts to use one of the following (in this order):
+# - <Project name>.png
+# - icon.png
+# - <libctru folder>/default_icon.png
+#---------------------------------------------------------------------------------
+TARGET := $(notdir $(CURDIR))
+BUILD := build
+SOURCES := source
+DATA := data
+INCLUDES := include
+
+#---------------------------------------------------------------------------------
+# options for code generation
+#---------------------------------------------------------------------------------
+ARCH := -march=armv6k -mtune=mpcore -mfloat-abi=hard
+
+CFLAGS := -g -Wall -O2 -mword-relocations \
+ -fomit-frame-pointer -ffast-math \
+ $(ARCH)
+
+CFLAGS += $(INCLUDE) -DARM11 -D_3DS
+
+CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions -std=gnu++11
+
+ASFLAGS := -g $(ARCH)
+LDFLAGS = -specs=3dsx.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
+
+LIBS := -lctru -lm
+
+#---------------------------------------------------------------------------------
+# list of directories containing libraries, this must be the top level containing
+# include and lib
+#---------------------------------------------------------------------------------
+LIBDIRS := $(CTRULIB)
+
+
+#---------------------------------------------------------------------------------
+# no real need to edit anything past this point unless you need to add additional
+# rules for different file extensions
+#---------------------------------------------------------------------------------
+ifneq ($(BUILD),$(notdir $(CURDIR)))
+#---------------------------------------------------------------------------------
+
+export OUTPUT := $(CURDIR)/$(TARGET)
+export TOPDIR := $(CURDIR)
+
+export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
+ $(foreach dir,$(DATA),$(CURDIR)/$(dir))
+
+export DEPSDIR := $(CURDIR)/$(BUILD)
+
+CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
+CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
+SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
+PICAFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.pica)))
+BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
+
+#---------------------------------------------------------------------------------
+# use CXX for linking C++ projects, CC for standard C
+#---------------------------------------------------------------------------------
+ifeq ($(strip $(CPPFILES)),)
+#---------------------------------------------------------------------------------
+ export LD := $(CC)
+#---------------------------------------------------------------------------------
+else
+#---------------------------------------------------------------------------------
+ export LD := $(CXX)
+#---------------------------------------------------------------------------------
+endif
+#---------------------------------------------------------------------------------
+
+export OFILES := $(addsuffix .o,$(BINFILES)) $(PICAFILES:.pica=.shbin.o) \
+ $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
+
+export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
+ $(foreach dir,$(LIBDIRS),-I$(dir)/include) \
+ -I$(CURDIR)/$(BUILD)
+
+export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib)
+
+ifeq ($(strip $(ICON)),)
+ icons := $(wildcard *.png)
+ ifneq (,$(findstring $(TARGET).png,$(icons)))
+ export APP_ICON := $(TOPDIR)/$(TARGET).png
+ else
+ ifneq (,$(findstring icon.png,$(icons)))
+ export APP_ICON := $(TOPDIR)/icon.png
+ endif
+ endif
+else
+ export APP_ICON := $(TOPDIR)/$(ICON)
+endif
+
+ifeq ($(strip $(NO_SMDH)),)
+ export _3DSXFLAGS += --smdh=$(CURDIR)/$(TARGET).smdh
+endif
+
+.PHONY: $(BUILD) clean all
+
+#---------------------------------------------------------------------------------
+all: $(BUILD)
+
+$(BUILD):
+ @[ -d $@ ] || mkdir -p $@
+ @$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
+
+#---------------------------------------------------------------------------------
+clean:
+ @echo clean ...
+ @rm -fr $(BUILD) $(TARGET).3dsx $(OUTPUT).smdh $(TARGET).elf
+
+
+#---------------------------------------------------------------------------------
+else
+
+DEPENDS := $(OFILES:.o=.d)
+
+#---------------------------------------------------------------------------------
+# main targets
+#---------------------------------------------------------------------------------
+ifeq ($(strip $(NO_SMDH)),)
+$(OUTPUT).3dsx : $(OUTPUT).elf $(OUTPUT).smdh
+else
+$(OUTPUT).3dsx : $(OUTPUT).elf
+endif
+
+$(OUTPUT).elf : $(OFILES)
+
+#---------------------------------------------------------------------------------
+# you need a rule like this for each extension you use as binary data
+#---------------------------------------------------------------------------------
+%.bin.o : %.bin
+#---------------------------------------------------------------------------------
+ @echo $(notdir $<)
+ @$(bin2o)
+
+#---------------------------------------------------------------------------------
+# rule for assembling GPU shaders
+#---------------------------------------------------------------------------------
+%.shbin.o: %.pica
+ @echo $(notdir $<)
+ $(eval CURBIN := $(patsubst %.pica,%.shbin,$(notdir $<)))
+ $(eval CURH := $(patsubst %.pica,%.psh.h,$(notdir $<)))
+ @picasso $(CURBIN) $< $(CURH)
+ @bin2s $(CURBIN) | $(AS) -o $@
+ @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"_end[];" > `(echo $(CURBIN) | tr . _)`.h
+ @echo "extern const u8" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`"[];" >> `(echo $(CURBIN) | tr . _)`.h
+ @echo "extern const u32" `(echo $(CURBIN) | sed -e 's/^\([0-9]\)/_\1/' | tr . _)`_size";" >> `(echo $(CURBIN) | tr . _)`.h
+
+-include $(DEPENDS)
+
+#---------------------------------------------------------------------------------------
+endif
+#---------------------------------------------------------------------------------------
--- /dev/null
+# GPU example
+
+This is a simple GPU example using the `picasso` shader assembler which comes with devkitARM r45 and up.
+Users of earlier versions of devkitARM need to install the tool, which can be found in the address below:
+
+https://github.com/fincs/picasso/releases
--- /dev/null
+#include "3dmath.h"
+
+void m4x4_identity(matrix_4x4* out)
+{
+ m4x4_zeros(out);
+ out->r[0].x = out->r[1].y = out->r[2].z = out->r[3].w = 1.0f;
+}
+
+void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b)
+{
+ int i, j;
+ for (i = 0; i < 4; i ++)
+ for (j = 0; j < 4; j ++)
+ out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i];
+}
+
+void m4x4_translate(matrix_4x4* mtx, float x, float y, float z)
+{
+ matrix_4x4 tm, om;
+
+ m4x4_identity(&tm);
+ tm.r[0].w = x;
+ tm.r[1].w = y;
+ tm.r[2].w = z;
+
+ m4x4_multiply(&om, mtx, &tm);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_scale(matrix_4x4* mtx, float x, float y, float z)
+{
+ int i;
+ for (i = 0; i < 4; i ++)
+ {
+ mtx->r[i].x *= x;
+ mtx->r[i].y *= y;
+ mtx->r[i].z *= z;
+ }
+}
+
+void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide)
+{
+ matrix_4x4 rm, om;
+
+ float cosAngle = cosf(angle);
+ float sinAngle = sinf(angle);
+
+ m4x4_zeros(&rm);
+ rm.r[0].x = 1.0f;
+ rm.r[1].y = cosAngle;
+ rm.r[1].z = sinAngle;
+ rm.r[2].y = -sinAngle;
+ rm.r[2].z = cosAngle;
+ rm.r[3].w = 1.0f;
+
+ if (bRightSide) m4x4_multiply(&om, mtx, &rm);
+ else m4x4_multiply(&om, &rm, mtx);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide)
+{
+ matrix_4x4 rm, om;
+
+ float cosAngle = cosf(angle);
+ float sinAngle = sinf(angle);
+
+ m4x4_zeros(&rm);
+ rm.r[0].x = cosAngle;
+ rm.r[0].z = sinAngle;
+ rm.r[1].y = 1.0f;
+ rm.r[2].x = -sinAngle;
+ rm.r[2].z = cosAngle;
+ rm.r[3].w = 1.0f;
+
+ if (bRightSide) m4x4_multiply(&om, mtx, &rm);
+ else m4x4_multiply(&om, &rm, mtx);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide)
+{
+ matrix_4x4 rm, om;
+
+ float cosAngle = cosf(angle);
+ float sinAngle = sinf(angle);
+
+ m4x4_zeros(&rm);
+ rm.r[0].x = cosAngle;
+ rm.r[0].y = sinAngle;
+ rm.r[1].x = -sinAngle;
+ rm.r[1].y = cosAngle;
+ rm.r[2].z = 1.0f;
+ rm.r[3].w = 1.0f;
+
+ if (bRightSide) m4x4_multiply(&om, mtx, &rm);
+ else m4x4_multiply(&om, &rm, mtx);
+ m4x4_copy(mtx, &om);
+}
+
+void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far)
+{
+ matrix_4x4 mp;
+ m4x4_zeros(&mp);
+
+ // Build standard orthogonal projection matrix
+ mp.r[0].x = 2.0f / (right - left);
+ mp.r[0].w = (left + right) / (left - right);
+ mp.r[1].y = 2.0f / (top - bottom);
+ mp.r[1].w = (bottom + top) / (bottom - top);
+ mp.r[2].z = 2.0f / (near - far);
+ mp.r[2].w = (far + near) / (far - near);
+ mp.r[3].w = 1.0f;
+
+ // Fix depth range to [-1, 0]
+ matrix_4x4 mp2, mp3;
+ m4x4_identity(&mp2);
+ mp2.r[2].z = 0.5;
+ mp2.r[2].w = -0.5;
+ m4x4_multiply(&mp3, &mp2, &mp);
+
+ // Fix the 3DS screens' orientation by swapping the X and Y axis
+ m4x4_identity(&mp2);
+ mp2.r[0].x = 0.0;
+ mp2.r[0].y = 1.0;
+ mp2.r[1].x = -1.0; // flipped
+ mp2.r[1].y = 0.0;
+ m4x4_multiply(mtx, &mp2, &mp3);
+}
+
+void m4x4_persp_tilt(matrix_4x4* mtx, float fovx, float invaspect, float near, float far)
+{
+ // Notes:
+ // We are passed "fovy" and the "aspect ratio". However, the 3DS screens are sideways,
+ // and so are these parameters -- in fact, they are actually the fovx and the inverse
+ // of the aspect ratio. Therefore the formula for the perspective projection matrix
+ // had to be modified to be expressed in these terms instead.
+
+ // Notes:
+ // fovx = 2 atan(tan(fovy/2)*w/h)
+ // fovy = 2 atan(tan(fovx/2)*h/w)
+ // invaspect = h/w
+
+ // a0,0 = h / (w*tan(fovy/2)) =
+ // = h / (w*tan(2 atan(tan(fovx/2)*h/w) / 2)) =
+ // = h / (w*tan( atan(tan(fovx/2)*h/w) )) =
+ // = h / (w * tan(fovx/2)*h/w) =
+ // = 1 / tan(fovx/2)
+
+ // a1,1 = 1 / tan(fovy/2) = (...) = w / (h*tan(fovx/2))
+
+ float fovx_tan = tanf(fovx / 2);
+ matrix_4x4 mp;
+ m4x4_zeros(&mp);
+
+ // Build standard perspective projection matrix
+ mp.r[0].x = 1.0f / fovx_tan;
+ mp.r[1].y = 1.0f / (fovx_tan*invaspect);
+ mp.r[2].z = (near + far) / (near - far);
+ mp.r[2].w = (2 * near * far) / (near - far);
+ mp.r[3].z = -1.0f;
+
+ // Fix depth range to [-1, 0]
+ matrix_4x4 mp2;
+ m4x4_identity(&mp2);
+ mp2.r[2].z = 0.5;
+ mp2.r[2].w = -0.5;
+ m4x4_multiply(mtx, &mp2, &mp);
+
+ // Rotate the matrix one quarter of a turn CCW in order to fix the 3DS screens' orientation
+ m4x4_rotate_z(mtx, M_PI / 2, true);
+}
--- /dev/null
+/*
+ * Bare-bones simplistic 3D math library
+ * This library is common to all libctru GPU examples
+ */
+
+#pragma once
+#include <string.h>
+#include <stdbool.h>
+#include <math.h>
+
+typedef union { struct { float w, z, y, x; }; float c[4]; } vector_4f;
+typedef struct { vector_4f r[4]; } matrix_4x4;
+
+static inline float v4f_dp4(const vector_4f* a, const vector_4f* b)
+{
+ return a->x*b->x + a->y*b->y + a->z*b->z + a->w*b->w;
+}
+
+static inline float v4f_mod4(const vector_4f* a)
+{
+ return sqrtf(v4f_dp4(a,a));
+}
+
+static inline void v4f_norm4(vector_4f* vec)
+{
+ float m = v4f_mod4(vec);
+ if (m == 0.0) return;
+ vec->x /= m;
+ vec->y /= m;
+ vec->z /= m;
+ vec->w /= m;
+}
+
+static inline void m4x4_zeros(matrix_4x4* out)
+{
+ memset(out, 0, sizeof(*out));
+}
+
+static inline void m4x4_copy(matrix_4x4* out, const matrix_4x4* in)
+{
+ memcpy(out, in, sizeof(*out));
+}
+
+void m4x4_identity(matrix_4x4* out);
+void m4x4_multiply(matrix_4x4* out, const matrix_4x4* a, const matrix_4x4* b);
+
+void m4x4_translate(matrix_4x4* mtx, float x, float y, float z);
+void m4x4_scale(matrix_4x4* mtx, float x, float y, float z);
+
+void m4x4_rotate_x(matrix_4x4* mtx, float angle, bool bRightSide);
+void m4x4_rotate_y(matrix_4x4* mtx, float angle, bool bRightSide);
+void m4x4_rotate_z(matrix_4x4* mtx, float angle, bool bRightSide);
+
+// Special versions of the projection matrices that take the 3DS' screen orientation into account
+void m4x4_ortho_tilt(matrix_4x4* mtx, float left, float right, float bottom, float top, float near, float far);
+void m4x4_persp_tilt(matrix_4x4* mtx, float fovy, float aspect, float near, float far);
--- /dev/null
+#include "gpu.h"
+
+#define DISPLAY_TRANSFER_FLAGS \
+ (GX_TRANSFER_FLIP_VERT(0) | GX_TRANSFER_OUT_TILED(0) | GX_TRANSFER_RAW_COPY(0) | \
+ GX_TRANSFER_IN_FORMAT(GX_TRANSFER_FMT_RGBA8) | GX_TRANSFER_OUT_FORMAT(GX_TRANSFER_FMT_RGB8) | \
+ GX_TRANSFER_SCALING(GX_TRANSFER_SCALE_NO))
+
+static u32 *colorBuf, *depthBuf;
+static u32 *cmdBuf;
+
+void gpuInit(void)
+{
+ colorBuf = vramAlloc(400*240*4);
+ depthBuf = vramAlloc(400*240*4);
+ cmdBuf = linearAlloc(0x40000*4);
+
+ GPU_Init(NULL);
+ GPU_Reset(NULL, cmdBuf, 0x40000);
+}
+
+void gpuExit(void)
+{
+ linearFree(cmdBuf);
+ vramFree(depthBuf);
+ vramFree(colorBuf);
+}
+
+void gpuClearBuffers(u32 clearColor)
+{
+ GX_SetMemoryFill(NULL,
+ colorBuf, clearColor, &colorBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH,
+ depthBuf, 0, &depthBuf[240*400], GX_FILL_TRIGGER | GX_FILL_32BIT_DEPTH);
+ gspWaitForPSC0(); // Wait for the fill to complete
+}
+
+void gpuFrameBegin(void)
+{
+ // Configure the viewport and the depth linear conversion function
+ GPU_SetViewport(
+ (u32*)osConvertVirtToPhys((u32)depthBuf),
+ (u32*)osConvertVirtToPhys((u32)colorBuf),
+ 0, 0, 240, 400); // The top screen is physically 240x400 pixels
+ GPU_DepthMap(-1.0f, 0.0f); // calculate the depth value from the Z coordinate in the following way: -1.0*z + 0.0
+
+ // Configure some boilerplate
+ GPU_SetFaceCulling(GPU_CULL_BACK_CCW);
+ GPU_SetStencilTest(false, GPU_ALWAYS, 0x00, 0xFF, 0x00);
+ GPU_SetStencilOp(GPU_KEEP, GPU_KEEP, GPU_KEEP);
+ GPU_SetBlendingColor(0,0,0,0);
+ GPU_SetDepthTestAndWriteMask(true, GPU_GREATER, GPU_WRITE_ALL);
+
+ // This is unknown
+ GPUCMD_AddMaskedWrite(GPUREG_0062, 0x1, 0);
+ GPUCMD_AddWrite(GPUREG_0118, 0);
+
+ // Configure alpha blending and test
+ GPU_SetAlphaBlending(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA);
+ GPU_SetAlphaTest(false, GPU_ALWAYS, 0x00);
+
+ int i;
+ for (i = 0; i < 6; i ++)
+ GPU_SetDummyTexEnv(i);
+}
+
+void gpuFrameEnd(void)
+{
+ // Finish rendering
+ GPU_FinishDrawing();
+ GPUCMD_Finalize();
+ GPUCMD_FlushAndRun(NULL);
+ gspWaitForP3D(); // Wait for the rendering to complete
+
+ // Transfer the GPU output to the framebuffer
+ GX_SetDisplayTransfer(NULL, colorBuf, GX_BUFFER_DIM(240, 400),
+ (u32*)gfxGetFramebuffer(GFX_TOP, GFX_LEFT, NULL, NULL), GX_BUFFER_DIM(240, 400),
+ DISPLAY_TRANSFER_FLAGS);
+ gspWaitForPPF(); // Wait for the transfer to complete
+
+ // Reset the command buffer
+ GPUCMD_SetBufferOffset(0);
+};
+
+void GPU_SetDummyTexEnv(int id)
+{
+ GPU_SetTexEnv(id,
+ GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
+ GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0),
+ GPU_TEVOPERANDS(0, 0, 0),
+ GPU_TEVOPERANDS(0, 0, 0),
+ GPU_REPLACE,
+ GPU_REPLACE,
+ 0xFFFFFFFF);
+}
--- /dev/null
+/*
+ * Bare-bones simplistic GPU wrapper
+ * This library is common to all libctru GPU examples
+ */
+
+#pragma once
+#include <string.h>
+#include <3ds.h>
+#include "3dmath.h"
+
+void gpuInit(void);
+void gpuExit(void);
+
+void gpuClearBuffers(u32 clearColor);
+
+void gpuFrameBegin(void);
+void gpuFrameEnd(void);
+
+// Configures the specified fixed-function fragment shading substage to be a no-operation
+void GPU_SetDummyTexEnv(int id);
+
+// Uploads an uniform matrix
+static inline void GPU_SetFloatUniformMatrix(GPU_SHADER_TYPE type, int location, matrix_4x4* matrix)
+{
+ GPU_SetFloatUniform(type, location, (u32*)matrix, 4);
+}
--- /dev/null
+/*
+ * ~~ Simple libctru GPU textured cube example ~~
+ * This example demonstrates the basics of using the PICA200 in a 3DS homebrew
+ * application in order to render a basic scene consisting of a rotating
+ * textured cube which is also shaded using a simple shading algorithm.
+ * The shading algorithm is explained in the vertex shader source code.
+ */
+
+#include "gpu.h"
+#include "vshader_shbin.h"
+#include "kitten_bin.h"
+
+#define CLEAR_COLOR 0x68B0D8FF
+
+typedef struct { float position[3]; float texcoord[2]; float normal[3]; } vertex;
+
+static const vertex vertex_list[] =
+{
+ // First face (PZ)
+ // First triangle
+ { {-0.5f, -0.5f, +0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, +1.0f} },
+ { {+0.5f, -0.5f, +0.5f}, {1.0f, 0.0f}, {0.0f, 0.0f, +1.0f} },
+ { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, +1.0f} },
+ // Second triangle
+ { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, +1.0f} },
+ { {-0.5f, +0.5f, +0.5f}, {0.0f, 1.0f}, {0.0f, 0.0f, +1.0f} },
+ { {-0.5f, -0.5f, +0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, +1.0f} },
+
+ // Second face (MZ)
+ // First triangle
+ { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, -1.0f} },
+ { {-0.5f, +0.5f, -0.5f}, {1.0f, 0.0f}, {0.0f, 0.0f, -1.0f} },
+ { {+0.5f, +0.5f, -0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, -1.0f} },
+ // Second triangle
+ { {+0.5f, +0.5f, -0.5f}, {1.0f, 1.0f}, {0.0f, 0.0f, -1.0f} },
+ { {+0.5f, -0.5f, -0.5f}, {0.0f, 1.0f}, {0.0f, 0.0f, -1.0f} },
+ { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, 0.0f, -1.0f} },
+
+ // Third face (PX)
+ // First triangle
+ { {+0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} },
+ { {+0.5f, +0.5f, -0.5f}, {1.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} },
+ { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} },
+ // Second triangle
+ { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} },
+ { {+0.5f, -0.5f, +0.5f}, {0.0f, 1.0f}, {+1.0f, 0.0f, 0.0f} },
+ { {+0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {+1.0f, 0.0f, 0.0f} },
+
+ // Fourth face (MX)
+ // First triangle
+ { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} },
+ { {-0.5f, -0.5f, +0.5f}, {1.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} },
+ { {-0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} },
+ // Second triangle
+ { {-0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} },
+ { {-0.5f, +0.5f, -0.5f}, {0.0f, 1.0f}, {-1.0f, 0.0f, 0.0f} },
+ { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {-1.0f, 0.0f, 0.0f} },
+
+ // Fifth face (PY)
+ // First triangle
+ { {-0.5f, +0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, +1.0f, 0.0f} },
+ { {-0.5f, +0.5f, +0.5f}, {1.0f, 0.0f}, {0.0f, +1.0f, 0.0f} },
+ { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, +1.0f, 0.0f} },
+ // Second triangle
+ { {+0.5f, +0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, +1.0f, 0.0f} },
+ { {+0.5f, +0.5f, -0.5f}, {0.0f, 1.0f}, {0.0f, +1.0f, 0.0f} },
+ { {-0.5f, +0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, +1.0f, 0.0f} },
+
+ // Sixth face (MY)
+ // First triangle
+ { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, -1.0f, 0.0f} },
+ { {+0.5f, -0.5f, -0.5f}, {1.0f, 0.0f}, {0.0f, -1.0f, 0.0f} },
+ { {+0.5f, -0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, -1.0f, 0.0f} },
+ // Second triangle
+ { {+0.5f, -0.5f, +0.5f}, {1.0f, 1.0f}, {0.0f, -1.0f, 0.0f} },
+ { {-0.5f, -0.5f, +0.5f}, {0.0f, 1.0f}, {0.0f, -1.0f, 0.0f} },
+ { {-0.5f, -0.5f, -0.5f}, {0.0f, 0.0f}, {0.0f, -1.0f, 0.0f} },
+};
+
+#define vertex_list_count (sizeof(vertex_list)/sizeof(vertex_list[0]))
+
+static DVLB_s* vshader_dvlb;
+static shaderProgram_s program;
+static int uLoc_projection, uLoc_modelView;
+static int uLoc_lightVec, uLoc_lightHalfVec, uLoc_lightClr, uLoc_material;
+static matrix_4x4 projection;
+static matrix_4x4 material =
+{
+ {
+ { { 0.0f, 0.2f, 0.2f, 0.2f } }, // Ambient
+ { { 0.0f, 0.4f, 0.4f, 0.4f } }, // Diffuse
+ { { 0.0f, 0.8f, 0.8f, 0.8f } }, // Specular
+ { { 1.0f, 0.0f, 0.0f, 0.0f } }, // Emission
+ }
+};
+
+static void* vbo_data;
+static void* tex_data;
+static float angleX = 0.0, angleY = 0.0;
+
+static void sceneInit(void)
+{
+ // Load the vertex shader and create a shader program
+ vshader_dvlb = DVLB_ParseFile((u32*)vshader_shbin, vshader_shbin_size);
+ shaderProgramInit(&program);
+ shaderProgramSetVsh(&program, &vshader_dvlb->DVLE[0]);
+
+ // Get the location of the uniforms
+ uLoc_projection = shaderInstanceGetUniformLocation(program.vertexShader, "projection");
+ uLoc_modelView = shaderInstanceGetUniformLocation(program.vertexShader, "modelView");
+ uLoc_lightVec = shaderInstanceGetUniformLocation(program.vertexShader, "lightVec");
+ uLoc_lightHalfVec = shaderInstanceGetUniformLocation(program.vertexShader, "lightHalfVec");
+ uLoc_lightClr = shaderInstanceGetUniformLocation(program.vertexShader, "lightClr");
+ uLoc_material = shaderInstanceGetUniformLocation(program.vertexShader, "material");
+
+ // Compute the projection matrix
+ m4x4_persp_tilt(&projection, 80.0f*M_PI/180.0f, 400.0f/240.0f, 0.01f, 1000.0f);
+
+ // Create the VBO (vertex buffer object)
+ vbo_data = linearAlloc(sizeof(vertex_list));
+ memcpy(vbo_data, vertex_list, sizeof(vertex_list));
+
+ // Load the texture
+ tex_data = linearAlloc(kitten_bin_size);
+ memcpy(tex_data, kitten_bin, kitten_bin_size);
+}
+
+static void sceneRender(void)
+{
+ // Bind the shader program
+ shaderProgramUse(&program);
+
+ // Configure the first fragment shading substage to blend the texture color with
+ // the vertex color (calculated by the vertex shader using a lighting algorithm)
+ // See https://www.opengl.org/sdk/docs/man2/xhtml/glTexEnv.xml for more insight
+ GPU_SetTexEnv(0,
+ GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // RGB channels
+ GPU_TEVSOURCES(GPU_TEXTURE0, GPU_PRIMARY_COLOR, GPU_PRIMARY_COLOR), // Alpha
+ GPU_TEVOPERANDS(0, 0, 0), // RGB
+ GPU_TEVOPERANDS(0, 0, 0), // Alpha
+ GPU_MODULATE, GPU_MODULATE, // RGB, Alpha
+ 0xFFFFFFFF);
+
+ // Configure the first texture unit
+ GPU_SetTextureEnable(GPU_TEXUNIT0);
+ GPU_SetTexture(
+ GPU_TEXUNIT0,
+ (u32*)osConvertVirtToPhys((u32)tex_data),
+ 64, // Width
+ 64, // Height
+ GPU_TEXTURE_MAG_FILTER(GPU_LINEAR) | GPU_TEXTURE_WRAP_S(GPU_REPEAT) | GPU_TEXTURE_WRAP_T(GPU_REPEAT), // Flags
+ GPU_RGBA8 // Pixel format
+ );
+
+ // Configure the "attribute buffers" (that is, the vertex input buffers)
+ GPU_SetAttributeBuffers(
+ 3, // Number of inputs per vertex
+ (u32*)osConvertVirtToPhys((u32)vbo_data), // Location of the VBO
+ GPU_ATTRIBFMT(0, 3, GPU_FLOAT) | // Format of the inputs
+ GPU_ATTRIBFMT(1, 2, GPU_FLOAT) |
+ GPU_ATTRIBFMT(2, 3, GPU_FLOAT),
+ 0xFFC, // Unused attribute mask, in our case bits 0~2 are cleared since they are used
+ 0x210, // Attribute permutations (here it is the identity, passing each attribute in order)
+ 1, // Number of buffers
+ (u32[]) { 0x0 }, // Buffer offsets (placeholders)
+ (u64[]) { 0x210 }, // Attribute permutations for each buffer (identity again)
+ (u8[]) { 3 }); // Number of attributes for each buffer
+
+ // Calculate the modelView matrix
+ matrix_4x4 modelView;
+ m4x4_identity(&modelView);
+ m4x4_translate(&modelView, 0.0, 0.0, -2.0 + 0.5*sinf(angleX));
+ m4x4_rotate_x(&modelView, angleX, true);
+ m4x4_rotate_y(&modelView, angleY, true);
+
+ // Rotate the cube each frame
+ angleX += M_PI / 180;
+ angleY += M_PI / 360;
+
+ // Upload the uniforms
+ GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_projection, &projection);
+ GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_modelView, &modelView);
+ GPU_SetFloatUniformMatrix(GPU_VERTEX_SHADER, uLoc_material, &material);
+ GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightVec, (u32*)(float[]){0.0f, -1.0f, 0.0f, 0.0f}, 1);
+ GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightHalfVec, (u32*)(float[]){0.0f, -1.0f, 0.0f, 0.0f}, 1);
+ GPU_SetFloatUniform(GPU_VERTEX_SHADER, uLoc_lightClr, (u32*)(float[]){1.0f, 1.0f, 1.0f, 1.0f}, 1);
+
+ // Draw the VBO
+ GPU_DrawArray(GPU_TRIANGLES, vertex_list_count);
+}
+
+static void sceneExit(void)
+{
+ // Free the texture
+ linearFree(tex_data);
+
+ // Free the VBO
+ linearFree(vbo_data);
+
+ // Free the shader program
+ shaderProgramFree(&program);
+ DVLB_Free(vshader_dvlb);
+}
+
+int main()
+{
+ // Initialize graphics
+ gfxInitDefault();
+ gpuInit();
+
+ // Initialize the scene
+ sceneInit();
+ gpuClearBuffers(CLEAR_COLOR);
+
+ // Main loop
+ while (aptMainLoop())
+ {
+ gspWaitForVBlank(); // Synchronize with the start of VBlank
+ gfxSwapBuffersGpu(); // Swap the framebuffers so that the frame that we rendered last frame is now visible
+ hidScanInput(); // Read the user input
+
+ // Respond to user input
+ u32 kDown = hidKeysDown();
+ if (kDown & KEY_START)
+ break; // break in order to return to hbmenu
+
+ // Render the scene
+ gpuFrameBegin();
+ sceneRender();
+ gpuFrameEnd();
+ gpuClearBuffers(CLEAR_COLOR);
+
+ // Flush the framebuffers out of the data cache (not necessary with pure GPU rendering)
+ //gfxFlushBuffers();
+ }
+
+ // Deinitialize the scene
+ sceneExit();
+
+ // Deinitialize graphics
+ gpuExit();
+ gfxExit();
+ return 0;
+}
--- /dev/null
+; Example PICA200 vertex shader
+
+; Uniforms
+.fvec projection[4], modelView[4]
+.fvec lightVec, lightHalfVec, lightClr, material[4]
+.alias mat_amb material[0]
+.alias mat_dif material[1]
+.alias mat_spe material[2]
+.alias mat_emi material[3]
+
+; Constants
+.constf myconst(0.0, 1.0, -1.0, -0.5)
+.alias zeros myconst.xxxx ; Vector full of zeros
+.alias ones myconst.yyyy ; Vector full of ones
+
+; Outputs
+.out outpos position
+.out outtc0 texcoord0
+.out outclr color
+
+; Inputs (defined as aliases for convenience)
+.alias inpos v0
+.alias intex v1
+.alias innrm v2
+
+.proc main
+ ; Force the w component of inpos to be 1.0
+ mov r0.xyz, inpos
+ mov r0.w, ones
+
+ ; r1 = modelView * inpos
+ dp4 r1.x, modelView[0], r0
+ dp4 r1.y, modelView[1], r0
+ dp4 r1.z, modelView[2], r0
+ dp4 r1.w, modelView[3], r0
+
+ ; outpos = projection * r1
+ dp4 outpos.x, projection[0], r1
+ dp4 outpos.y, projection[1], r1
+ dp4 outpos.z, projection[2], r1
+ dp4 outpos.w, projection[3], r1
+
+ ; outtex = intex
+ mov outtc0, intex
+
+ ; Transform the normal vector with the modelView matrix
+ ; r1 = normalize(modelView * innrm)
+ mov r0.xyz, innrm
+ mov r0.w, zeros
+ dp4 r1.x, modelView[0], r0
+ dp4 r1.y, modelView[1], r0
+ dp4 r1.z, modelView[2], r0
+ mov r1.w, zeros
+ dp3 r2, r1, r1 ; r2 = x^2+y^2+z^2 for each component
+ rsq r2, r2 ; r2 = 1/sqrt(r2) ''
+ mul r1, r2, r1 ; r1 = r1*r2
+
+ ; Calculate the diffuse level (r0.x) and the shininess level (r0.y)
+ ; r0.x = max(0, -(lightVec * r1))
+ ; r0.y = max(0, (-lightHalfVec[i]) * r1) ^ 2
+ dp3 r0.x, lightVec, r1
+ add r0.x, zeros, -r0
+ dp3 r0.y, -lightHalfVec, r1
+ max r0, zeros, r0
+ mul r0.y, r0, r0
+
+ ; Accumulate the vertex color in r1, initializing it to the emission color
+ mov r1, mat_emi
+
+ ; r1 += specularColor * lightClr * shininessLevel
+ mul r2, lightClr, r0.yyyy
+ mul r2, mat_spe, r2
+ add r1, r2, r1
+
+ ; r1 += diffuseColor * lightClr * diffuseLevel
+ mul r2, lightClr, r0.xxxx
+ mul r2, mat_dif, r2
+ add r1, r2, r1
+
+ ; r1 += ambientColor * lightClr
+ mov r2, lightClr
+ mul r2, mat_amb, r2
+ add r1, r2, r1
+
+ ; outclr = clamp r1 to [0,1]
+ min outclr, ones, r1
+
+ ; We're finished
+ end
+.end