As a challenge to myself, I am working on a basic minecraft remake in javascript and using the WebGL library supported by the <canvas> tag. I have a demo video on youtube here. To make the world easily editable, I split the world geometry into chunks (16^3) areas and that means that I need draw call per rendering chunk. That is where the problem comes in. This is not a performance problem with the graphics card my Nvidia GeForce 980 does not turn on the fans even and the GPU reports only 25% utilization on half the maximum clock speed so actually a more accurate number is 12.5% utilization. The problem is in the CPU.

The GPU Process in the google chrome task manager is at 15% more then saturating a core in my CPU. This is what the call logger to GL says:
GL drawElements: [4, 7680, 5123, 0]
GL drawElements: [4, 6144, 5123, 0]
GL drawElements: [4, 7866, 5123, 0]
GL drawElements: [4, 6618, 5123, 0]
GL drawElements: [4, 6144, 5123, 0]
GL drawElements: [4, 4608, 5123, 0]
GL uniformMatrix4fv: [[object WebGLUniformLocation], false, mat4(0.9999874830245972, -0.000033332948078168556, 0.004999868106096983, 0, 0, 0.9999777674674988, 0.006666617467999458, 0, -0.0049999793991446495, -0.00666653411462903, 0.999965250492096, 0, -127.43840026855469, -129.25619506835938, -113.50281524658203, 1)]
GL uniform2fv: [[object WebGLUniformLocation], vec2(-8, -7)]
GL drawElements: [4, 7680, 5123, 0]
GL drawElements: [4, 6144, 5123, 0]
GL drawElements: [4, 6210, 5123, 0]
GL drawElements: [4, 8148, 5123, 0]
GL drawElements: [4, 6144, 5123, 0]
GL drawElements: [4, 4608, 5123, 0]
GL uniformMatrix4fv: [[object WebGLUniformLocation], false, mat4(0.9999874830245972, -0.000033332948078168556, 0.004999868106096983, 0, 0, 0.9999777674674988, 0.006666617467999458, 0, -0.0049999793991446495, -0.00666653411462903, 0.999965250492096, 0, -127.51840209960938, -129.36285400390625, -97.50337219238281, 1)]
GL uniform2fv: [[object WebGLUniformLocation], vec2(-8, -6)]
GL drawElements: [4, 7680, 5123, 0]
GL drawElements: [4, 6144, 5123, 0]
GL drawElements: [4, 7842, 5123, 0]
GL drawElements: [4, 6144, 5123, 0]
GL drawElements: [4, 4608, 5123, 0]
The reason I am able to have back-to-back drawElements calls is because I am using the WebGL extension OES_vertex_array_object so those calls aren't getting logged by the logger so you don't see them.
Iv'e herd stories of state changes being very expensive but since I'm calling a lot of drawElements back-to-back this shouldn't be an issue? Also I have herd that people with my type of hardware can easily do 4096 draw calls by taking into account these state changes. Maybe this is a issue with webgl itself being unoptimized from the ANGLE gl to direct3D calls that Google Chrome uses.
One more note: If I make the geometry construction size from 16^3 to 16x16x128 slashing the draw calls count by 8 I am able to run the game at a solid 60FPS if there is no world geometry being created. If there is the game is unplayable.
EDIT: some more testing... So I decided to make a minimal webgl program that turned out to be a preaty cool screen saver. Here it is:
<html>
<body style="margin:0px">
<canvas id="gl" style="width:100%;height:100%;">
</canvas>
</body>
<script type="vertex" id="vertex">
attribute vec2 pos;
uniform mat4 matrix;
uniform float time;
uniform vec2 translate;
varying vec3 color;
void main (){
gl_Position = matrix * vec4(pos + translate, (sin(time) + 1.5) * -10.0, 1.0);
color = vec3((sin(time) + 1.0) / 2.0);
}
</script>
<script type="frag", id="frag">
precision mediump float;
varying vec3 color;
void main (){
gl_FragColor = vec4(color, 1.0);
}
</script>
<script>
var canvas = document.getElementById("gl");
var gl = canvas.getContext("webgl");
canvas.width = canvas.clientWidth;
canvas.height = canvas.clientHeight;
gl.viewport(0, 0, canvas.width, canvas.height);
var vertShader = gl.createShader(gl.VERTEX_SHADER);
var fragShader = gl.createShader(gl.FRAGMENT_SHADER);
gl.shaderSource(vertShader, "attribute vec2 pos;uniform mat4 matrix;uniform float time;uniform vec2 translate;varying vec3 color;void main(){gl_Position=matrix*vec4(pos+translate,(sin(time)+1.5)*-10.0,1.0);color=vec3((sin(time)+1.0)/2.0);}");
gl.shaderSource(fragShader, "precision mediump float;varying vec3 color;void main(){gl_FragColor=vec4(color, 1.0);}");
gl.compileShader(vertShader);
gl.compileShader(fragShader);
var shader = gl.createProgram();
gl.attachShader(shader, vertShader);
gl.attachShader(shader, fragShader);
gl.linkProgram(shader);
gl.useProgram(shader);
gl.enableVertexAttribArray(0);
var u_time = gl.getUniformLocation(shader, "time");
var u_matrix = gl.getUniformLocation(shader, "matrix");
var u_translate = gl.getUniformLocation(shader, "translate");
(function (){
var nearView = 0.1;
var farView = 100;
var f = 1 / Math.tan(60 / 180 * Math.PI / 2);
var nf = nearView - farView;
var aspectRatio = canvas.width / canvas.height;
gl.uniformMatrix4fv(u_matrix, false, [
f / aspectRatio, 0, 0, 0,
0, f, 0, 0,
0, 0, (farView + nearView) / nf, -1,
0, 0, (2 * farView * nearView) / nf, 0
]);
})();
var buf = gl.createBuffer();
gl.bindBuffer (gl.ARRAY_BUFFER, buf);
gl.bufferData(gl.ARRAY_BUFFER, new Float32Array([
-1, -1,
1, 1,
-1, 1,
-1, -1,
1, 1,
1, -1,
]), gl.STATIC_DRAW);
gl.vertexAttribPointer(0, 2, gl.FLOAT, false, 0, 0);
var time = 0;
var translations = [];
for (var i = 0; i < 4096; i++){
translations.push(Math.random() * 10 - 5, Math.random() * 10 - 5);
}
var renderLoop = function (){
gl.clear(gl.CLEAR_COLOR_BIT | gl.CLEAR_DEPTH_BIT);
for (var i = 0; i < 4096; i++){
gl.uniform1f(u_time, time + i / 100);
gl.uniform2f(u_translate, translations[i * 2], translations[i * 2 + 1])
gl.drawArrays(gl.TRIANGLES, 0, 6);
}
window.requestAnimationFrame(renderLoop);
}
window.setInterval(function (){
time += 0.01;
}, 10);
window.requestAnimationFrame(renderLoop);
</script>
The program draws a bunch of squares. In this case it is 4096 making that many draw calls. The performance is better then my main project but still not optimal. The gpu process uses ~13% CPU and I am somehow maintaining a sold 60 FPS. Granted, the most I am doing with this is doing a few uniform calls. My real project uses 5 shader programs and obviously handles a lot more information. I will try to write this with the api I am using to render the main game. Perhaps there is room for improvement.


