0
votes

I can't find any documentation of different behavior, so this is just a sanity check that I'm not doing anything wrong...

I've created some helper functions in GLSL to output float/vec/mat comparisons as a color:

note: pretty sure there aren't any errors here, just including it so you know exactly what I'm doing...

//returns true or false if floats are eq (within some epsillon)
bool feq(float a, float b)
{
  float c = a-b;
  return (c > -0.05 && c < 0.05);
}

returns true or false if vecs are eq
bool veq(vec4 a, vec4 b)
{
  return
  (
    feq(a.x, b.x) &&
    feq(a.y, b.y) &&
    feq(a.z, b.z) &&
    feq(a.w, b.w) &&
    true
  );
}

//returns color indicating where first diff lies between vecs
//white for "no diff"
vec4 cveq(vec4 a, vec4 b)
{
       if(!feq(a.x, b.x)) return vec4(1.,0.,0.,1.);
  else if(!feq(a.y, b.y)) return vec4(0.,1.,0.,1.);
  else if(!feq(a.z, b.z)) return vec4(0.,0.,1.,1.);
  else if(!feq(a.w, b.w)) return vec4(1.,1.,0.,1.);
  else                    return vec4(1.,1.,1.,1.);
}

//returns true or false if mats are eq
bool meq(mat4 a, mat4 b)
{
  return
  (
    veq(a[0],b[0]) &&
    veq(a[1],b[1]) &&
    veq(a[2],b[2]) &&
    veq(a[3],b[3]) &&
    true
  );
}

//returns color indicating where first diff lies between mats
//white means "no diff"
vec4 cmeq(mat4 a, mat4 b)
{
       if(!veq(a[0],b[0])) return vec4(1.,0.,0.,1.);
  else if(!veq(a[1],b[1])) return vec4(0.,1.,0.,1.);
  else if(!veq(a[2],b[2])) return vec4(0.,0.,1.,1.);
  else if(!veq(a[3],b[3])) return vec4(1.,1.,0.,1.);
  else return vec4(1.,1.,1.,1.);
}

So I have a model mat, a view mat, and a proj mat. I'm rendering a rectangle on screen (that is correctly projected/transformed...), and setting its color based on how well each steps of the calculations match with my on-cpu-calculated equivalents.

uniform mat4 model_mat;
uniform mat4 view_mat;
uniform mat4 proj_mat;

attribute vec4 position;

varying vec4 var_color;

void main()
{
  //this code works (at least visually)- the rect is transformed as expected
  vec4 model_pos = model_mat * position;
  gl_Position = proj_mat * view_mat * model_pos;

  //this is the test code that does the same as above, but tests its results against CPU calculated equivalents
  mat4 m;

  //test proj
  //compares the passed in uniform 'proj_mat' against a hardcoded rep of 'proj_mat' as printf'd by the CPU
  m[0] = vec4(1.542351,0.000000,0.000000,0.000000);
  m[1] = vec4(0.000000,1.542351,0.000000,0.000000);
  m[2] = vec4(0.000000,0.000000,-1.020202,-1.000000);
  m[3] = vec4(0.000000,0.000000,-2.020202,0.000000);
  var_color = cmeq(proj_mat,m); //THIS PASSES (the rect is white)

  //view
  //compares the passed in uniform 'view_mat' against a hardcoded rep of 'view_mat' as printf'd by the CPU
  m[0] = vec4(1.000000,0.000000,-0.000000,0.000000);
  m[1] = vec4(-0.000000,0.894427,0.447214,0.000000);
  m[2] = vec4(0.000000,-0.447214,0.894427,0.000000);
  m[3] = vec4(-0.000000,-0.000000,-22.360680,1.000000);
  var_color = cmeq(view_mat,m); //THIS PASSES (the rect is white)

  //projview
  mat4 pv = proj_mat*view_mat;

  //proj_mat*view_mat
  //compares the result of GPU computed proj*view against a hardcoded rep of proj*view **<- NOTE ORDER** as printf'd by the CPU
  m[0] = vec4(1.542351,0.000000,0.000000,0.000000);
  m[1] = vec4(0.000000,1.379521,-0.689760,0.000000);
  m[2] = vec4(0.000000,-0.456248,-0.912496,20.792208);
  m[3] = vec4(0.000000,-0.447214,-0.894427,22.360680);
  var_color = cmeq(pv,m); //THIS FAILS (the rect is green)

  //view_mat*proj_mat
  //compares the result of GPU computed proj*view against a hardcoded rep of view*proj **<- NOTE ORDER** as printf'd by the CPU
  m[0] = vec4(1.542351,0.000000,0.000000,0.000000);
  m[1] = vec4(0.000000,1.379521,0.456248,0.903462);
  m[2] = vec4(0.000000,0.689760,21.448183,-1.806924);
  m[3] = vec4(0.000000,0.000000,-1.000000,0.000000);
  var_color = cmeq(pv,m); //THIS FAILS (the rect is green)

  //view_mat_t*proj_mat_t
  //compares the result of GPU computed proj*view against a hardcoded rep of view_t*proj_t **<- '_t' = transpose, also note order** as printf'd by the CPU
  m[0] = vec4(1.542351,0.000000,0.000000,0.000000);
  m[1] = vec4(0.000000,1.379521,-0.456248,-0.447214);
  m[2] = vec4(0.000000,-0.689760,-0.912496,-0.894427);
  m[3] = vec4(0.000000,0.000000,20.792208,22.360680);
  var_color = cmeq(pv,m); //THIS PASSES (the rect is white)
}

And here are my CPU vector/matrix calcs (matrices are col-order [m.x is first column, not first row]):

fv4 matmulfv4(fm4 m, fv4 v)
{
  return fv4
    { m.x[0]*v.x+m.y[0]*v.y+m.z[0]*v.z+m.w[0]*v.w,
      m.x[1]*v.x+m.y[1]*v.y+m.z[1]*v.z+m.w[1]*v.w,
      m.x[2]*v.x+m.y[2]*v.y+m.z[2]*v.z+m.w[2]*v.w,
      m.x[3]*v.x+m.y[3]*v.y+m.z[3]*v.z+m.w[3]*v.w };
}

fm4 mulfm4(fm4 a, fm4 b)
{
  return fm4
    { { a.x[0]*b.x[0]+a.y[0]*b.x[1]+a.z[0]*b.x[2]+a.w[0]*b.x[3], a.x[0]*b.y[0]+a.y[0]*b.y[1]+a.z[0]*b.y[2]+a.w[0]*b.y[3], a.x[0]*b.z[0]+a.y[0]*b.z[1]+a.z[0]*b.z[2]+a.w[0]*b.z[3], a.x[0]*b.w[0]+a.y[0]*b.w[1]+a.z[0]*b.w[2]+a.w[0]*b.w[3] },
      { a.x[1]*b.x[0]+a.y[1]*b.x[1]+a.z[1]*b.x[2]+a.w[1]*b.x[3], a.x[1]*b.y[0]+a.y[1]*b.y[1]+a.z[1]*b.y[2]+a.w[1]*b.y[3], a.x[1]*b.z[0]+a.y[1]*b.z[1]+a.z[1]*b.z[2]+a.w[1]*b.z[3], a.x[1]*b.w[0]+a.y[1]*b.w[1]+a.z[1]*b.w[2]+a.w[1]*b.w[3] },
      { a.x[2]*b.x[0]+a.y[2]*b.x[1]+a.z[2]*b.x[2]+a.w[2]*b.x[3], a.x[2]*b.y[0]+a.y[2]*b.y[1]+a.z[2]*b.y[2]+a.w[2]*b.y[3], a.x[2]*b.z[0]+a.y[2]*b.z[1]+a.z[2]*b.z[2]+a.w[2]*b.z[3], a.x[2]*b.w[0]+a.y[2]*b.w[1]+a.z[2]*b.w[2]+a.w[2]*b.w[3] },
      { a.x[3]*b.x[0]+a.y[3]*b.x[1]+a.z[3]*b.x[2]+a.w[3]*b.x[3], a.x[3]*b.y[0]+a.y[3]*b.y[1]+a.z[3]*b.y[2]+a.w[3]*b.y[3], a.x[3]*b.z[0]+a.y[3]*b.z[1]+a.z[3]*b.z[2]+a.w[3]*b.z[3], a.x[3]*b.w[0]+a.y[3]*b.w[1]+a.z[3]*b.w[2]+a.w[3]*b.w[3] } };
}

A key thing to notice is that the view_mat_t * proj_mat_t on the CPU matched the proj_mat * view_mat on the GPU. Does anyone know why? I've done tests on matrices on the CPU and compared them to results of online matrix multipliers, and they seem correct...

I know that the GPU does things between vert shader and frag shader (I think it like, divides gl_Position by gl_Position.w or something?)... is there something else I'm not taking into account going on here in just the vert shader? Is something being auto-transposed at some point?

3
HOW different are they?Bo Persson
Good question- difficult to tell on a gpu (the only way I've found of getting info back is to output various colors...). One big difference is that my proj view on GPU = my view_transpose * proj_transpose on CPU...Phildo

3 Answers

0
votes

You may wish to consider GLM for CPU-side Matrix instantiation and calculations. It'll help reduce possible sources of errors.

Secondly, GPUs and CPUs do not perform identical calculations. The IEEE 754 standard for computing Floating Point Numbers has relatively rigorous standards for how these calculations have to be performed and to what degree they have to be accurate, but:

  1. It's still possible for numbers to come up different in the least significant bit (and more than that depending on the specific operation/function being used)
  2. Some GPU vendors opt out of ensuring strict IEEE compliance in the first place (Nvidia has been known in the past to prioritize Speed over strict IEEE compliance)

I would finally note that your CPU-side computations leave a lot of room for rounding errors, which can add up. The usual advice for these kinds of questions, then, is to include tolerance in your code for small amounts of deviations. Usually code to check for 'equality' of two floating point numbers presumes that abs(x-y) < 0.000001 means x and y are essentially equal. Naturally, the specific number will have to be calibrated for your personal use.

And of course, you'll want to check to make sure that all your matrices/uniforms are being passed in correctly.

0
votes

Ok. I've found an answer. There is nothing special about matrix operations from within a single shader. There are, however, a couple things you should be aware of:

:1: OpenGL (GLSL) uses column-major matrices. So to construct the matrix that would be visually represented in a mathematical context as this:

 1  2  3  4
 5  6  7  8
 9 10 11 12
13 14 15 16

you would, from within GLSL use:

mat4 m = mat4(
  vec4( 1, 5, 9,13),
  vec4( 2, 6,10,14),
  vec4( 3, 7,11,15),
  vec4( 4, 8,12,16),
);

:2: If you instead use row-major matrices on the CPU, make sure to set the "transpose" flag to true when uploading the matrix uniforms to the shader, and make sure to set it to false if you're using col-major matrices.

So long as you are aware of these two things, you should be good to go.

My particular problem above was that I was in the middle of switching from row-major to col-major in my CPU implementation and wasn't thorough in ensuring that implementation was taken into account across all my CPU matrix operations.

Specifically, here is my now-correct mat4 multiplication implementation, assuming col-major matrices:

fm4 mulfm4(fm4 a, fm4 b)
{
  return fm4
    { { a.x[0]*b.x[0] + a.y[0]*b.x[1] + a.z[0]*b.x[2] + a.w[0]*b.x[3], a.x[1]*b.x[0] + a.y[1]*b.x[1] + a.z[1]*b.x[2] + a.w[1]*b.x[3], a.x[2]*b.x[0] + a.y[2]*b.x[1] + a.z[2]*b.x[2] + a.w[2]*b.x[3], a.x[3]*b.x[0] + a.y[3]*b.x[1] + a.z[3]*b.x[2] + a.w[3]*b.x[3] },
      { a.x[0]*b.y[0] + a.y[0]*b.y[1] + a.z[0]*b.y[2] + a.w[0]*b.y[3], a.x[1]*b.y[0] + a.y[1]*b.y[1] + a.z[1]*b.y[2] + a.w[1]*b.y[3], a.x[2]*b.y[0] + a.y[2]*b.y[1] + a.z[2]*b.y[2] + a.w[2]*b.y[3], a.x[3]*b.y[0] + a.y[3]*b.y[1] + a.z[3]*b.y[2] + a.w[3]*b.y[3] },
      { a.x[0]*b.z[0] + a.y[0]*b.z[1] + a.z[0]*b.z[2] + a.w[0]*b.z[3], a.x[1]*b.z[0] + a.y[1]*b.z[1] + a.z[1]*b.z[2] + a.w[1]*b.z[3], a.x[2]*b.z[0] + a.y[2]*b.z[1] + a.z[2]*b.z[2] + a.w[2]*b.z[3], a.x[3]*b.z[0] + a.y[3]*b.z[1] + a.z[3]*b.z[2] + a.w[3]*b.z[3] },
      { a.x[0]*b.w[0] + a.y[0]*b.w[1] + a.z[0]*b.w[2] + a.w[0]*b.w[3], a.x[1]*b.w[0] + a.y[1]*b.w[1] + a.z[1]*b.w[2] + a.w[1]*b.w[3], a.x[2]*b.w[0] + a.y[2]*b.w[1] + a.z[2]*b.w[2] + a.w[2]*b.w[3], a.x[3]*b.w[0] + a.y[3]*b.w[1] + a.z[3]*b.w[2] + a.w[3]*b.w[3] } };
}

again, the above implementation is for column major matrices. That means that a.x is the first column of the matrix, not the row.

0
votes

A key thing to notice is that the view_mat_t * proj_mat_t on the CPU matched the proj_mat * view_mat on the GPU. Does anyone know why?

The reason for this is that for two matrices A, B: A * B = (B' * A')', where ' indicates the transpose operation. As already pointed out by yourself, your math code (as well as popular math libraries such as GLM) uses a row-major representation of matrices, while OpenGL (by default) uses a column-major representation. What this means is that the matrix A,

    (a b c)
A = (d e f)
    (g h i)

in your CPU math library is stored in memory as [a, b, c, d, e, f, g, h, i], whereas defined in a GLSL shader, it would be stored as [a, d, g, b, e, h, c, f, i]. So if you upload the data [a, b, c, d, e, f, g, h, i] of the GLM matrix with glUniformMatrix3fv with the transpose parameter set to GL_FALSE, then the matrix you will see in GLSL is

     (a d g)
A' = (b e h)
     (c f i)

which is the transposed original matrix. Having realized that changing the interpretation of the matrix data between row-major and column-major leads to a transposed version of the original matrix, you can now explain why suddenly the matrix multiplication works the other way around. Your view_mat_t and proj_mat_t on the CPU are interpreted as view_mat_t' and proj_mat_t' in your GLSL shader, so uploading the pre-calculated view_mat_t * proj_mat_t to the shader will lead to the same result as uploading both matrices separately and then calculating proj_mat_t * view_mat_t.