Files
TSE/TSE_Math/src/Matrix4x4.cpp

429 lines
11 KiB
C++

#include "Matrix4x4.hpp"
#include "MathF.hpp"
#include <cstring>
#include <cmath>
#ifdef __unix__
#include <immintrin.h>
#endif
#if defined(_MSC_VER)
#if defined(__AVX2__)
#define SIMD_HAS_FMA 1
#endif
#elif defined(__FMA__)
#define SIMD_HAS_FMA 1
#endif
#if defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX2__))
#define SIMD_HAS_DPPS 1
#endif
TSE::Matrix4x4::Matrix4x4()
{
std::memset(m, 0, sizeof(m));
}
TSE::Matrix4x4::Matrix4x4(float d)
{
std::memset(m, 0, sizeof(m));
for (int i = 0; i < 4; ++i)
m[i][i] = d;
}
TSE::Matrix4x4::Matrix4x4(const Matrix4x4 &other)
{
std::memcpy(m, other.m, sizeof(m));
}
bool TSE::Matrix4x4::IsIdentityMatrix() const
{
Matrix4x4 id = Identity();
return std::memcmp(m, id.m, sizeof(m)) == 0;
}
bool TSE::Matrix4x4::IsValid() const
{
bool valid = true;
for (byte x = 0; x < 4; x++)
{
for (byte y = 0; y < 4; y++)
{
valid &= !std::isnan(m[x][y]);
}
}
return valid;
}
void TSE::Matrix4x4::Invert()
{
const float det = Determinant();
if(std::abs(det) < TSE_EPSILON)
{
*this = Matrix4x4::Identity();
return;
}
Matrix4x4 conj = Conjucate();
float invDet = 1.0f / det;
*this = conj * invDet;
return;
}
float det3(float a00, float a01, float a02, float a10, float a11, float a12, float a20, float a21, float a22)
{
return a00 * (a11 * a22 - a12 * a21)
- a01 * (a10 * a22 - a12 * a20)
+ a02 * (a10 * a21 - a11 * a20);
}
float minor3(const TSE::Matrix4x4& M, int i, int j)
{
int ri[3], ci[3];
for (int r = 0, rr = 0; r < 4; ++r) if (r != i) ri[rr++] = r;
for (int c = 0, cc = 0; c < 4; ++c) if (c != j) ci[cc++] = c;
return det3(
M.m[ri[0]][ci[0]], M.m[ri[0]][ci[1]], M.m[ri[0]][ci[2]],
M.m[ri[1]][ci[0]], M.m[ri[1]][ci[1]], M.m[ri[1]][ci[2]],
M.m[ri[2]][ci[0]], M.m[ri[2]][ci[1]], M.m[ri[2]][ci[2]]
);
}
float cofactorSign(TSE::byte x, TSE::byte y)
{
return ((x + y) & 1) ? -1.0f : 1.0f;
}
TSE::Matrix4x4 TSE::Matrix4x4::Conjucate() const
{
Matrix4x4 conj;
for (byte x = 0; x < 4; x++)
{
for (byte y = 0; y < 4; y++)
{
conj.m[x][y] = cofactorSign(x,y) * minor3(*this, x, y);
}
}
return conj;
}
float TSE::Matrix4x4::Determinant() const
{
const float c00 = cofactorSign(0,0) * minor3(*this, 0, 0);
const float c01 = cofactorSign(0,1) * minor3(*this, 0, 1);
const float c02 = cofactorSign(0,2) * minor3(*this, 0, 2);
const float c03 = cofactorSign(0,3) * minor3(*this, 0, 3);
return m[0][0] * c00 + m[0][1] * c01 + m[0][2] * c02 + m[0][3] * c03;
}
bool TSE::Matrix4x4::IsAffine() const
{
return std::abs(m[3][0]) < TSE_EPSILON &&
std::abs(m[3][1]) < TSE_EPSILON &&
std::abs(m[3][2]) < TSE_EPSILON &&
std::abs(m[3][3] - 1.0f) < TSE_EPSILON;
}
void TSE::Matrix4x4::InvertAffine()
{
const float a00 = m[0][0], a01 = m[0][1], a02 = m[0][2];
const float a10 = m[1][0], a11 = m[1][1], a12 = m[1][2];
const float a20 = m[2][0], a21 = m[2][1], a22 = m[2][2];
const float tx = m[0][3], ty = m[1][3], tz = m[2][3];
const float det =
a00*(a11*a22 - a12*a21) -
a01*(a10*a22 - a12*a20) +
a02*(a10*a21 - a11*a20);
if (std::abs(det) < TSE_EPSILON) {
*this = Matrix4x4::Identity();
return;
}
const float invDet = 1.0f / det;
m[0][0] = (a11*a22 - a12*a21) * invDet;
m[0][1] = -(a01*a22 - a02*a21) * invDet;
m[0][2] = (a01*a12 - a02*a11) * invDet;
m[1][0] = -(a10*a22 - a12*a20) * invDet;
m[1][1] = (a00*a22 - a02*a20) * invDet;
m[1][2] = -(a00*a12 - a02*a10) * invDet;
m[2][0] = (a10*a21 - a11*a20) * invDet;
m[2][1] = -(a00*a21 - a01*a20) * invDet;
m[2][2] = (a00*a11 - a01*a10) * invDet;
m[0][3] = -(m[0][0]*tx + m[0][1]*ty + m[0][2]*tz);
m[1][3] = -(m[1][0]*tx + m[1][1]*ty + m[1][2]*tz);
m[2][3] = -(m[2][0]*tx + m[2][1]*ty + m[2][2]*tz);
m[3][0] = 0.0f;
m[3][1] = 0.0f;
m[3][2] = 0.0f;
m[3][3] = 1.0f;
}
const float *TSE::Matrix4x4::ToArrayRowMajor() const
{
return &m[0][0];
}
void TSE::Matrix4x4::ToArrayColumnMajor(float out[16]) const
{
for (byte row = 0; row < 4; ++row)
for (byte col = 0; col < 4; ++col)
out[col * 4 + row] = m[row][col];
}
TSE::Matrix4x4 TSE::Matrix4x4::Identity()
{
return Matrix4x4(1.0f);
}
TSE::Matrix4x4 TSE::Matrix4x4::Orthographic(float left, float right, float bottom, float top, float near, float far)
{
Matrix4x4 result = Matrix4x4(1.0f);
result.m[0][0] = 2.0f / (right - left);
result.m[1][1] = 2.0f / (top - bottom);
result.m[2][2] = -2.0f / (far - near);
result.m[0][3] = (left + right) / (left - right);
result.m[1][3] = (bottom + top) / (bottom - top);
result.m[2][3] = -(near + far) / (near - far);
return result;
}
TSE::Matrix4x4 TSE::Matrix4x4::Perspective(float fov, float aspectRatio, float near, float far)
{
Matrix4x4 result = Matrix4x4(1.0f);
float q = 1 / tan(Deg2Rad(0.5f * fov));
float a = q / aspectRatio;
float b = (near + far) / (near - far);
float c = (2 * near * far) / (near - far);
result.m[0][0] = a;
result.m[1][1] = q;
result.m[2][2] = b;
result.m[3][2] = -1;
result.m[2][3] = c;
return result;
}
TSE::Matrix4x4 TSE::Matrix4x4::ToTranslationMatrix(const Vector3 &pos)
{
Matrix4x4 result = Identity();
result.m[0][3] = pos.x;
result.m[1][3] = pos.y;
result.m[2][3] = pos.z;
return result;
}
TSE::Matrix4x4 TSE::Matrix4x4::ToRotationMatrix(const Quaternion &rot)
{
Matrix4x4 result = Identity();
float xx = rot.x * rot.x;
float yy = rot.y * rot.y;
float zz = rot.z * rot.z;
float xy = rot.x * rot.y;
float xz = rot.x * rot.z;
float yz = rot.y * rot.z;
float wx = rot.w * rot.x;
float wy = rot.w * rot.y;
float wz = rot.w * rot.z;
result.m[0][0] = 1.0f - 2.0f * (yy + zz);
result.m[0][1] = 2.0f * (xy - wz);
result.m[0][2] = 2.0f * (xz + wy);
result.m[1][0] = 2.0f * (xy + wz);
result.m[1][1] = 1.0f - 2.0f * (xx + zz);
result.m[1][2] = 2.0f * (yz - wx);
result.m[2][0] = 2.0f * (xz - wy);
result.m[2][1] = 2.0f * (yz + wx);
result.m[2][2] = 1.0f - 2.0f * (xx + yy);
return result;
}
TSE::Matrix4x4 TSE::Matrix4x4::ToRotationMatrixFromZAxisOnly(float angle)
{
Matrix4x4 result = Identity();
float c = std::cos(angle);
float s = std::sin(angle);
result.m[0][0] = c;
result.m[0][1] = -s;
result.m[1][0] = s;
result.m[1][1] = c;
return result;
}
TSE::Matrix4x4 TSE::Matrix4x4::ToScaleMatrix(const Vector3 &scale)
{
Matrix4x4 result = Identity();
result.m[0][0] = scale.x;
result.m[1][1] = scale.y;
result.m[2][2] = scale.z;
return result;
}
TSE::Matrix4x4 TSE::Matrix4x4::Invert(const Matrix4x4 &mat)
{
Matrix4x4 invers (mat);
if(invers.IsAffine())
{
invers.InvertAffine();
}
else
{
invers.Invert();
}
return invers;
}
TSE::Matrix4x4 mulsse(const TSE::Matrix4x4& A, const TSE::Matrix4x4& B)
{
using namespace TSE;
Matrix4x4 C;
const __m128 b0 = _mm_loadu_ps(&B.m[0][0]);
const __m128 b1 = _mm_loadu_ps(&B.m[1][0]);
const __m128 b2 = _mm_loadu_ps(&B.m[2][0]);
const __m128 b3 = _mm_loadu_ps(&B.m[3][0]);
for (int r = 0; r < 4; ++r) {
__m128 row;
#if SIMD_HAS_FMA
row = _mm_mul_ps(_mm_set1_ps(A.m[r][0]), b0);
row = _mm_fmadd_ps(_mm_set1_ps(A.m[r][1]), b1, row);
row = _mm_fmadd_ps(_mm_set1_ps(A.m[r][2]), b2, row);
row = _mm_fmadd_ps(_mm_set1_ps(A.m[r][3]), b3, row);
#else
row = _mm_mul_ps(_mm_set1_ps(A.m[r][0]), b0);
row = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(A.m[r][1]), b1));
row = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(A.m[r][2]), b2));
row = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(A.m[r][3]), b3));
#endif
_mm_storeu_ps(&C.m[r][0], row);
}
return C;
}
float hsum_ps(__m128 v) {
__m128 shuf = _mm_movehdup_ps(v);
__m128 sums = _mm_add_ps(v, shuf);
shuf = _mm_movehl_ps(shuf, sums);
sums = _mm_add_ss(sums, shuf);
return _mm_cvtss_f32(sums);
}
TSE::Matrix4x4 TSE::Matrix4x4::operator*(const Matrix4x4 &other) const
{
return mulsse(*this, other);
}
TSE::Matrix4x4 TSE::Matrix4x4::operator*=(const Matrix4x4 &other)
{
*this = *this * other;
return *this;
}
TSE::Matrix4x4 TSE::Matrix4x4::operator*(float scalar) const
{
Matrix4x4 R;
#if defined(__SSE__)
const __m128 s = _mm_set1_ps(scalar);
for (int r = 0; r < 4; ++r) {
__m128 row = _mm_loadu_ps(&m[r][0]);
row = _mm_mul_ps(row, s);
_mm_storeu_ps(&R.m[r][0], row);
}
#else
for (int r = 0; r < 4; ++r)
for (int c = 0; c < 4; ++c)
R.m[r][c] = m[r][c] * scalar;
#endif
return R;
}
TSE::Matrix4x4 TSE::Matrix4x4::operator*=(float scalar)
{
*this = *this * scalar;
return *this;
}
TSE::Vector3 TSE::Matrix4x4::operator*(const Vector3 &v) const
{
#if defined(__SSE__)
const __m128 vec = _mm_set_ps(1.0f, v.z, v.y, v.x);
__m128 r0 = _mm_loadu_ps(&m[0][0]);
__m128 r1 = _mm_loadu_ps(&m[1][0]);
__m128 r2 = _mm_loadu_ps(&m[2][0]);
#if SIMD_HAS_DPPS
float x = _mm_cvtss_f32(_mm_dp_ps(r0, vec, 0xF1));
float y = _mm_cvtss_f32(_mm_dp_ps(r1, vec, 0xF1));
float z = _mm_cvtss_f32(_mm_dp_ps(r2, vec, 0xF1));
#else
float x = hsum_ps(_mm_mul_ps(r0, vec));
float y = hsum_ps(_mm_mul_ps(r1, vec));
float z = hsum_ps(_mm_mul_ps(r2, vec));
#endif
return Vector3{ x, y, z };
#else
return Vector3{
m[0][0]*v.x + m[0][1]*v.y + m[0][2]*v.z + m[0][3],
m[1][0]*v.x + m[1][1]*v.y + m[1][2]*v.z + m[1][3],
m[2][0]*v.x + m[2][1]*v.y + m[2][2]*v.z + m[2][3]
};
#endif
}
TSE::Vector4 TSE::Matrix4x4::operator*(const Vector4 &v) const
{
#if defined(__SSE__)
const __m128 vec = _mm_set_ps(v.w, v.z, v.y, v.x);
__m128 r0 = _mm_loadu_ps(&m[0][0]);
__m128 r1 = _mm_loadu_ps(&m[1][0]);
__m128 r2 = _mm_loadu_ps(&m[2][0]);
__m128 r3 = _mm_loadu_ps(&m[3][0]);
#if SIMD_HAS_DPPS
float x = _mm_cvtss_f32(_mm_dp_ps(r0, vec, 0xF1));
float y = _mm_cvtss_f32(_mm_dp_ps(r1, vec, 0xF1));
float z = _mm_cvtss_f32(_mm_dp_ps(r2, vec, 0xF1));
float w = _mm_cvtss_f32(_mm_dp_ps(r3, vec, 0xF1));
#else
float x = hsum_ps(_mm_mul_ps(r0, vec));
float y = hsum_ps(_mm_mul_ps(r1, vec));
float z = hsum_ps(_mm_mul_ps(r2, vec));
float w = hsum_ps(_mm_mul_ps(r3, vec));
#endif
return Vector4{ x, y, z, w };
#else
return Vector4{
m[0][0]*v.x + m[0][1]*v.y + m[0][2]*v.z + m[0][3]*v.w,
m[1][0]*v.x + m[1][1]*v.y + m[1][2]*v.z + m[1][3]*v.w,
m[2][0]*v.x + m[2][1]*v.y + m[2][2]*v.z + m[2][3]*v.w,
m[3][0]*v.x + m[3][1]*v.y + m[3][2]*v.z + m[3][3]*v.w
};
#endif
}