first commit of some basics
This commit is contained in:
428
TSE_Math/src/Matrix4x4.cpp
Normal file
428
TSE_Math/src/Matrix4x4.cpp
Normal file
@@ -0,0 +1,428 @@
|
||||
#include "Matrix4x4.hpp"
|
||||
#include "MathF.hpp"
|
||||
#include <cstring>
|
||||
#include <cmath>
|
||||
|
||||
#ifdef __unix__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#if defined(__AVX2__)
|
||||
#define SIMD_HAS_FMA 1
|
||||
#endif
|
||||
#elif defined(__FMA__)
|
||||
#define SIMD_HAS_FMA 1
|
||||
#endif
|
||||
|
||||
#if defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX2__))
|
||||
#define SIMD_HAS_DPPS 1
|
||||
#endif
|
||||
|
||||
TSE::Matrix4x4::Matrix4x4()
|
||||
{
|
||||
std::memset(m, 0, sizeof(m));
|
||||
}
|
||||
|
||||
TSE::Matrix4x4::Matrix4x4(float d)
|
||||
{
|
||||
std::memset(m, 0, sizeof(m));
|
||||
for (int i = 0; i < 4; ++i)
|
||||
m[i][i] = d;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4::Matrix4x4(const Matrix4x4 &other)
|
||||
{
|
||||
std::memcpy(m, other.m, sizeof(m));
|
||||
}
|
||||
|
||||
bool TSE::Matrix4x4::IsIdentityMatrix() const
|
||||
{
|
||||
Matrix4x4 id = Identity();
|
||||
return std::memcmp(m, id.m, sizeof(m)) == 0;
|
||||
}
|
||||
|
||||
bool TSE::Matrix4x4::IsValid() const
|
||||
{
|
||||
bool valid = true;
|
||||
for (byte x = 0; x < 4; x++)
|
||||
{
|
||||
for (byte y = 0; y < 4; y++)
|
||||
{
|
||||
valid &= !std::isnan(m[x][y]);
|
||||
}
|
||||
}
|
||||
return valid;
|
||||
}
|
||||
|
||||
void TSE::Matrix4x4::Invert()
|
||||
{
|
||||
const float det = Determinant();
|
||||
if(std::abs(det) < TSE_EPSILON)
|
||||
{
|
||||
*this = Matrix4x4::Identity();
|
||||
return;
|
||||
}
|
||||
|
||||
Matrix4x4 conj = Conjucate();
|
||||
float invDet = 1.0f / det;
|
||||
|
||||
*this = conj * invDet;
|
||||
return;
|
||||
}
|
||||
|
||||
float det3(float a00, float a01, float a02, float a10, float a11, float a12, float a20, float a21, float a22)
|
||||
{
|
||||
return a00 * (a11 * a22 - a12 * a21)
|
||||
- a01 * (a10 * a22 - a12 * a20)
|
||||
+ a02 * (a10 * a21 - a11 * a20);
|
||||
}
|
||||
|
||||
float minor3(const TSE::Matrix4x4& M, int i, int j)
|
||||
{
|
||||
int ri[3], ci[3];
|
||||
for (int r = 0, rr = 0; r < 4; ++r) if (r != i) ri[rr++] = r;
|
||||
for (int c = 0, cc = 0; c < 4; ++c) if (c != j) ci[cc++] = c;
|
||||
|
||||
return det3(
|
||||
M.m[ri[0]][ci[0]], M.m[ri[0]][ci[1]], M.m[ri[0]][ci[2]],
|
||||
M.m[ri[1]][ci[0]], M.m[ri[1]][ci[1]], M.m[ri[1]][ci[2]],
|
||||
M.m[ri[2]][ci[0]], M.m[ri[2]][ci[1]], M.m[ri[2]][ci[2]]
|
||||
);
|
||||
}
|
||||
|
||||
float cofactorSign(TSE::byte x, TSE::byte y)
|
||||
{
|
||||
return ((x + y) & 1) ? -1.0f : 1.0f;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::Conjucate() const
|
||||
{
|
||||
Matrix4x4 conj;
|
||||
|
||||
for (byte x = 0; x < 4; x++)
|
||||
{
|
||||
for (byte y = 0; y < 4; y++)
|
||||
{
|
||||
conj.m[x][y] = cofactorSign(x,y) * minor3(*this, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
return conj;
|
||||
}
|
||||
|
||||
float TSE::Matrix4x4::Determinant() const
|
||||
{
|
||||
const float c00 = cofactorSign(0,0) * minor3(*this, 0, 0);
|
||||
const float c01 = cofactorSign(0,1) * minor3(*this, 0, 1);
|
||||
const float c02 = cofactorSign(0,2) * minor3(*this, 0, 2);
|
||||
const float c03 = cofactorSign(0,3) * minor3(*this, 0, 3);
|
||||
|
||||
return m[0][0] * c00 + m[0][1] * c01 + m[0][2] * c02 + m[0][3] * c03;
|
||||
}
|
||||
|
||||
bool TSE::Matrix4x4::IsAffine() const
|
||||
{
|
||||
return std::abs(m[3][0]) < TSE_EPSILON &&
|
||||
std::abs(m[3][1]) < TSE_EPSILON &&
|
||||
std::abs(m[3][2]) < TSE_EPSILON &&
|
||||
std::abs(m[3][3] - 1.0f) < TSE_EPSILON;
|
||||
}
|
||||
|
||||
void TSE::Matrix4x4::InvertAffine()
|
||||
{
|
||||
const float a00 = m[0][0], a01 = m[0][1], a02 = m[0][2];
|
||||
const float a10 = m[1][0], a11 = m[1][1], a12 = m[1][2];
|
||||
const float a20 = m[2][0], a21 = m[2][1], a22 = m[2][2];
|
||||
|
||||
const float tx = m[0][3], ty = m[1][3], tz = m[2][3];
|
||||
|
||||
const float det =
|
||||
a00*(a11*a22 - a12*a21) -
|
||||
a01*(a10*a22 - a12*a20) +
|
||||
a02*(a10*a21 - a11*a20);
|
||||
|
||||
if (std::abs(det) < TSE_EPSILON) {
|
||||
*this = Matrix4x4::Identity();
|
||||
return;
|
||||
}
|
||||
|
||||
const float invDet = 1.0f / det;
|
||||
|
||||
m[0][0] = (a11*a22 - a12*a21) * invDet;
|
||||
m[0][1] = -(a01*a22 - a02*a21) * invDet;
|
||||
m[0][2] = (a01*a12 - a02*a11) * invDet;
|
||||
|
||||
m[1][0] = -(a10*a22 - a12*a20) * invDet;
|
||||
m[1][1] = (a00*a22 - a02*a20) * invDet;
|
||||
m[1][2] = -(a00*a12 - a02*a10) * invDet;
|
||||
|
||||
m[2][0] = (a10*a21 - a11*a20) * invDet;
|
||||
m[2][1] = -(a00*a21 - a01*a20) * invDet;
|
||||
m[2][2] = (a00*a11 - a01*a10) * invDet;
|
||||
|
||||
m[0][3] = -(m[0][0]*tx + m[0][1]*ty + m[0][2]*tz);
|
||||
m[1][3] = -(m[1][0]*tx + m[1][1]*ty + m[1][2]*tz);
|
||||
m[2][3] = -(m[2][0]*tx + m[2][1]*ty + m[2][2]*tz);
|
||||
|
||||
m[3][0] = 0.0f;
|
||||
m[3][1] = 0.0f;
|
||||
m[3][2] = 0.0f;
|
||||
m[3][3] = 1.0f;
|
||||
}
|
||||
|
||||
const float *TSE::Matrix4x4::ToArrayRowMajor() const
|
||||
{
|
||||
return &m[0][0];
|
||||
}
|
||||
|
||||
void TSE::Matrix4x4::ToArrayColumnMajor(float out[16]) const
|
||||
{
|
||||
for (byte row = 0; row < 4; ++row)
|
||||
for (byte col = 0; col < 4; ++col)
|
||||
out[col * 4 + row] = m[row][col];
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::Identity()
|
||||
{
|
||||
return Matrix4x4(1.0f);
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::Orthographic(float left, float right, float bottom, float top, float near, float far)
|
||||
{
|
||||
Matrix4x4 result = Matrix4x4(1.0f);
|
||||
|
||||
result.m[0][0] = 2.0f / (right - left);
|
||||
result.m[1][1] = 2.0f / (top - bottom);
|
||||
result.m[2][2] = -2.0f / (far - near);
|
||||
|
||||
result.m[0][3] = (left + right) / (left - right);
|
||||
result.m[1][3] = (bottom + top) / (bottom - top);
|
||||
result.m[2][3] = -(near + far) / (near - far);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::Perspective(float fov, float aspectRatio, float near, float far)
|
||||
{
|
||||
Matrix4x4 result = Matrix4x4(1.0f);
|
||||
|
||||
float q = 1 / tan(Deg2Rad(0.5f * fov));
|
||||
float a = q / aspectRatio;
|
||||
|
||||
float b = (near + far) / (near - far);
|
||||
float c = (2 * near * far) / (near - far);
|
||||
|
||||
result.m[0][0] = a;
|
||||
result.m[1][1] = q;
|
||||
result.m[2][2] = b;
|
||||
result.m[3][2] = -1;
|
||||
result.m[2][3] = c;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::ToTranslationMatrix(const Vector3 &pos)
|
||||
{
|
||||
Matrix4x4 result = Identity();
|
||||
result.m[0][3] = pos.x;
|
||||
result.m[1][3] = pos.y;
|
||||
result.m[2][3] = pos.z;
|
||||
return result;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::ToRotationMatrix(const Quaternion &rot)
|
||||
{
|
||||
Matrix4x4 result = Identity();
|
||||
float xx = rot.x * rot.x;
|
||||
float yy = rot.y * rot.y;
|
||||
float zz = rot.z * rot.z;
|
||||
float xy = rot.x * rot.y;
|
||||
float xz = rot.x * rot.z;
|
||||
float yz = rot.y * rot.z;
|
||||
float wx = rot.w * rot.x;
|
||||
float wy = rot.w * rot.y;
|
||||
float wz = rot.w * rot.z;
|
||||
|
||||
result.m[0][0] = 1.0f - 2.0f * (yy + zz);
|
||||
result.m[0][1] = 2.0f * (xy - wz);
|
||||
result.m[0][2] = 2.0f * (xz + wy);
|
||||
|
||||
result.m[1][0] = 2.0f * (xy + wz);
|
||||
result.m[1][1] = 1.0f - 2.0f * (xx + zz);
|
||||
result.m[1][2] = 2.0f * (yz - wx);
|
||||
|
||||
result.m[2][0] = 2.0f * (xz - wy);
|
||||
result.m[2][1] = 2.0f * (yz + wx);
|
||||
result.m[2][2] = 1.0f - 2.0f * (xx + yy);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::ToRotationMatrixFromZAxisOnly(float angle)
|
||||
{
|
||||
Matrix4x4 result = Identity();
|
||||
float c = std::cos(angle);
|
||||
float s = std::sin(angle);
|
||||
result.m[0][0] = c;
|
||||
result.m[0][1] = -s;
|
||||
result.m[1][0] = s;
|
||||
result.m[1][1] = c;
|
||||
return result;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::ToScaleMatrix(const Vector3 &scale)
|
||||
{
|
||||
Matrix4x4 result = Identity();
|
||||
result.m[0][0] = scale.x;
|
||||
result.m[1][1] = scale.y;
|
||||
result.m[2][2] = scale.z;
|
||||
return result;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::Invert(const Matrix4x4 &mat)
|
||||
{
|
||||
Matrix4x4 invers (mat);
|
||||
|
||||
if(invers.IsAffine())
|
||||
{
|
||||
invers.InvertAffine();
|
||||
}
|
||||
else
|
||||
{
|
||||
invers.Invert();
|
||||
}
|
||||
return invers;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 mulsse(const TSE::Matrix4x4& A, const TSE::Matrix4x4& B)
|
||||
{
|
||||
using namespace TSE;
|
||||
Matrix4x4 C;
|
||||
|
||||
const __m128 b0 = _mm_loadu_ps(&B.m[0][0]);
|
||||
const __m128 b1 = _mm_loadu_ps(&B.m[1][0]);
|
||||
const __m128 b2 = _mm_loadu_ps(&B.m[2][0]);
|
||||
const __m128 b3 = _mm_loadu_ps(&B.m[3][0]);
|
||||
|
||||
for (int r = 0; r < 4; ++r) {
|
||||
__m128 row;
|
||||
#if SIMD_HAS_FMA
|
||||
row = _mm_mul_ps(_mm_set1_ps(A.m[r][0]), b0);
|
||||
row = _mm_fmadd_ps(_mm_set1_ps(A.m[r][1]), b1, row);
|
||||
row = _mm_fmadd_ps(_mm_set1_ps(A.m[r][2]), b2, row);
|
||||
row = _mm_fmadd_ps(_mm_set1_ps(A.m[r][3]), b3, row);
|
||||
#else
|
||||
row = _mm_mul_ps(_mm_set1_ps(A.m[r][0]), b0);
|
||||
row = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(A.m[r][1]), b1));
|
||||
row = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(A.m[r][2]), b2));
|
||||
row = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(A.m[r][3]), b3));
|
||||
#endif
|
||||
_mm_storeu_ps(&C.m[r][0], row);
|
||||
}
|
||||
return C;
|
||||
}
|
||||
|
||||
float hsum_ps(__m128 v) {
|
||||
__m128 shuf = _mm_movehdup_ps(v);
|
||||
__m128 sums = _mm_add_ps(v, shuf);
|
||||
shuf = _mm_movehl_ps(shuf, sums);
|
||||
sums = _mm_add_ss(sums, shuf);
|
||||
return _mm_cvtss_f32(sums);
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::operator*(const Matrix4x4 &other) const
|
||||
{
|
||||
return mulsse(*this, other);
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::operator*=(const Matrix4x4 &other)
|
||||
{
|
||||
*this = *this * other;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::operator*(float scalar) const
|
||||
{
|
||||
Matrix4x4 R;
|
||||
|
||||
#if defined(__SSE__)
|
||||
const __m128 s = _mm_set1_ps(scalar);
|
||||
for (int r = 0; r < 4; ++r) {
|
||||
__m128 row = _mm_loadu_ps(&m[r][0]);
|
||||
row = _mm_mul_ps(row, s);
|
||||
_mm_storeu_ps(&R.m[r][0], row);
|
||||
}
|
||||
#else
|
||||
for (int r = 0; r < 4; ++r)
|
||||
for (int c = 0; c < 4; ++c)
|
||||
R.m[r][c] = m[r][c] * scalar;
|
||||
#endif
|
||||
|
||||
return R;
|
||||
}
|
||||
|
||||
TSE::Matrix4x4 TSE::Matrix4x4::operator*=(float scalar)
|
||||
{
|
||||
*this = *this * scalar;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TSE::Vector3 TSE::Matrix4x4::operator*(const Vector3 &v) const
|
||||
{
|
||||
#if defined(__SSE__)
|
||||
const __m128 vec = _mm_set_ps(1.0f, v.z, v.y, v.x);
|
||||
|
||||
__m128 r0 = _mm_loadu_ps(&m[0][0]);
|
||||
__m128 r1 = _mm_loadu_ps(&m[1][0]);
|
||||
__m128 r2 = _mm_loadu_ps(&m[2][0]);
|
||||
#if SIMD_HAS_DPPS
|
||||
float x = _mm_cvtss_f32(_mm_dp_ps(r0, vec, 0xF1));
|
||||
float y = _mm_cvtss_f32(_mm_dp_ps(r1, vec, 0xF1));
|
||||
float z = _mm_cvtss_f32(_mm_dp_ps(r2, vec, 0xF1));
|
||||
#else
|
||||
float x = hsum_ps(_mm_mul_ps(r0, vec));
|
||||
float y = hsum_ps(_mm_mul_ps(r1, vec));
|
||||
float z = hsum_ps(_mm_mul_ps(r2, vec));
|
||||
#endif
|
||||
return Vector3{ x, y, z };
|
||||
#else
|
||||
return Vector3{
|
||||
m[0][0]*v.x + m[0][1]*v.y + m[0][2]*v.z + m[0][3],
|
||||
m[1][0]*v.x + m[1][1]*v.y + m[1][2]*v.z + m[1][3],
|
||||
m[2][0]*v.x + m[2][1]*v.y + m[2][2]*v.z + m[2][3]
|
||||
};
|
||||
#endif
|
||||
}
|
||||
|
||||
TSE::Vector4 TSE::Matrix4x4::operator*(const Vector4 &v) const
|
||||
{
|
||||
#if defined(__SSE__)
|
||||
const __m128 vec = _mm_set_ps(v.w, v.z, v.y, v.x);
|
||||
|
||||
__m128 r0 = _mm_loadu_ps(&m[0][0]);
|
||||
__m128 r1 = _mm_loadu_ps(&m[1][0]);
|
||||
__m128 r2 = _mm_loadu_ps(&m[2][0]);
|
||||
__m128 r3 = _mm_loadu_ps(&m[3][0]);
|
||||
|
||||
#if SIMD_HAS_DPPS
|
||||
float x = _mm_cvtss_f32(_mm_dp_ps(r0, vec, 0xF1));
|
||||
float y = _mm_cvtss_f32(_mm_dp_ps(r1, vec, 0xF1));
|
||||
float z = _mm_cvtss_f32(_mm_dp_ps(r2, vec, 0xF1));
|
||||
float w = _mm_cvtss_f32(_mm_dp_ps(r3, vec, 0xF1));
|
||||
#else
|
||||
float x = hsum_ps(_mm_mul_ps(r0, vec));
|
||||
float y = hsum_ps(_mm_mul_ps(r1, vec));
|
||||
float z = hsum_ps(_mm_mul_ps(r2, vec));
|
||||
float w = hsum_ps(_mm_mul_ps(r3, vec));
|
||||
#endif
|
||||
return Vector4{ x, y, z, w };
|
||||
#else
|
||||
return Vector4{
|
||||
m[0][0]*v.x + m[0][1]*v.y + m[0][2]*v.z + m[0][3]*v.w,
|
||||
m[1][0]*v.x + m[1][1]*v.y + m[1][2]*v.z + m[1][3]*v.w,
|
||||
m[2][0]*v.x + m[2][1]*v.y + m[2][2]*v.z + m[2][3]*v.w,
|
||||
m[3][0]*v.x + m[3][1]*v.y + m[3][2]*v.z + m[3][3]*v.w
|
||||
};
|
||||
#endif
|
||||
}
|
||||
Reference in New Issue
Block a user