first commit of some basics

2026-01-17 09:13:59 +01:00
parent f9bc556ad9
commit c770c62400
41 changed files with 13610 additions and 0 deletions
--- a/TSE_Math/src/Matrix4x4.cpp
+++ b/TSE_Math/src/Matrix4x4.cpp
@@ -0,0 +1,428 @@
+#include "Matrix4x4.hpp"
+#include "MathF.hpp"
+#include <cstring>
+#include <cmath>
+
+#ifdef __unix__
+    #include <immintrin.h>
+#endif
+
+#if defined(_MSC_VER)
+  #if defined(__AVX2__)
+    #define SIMD_HAS_FMA 1
+  #endif
+#elif defined(__FMA__)
+  #define SIMD_HAS_FMA 1
+#endif
+
+#if defined(__SSE4_1__) || (defined(_MSC_VER) && defined(__AVX2__))
+  #define SIMD_HAS_DPPS 1
+#endif
+
+TSE::Matrix4x4::Matrix4x4()
+{
+    std::memset(m, 0, sizeof(m));
+}
+
+TSE::Matrix4x4::Matrix4x4(float d)
+{
+    std::memset(m, 0, sizeof(m));
+    for (int i = 0; i < 4; ++i)
+        m[i][i] = d;
+}
+
+TSE::Matrix4x4::Matrix4x4(const Matrix4x4 &other)
+{
+    std::memcpy(m, other.m, sizeof(m));
+}
+
+bool TSE::Matrix4x4::IsIdentityMatrix() const
+{
+    Matrix4x4 id = Identity();
+    return std::memcmp(m, id.m, sizeof(m)) == 0;
+}
+
+bool TSE::Matrix4x4::IsValid() const
+{
+    bool valid = true;
+    for (byte x = 0; x < 4; x++)
+    {
+        for (byte y = 0; y < 4; y++)
+        {
+            valid &= !std::isnan(m[x][y]);
+        }
+    }
+    return valid;
+}
+
+void TSE::Matrix4x4::Invert()
+{
+    const float det = Determinant();
+    if(std::abs(det) < TSE_EPSILON)
+    {
+        *this = Matrix4x4::Identity();
+        return;
+    }
+
+    Matrix4x4 conj = Conjucate();
+    float invDet = 1.0f / det;
+
+    *this = conj * invDet;
+    return;
+}
+
+float det3(float a00, float a01, float a02, float a10, float a11, float a12, float a20, float a21, float a22)
+{
+    return a00 * (a11 * a22 - a12 * a21)
+         - a01 * (a10 * a22 - a12 * a20)
+         + a02 * (a10 * a21 - a11 * a20);
+}
+
+float minor3(const TSE::Matrix4x4& M, int i, int j)
+{
+    int ri[3], ci[3];
+    for (int r = 0, rr = 0; r < 4; ++r) if (r != i) ri[rr++] = r;
+    for (int c = 0, cc = 0; c < 4; ++c) if (c != j) ci[cc++] = c;
+
+    return det3(
+        M.m[ri[0]][ci[0]], M.m[ri[0]][ci[1]], M.m[ri[0]][ci[2]],
+        M.m[ri[1]][ci[0]], M.m[ri[1]][ci[1]], M.m[ri[1]][ci[2]],
+        M.m[ri[2]][ci[0]], M.m[ri[2]][ci[1]], M.m[ri[2]][ci[2]]
+    );
+}
+
+float cofactorSign(TSE::byte x, TSE::byte y)
+{
+    return ((x + y) & 1) ? -1.0f : 1.0f;
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::Conjucate() const
+{
+    Matrix4x4 conj;
+
+    for (byte x = 0; x < 4; x++)
+    {
+        for (byte y = 0; y < 4; y++)
+        {
+            conj.m[x][y] = cofactorSign(x,y) * minor3(*this, x, y);
+        }
+    }
+
+    return conj;
+}
+
+float TSE::Matrix4x4::Determinant() const
+{
+    const float c00 =  cofactorSign(0,0) * minor3(*this, 0, 0);
+    const float c01 =  cofactorSign(0,1) * minor3(*this, 0, 1);
+    const float c02 =  cofactorSign(0,2) * minor3(*this, 0, 2);
+    const float c03 =  cofactorSign(0,3) * minor3(*this, 0, 3);
+
+    return m[0][0] * c00 + m[0][1] * c01 + m[0][2] * c02 + m[0][3] * c03;
+}
+
+bool TSE::Matrix4x4::IsAffine() const
+{
+    return std::abs(m[3][0]) < TSE_EPSILON &&
+           std::abs(m[3][1]) < TSE_EPSILON &&
+           std::abs(m[3][2]) < TSE_EPSILON &&
+           std::abs(m[3][3] - 1.0f) < TSE_EPSILON;
+}
+
+void TSE::Matrix4x4::InvertAffine()
+{
+    const float a00 = m[0][0], a01 = m[0][1], a02 = m[0][2];
+    const float a10 = m[1][0], a11 = m[1][1], a12 = m[1][2];
+    const float a20 = m[2][0], a21 = m[2][1], a22 = m[2][2];
+
+    const float tx = m[0][3], ty = m[1][3], tz = m[2][3];
+
+    const float det =
+        a00*(a11*a22 - a12*a21) -
+        a01*(a10*a22 - a12*a20) +
+        a02*(a10*a21 - a11*a20);
+
+    if (std::abs(det) < TSE_EPSILON) {
+        *this = Matrix4x4::Identity();
+        return;
+    }
+
+    const float invDet = 1.0f / det;
+    
+    m[0][0] =  (a11*a22 - a12*a21) * invDet;
+    m[0][1] = -(a01*a22 - a02*a21) * invDet;
+    m[0][2] =  (a01*a12 - a02*a11) * invDet;
+
+    m[1][0] = -(a10*a22 - a12*a20) * invDet;
+    m[1][1] =  (a00*a22 - a02*a20) * invDet;
+    m[1][2] = -(a00*a12 - a02*a10) * invDet;
+
+    m[2][0] =  (a10*a21 - a11*a20) * invDet;
+    m[2][1] = -(a00*a21 - a01*a20) * invDet;
+    m[2][2] =  (a00*a11 - a01*a10) * invDet;
+
+    m[0][3] = -(m[0][0]*tx + m[0][1]*ty + m[0][2]*tz);
+    m[1][3] = -(m[1][0]*tx + m[1][1]*ty + m[1][2]*tz);
+    m[2][3] = -(m[2][0]*tx + m[2][1]*ty + m[2][2]*tz);
+
+    m[3][0] = 0.0f; 
+    m[3][1] = 0.0f; 
+    m[3][2] = 0.0f; 
+    m[3][3] = 1.0f;
+}
+
+const float *TSE::Matrix4x4::ToArrayRowMajor() const
+{
+    return &m[0][0];
+}
+
+void TSE::Matrix4x4::ToArrayColumnMajor(float out[16]) const
+{
+    for (byte row = 0; row < 4; ++row)
+        for (byte col = 0; col < 4; ++col)
+            out[col * 4 + row] = m[row][col];
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::Identity()
+{
+    return Matrix4x4(1.0f);
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::Orthographic(float left, float right, float bottom, float top, float near, float far)
+{
+    Matrix4x4 result = Matrix4x4(1.0f);
+    
+    result.m[0][0] = 2.0f / (right - left);
+    result.m[1][1] = 2.0f / (top - bottom);
+    result.m[2][2] = -2.0f / (far - near);
+
+    result.m[0][3] = (left + right) / (left - right);
+    result.m[1][3] = (bottom + top) / (bottom - top);
+    result.m[2][3] = -(near + far) / (near - far);
+
+    return result;
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::Perspective(float fov, float aspectRatio, float near, float far)
+{
+    Matrix4x4 result = Matrix4x4(1.0f);
+
+    float q = 1 / tan(Deg2Rad(0.5f * fov));
+    float a = q / aspectRatio;
+
+    float b = (near + far) / (near - far);
+    float c = (2 * near * far) / (near - far);
+
+    result.m[0][0] = a;
+    result.m[1][1] = q;
+    result.m[2][2] = b;
+    result.m[3][2] = -1;
+    result.m[2][3] = c;
+    
+    return result;
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::ToTranslationMatrix(const Vector3 &pos)
+{
+    Matrix4x4 result = Identity();
+    result.m[0][3] = pos.x;
+    result.m[1][3] = pos.y;
+    result.m[2][3] = pos.z;
+    return result;
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::ToRotationMatrix(const Quaternion &rot)
+{
+    Matrix4x4 result = Identity();
+    float xx = rot.x * rot.x;
+    float yy = rot.y * rot.y;
+    float zz = rot.z * rot.z;
+    float xy = rot.x * rot.y;
+    float xz = rot.x * rot.z;
+    float yz = rot.y * rot.z;
+    float wx = rot.w * rot.x;
+    float wy = rot.w * rot.y;
+    float wz = rot.w * rot.z;
+
+    result.m[0][0] = 1.0f - 2.0f * (yy + zz);
+    result.m[0][1] = 2.0f * (xy - wz);
+    result.m[0][2] = 2.0f * (xz + wy);
+
+    result.m[1][0] = 2.0f * (xy + wz);
+    result.m[1][1] = 1.0f - 2.0f * (xx + zz);
+    result.m[1][2] = 2.0f * (yz - wx);
+
+    result.m[2][0] = 2.0f * (xz - wy);
+    result.m[2][1] = 2.0f * (yz + wx);
+    result.m[2][2] = 1.0f - 2.0f * (xx + yy);
+
+    return result;
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::ToRotationMatrixFromZAxisOnly(float angle)
+{
+    Matrix4x4 result = Identity();
+    float c = std::cos(angle);
+    float s = std::sin(angle);
+    result.m[0][0] = c;
+    result.m[0][1] = -s;
+    result.m[1][0] = s;
+    result.m[1][1] = c;
+    return result;
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::ToScaleMatrix(const Vector3 &scale)
+{
+    Matrix4x4 result = Identity();
+    result.m[0][0] = scale.x;
+    result.m[1][1] = scale.y;
+    result.m[2][2] = scale.z;
+    return result;
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::Invert(const Matrix4x4 &mat)
+{
+    Matrix4x4 invers (mat);
+
+    if(invers.IsAffine())
+    {
+        invers.InvertAffine();
+    }
+    else
+    {
+        invers.Invert();
+    }
+    return invers;
+}
+
+TSE::Matrix4x4 mulsse(const TSE::Matrix4x4& A, const TSE::Matrix4x4& B) 
+{
+    using namespace TSE;
+    Matrix4x4 C;
+
+    const __m128 b0 = _mm_loadu_ps(&B.m[0][0]);
+    const __m128 b1 = _mm_loadu_ps(&B.m[1][0]);
+    const __m128 b2 = _mm_loadu_ps(&B.m[2][0]);
+    const __m128 b3 = _mm_loadu_ps(&B.m[3][0]);
+
+    for (int r = 0; r < 4; ++r) {
+        __m128 row;
+    #if SIMD_HAS_FMA
+        row  = _mm_mul_ps(_mm_set1_ps(A.m[r][0]), b0);
+        row  = _mm_fmadd_ps(_mm_set1_ps(A.m[r][1]), b1, row);
+        row  = _mm_fmadd_ps(_mm_set1_ps(A.m[r][2]), b2, row);
+        row  = _mm_fmadd_ps(_mm_set1_ps(A.m[r][3]), b3, row);
+    #else
+        row  = _mm_mul_ps(_mm_set1_ps(A.m[r][0]), b0);
+        row  = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(A.m[r][1]), b1));
+        row  = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(A.m[r][2]), b2));
+        row  = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(A.m[r][3]), b3));
+    #endif
+        _mm_storeu_ps(&C.m[r][0], row);
+    }
+    return C;
+}
+
+float hsum_ps(__m128 v) {
+        __m128 shuf = _mm_movehdup_ps(v);
+        __m128 sums = _mm_add_ps(v, shuf);
+        shuf = _mm_movehl_ps(shuf, sums);
+        sums = _mm_add_ss(sums, shuf);
+        return _mm_cvtss_f32(sums);
+    }
+
+TSE::Matrix4x4 TSE::Matrix4x4::operator*(const Matrix4x4 &other) const
+{
+    return mulsse(*this, other);
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::operator*=(const Matrix4x4 &other)
+{
+    *this = *this * other;
+    return *this;
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::operator*(float scalar) const
+{
+    Matrix4x4 R;
+
+#if defined(__SSE__)
+    const __m128 s = _mm_set1_ps(scalar);
+    for (int r = 0; r < 4; ++r) {
+        __m128 row = _mm_loadu_ps(&m[r][0]);
+        row = _mm_mul_ps(row, s);
+        _mm_storeu_ps(&R.m[r][0], row);
+    }
+#else
+    for (int r = 0; r < 4; ++r)
+        for (int c = 0; c < 4; ++c)
+            R.m[r][c] = m[r][c] * scalar;
+#endif
+
+    return R;
+}
+
+TSE::Matrix4x4 TSE::Matrix4x4::operator*=(float scalar)
+{
+    *this = *this * scalar;
+    return *this;
+}
+
+TSE::Vector3 TSE::Matrix4x4::operator*(const Vector3 &v) const
+{
+#if defined(__SSE__)
+    const __m128 vec = _mm_set_ps(1.0f, v.z, v.y, v.x);
+
+    __m128 r0 = _mm_loadu_ps(&m[0][0]);
+    __m128 r1 = _mm_loadu_ps(&m[1][0]);
+    __m128 r2 = _mm_loadu_ps(&m[2][0]);
+#if SIMD_HAS_DPPS
+    float x = _mm_cvtss_f32(_mm_dp_ps(r0, vec, 0xF1));
+    float y = _mm_cvtss_f32(_mm_dp_ps(r1, vec, 0xF1));
+    float z = _mm_cvtss_f32(_mm_dp_ps(r2, vec, 0xF1));
+#else
+    float x = hsum_ps(_mm_mul_ps(r0, vec));
+    float y = hsum_ps(_mm_mul_ps(r1, vec));
+    float z = hsum_ps(_mm_mul_ps(r2, vec));
+#endif
+    return Vector3{ x, y, z };
+#else
+    return Vector3{
+        m[0][0]*v.x + m[0][1]*v.y + m[0][2]*v.z + m[0][3],
+        m[1][0]*v.x + m[1][1]*v.y + m[1][2]*v.z + m[1][3],
+        m[2][0]*v.x + m[2][1]*v.y + m[2][2]*v.z + m[2][3]
+    };
+#endif
+}
+
+TSE::Vector4 TSE::Matrix4x4::operator*(const Vector4 &v) const
+{
+#if defined(__SSE__)
+    const __m128 vec = _mm_set_ps(v.w, v.z, v.y, v.x);
+
+    __m128 r0 = _mm_loadu_ps(&m[0][0]);
+    __m128 r1 = _mm_loadu_ps(&m[1][0]);
+    __m128 r2 = _mm_loadu_ps(&m[2][0]);
+    __m128 r3 = _mm_loadu_ps(&m[3][0]);
+
+#if SIMD_HAS_DPPS
+    float x = _mm_cvtss_f32(_mm_dp_ps(r0, vec, 0xF1));
+    float y = _mm_cvtss_f32(_mm_dp_ps(r1, vec, 0xF1));
+    float z = _mm_cvtss_f32(_mm_dp_ps(r2, vec, 0xF1));
+    float w = _mm_cvtss_f32(_mm_dp_ps(r3, vec, 0xF1));
+#else
+    float x = hsum_ps(_mm_mul_ps(r0, vec));
+    float y = hsum_ps(_mm_mul_ps(r1, vec));
+    float z = hsum_ps(_mm_mul_ps(r2, vec));
+    float w = hsum_ps(_mm_mul_ps(r3, vec));
+#endif
+    return Vector4{ x, y, z, w };
+#else
+    return Vector4{
+        m[0][0]*v.x + m[0][1]*v.y + m[0][2]*v.z + m[0][3]*v.w,
+        m[1][0]*v.x + m[1][1]*v.y + m[1][2]*v.z + m[1][3]*v.w,
+        m[2][0]*v.x + m[2][1]*v.y + m[2][2]*v.z + m[2][3]*v.w,
+        m[3][0]*v.x + m[3][1]*v.y + m[3][2]*v.z + m[3][3]*v.w
+    };
+#endif
+}