C# SIMD(single instruction multiple data)

Code Snippets SIMD

About

In this code snippet, we’ll take a look at SIMD(single instruction multiple data) in C#.

To quote Microsoft: “SIMD (Single instruction, multiple data) provides hardware support for performing an operation on multiple pieces of data, in parallel, using a single instruction. In .NET, there’s set of SIMD-accelerated types under the System.Numerics namespace. SIMD operations can be parallelized at the hardware level. That increases the throughput of the vectorized computations, which are common in mathematical, scientific, and graphics apps.”

Some good resources from Microsoft on the topic: Hardware Intrinsics in .NET Core and Use SIMD-accelerated numeric types.
Additionally, here’s a good in-depth post about SIMD:  Basics of SIMD Programming

There are two ways to use SIMD in .NET:

Using System.Numerics.Vector<T>  a hardware-agnostic API that automatically uses the widest SIMD instructions supported by the current platform.

Or hardware Intrinsics System.Runtime.Intrinsics a low-level APIs that expose CPU-specific instruction sets such as SSE, AVX, AVX2, and ARM AdvSimd, giving developers fine-grained control over generated instructions.

All the numerics types that have SIMD acceleration support:

  • Vector2
  • Vector3
  • Vector4
  • Vector<T>
  • Quaternion
  • Plane
  • Matrix3x2
  • Matrix4x4
Let’s see how to use SIMD in the code examples below.

Vector Operations Code:

        static void VectorAdditionExample()
        {
            Vector4 v1 = new Vector4(1, 2, 3, 4);
            Vector4 v2 = new Vector4(5, 6, 7, 8);

            Vector4 result = v1 + v2;

            Console.WriteLine($"v1: {v1}");
            Console.WriteLine($"v2: {v2}");
            Console.WriteLine($"Result: {result}");
        }

        static void ScalarVsVectorPerformance()
        {
            const int iterations = 10000000;
            float[] array = new float[1024];

            //Init. array.
            for (int i = 0; i < array.Length; i++)
            {
                array[i] = i * 0.5f;
            }

            #region Scalar approach.

            Console.WriteLine("Beginning scalar approach...");

            var sw = Stopwatch.StartNew();

            float scalarResult = 0;
            for (int i = 0; i < iterations; i++)
            {
                for (int j = 0; j < array.Length; j++)
                {
                    scalarResult += array[j] * 2f;
                }
            }

            sw.Stop();

            Console.WriteLine($"Scalar approach: {sw.ElapsedMilliseconds}ms (Result: {scalarResult})");

            #endregion


            #region Vector approach.

            Console.WriteLine("Beginning vector approach...");

            sw.Restart();

            Vector4 vectorResult = Vector4.Zero;
            Vector4 multiplier = new Vector4(2f);
            for (int i = 0; i < iterations; i++)
            {
                for (int j = 0; j < array.Length; j += 4)
                {
                    Vector4 v = new Vector4(array[j], array[j + 1], array[j + 2], array[j + 3]);
                    vectorResult += v * multiplier;
                }
            }

            sw.Stop();

            Console.WriteLine($"Vector approach: {sw.ElapsedMilliseconds}ms (Result: {vectorResult.X})");

            #endregion
        }

        static void DotProductExample()
        {
            float[] a = new float[256];
            float[] b = new float[256];

            //Init. arrays.
            for (int i = 0; i < a.Length; i++)
            {
                a[i] = i * 0.1f;
                b[i] = i * 0.2f;
            }

            #region Scalar dot product aproach.

            var sw = Stopwatch.StartNew();

            float scalarDot = 0;
            for (int i = 0; i < 1_000_000; i++)
            {
                float result = 0;
                for (int j = 0; j < a.Length; j++)
                {
                    result += a[j] * b[j];
                }
                scalarDot += result;
            }

            sw.Stop();

            Console.WriteLine($"Scalar dot product: {sw.ElapsedMilliseconds}ms");

            #endregion


            #region  Vector dot product approach.

            sw.Restart();

            float vectorDot = 0;
            for (int i = 0; i < 1_000_000; i++)
            {
                float result = 0;
                for (int j = 0; j < a.Length; j += 4)
                {
                    Vector4 va = new Vector4(a[j], a[j + 1], a[j + 2], a[j + 3]);
                    Vector4 vb = new Vector4(b[j], b[j + 1], b[j + 2], b[j + 3]);
                    result += Vector4.Dot(va, vb);
                }
                vectorDot += result;
            }

            sw.Stop();

            Console.WriteLine($"Vector dot product: {sw.ElapsedMilliseconds}ms");

            #endregion
        }

        static void VectorManipulationExample()
        {
            Vector3 position = new Vector3(1, 2, 3);
            Vector3 velocity = new Vector3(0.1f, 0.2f, 0.3f);

            Console.WriteLine($"Initial position: {position}");
            Console.WriteLine($"Velocity: {velocity}");

            //Update position using vector operations.
            Vector3 newPosition = position + velocity * 10;
            Console.WriteLine($"Position after 10 time steps: {newPosition}");

            //Normalize velocity.
            Vector3 direction = Vector3.Normalize(velocity);
            Console.WriteLine($"Normalized direction: {direction}");

            //Calculate distance.
            float distance = Vector3.Distance(position, newPosition);
            Console.WriteLine($"Distance traveled: {distance:F2}");
        }

Resulting output:

Matrix Multiplication Code:

static void MatrixMultiplicationExample()
{
    const int matrixSize = 256;
    const int iterations = 100;

    float[] matrixA = new float[matrixSize * matrixSize];
    float[] matrixB = new float[matrixSize * matrixSize];
    float[] resultScalar = new float[matrixSize * matrixSize];
    float[] resultVector = new float[matrixSize * matrixSize];

    #region Init. matrices.

    InitializeMatrix(matrixA);
    InitializeMatrix(matrixB);

    #endregion


    #region Scalar matrix multiplication.

    Console.WriteLine("Beginning scalar matrix multiplication...");

    var sw = Stopwatch.StartNew();

    for (int iter = 0; iter < iterations; iter++)
    {
        MultiplyMatricesScalar(matrixA, matrixB, resultScalar, matrixSize);
    }

    sw.Stop();

    Console.WriteLine($"Scalar approach: {sw.ElapsedMilliseconds}ms");

    #endregion


    #region SIMD vector matrix multiplication.

    Console.WriteLine("Beginning vector matrix multiplication...");

    sw.Restart();

    for (int iter = 0; iter < iterations; iter++)
    {
        MultiplyMatricesVector(matrixA, matrixB, resultVector, matrixSize);
    }

    sw.Stop();

    Console.WriteLine($"Vector approach: {sw.ElapsedMilliseconds}ms");

    #endregion


    #region Verify results match

    bool resultsMatch = CompareMatrices(resultScalar, resultVector, matrixSize);
    Console.WriteLine($"Results match: {resultsMatch}");

    #endregion
}

static void InitializeMatrix(float[] matrix)
{
    for (int i = 0; i < matrix.Length; i++)
    {
        matrix[i] = i * 0.01f;
    }
}

static void MultiplyMatricesScalar(float[] a, float[] b, float[] result, int size)
{
    for (int i = 0; i < size; i++)
    {
        for (int j = 0; j < size; j++)
        {
            float sum = 0f;
            for (int k = 0; k < size; k++)
            {
                sum += a[i * size + k] * b[k * size + j];
            }

            result[i * size + j] = sum;
        }
    }
}

static void MultiplyMatricesVector(float[] a, float[] b, float[] result, int size)
{
    for (int i = 0; i < size; i++)
    {
        for (int j = 0; j < size; j++)
        {
            Vector4 sum = Vector4.Zero;
            int k = 0;

            //Process 4 elements at a time using SIMD.
            for (; k <= size - 4; k += 4)
            {
                Vector4 aVec = new Vector4(
                    a[i * size + k],
                    a[i * size + k + 1],
                    a[i * size + k + 2],
                    a[i * size + k + 3]);

                Vector4 bVec = new Vector4(
                    b[k * size + j],
                    b[(k + 1) * size + j],
                    b[(k + 2) * size + j],
                    b[(k + 3) * size + j]);

                sum += aVec * bVec;
            }

            //Handle remaining elements.
            float remaining = sum.X + sum.Y + sum.Z + sum.W;
            for (; k < size; k++)
            {
                remaining += a[i * size + k] * b[k * size + j];
            }

            result[i * size + j] = remaining;
        }
    }
}

static bool CompareMatrices(float[] a, float[] b, int size)
{
    const float tolerance = 0.0001f;

    for (int i = 0; i < a.Length; i++)
    {
        if (Math.Abs(a[i] - b[i]) > tolerance)
        {
            return false;
        }
    }

    return true;
}

Resulting output:

Vector<T> Code:

        static void VectorTExample()
        {
            //The count of a Vector<T> instance is fixed, but its value Vector<T>.Count depends on the CPU of the machine running the code.
            Console.WriteLine($"Vector<T> count (elements per vector): {Vector<float>.Count}");
            Console.WriteLine();

            Console.WriteLine("--- Example 1: Float Array Operations ---");
            VectorTFloatOperations();

            Console.WriteLine("\n--- Example 2: Integer Array Operations ---");
            VectorTIntegerOperations();

            Console.WriteLine("\n--- Example 3: Dot Product using Vector<T> ---");
            DotProductWithVectorT();

            Console.WriteLine("\n--- Example 4: Performance Comparison ---");
            VectorTPerformanceComparison();
        }

        static void VectorTFloatOperations()
        {
            float[] array1 = { 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f };
            float[] array2 = { 8f, 7f, 6f, 5f, 4f, 3f, 2f, 1f };
            float[] result = new float[array1.Length];

            //The count of a Vector<T> instance is fixed, but its value Vector<T>.Count depends on the CPU of the machine running the code.
            int vectorSize = Vector<float>.Count;

            for (int i = 0; i <= array1.Length - vectorSize; i += vectorSize)
            {
                Vector<float> v1 = new Vector<float>(array1, i);
                Vector<float> v2 = new Vector<float>(array2, i);
                Vector<float> sum = v1 + v2;

                sum.CopyTo(result, i);
            }

            Console.WriteLine("Array 1: " + string.Join(", ", array1));
            Console.WriteLine("Array 2: " + string.Join(", ", array2));
            Console.WriteLine("Result:  " + string.Join(", ", result));
        }

        static void VectorTIntegerOperations()
        {
            int[] array1 = { 10, 20, 30, 40, 50, 60, 70, 80 };
            int[] array2 = { 1, 2, 3, 4, 5, 6, 7, 8 };
            int[] result = new int[array1.Length];

            //The count of a Vector<T> instance is fixed, but its value Vector<T>.Count depends on the CPU of the machine running the code.
            int vectorSize = Vector<int>.Count;

            for (int i = 0; i <= array1.Length - vectorSize; i += vectorSize)
            {
                Vector<int> v1 = new Vector<int>(array1, i);
                Vector<int> v2 = new Vector<int>(array2, i);

                Vector<int> multiplied = v1 * v2;

                multiplied.CopyTo(result, i);
            }

            Console.WriteLine("Array 1: " + string.Join(", ", array1));
            Console.WriteLine("Array 2: " + string.Join(", ", array2));
            Console.WriteLine("Result:  " + string.Join(", ", result));
        }

        static void DotProductWithVectorT()
        {
            float[] vectorA = new float[256];
            float[] vectorB = new float[256];

            #region Init. vectors.

            for (int i = 0; i < vectorA.Length; i++)
            {
                vectorA[i] = i * 0.5f;
                vectorB[i] = i * 0.25f;
            }

            #endregion


            #region Scalar dot product.

            var sw = Stopwatch.StartNew();

            float scalarDot = 0;
            for (int i = 0; i < 100000; i++)
            {
                float result = 0;
                for (int j = 0; j < vectorA.Length; j++)
                { 
                    result += vectorA[j] * vectorB[j];
                }

                scalarDot += result;
            }

            sw.Stop();

            Console.WriteLine($"Scalar dot product (100k iterations): {sw.ElapsedMilliseconds}ms");

            #endregion


            #region Vector<T> dot product.

            sw.Restart();

            float vectorTDot = 0;
            int vectorSize = Vector<float>.Count;

            for (int i = 0; i < 100000; i++)
            {
                float result = 0;
                for (int j = 0; j <= vectorA.Length - vectorSize; j += vectorSize)
                {
                    Vector<float> va = new Vector<float>(vectorA, j);
                    Vector<float> vb = new Vector<float>(vectorB, j);

                    Vector<float> product = va * vb;

                    //Sum vector elements.
                    for (int k = 0; k < Vector<float>.Count; k++)
                    { 
                        result += product[k];
                    }
                }

                vectorTDot += result;
            }

            sw.Stop();

            Console.WriteLine($"Vector<T> dot product (100k iterations): {sw.ElapsedMilliseconds}ms");

            #endregion
        }

        static void VectorTPerformanceComparison()
        {
            const int arraySize = 8192;
            const int iterations = 100_000;

            float[] sourceArray = new float[arraySize];
            float[] resultArray = new float[arraySize];

            #region Init. source array.

            for (int i = 0; i < sourceArray.Length; i++)
            {
                sourceArray[i] = i * 0.1f;
            }

            #endregion


            #region Scalar multiplication.

            Console.WriteLine("Beginning scalar multiplication...");

            var sw = Stopwatch.StartNew();

            for (int iter = 0; iter < iterations; iter++)
            {
                for (int i = 0; i < sourceArray.Length; i++)
                {
                    resultArray[i] = sourceArray[i] * 3.14f;
                }
            }

            sw.Stop();

            Console.WriteLine($"Scalar approach: {sw.ElapsedMilliseconds}ms");

            #endregion


            #region Vector<T> multiplication.

            Console.WriteLine("Beginning Vector<T> multiplication...");

            Array.Clear(resultArray);
            sw.Restart();

            int vectorSize = Vector<float>.Count;
            Vector<float> multiplier = new Vector<float>(3.14f);

            for (int iter = 0; iter < iterations; iter++)
            {
                for (int i = 0; i <= sourceArray.Length - vectorSize; i += vectorSize)
                {
                    Vector<float> source = new Vector<float>(sourceArray, i);
                    Vector<float> product = source * multiplier;

                    product.CopyTo(resultArray, i);
                }
            }

            sw.Stop();

            Console.WriteLine($"Vector<T> approach: {sw.ElapsedMilliseconds}ms");

            #endregion
        }

Resulting output:

Matrix4x4 Code:

    static void Matrix4x4Example()
    {
        Console.WriteLine("--- Basic Matrix4x4 Operations ---");
        BasicMatrix4x4Operations();

        Console.WriteLine("\n--- Matrix4x4 Transformations ---");
        Matrix4x4Transformations();

        Console.WriteLine("\n--- Matrix4x4 Performance Comparison ---");
        Matrix4x4PerformanceComparison();
    }

    static void BasicMatrix4x4Operations()
    {
        //init. a matrix.
        var m1 = new Matrix4x4(
            1.1f, 1.2f, 1.3f, 1.4f,
            2.1f, 2.2f, 2.3f, 2.4f,
            3.1f, 3.2f, 3.3f, 3.4f,
            4.1f, 4.2f, 4.3f, 4.4f);

        Console.WriteLine("Matrix M1:");
        PrintMatrix(m1);


        //Transpose the matrix.
        var m2 = Matrix4x4.Transpose(m1);

        Console.WriteLine("\nMatrix M2 (M1 Transposed):");
        PrintMatrix(m2);


        //Multiply the matrices.
        var mResult = Matrix4x4.Multiply(m1, m2);

        Console.WriteLine("\nMatrix Result (M1 * M2):");
        PrintMatrix(mResult);


        //Check determinant.
        float determinant = m1.GetDeterminant();
        Console.WriteLine($"\nDeterminant of M1: {determinant}");


        //Try to invert the matrix
        if (Matrix4x4.Invert(m1, out Matrix4x4 invertedM1))
        {
            Console.WriteLine("\nInverted M1:");
            PrintMatrix(invertedM1);
        }
        else
        {
            Console.WriteLine("Matrix M1 cannot be inverted (singular matrix)");
        }
    }

    static void Matrix4x4Transformations()
    {
        //Create transformation matrices.
        var translationMatrix = Matrix4x4.CreateTranslation(new Vector3(5, 10, 15));
        var scaleMatrix = Matrix4x4.CreateScale(new Vector3(2, 3, 4));
        var rotationMatrix = Matrix4x4.CreateRotationZ(MathF.PI / 4); // 45 degrees

        Console.WriteLine("Translation Matrix:");
        PrintMatrix(translationMatrix);

        Console.WriteLine("\nScale Matrix:");
        PrintMatrix(scaleMatrix);

        Console.WriteLine("\nRotation Matrix (45° around Z-axis):");
        PrintMatrix(rotationMatrix);


        //Combine transformations: Rotate -> Scale -> Translate
        var combinedMatrix = rotationMatrix * scaleMatrix * translationMatrix;

        Console.WriteLine("\nCombined Transformation Matrix:");
        PrintMatrix(combinedMatrix);


        //Transform a vector.
        Vector3 point = new Vector3(1, 1, 1);
        Vector4 point4d = new Vector4(point, 1);

        Vector4 transformedPoint = Vector4.Transform(point4d, combinedMatrix);

        Console.WriteLine($"\nOriginal point: {point}");
        Console.WriteLine($"Transformed point: {new Vector3(transformedPoint.X, transformedPoint.Y, transformedPoint.Z)}");
    }

    static void Matrix4x4PerformanceComparison()
    {
        const int iterations = 100_000;

        #region Init. matrices.

        var m1 = new Matrix4x4(
            1.1f, 1.2f, 1.3f, 1.4f,
            2.1f, 2.2f, 2.3f, 2.4f,
            3.1f, 3.2f, 3.3f, 3.4f,
            4.1f, 4.2f, 4.3f, 4.4f);

        var m2 = new Matrix4x4(
            5.1f, 5.2f, 5.3f, 5.4f,
            6.1f, 6.2f, 6.3f, 6.4f,
            7.1f, 7.2f, 7.3f, 7.4f,
            8.1f, 8.2f, 8.3f, 8.4f);

        #endregion


        #region Matrix multiplication.

        Console.WriteLine("Beginning matrix multiplication...");

        var sw = Stopwatch.StartNew();

        Matrix4x4 result = Matrix4x4.Identity;
        for (int i = 0; i < iterations; i++)
        {
            result = Matrix4x4.Multiply(m1, m2);
        }

        sw.Stop();

        Console.WriteLine($"Matrix4x4.Multiply: {sw.ElapsedMilliseconds}ms");

        #endregion


        #region Matrix multiplication using operator.

        Console.WriteLine("Beginning matrix multiplication (operator)...");

        sw.Restart();

        result = Matrix4x4.Identity;
        for (int i = 0; i < iterations; i++)
        {
            result = m1 * m2;
        }

        sw.Stop();

        Console.WriteLine($"Matrix4x4 * operator: {sw.ElapsedMilliseconds}ms");

        #endregion


        #region Transpose operation.

        Console.WriteLine("Beginning transpose operation...");

        sw.Restart();

        Matrix4x4 transposed = Matrix4x4.Identity;
        for (int i = 0; i < iterations; i++)
        {
            transposed = Matrix4x4.Transpose(m1);
        }

        sw.Stop();

        Console.WriteLine($"Matrix4x4.Transpose: {sw.ElapsedMilliseconds}ms");

        #endregion
    }

    static void PrintMatrix(Matrix4x4 matrix)
    {
        Console.WriteLine($"  {matrix.M11:F2} {matrix.M12:F2} {matrix.M13:F2} {matrix.M14:F2}");
        Console.WriteLine($"  {matrix.M21:F2} {matrix.M22:F2} {matrix.M23:F2} {matrix.M24:F2}");
        Console.WriteLine($"  {matrix.M31:F2} {matrix.M32:F2} {matrix.M33:F2} {matrix.M34:F2}");
        Console.WriteLine($"  {matrix.M41:F2} {matrix.M42:F2} {matrix.M43:F2} {matrix.M44:F2}");
    }

Resulting output:

Entire Code:

using System.Diagnostics;
using System.Numerics;

namespace SIMD
{
    internal class Program
    {
        static void Main(string[] args)
        {
            Console.WriteLine("--- Example 1: Vector Addition ---");
            VectorAdditionExample();

            Console.WriteLine("\n--- Example 2: Scalar vs Vector Performance ---");
            ScalarVsVectorPerformance();

            Console.WriteLine("\n--- Example 3: Dot Product (SIMD vs Scalar) ---");
            DotProductExample();

            Console.WriteLine("\n--- Example 4: Vector Manipulation ---");
            VectorManipulationExample();

            Console.WriteLine("\n--- Example 5: Matrix Multiplication (SIMD vs Scalar) ---");
            MatrixMultiplicationExample();

            Console.WriteLine("\n--- Example 6: Vector<T> Generic SIMD Operations ---");
            VectorTExample();

            Console.WriteLine("\n--- Example 7: Matrix4x4 Operations ---");
            Matrix4x4Example();
        }



        static void VectorAdditionExample()
        {
            Vector4 v1 = new Vector4(1, 2, 3, 4);
            Vector4 v2 = new Vector4(5, 6, 7, 8);

            Vector4 result = v1 + v2;

            Console.WriteLine($"v1: {v1}");
            Console.WriteLine($"v2: {v2}");
            Console.WriteLine($"Result: {result}");
        }

        static void ScalarVsVectorPerformance()
        {
            const int iterations = 10000000;
            float[] array = new float[1024];

            //Init. array.
            for (int i = 0; i < array.Length; i++)
            {
                array[i] = i * 0.5f;
            }

            #region Scalar approach.

            Console.WriteLine("Beginning scalar approach...");

            var sw = Stopwatch.StartNew();

            float scalarResult = 0;
            for (int i = 0; i < iterations; i++)
            {
                for (int j = 0; j < array.Length; j++)
                {
                    scalarResult += array[j] * 2f;
                }
            }

            sw.Stop();

            Console.WriteLine($"Scalar approach: {sw.ElapsedMilliseconds}ms (Result: {scalarResult})");

            #endregion


            #region Vector approach.

            Console.WriteLine("Beginning vector approach...");

            sw.Restart();

            Vector4 vectorResult = Vector4.Zero;
            Vector4 multiplier = new Vector4(2f);
            for (int i = 0; i < iterations; i++)
            {
                for (int j = 0; j < array.Length; j += 4)
                {
                    Vector4 v = new Vector4(array[j], array[j + 1], array[j + 2], array[j + 3]);
                    vectorResult += v * multiplier;
                }
            }

            sw.Stop();

            Console.WriteLine($"Vector approach: {sw.ElapsedMilliseconds}ms (Result: {vectorResult.X})");

            #endregion
        }

        static void DotProductExample()
        {
            float[] a = new float[256];
            float[] b = new float[256];

            //Init. arrays.
            for (int i = 0; i < a.Length; i++)
            {
                a[i] = i * 0.1f;
                b[i] = i * 0.2f;
            }

            #region Scalar dot product aproach.

            var sw = Stopwatch.StartNew();

            float scalarDot = 0;
            for (int i = 0; i < 1_000_000; i++)
            {
                float result = 0;
                for (int j = 0; j < a.Length; j++)
                {
                    result += a[j] * b[j];
                }
                scalarDot += result;
            }

            sw.Stop();

            Console.WriteLine($"Scalar dot product: {sw.ElapsedMilliseconds}ms");

            #endregion


            #region  Vector dot product approach.

            sw.Restart();

            float vectorDot = 0;
            for (int i = 0; i < 1_000_000; i++)
            {
                float result = 0;
                for (int j = 0; j < a.Length; j += 4)
                {
                    Vector4 va = new Vector4(a[j], a[j + 1], a[j + 2], a[j + 3]);
                    Vector4 vb = new Vector4(b[j], b[j + 1], b[j + 2], b[j + 3]);
                    result += Vector4.Dot(va, vb);
                }
                vectorDot += result;
            }

            sw.Stop();

            Console.WriteLine($"Vector dot product: {sw.ElapsedMilliseconds}ms");

            #endregion
        }

        static void VectorManipulationExample()
        {
            Vector3 position = new Vector3(1, 2, 3);
            Vector3 velocity = new Vector3(0.1f, 0.2f, 0.3f);

            Console.WriteLine($"Initial position: {position}");
            Console.WriteLine($"Velocity: {velocity}");

            //Update position using vector operations.
            Vector3 newPosition = position + velocity * 10;
            Console.WriteLine($"Position after 10 time steps: {newPosition}");

            //Normalize velocity.
            Vector3 direction = Vector3.Normalize(velocity);
            Console.WriteLine($"Normalized direction: {direction}");

            //Calculate distance.
            float distance = Vector3.Distance(position, newPosition);
            Console.WriteLine($"Distance traveled: {distance:F2}");
        }



        static void MatrixMultiplicationExample()
        {
            const int matrixSize = 256;
            const int iterations = 100;

            float[] matrixA = new float[matrixSize * matrixSize];
            float[] matrixB = new float[matrixSize * matrixSize];
            float[] resultScalar = new float[matrixSize * matrixSize];
            float[] resultVector = new float[matrixSize * matrixSize];

            #region Init. matrices.

            InitializeMatrix(matrixA);
            InitializeMatrix(matrixB);

            #endregion


            #region Scalar matrix multiplication.

            Console.WriteLine("Beginning scalar matrix multiplication...");

            var sw = Stopwatch.StartNew();

            for (int iter = 0; iter < iterations; iter++)
            {
                MultiplyMatricesScalar(matrixA, matrixB, resultScalar, matrixSize);
            }

            sw.Stop();

            Console.WriteLine($"Scalar approach: {sw.ElapsedMilliseconds}ms");

            #endregion


            #region SIMD vector matrix multiplication.

            Console.WriteLine("Beginning vector matrix multiplication...");

            sw.Restart();

            for (int iter = 0; iter < iterations; iter++)
            {
                MultiplyMatricesVector(matrixA, matrixB, resultVector, matrixSize);
            }

            sw.Stop();

            Console.WriteLine($"Vector approach: {sw.ElapsedMilliseconds}ms");

            #endregion


            #region Verify results match

            bool resultsMatch = CompareMatrices(resultScalar, resultVector, matrixSize);
            Console.WriteLine($"Results match: {resultsMatch}");

            #endregion
        }

        static void InitializeMatrix(float[] matrix)
        {
            for (int i = 0; i < matrix.Length; i++)
            {
                matrix[i] = i * 0.01f;
            }
        }

        static void MultiplyMatricesScalar(float[] a, float[] b, float[] result, int size)
        {
            for (int i = 0; i < size; i++)
            {
                for (int j = 0; j < size; j++)
                {
                    float sum = 0f;
                    for (int k = 0; k < size; k++)
                    {
                        sum += a[i * size + k] * b[k * size + j];
                    }

                    result[i * size + j] = sum;
                }
            }
        }

        static void MultiplyMatricesVector(float[] a, float[] b, float[] result, int size)
        {
            for (int i = 0; i < size; i++)
            {
                for (int j = 0; j < size; j++)
                {
                    Vector4 sum = Vector4.Zero;
                    int k = 0;

                    //Process 4 elements at a time using SIMD.
                    for (; k <= size - 4; k += 4)
                    {
                        Vector4 aVec = new Vector4(
                            a[i * size + k],
                            a[i * size + k + 1],
                            a[i * size + k + 2],
                            a[i * size + k + 3]);

                        Vector4 bVec = new Vector4(
                            b[k * size + j],
                            b[(k + 1) * size + j],
                            b[(k + 2) * size + j],
                            b[(k + 3) * size + j]);

                        sum += aVec * bVec;
                    }

                    //Handle remaining elements.
                    float remaining = sum.X + sum.Y + sum.Z + sum.W;
                    for (; k < size; k++)
                    {
                        remaining += a[i * size + k] * b[k * size + j];
                    }

                    result[i * size + j] = remaining;
                }
            }
        }

        static bool CompareMatrices(float[] a, float[] b, int size)
        {
            const float tolerance = 0.0001f;

            for (int i = 0; i < a.Length; i++)
            {
                if (Math.Abs(a[i] - b[i]) > tolerance)
                {
                    return false;
                }
            }

            return true;
        }



        static void VectorTExample()
        {
            //The count of a Vector<T> instance is fixed, but its value Vector<T>.Count depends on the CPU of the machine running the code.
            Console.WriteLine($"Vector<T> count (elements per vector): {Vector<float>.Count}");
            Console.WriteLine();

            Console.WriteLine("--- Example 1: Float Array Operations ---");
            VectorTFloatOperations();

            Console.WriteLine("\n--- Example 2: Integer Array Operations ---");
            VectorTIntegerOperations();

            Console.WriteLine("\n--- Example 3: Dot Product using Vector<T> ---");
            DotProductWithVectorT();

            Console.WriteLine("\n--- Example 4: Performance Comparison ---");
            VectorTPerformanceComparison();
        }

        static void VectorTFloatOperations()
        {
            float[] array1 = { 1f, 2f, 3f, 4f, 5f, 6f, 7f, 8f };
            float[] array2 = { 8f, 7f, 6f, 5f, 4f, 3f, 2f, 1f };
            float[] result = new float[array1.Length];

            //The count of a Vector<T> instance is fixed, but its value Vector<T>.Count depends on the CPU of the machine running the code.
            int vectorSize = Vector<float>.Count;

            for (int i = 0; i <= array1.Length - vectorSize; i += vectorSize)
            {
                Vector<float> v1 = new Vector<float>(array1, i);
                Vector<float> v2 = new Vector<float>(array2, i);
                Vector<float> sum = v1 + v2;

                sum.CopyTo(result, i);
            }

            Console.WriteLine("Array 1: " + string.Join(", ", array1));
            Console.WriteLine("Array 2: " + string.Join(", ", array2));
            Console.WriteLine("Result:  " + string.Join(", ", result));
        }

        static void VectorTIntegerOperations()
        {
            int[] array1 = { 10, 20, 30, 40, 50, 60, 70, 80 };
            int[] array2 = { 1, 2, 3, 4, 5, 6, 7, 8 };
            int[] result = new int[array1.Length];

            //The count of a Vector<T> instance is fixed, but its value Vector<T>.Count depends on the CPU of the machine running the code.
            int vectorSize = Vector<int>.Count;

            for (int i = 0; i <= array1.Length - vectorSize; i += vectorSize)
            {
                Vector<int> v1 = new Vector<int>(array1, i);
                Vector<int> v2 = new Vector<int>(array2, i);

                Vector<int> multiplied = v1 * v2;

                multiplied.CopyTo(result, i);
            }

            Console.WriteLine("Array 1: " + string.Join(", ", array1));
            Console.WriteLine("Array 2: " + string.Join(", ", array2));
            Console.WriteLine("Result:  " + string.Join(", ", result));
        }

        static void DotProductWithVectorT()
        {
            float[] vectorA = new float[256];
            float[] vectorB = new float[256];

            #region Init. vectors.

            for (int i = 0; i < vectorA.Length; i++)
            {
                vectorA[i] = i * 0.5f;
                vectorB[i] = i * 0.25f;
            }

            #endregion


            #region Scalar dot product.

            var sw = Stopwatch.StartNew();

            float scalarDot = 0;
            for (int i = 0; i < 100000; i++)
            {
                float result = 0;
                for (int j = 0; j < vectorA.Length; j++)
                { 
                    result += vectorA[j] * vectorB[j];
                }

                scalarDot += result;
            }

            sw.Stop();

            Console.WriteLine($"Scalar dot product (100k iterations): {sw.ElapsedMilliseconds}ms");

            #endregion


            #region Vector<T> dot product.

            sw.Restart();

            float vectorTDot = 0;
            int vectorSize = Vector<float>.Count;

            for (int i = 0; i < 100000; i++)
            {
                float result = 0;
                for (int j = 0; j <= vectorA.Length - vectorSize; j += vectorSize)
                {
                    Vector<float> va = new Vector<float>(vectorA, j);
                    Vector<float> vb = new Vector<float>(vectorB, j);

                    Vector<float> product = va * vb;

                    //Sum vector elements.
                    for (int k = 0; k < Vector<float>.Count; k++)
                    { 
                        result += product[k];
                    }
                }

                vectorTDot += result;
            }

            sw.Stop();

            Console.WriteLine($"Vector<T> dot product (100k iterations): {sw.ElapsedMilliseconds}ms");

            #endregion
        }

        static void VectorTPerformanceComparison()
        {
            const int arraySize = 8192;
            const int iterations = 100_000;

            float[] sourceArray = new float[arraySize];
            float[] resultArray = new float[arraySize];

            #region Init. source array.

            for (int i = 0; i < sourceArray.Length; i++)
            {
                sourceArray[i] = i * 0.1f;
            }

            #endregion


            #region Scalar multiplication.

            Console.WriteLine("Beginning scalar multiplication...");

            var sw = Stopwatch.StartNew();

            for (int iter = 0; iter < iterations; iter++)
            {
                for (int i = 0; i < sourceArray.Length; i++)
                {
                    resultArray[i] = sourceArray[i] * 3.14f;
                }
            }

            sw.Stop();

            Console.WriteLine($"Scalar approach: {sw.ElapsedMilliseconds}ms");

            #endregion


            #region Vector<T> multiplication.

            Console.WriteLine("Beginning Vector<T> multiplication...");

            Array.Clear(resultArray);
            sw.Restart();

            int vectorSize = Vector<float>.Count;
            Vector<float> multiplier = new Vector<float>(3.14f);

            for (int iter = 0; iter < iterations; iter++)
            {
                for (int i = 0; i <= sourceArray.Length - vectorSize; i += vectorSize)
                {
                    Vector<float> source = new Vector<float>(sourceArray, i);
                    Vector<float> product = source * multiplier;

                    product.CopyTo(resultArray, i);
                }
            }

            sw.Stop();

            Console.WriteLine($"Vector<T> approach: {sw.ElapsedMilliseconds}ms");

            #endregion
        }



        static void Matrix4x4Example()
        {
            Console.WriteLine("--- Basic Matrix4x4 Operations ---");
            BasicMatrix4x4Operations();

            Console.WriteLine("\n--- Matrix4x4 Transformations ---");
            Matrix4x4Transformations();

            Console.WriteLine("\n--- Matrix4x4 Performance Comparison ---");
            Matrix4x4PerformanceComparison();
        }

        static void BasicMatrix4x4Operations()
        {
            //init. a matrix.
            var m1 = new Matrix4x4(
                1.1f, 1.2f, 1.3f, 1.4f,
                2.1f, 2.2f, 2.3f, 2.4f,
                3.1f, 3.2f, 3.3f, 3.4f,
                4.1f, 4.2f, 4.3f, 4.4f);

            Console.WriteLine("Matrix M1:");
            PrintMatrix(m1);


            //Transpose the matrix.
            var m2 = Matrix4x4.Transpose(m1);

            Console.WriteLine("\nMatrix M2 (M1 Transposed):");
            PrintMatrix(m2);


            //Multiply the matrices.
            var mResult = Matrix4x4.Multiply(m1, m2);

            Console.WriteLine("\nMatrix Result (M1 * M2):");
            PrintMatrix(mResult);


            //Check determinant.
            float determinant = m1.GetDeterminant();
            Console.WriteLine($"\nDeterminant of M1: {determinant}");


            //Try to invert the matrix
            if (Matrix4x4.Invert(m1, out Matrix4x4 invertedM1))
            {
                Console.WriteLine("\nInverted M1:");
                PrintMatrix(invertedM1);
            }
            else
            {
                Console.WriteLine("Matrix M1 cannot be inverted (singular matrix)");
            }
        }

        static void Matrix4x4Transformations()
        {
            //Create transformation matrices.
            var translationMatrix = Matrix4x4.CreateTranslation(new Vector3(5, 10, 15));
            var scaleMatrix = Matrix4x4.CreateScale(new Vector3(2, 3, 4));
            var rotationMatrix = Matrix4x4.CreateRotationZ(MathF.PI / 4); // 45 degrees

            Console.WriteLine("Translation Matrix:");
            PrintMatrix(translationMatrix);

            Console.WriteLine("\nScale Matrix:");
            PrintMatrix(scaleMatrix);

            Console.WriteLine("\nRotation Matrix (45° around Z-axis):");
            PrintMatrix(rotationMatrix);


            //Combine transformations: Rotate -> Scale -> Translate
            var combinedMatrix = rotationMatrix * scaleMatrix * translationMatrix;

            Console.WriteLine("\nCombined Transformation Matrix:");
            PrintMatrix(combinedMatrix);


            //Transform a vector.
            Vector3 point = new Vector3(1, 1, 1);
            Vector4 point4d = new Vector4(point, 1);

            Vector4 transformedPoint = Vector4.Transform(point4d, combinedMatrix);

            Console.WriteLine($"\nOriginal point: {point}");
            Console.WriteLine($"Transformed point: {new Vector3(transformedPoint.X, transformedPoint.Y, transformedPoint.Z)}");
        }

        static void Matrix4x4PerformanceComparison()
        {
            const int iterations = 100_000;

            #region Init. matrices.

            var m1 = new Matrix4x4(
                1.1f, 1.2f, 1.3f, 1.4f,
                2.1f, 2.2f, 2.3f, 2.4f,
                3.1f, 3.2f, 3.3f, 3.4f,
                4.1f, 4.2f, 4.3f, 4.4f);

            var m2 = new Matrix4x4(
                5.1f, 5.2f, 5.3f, 5.4f,
                6.1f, 6.2f, 6.3f, 6.4f,
                7.1f, 7.2f, 7.3f, 7.4f,
                8.1f, 8.2f, 8.3f, 8.4f);

            #endregion


            #region Matrix multiplication.

            Console.WriteLine("Beginning matrix multiplication...");

            var sw = Stopwatch.StartNew();

            Matrix4x4 result = Matrix4x4.Identity;
            for (int i = 0; i < iterations; i++)
            {
                result = Matrix4x4.Multiply(m1, m2);
            }

            sw.Stop();

            Console.WriteLine($"Matrix4x4.Multiply: {sw.ElapsedMilliseconds}ms");

            #endregion


            #region Matrix multiplication using operator.

            Console.WriteLine("Beginning matrix multiplication (operator)...");

            sw.Restart();

            result = Matrix4x4.Identity;
            for (int i = 0; i < iterations; i++)
            {
                result = m1 * m2;
            }

            sw.Stop();

            Console.WriteLine($"Matrix4x4 * operator: {sw.ElapsedMilliseconds}ms");

            #endregion


            #region Transpose operation.

            Console.WriteLine("Beginning transpose operation...");

            sw.Restart();

            Matrix4x4 transposed = Matrix4x4.Identity;
            for (int i = 0; i < iterations; i++)
            {
                transposed = Matrix4x4.Transpose(m1);
            }

            sw.Stop();

            Console.WriteLine($"Matrix4x4.Transpose: {sw.ElapsedMilliseconds}ms");

            #endregion
        }

        static void PrintMatrix(Matrix4x4 matrix)
        {
            Console.WriteLine($"  {matrix.M11:F2} {matrix.M12:F2} {matrix.M13:F2} {matrix.M14:F2}");
            Console.WriteLine($"  {matrix.M21:F2} {matrix.M22:F2} {matrix.M23:F2} {matrix.M24:F2}");
            Console.WriteLine($"  {matrix.M31:F2} {matrix.M32:F2} {matrix.M33:F2} {matrix.M34:F2}");
            Console.WriteLine($"  {matrix.M41:F2} {matrix.M42:F2} {matrix.M43:F2} {matrix.M44:F2}");
        }
    }
}

All Resulting output:

Leave a Reply

Your email address will not be published. Required fields are marked *

The following GDPR rules must be read and accepted:
This form collects your name, email and content so that we can keep track of the comments placed on the website. For more info check our privacy policy where you will get more info on where, how and why we store your data.