THE cost OF PURITY



REF/OUT  vs  READONLY



the code is here

benchmark

100 000 of Float3's
100 of Float4x4

sum = 0;
foreach float3 and foreach float4x4
    sum += TransformCoordinate(float3, float4x4).X

*sum is just to prevent JITter from optimizing away our calls

The field

public struct Float4x4
{
    public float M11;
    public float M12;
    public float M13;
    ...
    public float M42;
    public float M43;
    public float M44;
}
public struct Float3
{
    public float X;
    public float Y;
    public float Z; 
}
public struct Float4
{
    public float X;
    public float Y;
    public float Z;
    public float W;
}
public static Float3 TransformCoordinate(
    Float3 input, Float4x4 transform)
{
    Float4 vector;
    vector.X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    vector.Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    vector.Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    vector.W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    return new Float3(vector.X * vector.W,
        vector.Y * vector.W,
        vector.Z * vector.W);
}

The [pure] version

public static Float3 TransformCoordinate1(
    Float3 input, Float4x4 transform)
{
    Float4 vector;
    vector.X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    vector.Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    vector.Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    vector.W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    return new Float3(vector.X * vector.W,
        vector.Y * vector.W,
        vector.Z * vector.W);
}
395 ms

strange, but...

public static Float3 TransformCoordinate2(
    Float3 input, Float4x4 transform)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    var vector = new Float4(X, Y, Z, W);
    return new Float3(vector.X * vector.W,
        vector.Y * vector.W,
        vector.Z * vector.W);
}
370 ms

optimize locals...

public static Float3 TransformCoordinate3(
    Float3 input, Float4x4 transform)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    return new Float3(X * W,
        Y * W,
        Z * W);
}
343 ms

ref for transform...

public static Float3 TransformCoordinate4(
    Float3 input, ref Float4x4 transform)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    return new Float3(X * W,
        Y * W,
        Z * W);
}
294 ms

ref for point...

public static Float3 TransformCoordinate5(
    ref Float3 input, Float4x4 transform)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    return new Float3(X * W,
        Y * W,
        Z * W);
}
340 ms

ref for both

public static Float3 TransformCoordinate6(
    ref Float3 input, ref Float4x4 transform)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    return new Float3(X * W,
        Y * W,
        Z * W);
}
171 ms

out and ref...

public static void TransformCoordinate10(
    ref Float3 input, ref Float4x4 transform, out Float3 result)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    result = new Float3(X * W,
        Y * W,
        Z * W);
}
150 ms

out and ref transform...

public static void TransformCoordinate9(
    Float3 input, ref Float4x4 transform, out Float3 result)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    result = new Float3(X * W,
        Y * W,
        Z * W);
}
250 ms

out and ref point

public static void TransformCoordinate8(
    ref Float3 input, Float4x4 transform, out Float3 result)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    result = new Float3(X * W,
        Y * W,
        Z * W);
}
295 ms

out without ref...

public static void TransformCoordinate7(
    Float3 input, Float4x4 transform, out Float3 result)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    result = new Float3(X * W,
        Y * W,
        Z * W);
}
300 ms

goodbuy, purity

public static void TransformCoordinate11(
    ref Float3 input, ref Float4x4 transform, out Float3 result)
{
    var X = (((input.X * transform.M11)
        + (input.Y * transform.M21))
        + (input.Z * transform.M31))
        + transform.M41;
    var Y = (((input.X * transform.M12)
        + (input.Y * transform.M22))
        + (input.Z * transform.M32))
        + transform.M42;
    var Z = (((input.X * transform.M13)
        + (input.Y * transform.M23))
        + (input.Z * transform.M33))
        + transform.M43;
    var W = 1 / ((((input.X * transform.M14)
        + (input.Y * transform.M24))
        + (input.Z * transform.M34))
        + transform.M44);
    result.X = X * W;
    result.Y = Y * W;
    result.Z = Z * W;
}
120 ms

to [Pure] or not to [Pure]

By Timur Seitosmanov

to [Pure] or not to [Pure]

Performance benchmark results for using C# ref and out keywords.

  • 1,382