/*
matrix.c -- SIMDx86 Matrix Library
Written by Patrick Baggett, 2005 (baggett.patrick@gmail.com)
Under LGPL License
Part of SIMDx86 Project
*/
#include <SIMDx86/matrix.h>
#include <memory.h> /* memcpy() */
void SIMDx86Matrix_Sum(SIMDx86Matrix* Out, const SIMDx86Matrix* In)
{
#if defined(USE_SSE)
asm(
"movups (%0), %%xmm0\n"
"movups 16(%0), %%xmm1\n"
"movups 32(%0), %%xmm2\n"
"movups 48(%0), %%xmm3\n"
"movups (%1), %%xmm4\n"
"movups 16(%1), %%xmm5\n"
"movups 32(%1), %%xmm6\n"
"movups 48(%1), %%xmm7\n"
"addps %%xmm0, %%xmm4\n"
"addps %%xmm1, %%xmm5\n"
"addps %%xmm2, %%xmm6\n"
"addps %%xmm3, %%xmm7\n"
"movups %%xmm4, (%0)\n"
"movups %%xmm5, 16(%0)\n"
"movups %%xmm6, 32(%0)\n"
"movups %%xmm7, 48(%0)\n"
:
: "r" (Out), "r" (In)
);
#elif defined(USE_3DNOW)
asm(
"movq (%0), %%mm0\n"
"movq 8(%0), %%mm1\n"
"movq 16(%0), %%mm2\n"
"movq 24(%0), %%mm3\n"
"movq 32(%0), %%mm4\n"
"movq 40(%0), %%mm5\n"
"movq 48(%0), %%mm6\n"
"movq 56(%0), %%mm7\n"
"pfadd (%1), %%mm0\n"
"pfadd 8(%1), %%mm1\n"
"pfadd 16(%1), %%mm2\n"
"pfadd 24(%1), %%mm3\n"
"pfadd 32(%1), %%mm4\n"
"pfadd 40(%1), %%mm5\n"
"pfadd 48(%1), %%mm6\n"
"pfadd 56(%1), %%mm7\n"
"movq %%mm0, (%0)\n"
"movq %%mm1, 8(%0)\n"
"movq %%mm2, 16(%0)\n"
"movq %%mm3, 24(%0)\n"
"movq %%mm4, 32(%0)\n"
"movq %%mm5, 40(%0)\n"
"movq %%mm6, 48(%0)\n"
"movq %%mm7, 56(%0)\n"
:
: "r" (Out), "r" (In)
);
/* Execute 'femms' if desired */
#ifndef NO_EMMS
asm("femms\n");
#endif
#else
Out->m[ 0] += In->m[ 0];
Out->m[ 1] += In->m[ 1];
Out->m[ 2] += In->m[ 2];
Out->m[ 3] += In->m[ 3];
Out->m[ 4] += In->m[ 4];
Out->m[ 5] += In->m[ 5];
Out->m[ 6] += In->m[ 6];
Out->m[ 7] += In->m[ 7];
Out->m[ 8] += In->m[ 8];
Out->m[ 9] += In->m[ 9];
Out->m[10] += In->m[10];
Out->m[11] += In->m[11];
Out->m[12] += In->m[12];
Out->m[13] += In->m[13];
Out->m[14] += In->m[14];
Out->m[15] += In->m[15];
#endif
}
void SIMDx86Matrix_SumOf(SIMDx86Matrix* Out, const SIMDx86Matrix* In1, const SIMDx86Matrix* In2)
{
#if defined(USE_SSE)
asm(
"movups (%0), %%xmm0\n"
"movups 16(%0), %%xmm1\n"
"movups 32(%0), %%xmm2\n"
"movups 48(%0), %%xmm3\n"
"movups (%1), %%xmm4\n"
"movups 16(%1), %%xmm5\n"
"movups 32(%1), %%xmm6\n"
"movups 48(%1), %%xmm7\n"
"addps %%xmm0, %%xmm4\n"
"addps %%xmm1, %%xmm5\n"
"addps %%xmm2, %%xmm6\n"
"addps %%xmm3, %%xmm7\n"
"movups %%xmm4, (%2)\n"
"movups %%xmm5, 16(%2)\n"
"movups %%xmm6, 32(%2)\n"
"movups %%xmm7, 48(%2)\n"
:
: "r" (In1), "r" (In2), "r" (Out)
);
#elif defined(USE_3DNOW)
asm(
/* Get matrix */
"movq (%0), %%mm0\n"
"movq 8(%0), %%mm1\n"
"movq 16(%0), %%mm2\n"
"movq 24(%0), %%mm3\n"
"movq 32(%0), %%mm4\n"
"movq 40(%0), %%mm5\n"
"movq 48(%0), %%mm6\n"
"movq 56(%0), %%mm7\n"
/* Add with other */
"pfadd (%1), %%mm0\n"
"pfadd 8(%1), %%mm1\n"
"pfadd 16(%1), %%mm2\n"
"pfadd 24(%1), %%mm3\n"
"pfadd 32(%1), %%mm4\n"
"pfadd 40(%1), %%mm5\n"
"pfadd 48(%1), %%mm6\n"
"pfadd 56(%1), %%mm7\n"
/* Write out results */
"movq %%mm0, (%2)\n"
"movq %%mm1, 8(%2)\n"
"movq %%mm2, 16(%2)\n"
"movq %%mm3, 24(%2)\n"
"movq %%mm4, 32(%2)\n"
"movq %%mm5, 40(%2)\n"
"movq %%mm6, 48(%2)\n"
"movq %%mm7, 56(%2)\n"
:
: "r" (In1), "r" (In2), "r" (Out)
);
/* Execute 'femms' if desired */
#ifndef NO_EMMS
asm("femms\n");
#endif
#else
Out->m[ 0] = In1->m[ 0] + In2->m[ 0];
Out->m[ 1] = In1->m[ 1] + In2->m[ 1];
Out->m[ 2] = In1->m[ 2] + In2->m[ 2];
Out->m[ 3] = In1->m[ 3] + In2->m[ 3];
Out->m[ 4] = In1->m[ 4] + In2->m[ 4];
Out->m[ 5] = In1->m[ 5] + In2->m[ 5];
Out->m[ 6] = In1->m[ 6] + In2->m[ 6];
Out->m[ 7] = In1->m[ 7] + In2->m[ 7];
Out->m[ 8] = In1->m[ 8] + In2->m[ 8];
Out->m[ 9] = In1->m[ 9] + In2->m[ 9];
Out->m[10] = In1->m[10] + In2->m[10];
Out->m[11] = In1->m[11] + In2->m[11];
Out->m[12] = In1->m[12] + In2->m[12];
Out->m[13] = In1->m[13] + In2->m[13];
Out->m[14] = In1->m[14] + In2->m[14];
Out->m[15] = In1->m[15] + In2->m[15];
#endif
}
void SIMDx86Matrix_Diff(SIMDx86Matrix* Out, const SIMDx86Matrix* In)
{
#if defined(USE_SSE)
asm(
"movups (%0), %%xmm0\n"
"movups 16(%0), %%xmm1\n"
"movups 32(%0), %%xmm2\n"
"movups 48(%0), %%xmm3\n"
"movups (%1), %%xmm4\n"
"movups 16(%1), %%xmm5\n"
"movups 32(%1), %%xmm6\n"
"movups 48(%1), %%xmm7\n"
"subps %%xmm4, %%xmm0\n"
"subps %%xmm5, %%xmm1\n"
"subps %%xmm6, %%xmm2\n"
"subps %%xmm7, %%xmm3\n"
"movups %%xmm0, (%0)\n"
"movups %%xmm1, 16(%0)\n"
"movups %%xmm2, 32(%0)\n"
"movups %%xmm3, 48(%0)\n"
:
: "r" (Out), "r" (In)
);
#elif defined(USE_3DNOW)
asm(
"movq (%0), %%mm0\n"
"movq 8(%0), %%mm1\n"
"movq 16(%0), %%mm2\n"
"movq 24(%0), %%mm3\n"
"movq 32(%0), %%mm4\n"
"movq 40(%0), %%mm5\n"
"movq 48(%0), %%mm6\n"
"movq 56(%0), %%mm7\n"
"pfsub (%1), %%mm0\n"
"pfsub 8(%1), %%mm1\n"
"pfsub 16(%1), %%mm2\n"
"pfsub 24(%1), %%mm3\n"
"pfsub 32(%1), %%mm4\n"
"pfsub 40(%1), %%mm5\n"
"pfsub 48(%1), %%mm6\n"
"pfsub 56(%1), %%mm7\n"
"movq %%mm0, (%0)\n"
"movq %%mm1, 8(%0)\n"
"movq %%mm2, 16(%0)\n"
"movq %%mm3, 24(%0)\n"
"movq %%mm4, 32(%0)\n"
"movq %%mm5, 40(%0)\n"
"movq %%mm6, 48(%0)\n"
"movq %%mm7, 56(%0)\n"
:
: "r" (Out), "r" (In)
);
/* Execute 'femms' if desired */
#ifndef NO_EMMS
asm("femms\n");
#endif
#else
Out->m[ 0] -= In->m[0];
Out->m[ 1] -= In->m[1];
Out->m[ 2] -= In->m[2];
Out->m[ 3] -= In->m[3];
Out->m[ 4] -= In->m[4];
Out->m[ 5] -= In->m[5];
Out->m[ 6] -= In->m[6];
Out->m[ 7] -= In->m[7];
Out->m[ 8] -= In->m[8];
Out->m[ 9] -= In->m[9];
Out->m[10] -= In->m[10];
Out->m[11] -= In->m[11];
Out->m[12] -= In->m[12];
Out->m[13] -= In->m[13];
Out->m[14] -= In->m[14];
Out->m[15] -= In->m[15];
#endif
}
void SIMDx86Matrix_DiffOf(SIMDx86Matrix* Out, const SIMDx86Matrix* In1, const SIMDx86Matrix* In2)
{
#if defined(USE_SSE)
asm(
"movups (%0), %%xmm0\n"
"movups 16(%0), %%xmm1\n"
"movups 32(%0), %%xmm2\n"
"movups 48(%0), %%xmm3\n"
"movups (%1), %%xmm4\n"
"movups 16(%1), %%xmm5\n"
"movups 32(%1), %%xmm6\n"
"movups 48(%1), %%xmm7\n"
"subps %%xmm4, %%xmm0\n"
"subps %%xmm5, %%xmm1\n"
"subps %%xmm6, %%xmm2\n"
"subps %%xmm7, %%xmm3\n"
"movups %%xmm0, (%2)\n"
"movups %%xmm1, 16(%2)\n"
"movups %%xmm2, 32(%2)\n"
"movups %%xmm3, 48(%2)\n"
:
: "r" (In1), "r" (In2), "r" (Out)
);
#elif defined(USE_3DNOW)
asm(
/* Get 'In1'*/
"movq (%0), %%mm0\n"
"movq 8(%0), %%mm1\n"
"movq 16(%0), %%mm2\n"
"movq 24(%0), %%mm3\n"
"movq 32(%0), %%mm4\n"
"movq 40(%0), %%mm5\n"
"movq 48(%0), %%mm6\n"
"movq 56(%0), %%mm7\n"
/* Subtract 'In2' */
"pfsub (%1), %%mm0\n"
"pfsub 8(%1), %%mm1\n"
"pfsub 16(%1), %%mm2\n"
"pfsub 24(%1), %%mm3\n"
"pfsub 32(%1), %%mm4\n"
"pfsub 40(%1), %%mm5\n"
"pfsub 48(%1), %%mm6\n"
"pfsub 56(%1), %%mm7\n"
/* Store in 'Out' */
"movq %%mm0, (%2)\n"
"movq %%mm1, 8(%2)\n"
"movq %%mm2, 16(%2)\n"
"movq %%mm3, 24(%2)\n"
"movq %%mm4, 32(%2)\n"
"movq %%mm5, 40(%2)\n"
"movq %%mm6, 48(%2)\n"
"movq %%mm7, 56(%2)\n"
:
: "r" (In1), "r" (In2), "r" (Out)
);
/* Execute 'femms' if desired */
#ifndef NO_EMMS
asm("femms\n");
#endif
#else
Out->m[ 0] = In1->m[0] - In2->m[0];
Out->m[ 1] = In1->m[1] - In2->m[1];
Out->m[ 2] = In1->m[2] - In2->m[2];
Out->m[ 3] = In1->m[3] - In2->m[3];
Out->m[ 4] = In1->m[4] - In2->m[4];
Out->m[ 5] = In1->m[5] - In2->m[5];
Out->m[ 6] = In1->m[6] - In2->m[6];
Out->m[ 7] = In1->m[7] - In2->m[7];
Out->m[ 8] = In1->m[8] - In2->m[8];
Out->m[ 9] = In1->m[9] - In2->m[9];
Out->m[10] = In1->m[10] - In2->m[10];
Out->m[11] = In1->m[11] - In2->m[11];
Out->m[12] = In1->m[12] - In2->m[12];
Out->m[13] = In1->m[13] - In2->m[13];
Out->m[14] = In1->m[14] - In2->m[14];
Out->m[15] = In1->m[15] - In2->m[15];
#endif
}
void SIMDx86Matrix_Scale(SIMDx86Matrix* mtx, float scalar)
{
#if defined(USE_SSE)
asm(
/* Store scalar in xmm4.x */
"movss %1, %%xmm4\n"
/* Get the matrix into registers */
"movups (%0), %%xmm0\n"
"movups 16(%0), %%xmm1\n"
"movups 32(%0), %%xmm2\n"
"movups 48(%0), %%xmm3