///////////////////////////////////////////////////////////////////////
// planar_deform.c
#include "stdafx.h"
#include <windows.h>
#include <stdlib.h>
#include <io.h>
#include <stdio.h>
#include <math.h>
#include "Deform.h"
#include "Deform2.h"

///////////////////////////////////////////////////////////////////////
// Surface initialization
void SURFACE3::Init(int NumRows, int NumCols, float Mass, float Elasticity,
	float Damping, float dW, float dH)
{
	// Set default parameter values
	this->NumRows = NumRows;
	this->NumCols = NumCols;
	NumVertex = NumRows*NumCols;
	this->Elasticity = Elasticity;
	this->Mass = 1.0f/Mass;
	this->Damping = Damping;
	this->dW = dW;
	this->dH = dH;
	memset(&Center, 0, sizeof(Center));

	float Width = dW*(NumCols - 1);
	float Height = dH*(NumRows - 1);
	
	// Init relaxation length
	SRelaxE.x = dW;
	SRelaxE.y = 0.0f;
	SRelaxE.z = 0.0f;
	
	SRelaxS.x = 0.0f;
	SRelaxS.y = 0.0f;
	SRelaxS.z = dH;
	
	SRelaxSE.x = dW;
	SRelaxSE.y = 0.0f;
	SRelaxSE.z = dH;
	
	// Reset everything
	memset(Vertex, 0, sizeof(Vertex));
	memset(ExtForce, 0, sizeof(ExtForce));
	memset(Normal, 0, sizeof(Normal));

	// Initial surface coordinates
	for ( int i = 0, k = 0; i < NumRows; i++ )
		for ( int j = 0; j < NumCols; j += 2, k++ )
		{
			// Vertex coodinates
			Vertex[k].S.x[0] = dW*j - Width/2;
			Vertex[k].S.z[0] = dH*i - Height/2;
			Vertex[k].S.x[1] = dW*(j + 1) - Width/2;
			Vertex[k].S.z[1] = dH*i - Height/2;
			// Mark surface edges as fixed
			Vertex[k].Fixed[0] =
				!i || i == NumRows - 1 || !j || j == NumCols - 1 ? 0 : -1;
			Vertex[k].Fixed[1] =
				!i || i == NumRows - 1 || !(j + 1) || j + 1 == NumCols - 1 ? 0 : -1;
			Normal[k*2].y = 1.0f;
			Normal[k*2 + 1].y = 1.0f;
		}
}

/////////////////////////////////////////////////////////////////////
// Get surface vertex data (Direct3D interface)
void SURFACE3::GetData(D3DRMVERTEX* Vertex)
{
	// Set new vertices
	for ( int k = 0; k < NumVertex/2; k++ )
	{
		Vertex[k*2].position.x = this->Vertex[k].S.x[0];
		Vertex[k*2].position.y = this->Vertex[k].S.y[0];
		Vertex[k*2].position.z = this->Vertex[k].S.z[0];
		Vertex[k*2].normal = Normal[k*2];
		Vertex[k*2 + 1].position.x = this->Vertex[k].S.x[1];
		Vertex[k*2 + 1].position.y = this->Vertex[k].S.y[1];
		Vertex[k*2 + 1].position.z = this->Vertex[k].S.z[1];
		Vertex[k*2 + 1].normal = Normal[k*2 + 1];
	}
}

/////////////////////////////////////////////////////////////////////
// Detect and calculate collision forces between an effector and
// a Planar deformable surface
void SURFACE3::Deform(EFFECTOR& Effector)
{
	float R2 = Effector.R*Effector.R;
	D3DVECTOR Offset;
	Offset.x = Effector.Center.x - Center.x;
	Offset.y = Effector.Center.y - Center.y;
	Offset.z = Effector.Center.z - Center.z;
	
	// For each vertex
	for ( int k = 0; k < NumVertex/2; k++ )
	{
		// Calculate distance from vertex to effector
		VECTOR2 d, NewForce = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
		d.x[0] = Vertex[k].S.x[0] - Offset.x;
		d.y[0] = Vertex[k].S.y[0] - Offset.y;
		d.z[0] = Vertex[k].S.z[0] - Offset.z;
		d.x[1] = Vertex[k].S.x[1] - Offset.x;
		d.y[1] = Vertex[k].S.y[1] - Offset.y;
		d.z[1] = Vertex[k].S.z[1] - Offset.z;
		float distance[2];
		distance[0] = d.x[0]*d.x[0] + d.y[0]*d.y[0] + d.z[0]*d.z[0];
		distance[1] = d.x[1]*d.x[1] + d.y[1]*d.y[1] + d.z[1]*d.z[1];

		// Check if vertex is in range
		if ( distance[0] < R2 )
		{
			float length = (float)sqrt(distance[0]);
			NewForce.x[0] = d.x[0]*Effector.Strength/(length + 0.01f);
			NewForce.y[0] = d.y[0]*Effector.Strength/(length + 0.01f);
			NewForce.z[0] = d.z[0]*Effector.Strength/(length + 0.01f);
			
			// Update vertex external force
			ExtForce[k].x[0] += NewForce.x[0];
			ExtForce[k].y[0] += NewForce.y[0];
			ExtForce[k].z[0] += NewForce.z[0];
		}
		if ( distance[1] < R2 )
		{
			float length = (float)sqrt(distance[1]);
			NewForce.x[1] = d.x[1]*Effector.Strength/(length + 0.01f);
			NewForce.y[1] = d.y[1]*Effector.Strength/(length + 0.01f);
			NewForce.z[1] = d.z[1]*Effector.Strength/(length + 0.01f);
			
			// Update vertex external force
			ExtForce[k].x[1] += NewForce.x[1];
			ExtForce[k].y[1] += NewForce.y[1];
			ExtForce[k].z[1] += NewForce.z[1];
		}
			
		// Vertex affects effector?
		if ( Effector.AffectedByExtForces )
		{
			Effector.ExternalForce.x -= NewForce.x[0];
			Effector.ExternalForce.y -= NewForce.y[0];
			Effector.ExternalForce.z -= NewForce.z[0];
			Effector.ExternalForce.x -= NewForce.x[1];
			Effector.ExternalForce.y -= NewForce.y[1];
			Effector.ExternalForce.z -= NewForce.z[1];
		}
	}
}

///////////////////////////////////////////////////////////////////////
// Update Planar surface using internal elastic and external forces
void SURFACE3::Update(float dt)
{
	// Clear first row
	for ( int i = 0; i < NumCols/2; i++ )
	{
		Vertex[i].TotalForce.x[0] = 0.0f;
		Vertex[i].TotalForce.y[0] = 0.0f;
		Vertex[i].TotalForce.z[0] = 0.0f;
		Vertex[i].TotalForce.x[1] = 0.0f;
		Vertex[i].TotalForce.y[1] = 0.0f;
		Vertex[i].TotalForce.z[1] = 0.0f;
	}

	// For each vertex
	for ( int i = 0, k = 0; i < NumRows - 1; i++ )
	{
		// Clear first two vertices in the next row
		Vertex[k + NumCols/2].TotalForce.x[0] = 0.0f;
		Vertex[k + NumCols/2].TotalForce.y[0] = 0.0f;
		Vertex[k + NumCols/2].TotalForce.z[0] = 0.0f;
		Vertex[k + NumCols/2].TotalForce.x[1] = 0.0f;
		Vertex[k + NumCols/2].TotalForce.y[1] = 0.0f;
		Vertex[k + NumCols/2].TotalForce.z[1] = 0.0f;

		// Unroll twice
		for ( int j = 0; j < NumCols - 2; j += 2, k++ )
		{
			// Distance between C-E vertices
			VECTOR2 d;
			d.x[0] = Vertex[k].S.x[1] - Vertex[k].S.x[0];
			d.y[0] = Vertex[k].S.y[1] - Vertex[k].S.y[0];
			d.z[0] = Vertex[k].S.z[1] - Vertex[k].S.z[0];
			d.x[1] = Vertex[k + 1].S.x[0] - Vertex[k].S.x[1];
			d.y[1] = Vertex[k + 1].S.y[0] - Vertex[k].S.y[1];
			d.z[1] = Vertex[k + 1].S.z[0] - Vertex[k].S.z[1];
			// Less relaxation length
			d.x[0] -= SRelaxE.x;
			d.y[0] -= SRelaxE.y;
			d.z[0] -= SRelaxE.z;
			d.x[1] -= SRelaxE.x;
			d.y[1] -= SRelaxE.y;
			d.z[1] -= SRelaxE.z;
			// C-E elastic force
			d.x[0] *= Elasticity;
			d.y[0] *= Elasticity;
			d.z[0] *= Elasticity;
			d.x[1] *= Elasticity;
			d.y[1] *= Elasticity;
			d.z[1] *= Elasticity;
			// Total force for C-vertex
			Vertex[k].TotalForce.x[0] += d.x[0];
			Vertex[k].TotalForce.y[0] += d.y[0];
			Vertex[k].TotalForce.z[0] += d.z[0];
			Vertex[k].TotalForce.x[1] += d.x[1];
			Vertex[k].TotalForce.y[1] += d.y[1];
			Vertex[k].TotalForce.z[1] += d.z[1];
			// Total force for E-vertex (3rd Newton's Law)
			Vertex[k].TotalForce.x[1] -= d.x[0];
			Vertex[k].TotalForce.y[1] -= d.y[0];
			Vertex[k].TotalForce.z[1] -= d.z[0];
			Vertex[k + 1].TotalForce.x[0] -= d.x[1];
			Vertex[k + 1].TotalForce.y[0] -= d.y[1];
			Vertex[k + 1].TotalForce.z[0] -= d.z[1];

			// Distance between C-S vertices
			d.x[0] = Vertex[k + NumCols/2].S.x[0] - Vertex[k].S.x[0];
			d.y[0] = Vertex[k + NumCols/2].S.y[0] - Vertex[k].S.y[0];
			d.z[0] = Vertex[k + NumCols/2].S.z[0] - Vertex[k].S.z[0];
			d.x[1] = Vertex[k + NumCols/2].S.x[1] - Vertex[k].S.x[1];
			d.y[1] = Vertex[k + NumCols/2].S.y[1] - Vertex[k].S.y[1];
			d.z[1] = Vertex[k + NumCols/2].S.z[1] - Vertex[k].S.z[1];
			// Less relaxation length
			d.x[0] -= SRelaxS.x;
			d.y[0] -= SRelaxS.y;
			d.z[0] -= SRelaxS.z;
			d.x[1] -= SRelaxS.x;
			d.y[1] -= SRelaxS.y;
			d.z[1] -= SRelaxS.z;
			// C-S elastic force
			d.x[0] *= Elasticity;
			d.y[0] *= Elasticity;
			d.z[0] *= Elasticity;
			d.x[1] *= Elasticity;
			d.y[1] *= Elasticity;
			d.z[1] *= Elasticity;
			// Total force for C-vertex
			Vertex[k].TotalForce.x[0] += d.x[0];
			Vertex[k].TotalForce.y[0] += d.y[0];
			Vertex[k].TotalForce.z[0] += d.z[0];
			Vertex[k].TotalForce.x[1] += d.x[1];
			Vertex[k].TotalForce.y[1] += d.y[1];
			Vertex[k].TotalForce.z[1] += d.z[1];
			// Total force for S-vertex (3rd Newton's Law)
			Vertex[k + NumCols/2].TotalForce.x[0] -= d.x[0];
			Vertex[k + NumCols/2].TotalForce.y[0] -= d.y[0];
			Vertex[k + NumCols/2].TotalForce.z[0] -= d.z[0];
			Vertex[k + NumCols/2].TotalForce.x[1] = -d.x[1];
			Vertex[k + NumCols/2].TotalForce.y[1] = -d.y[1];
			Vertex[k + NumCols/2].TotalForce.z[1] = -d.z[1];
			
			// Distance between C-SE vertices
			d.x[0] = Vertex[k + NumCols/2].S.x[1] - Vertex[k].S.x[0];
			d.y[0] = Vertex[k + NumCols/2].S.y[1] - Vertex[k].S.y[0];
			d.z[0] = Vertex[k + NumCols/2].S.z[1] - Vertex[k].S.z[0];
			d.x[1] = Vertex[k + NumCols/2 + 1].S.x[0] - Vertex[k].S.x[1];
			d.y[1] = Vertex[k + NumCols/2 + 1].S.y[0] - Vertex[k].S.y[1];
			d.z[1] = Vertex[k + NumCols/2 + 1].S.z[0] - Vertex[k].S.z[1];
			// Less relaxation length
			d.x[0] -= SRelaxSE.x;
			d.y[0] -= SRelaxSE.y;
			d.z[0] -= SRelaxSE.z;
			d.x[1] -= SRelaxSE.x;
			d.y[1] -= SRelaxSE.y;
			d.z[1] -= SRelaxSE.z;
			// C-SE elastic force
			d.x[0] *= Elasticity;
			d.y[0] *= Elasticity;
			d.z[0] *= Elasticity;
			d.x[1] *= Elasticity;
			d.y[1] *= Elasticity;
			d.z[1] *= Elasticity;
			// Total force for C-vertex
			Vertex[k].TotalForce.x[0] += d.x[0];
			Vertex[k].TotalForce.y[0] += d.y[0];
			Vertex[k].TotalForce.z[0] += d.z[0];
			Vertex[k].TotalForce.x[1] += d.x[1];
			Vertex[k].TotalForce.y[1] += d.y[1];
			Vertex[k].TotalForce.z[1] += d.z[1];
			// Total force for SE-vertex (3rd Newton's Law)
			Vertex[k + NumCols/2].TotalForce.x[1] -= d.x[0];
			Vertex[k + NumCols/2].TotalForce.y[1] -= d.y[0];
			Vertex[k + NumCols/2].TotalForce.z[1] -= d.z[0];
			Vertex[k + NumCols/2 + 1].TotalForce.x[0] = -d.x[1];
			Vertex[k + NumCols/2 + 1].TotalForce.y[0] = -d.y[1];
			Vertex[k + NumCols/2 + 1].TotalForce.z[0] = -d.z[1];
		}
		k++;
	}
	
	float Massdt = Mass*dt;
	// For each vertex add external force
	for ( k = 0; k < NumVertex/2; k++ )
	{
		// Update total and reset external forces
		Vertex[k].TotalForce.x[0] += ExtForce[k].x[0];
		Vertex[k].TotalForce.y[0] += ExtForce[k].y[0];
		Vertex[k].TotalForce.z[0] += ExtForce[k].z[0];
		Vertex[k].TotalForce.x[1] += ExtForce[k].x[1];
		Vertex[k].TotalForce.y[1] += ExtForce[k].y[1];
		Vertex[k].TotalForce.z[1] += ExtForce[k].z[1];
		ExtForce[k].x[0] = 0.0f;
		ExtForce[k].y[0] = 0.0f;
		ExtForce[k].z[0] = 0.0f;
		ExtForce[k].x[1] = 0.0f;
		ExtForce[k].y[1] = 0.0f;
		ExtForce[k].z[1] = 0.0f;
		
		// For each fixed vertex reset total force 
		if ( !Vertex[k].Fixed[0] )
			Vertex[k].TotalForce.x[0] = Vertex[k].TotalForce.y[0] =
				Vertex[k].TotalForce.z[0] = 0.0f;
		if ( !Vertex[k].Fixed[1] )
			Vertex[k].TotalForce.x[1] = Vertex[k].TotalForce.y[1] =
				Vertex[k].TotalForce.z[1] = 0.0f;
		// Calculate velocity: V = V + A*dt, A = F/m
		Vertex[k].V.x[0] += Vertex[k].TotalForce.x[0]*Massdt;
		Vertex[k].V.y[0] += Vertex[k].TotalForce.y[0]*Massdt;
		Vertex[k].V.z[0] += Vertex[k].TotalForce.z[0]*Massdt;
		Vertex[k].V.x[1] += Vertex[k].TotalForce.x[1]*Massdt;
		Vertex[k].V.y[1] += Vertex[k].TotalForce.y[1]*Massdt;
		Vertex[k].V.z[1] += Vertex[k].TotalForce.z[1]*Massdt;
		// Calculate new vertex position: S = S + V*dt
		Vertex[k].S.x[0] += Vertex[k].V.x[0]*dt;
		Vertex[k].S.y[0] += Vertex[k].V.y[0]*dt;
		Vertex[k].S.z[0] += Vertex[k].V.z[0]*dt;
		Vertex[k].S.x[1] += Vertex[k].V.x[1]*dt;
		Vertex[k].S.y[1] += Vertex[k].V.y[1]*dt;
		Vertex[k].S.z[1] += Vertex[k].V.z[1]*dt;
			
		// Acount for damping factor: V = V*damp
		Vertex[k].V.x[0] *= Damping;
		Vertex[k].V.y[0] *= Damping;
		Vertex[k].V.z[0] *= Damping;
		Vertex[k].V.x[1] *= Damping;
		Vertex[k].V.y[1] *= Damping;
		Vertex[k].V.z[1] *= Damping;
		// Set normals to macth velocity
		Normal[k*2].x = Vertex[k].V.x[0];
		Normal[k*2].y = Vertex[k].V.y[0];
		Normal[k*2].z = Vertex[k].V.z[0];
		Normal[k*2 + 1].x = Vertex[k].V.x[1];
		Normal[k*2 + 1].y = Vertex[k].V.y[1];
		Normal[k*2 + 1].z = Vertex[k].V.z[1];
	}
}

///////////////////////////////////////////////////////////////////////
// Update Planar surface using internal elastic and external forces
// 3DNow! version: NumCols must be divisible by 2
void SURFACE3::Update3DNow(float dt)
{
	float Massdt[2];
	float
		SREX[2] = {SRelaxE.x, SRelaxE.x},
		SREY[2] = {SRelaxE.y, SRelaxE.y},
		SREZ[2] = {SRelaxE.z, SRelaxE.z},
		SRSX[2] = {SRelaxS.x, SRelaxS.x},
		SRSY[2] = {SRelaxS.y, SRelaxS.y},
		SRSZ[2] = {SRelaxS.z, SRelaxS.z},
		SRSEX[2] = {SRelaxSE.x, SRelaxSE.x},
		SRSEY[2] = {SRelaxSE.y, SRelaxSE.y},
		SRSEZ[2] = {SRelaxSE.z, SRelaxSE.z};

	// Initial setup
	__asm {
		femms
		mov		ebx,this					// ebx -> this
		movd	mm7,[ebx]this.Elasticity
		punpckldq mm7,mm7					// mm7 = Elasticity

		lea		edi,[ebx]this.Vertex		// [edi] -> Vertex
		mov		eax,[ebx]this.NumCols		// eax = NumCols
		mov		edx,TYPE VERTEX
		mul		edx
		shr		eax,1						// eax = NumCols/2*sizeof(VERTEX)
		mov		edx,[ebx]this.NumRows
		dec		edx							// edx = NumRows - 1
		
		// Clear first row
		pxor	mm0,mm0
		mov		ecx,[ebx]this.NumVertex
		shr		ecx,1						// ecx = NumVertex/2
		mov		esi,edi
CLEAR:
		movq	[esi]Vertex.TotalForce.x,mm0
		movq	[esi]Vertex.TotalForce.y,mm0
		movq	[esi]Vertex.TotalForce.z,mm0
		add		esi,TYPE VERTEX
		loop	CLEAR

M:
		/*pxor	mm0,mm0
		// Clear first two vertices in the next row
		movq	[edi + eax]Vertex.TotalForce.x,mm0
		movq	[edi + eax]Vertex.TotalForce.y,mm0
		movq	[edi + eax]Vertex.TotalForce.z,mm0*/
		mov		ecx,[ebx]this.NumCols				// ecx = NumCols - 2
		sub		ecx,2
M1:		
		prefetch	[edi + 64]
		movq	mm3,[edi]Vertex.TotalForce.x		// mm3 = TotalForce[k].x
		movq	mm4,[edi]Vertex.TotalForce.y		// mm4 = TotalForce[k].y
		movq	mm5,[edi]Vertex.TotalForce.z		// mm5 = TotalForce[k].z

		// Process C-E vertices
		movq	mm0,[edi + TYPE VERTEX]Vertex.S.x	// S[k + 1].x
		movq	mm1,[edi + TYPE VERTEX]Vertex.S.y	// S[k + 1].y
		movq	mm2,[edi + TYPE VERTEX]Vertex.S.z	// S[k + 1].z
		pfsub	mm0,[edi]Vertex.S.x					// dx = S[k + 1].x - S[k].x
		pfsub	mm1,[edi]Vertex.S.y					// dy = S[k + 1].y - S[k].x
		pfsub	mm2,[edi]Vertex.S.z					// dz = S[k + 1].z - S[k].x
		pfsub	mm0,SREX							// dx -= SRelaxE.x
		pfsub	mm1,SREY							// dy -= SRelaxE.y
		pfsub	mm2,SREZ							// dz -= SRelaxE.z
		pfmul	mm0,mm7								// dx *= Elasticity
		pfmul	mm1,mm7								// dy *= Elasticity
		pfmul	mm2,mm7								// dz *= Elasticity
		pfadd	mm3,mm0								// TotalForce[k].x + dx
		pfadd	mm4,mm1								// TotalForce[k].y + dy
		pfadd	mm5,mm2								// TotalForce[k].z + dz
		pfsubr	mm0,[edi + TYPE VERTEX]Vertex.TotalForce.x	// TotalForce[k + 1].x - dx
		pfsubr	mm1,[edi + TYPE VERTEX]Vertex.TotalForce.y	// TotalForce[k + 1].y - dy
		pfsubr	mm2,[edi + TYPE VERTEX]Vertex.TotalForce.z	// TotalForce[k + 1].z - dz
		movq	[edi + TYPE VERTEX]Vertex.TotalForce.x,mm0	// TotalForce[k + 1].x -= dx
		movq	[edi + TYPE VERTEX]Vertex.TotalForce.y,mm1	// TotalForce[k + 1].y -= dy
		movq	[edi + TYPE VERTEX]Vertex.TotalForce.z,mm2	// TotalForce[k + 1].z -= dz

		// Process C-S vertices
		movq	mm0,[edi + eax]Vertex.S.x			// S[k + NumCols].x
		movq	mm1,[edi + eax]Vertex.S.y			// S[k + NumCols].y
		movq	mm2,[edi + eax]Vertex.S.z			// S[k + NumCols].z
		pfsub	mm0,[edi]Vertex.S.x					// dx = S[k + NumCols].x - S[k].x
		pfsub	mm1,[edi]Vertex.S.y					// dy = S[k + NumCols].y - S[k].x
		pfsub	mm2,[edi]Vertex.S.z					// dz = S[k + NumCols].z - S[k].x
		pfsub	mm0,SRSX							// dx -= SRelaxE.x
		pfsub	mm1,SRSY							// dy -= SRelaxE.y
		pfsub	mm2,SRSZ							// dz -= SRelaxE.z
		pfmul	mm0,mm7								// dx *= Elasticity
		pfmul	mm1,mm7								// dy *= Elasticity
		pfmul	mm2,mm7								// dz *= Elasticity
		pfadd	mm3,mm0								// TotalForce[k].x + dx
		pfadd	mm4,mm1								// TotalForce[k].y + dy
		pfadd	mm5,mm2								// TotalForce[k].z + dz
		pfsubr	mm0,[edi + eax]Vertex.TotalForce.x	// TotalForce[k + NumCols].x - dx
		pfsubr	mm1,[edi + eax]Vertex.TotalForce.y	// TotalForce[k + NumCols].y - dy
		pfsubr	mm2,[edi + eax]Vertex.TotalForce.z	// TotalForce[k + NumCols].z - dz
		movq	[edi + eax]Vertex.TotalForce.x,mm0	// TotalForce[k + NumCols].x -= dx
		movq	[edi + eax]Vertex.TotalForce.y,mm1	// TotalForce[k + NumCols].y -= dy
		movq	[edi + eax]Vertex.TotalForce.z,mm2	// TotalForce[k + NumCols].z -= dz

		// Z-coordinate
		movq	mm0,[edi + eax + TYPE VERTEX]Vertex.S.x	// S[k + NumCols + 1].x
		movq	mm1,[edi + eax + TYPE VERTEX]Vertex.S.y	// S[k + NumCols + 1].y
		movq	mm2,[edi + eax + TYPE VERTEX]Vertex.S.z	// S[k + NumCols + 1].z
		pfsub	mm0,[edi]Vertex.S.x					// dx = S[k + NumCols + 1].x - S[k].x
		pfsub	mm1,[edi]Vertex.S.y					// dy = S[k + NumCols + 1].y - S[k].x
		pfsub	mm2,[edi]Vertex.S.z					// dz = S[k + NumCols + 1].z - S[k].x
		pfsub	mm0,SRSEX							// dx -= SRelaxE.x
		pfsub	mm1,SRSEY							// dy -= SRelaxE.y
		pfsub	mm2,SRSEZ							// dz -= SRelaxE.z
		pfmul	mm0,mm7								// dx *= Elasticity
		pfmul	mm1,mm7								// dy *= Elasticity
		pfmul	mm2,mm7								// dz *= Elasticity
		pfadd	mm3,mm0								// TotalForce[k].x + dx
		pfadd	mm4,mm1								// TotalForce[k].y + dy
		pfadd	mm5,mm2								// TotalForce[k].z + dz
		pfsubr	mm0,[edi + eax + TYPE VERTEX]Vertex.TotalForce.x	// TotalForce[k + NumCols + 1].x - dx
		pfsubr	mm1,[edi + eax + TYPE VERTEX]Vertex.TotalForce.y	// TotalForce[k + NumCols + 1].y - dy
		pfsubr	mm2,[edi + eax + TYPE VERTEX]Vertex.TotalForce.z	// TotalForce[k + NumCols + 1].z - dz
		movq	[edi + eax + TYPE VERTEX]Vertex.TotalForce.x,mm0	// TotalForce[k + NumCols + 1].x -= dx
		movq	[edi + eax + TYPE VERTEX]Vertex.TotalForce.y,mm1	// TotalForce[k + NumCols + 1].y -= dy
		movq	[edi + eax + TYPE VERTEX]Vertex.TotalForce.z,mm2	// TotalForce[k + NumCols + 1].z -= dz

		// Save TotalForce[k]
		movq	[edi]Vertex.TotalForce.x,mm3		// TotalForce[k].x += dx
		movq	[edi]Vertex.TotalForce.y,mm4		// TotalForce[k].y += dy
		movq	[edi]Vertex.TotalForce.z,mm5		// TotalForce[k].z += dz

		// Update indices
		add		edi,TYPE VERTEX
		sub		ecx,2
		jnz		M1

		add		edi,TYPE VERTEX
		dec		edx
		jnz		M

		movd	mm5,[ebx]this.Mass
		punpckldq mm5,mm5
		movd	mm6,dt							// mm6 = dt
		punpckldq mm6,mm6
		pfmul	mm5,mm6							// mm5 = Massdt
		movq	Massdt,mm5
		movd	mm7,[ebx]this.Damping
		punpckldq mm7,mm7						// mm7 = Damping

		lea		edi,[ebx]this.Vertex			// [edi] -> Vertex
		lea		esi,[ebx]this.ExtForce			// [esi] -> ExtForce
		lea		ecx,[ebx]this.Normal			// [ecx] -> Normal
		mov		eax,[ebx]this.NumVertex			// eax = NumVertex
		// For each vertex add external force
M2:
		prefetch [edi + 64]						// V[k]
		prefetch [esi + 64]						// ExtForce[k]
		prefetch [ecx + 64]						// Normal[k]
		movq	mm4,[edi]Vertex.Fixed			// Fixed[k]
		movq	mm5,Massdt

		movq	mm0,[edi]Vertex.TotalForce.x	// TotalForce[k].x
		movq	mm1,[edi]Vertex.TotalForce.y	// TotalForce[k].y
		movq	mm2,[edi]Vertex.TotalForce.z	// TotalForce[k].z
		pfadd	mm0,[esi]ExtForce.x				// mm0 = TotalForce[k].x + ExtForce[k].x
		pfadd	mm1,[esi]ExtForce.y				// mm1 = TotalForce[k].y + ExtForce[k].y
		pfadd	mm2,[esi]ExtForce.z				// mm2 = TotalForce[k].z + ExtForce[k].z
		pxor	mm3,mm3
		pand	mm0,mm4							// TotalForce[k].x & Fixed[k]
		pand	mm1,mm4							// TotalForce[k].y & Fixed[k]
		pand	mm2,mm4							// TotalForce[k].z & Fixed[k]
		movq	[esi]ExtForce.x,mm3				// ExtForce[k].x = 0
		movq	[esi]ExtForce.y,mm3				// ExtForce[k].y = 0
		movq	[esi]ExtForce.z,mm3				// ExtForce[k].z = 0
		movq	[edi]Vertex.TotalForce.x,mm0	// TotalForce[k].x += ExtForce[k].x & Fixed[k]
		movq	[edi]Vertex.TotalForce.y,mm1	// TotalForce[k].y += ExtForce[k].y & Fixed[k]
		movq	[edi]Vertex.TotalForce.z,mm2	// TotalForce[k].z += ExtForce[k].z & Fixed[k]
		pfmul	mm0,mm5							// mm0 = TotalForce[k].x*Massdt
		pfmul	mm1,mm5							// mm1 = TotalForce[k].y*Massdt
		pfmul	mm2,mm5							// mm2 = TotalForce[k].z*Massdt

		// Calculate velocity: V = V + TotalForce*Massdt
		pfadd	mm0,[edi]Vertex.V.x				// V[k].x + TotalForce[k].x*Massdt
		pfadd	mm1,[edi]Vertex.V.y				// V[k].y + TotalForce[k].y*Massdt
		pfadd	mm2,[edi]Vertex.V.z				// V[k].z + TotalForce[k].z*Massdt
		movq	mm3,mm0							// mm3 = V[k].x + TotalForce[k].x*Massdt
		movq	mm4,mm1							// mm3 = V[k].y + TotalForce[k].y*Massdt
		movq	mm5,mm2							// mm3 = V[k].y + TotalForce[k].z*Massdt
		pfmul	mm3,mm7							// V[k].x*Damping
		pfmul	mm4,mm7							// V[k].y*Damping
		pfmul	mm5,mm7							// V[k].z*Damping
		pfmul	mm0,mm6							// mm0 = (V[k].x + TotalForce[k].x*Massdt)*dt
		pfmul	mm1,mm6							// mm1 = (V[k].y + TotalForce[k].y*Massdt)*dt
		pfmul	mm2,mm6							// mm2 = (V[k].y + TotalForce[k].z*Massdt)*dt
		movq	[edi]Vertex.V.x,mm3				// V[k].x += TotalForce[k].x*Massdt
		movq	[edi]Vertex.V.y,mm4				// V[k].y += TotalForce[k].y*Massdt
		movq	[edi]Vertex.V.z,mm5				// V[k].z += TotalForce[k].z*Massdt
		movd	[ecx]Normal.x,mm3				// Normal[k].x = V[k].x
		movd	[ecx]Normal.y,mm4				// Normal[k].y = V[k].y
		movd	[ecx]Normal.z,mm5				// Normal[k].z = V[k].z
		pswapd	mm3,mm3
		pswapd	mm4,mm4
		pswapd	mm5,mm5
		movd	[ecx + TYPE D3DVECTOR]Normal.x,mm3// Normal[k].x = V[k].x
		movd	[ecx + TYPE D3DVECTOR]Normal.y,mm4// Normal[k].y = V[k].y
		movd	[ecx + TYPE D3DVECTOR]Normal.z,mm5// Normal[k].z = V[k].z

		// Calculate new vertex position: S = S + V*dt
		pfadd	mm0,[edi]Vertex.S.x			// S[k].x + V[k].x*dt
		pfadd	mm1,[edi]Vertex.S.y			// S[k].y + V[k].y*dt
		pfadd	mm2,[edi]Vertex.S.z			// S[k].z + V[k].z*dt
		movq	[edi]Vertex.S.x,mm0			// S[k].x += V[k].x*dt
		movq	[edi]Vertex.S.y,mm1			// S[k].y += V[k].y*dt
		movq	[edi]Vertex.S.z,mm2			// S[k].z += V[k].z*dt

		// Update pointers
		add		edi,TYPE VERTEX
		add		ecx,(TYPE D3DVECTOR)*2
		add		esi,TYPE VECTOR2
		sub		eax,2						// k++
		jnz		M2
		femms
	}
}
