///////////////////////////////////////////////////////////////////////
// planar_deform.c
#include "stdafx.h"
#include <windows.h>
#include <stdlib.h>
#include <io.h>
#include <stdio.h>
#include <math.h>
#include "avector.h"
#include "Deform.h"

///////////////////////////////////////////////////////////////////////
// Effector initialization
void EFFECTOR::Init(float Strength, float Radius,
	BOOL AffectedByExtForces)
{
	this->Strength = Strength;
	this->R = Radius;
	this->AffectedByExtForces = AffectedByExtForces;
	memset(&Center, 0, sizeof(Center));
	memset(&ExternalForce, 0, sizeof(ExternalForce));
}

///////////////////////////////////////////////////////////////////////
// Surface initialization
void SURFACE::Init(int NumRows, int NumCols, float Mass, float Elasticity,
	float Damping, float dW, float dH)
{
	// Set default parameter values
	this->NumRows = NumRows;
	this->NumCols = NumCols;
	NumVertex = NumRows*NumCols;
	this->Elasticity = Elasticity;
	this->Mass = 1.0f/Mass;
	this->Damping = Damping;
	this->dW = dW;
	this->dH = dH;
	memset(&Center, 0, sizeof(Center));

	float Width = dW*(NumCols - 1);
	float Height = dH*(NumRows - 1);
	
	// Init relaxation length
	SRelaxE.x = dW;
	SRelaxE.y = 0.0f;
	SRelaxE.z = 0.0f;
	
	SRelaxS.x = 0.0f;
	SRelaxS.y = 0.0f;
	SRelaxS.z = dH;
	
	SRelaxSE.x = dW;
	SRelaxSE.y = 0.0f;
	SRelaxSE.z = dH;
	
	// Initial surface coordinates
	for ( int i = 0, k = 0; i < NumRows; i++ )
		for ( int j = 0; j < NumCols; j++, k++ )
		{
			// Vertex coodinates
			SX[k] = dW*j - Width/2;
			SY[k] = 0.0f;
			SZ[k] = dH*i - Height/2;
			// Mark surface edges as fixed
			Fixed[k] =
				!i || i == NumRows - 1 || !j || j == NumCols - 1 ? 0 : -1;
			Normal[k].x = 0.0f;
			Normal[k].y = 1.0f;
			Normal[k].z = 0.0f;
		}
	// Reset velocities and forces
	memset(VX, 0, NumVertex*sizeof(float));
	memset(VY, 0, NumVertex*sizeof(float));
	memset(VZ, 0, NumVertex*sizeof(float));
	memset(ExternalForceX, 0, NumVertex*sizeof(float));
	memset(ExternalForceY, 0, NumVertex*sizeof(float));
	memset(ExternalForceZ, 0, NumVertex*sizeof(float));
	memset(TotalForceX, 0, NumVertex*sizeof(float));
	memset(TotalForceY, 0, NumVertex*sizeof(float));
	memset(TotalForceZ, 0, NumVertex*sizeof(float));
}

/////////////////////////////////////////////////////////////////////
// Get surface vertex data (Direct3D interface)
void SURFACE::GetData(D3DRMVERTEX* Vertex)
{
	// Set new vertices
	for ( int k = 0; k < NumVertex; k++ )
	{
		Vertex[k].position.x = SX[k];
		Vertex[k].position.y = SY[k];
		Vertex[k].position.z = SZ[k];
		Vertex[k].normal = Normal[k];
	}
}

/////////////////////////////////////////////////////////////////////
// Detect and calculate collision forces between an effector and
// a Planar deformable surface
void SURFACE::Deform(EFFECTOR& Effector)
{
	float R2 = Effector.R*Effector.R;
	D3DVECTOR Offset;
	Offset.x = Effector.Center.x - Center.x;
	Offset.y = Effector.Center.y - Center.y;
	Offset.z = Effector.Center.z - Center.z;
	
	// For each vertex
	for ( int k = 0; k < NumVertex; k++ )
	{
		// Calculate distance from vertex to effector
		float
			x = SX[k] - Offset.x,
			y = SY[k] - Offset.y,
			z = SZ[k] - Offset.z,
			distance = x*x + y*y + z*z;

		// Check if vertex is in range
		if ( distance < R2 )
		{
			float length = (float)sqrt(distance),
				NewForceX = x*Effector.Strength/(length + 0.01f),
				NewForceY = y*Effector.Strength/(length + 0.01f),
				NewForceZ = z*Effector.Strength/(length + 0.01f);
			
			// Update vertex external force
			ExternalForceX[k] += NewForceX;
			ExternalForceY[k] += NewForceY;
			ExternalForceZ[k] += NewForceZ;
			
			// Vertex affects effector?
			if ( Effector.AffectedByExtForces )
			{
				Effector.ExternalForce.x -= NewForceX;
				Effector.ExternalForce.y -= NewForceY;
				Effector.ExternalForce.z -= NewForceZ;
			}
		}
	}
}

///////////////////////////////////////////////////////////////////////
// Update Planar surface using internal elastic and external forces
void SURFACE::Update(float dt)
{
	// Reset Total Force
	//memset(TotalForceX, 0, NumVertex*sizeof(float));
	//memset(TotalForceY, 0, NumVertex*sizeof(float));
	//memset(TotalForceZ, 0, NumVertex*sizeof(float));
	// Clear first row
	memset(TotalForceX, 0, NumCols*sizeof(float));
	memset(TotalForceY, 0, NumCols*sizeof(float));
	memset(TotalForceZ, 0, NumCols*sizeof(float));

	// For each vertex
	for ( int i = 0, k = 0; i < NumRows - 1; i++ )
	{
		// Clear first vertex in the next row
		TotalForceX[(i + 1)*NumCols] = 0.0f;
		TotalForceY[(i + 1)*NumCols] = 0.0f;
		TotalForceZ[(i + 1)*NumCols] = 0.0f;

		for ( int j = 0; j < NumCols - 1; j++, k++ )
		{
			// Distance between C-E vertices
			float
				dx = SX[k + 1] - SX[k],
				dy = SY[k + 1] - SY[k],
				dz = SZ[k + 1] - SZ[k];
			// Less relaxation length
			dx -= SRelaxE.x;
			dy -= SRelaxE.y;
			dz -= SRelaxE.z;			
			// C-E elastic force
			dx *= Elasticity,
			dy *= Elasticity,
			dz *= Elasticity;
			// Total force for C-vertex
			TotalForceX[k] += dx;
			TotalForceY[k] += dy;
			TotalForceZ[k] += dz;
			// Total force for E-vertex (3rd Newton's Law)
			TotalForceX[k + 1] -= dx;
			TotalForceY[k + 1] -= dy;
			TotalForceZ[k + 1] -= dz;

			// Distance between C-S vertices
			dx = SX[k + NumCols] - SX[k];
			dy = SY[k + NumCols] - SY[k];
			dz = SZ[k + NumCols] - SZ[k];
			// Less relaxation length
			dx -= SRelaxS.x;
			dy -= SRelaxS.y;
			dz -= SRelaxS.z;
			// C-S elastic force
			dx *= Elasticity;
			dy *= Elasticity;
			dz *= Elasticity;
			// Total force for C-vertex
			TotalForceX[k] += dx;
			TotalForceY[k] += dy;
			TotalForceZ[k] += dz;
			// Total force for S-vertex (3rd Newton's Law)
			TotalForceX[k + NumCols] -= dx;
			TotalForceY[k + NumCols] -= dy;
			TotalForceZ[k + NumCols] -= dz;	
			
			// Distance between C-SE vertices
			dx = SX[k + NumCols + 1] - SX[k];
			dy = SY[k + NumCols + 1] - SY[k];
			dz = SZ[k + NumCols + 1] - SZ[k];
			// Less relaxation length
			dx -= SRelaxSE.x;
			dy -= SRelaxSE.y;
			dz -= SRelaxSE.z;
			// C-SE elastic force
			dx *= Elasticity;
			dy *= Elasticity;
			dz *= Elasticity;
			// Total force for C-vertex
			TotalForceX[k] += dx;
			TotalForceY[k] += dy;
			TotalForceZ[k] += dz;
			// Total force for SE-vertex (3rd Newton's Law)
			TotalForceX[k + NumCols + 1] = -dx;
			TotalForceY[k + NumCols + 1] = -dy;
			TotalForceZ[k + NumCols + 1] = -dz;	  
		}
		k++;
	}
	
	float Massdt = Mass*dt;
	// For each vertex add external force
	for ( k = 0; k < NumVertex; k++ )
	{
		TotalForceX[k] += ExternalForceX[k];
		ExternalForceX[k] = 0.0f;
		TotalForceY[k] += ExternalForceY[k];
		ExternalForceY[k] = 0.0f;
		TotalForceZ[k] += ExternalForceZ[k];
		ExternalForceZ[k] = 0.0f;
		
		// For each vertex that is not fixed in space
		if ( Fixed[k] )
		{
			// Calculate velocity: V = V + A*dt, A = F/m
			VX[k] += TotalForceX[k]*Massdt;
			VY[k] += TotalForceY[k]*Massdt;
			VZ[k] += TotalForceZ[k]*Massdt;
			// Calculate new vertex position: S = S + V*dt
			SX[k] += VX[k]*dt;
			SY[k] += VY[k]*dt;
			SZ[k] += VZ[k]*dt;
			
			// Acount for damping factor: V = V*damp
			VX[k] *= Damping;
			VY[k] *= Damping;
			VZ[k] *= Damping;
			// Set normals to macth velocity
			Normal[k].x = VX[k];
			Normal[k].y = VY[k];
			Normal[k].z = VZ[k];
		}
	}
	// Reset external force
	//memset(ExternalForceX, 0, NumVertex*sizeof(float));
	//memset(ExternalForceY, 0, NumVertex*sizeof(float));
	//memset(ExternalForceZ, 0, NumVertex*sizeof(float));
}

void SURFACE::UpdateUnroll(float dt)
{
	// Clear first row
	memset(TotalForceX, 0, NumCols*sizeof(float));
	memset(TotalForceY, 0, NumCols*sizeof(float));
	memset(TotalForceZ, 0, NumCols*sizeof(float));

	// For each vertex
	for ( int i = 0, k = 0; i < NumRows - 1; i++ )
	{
		// Clear first two vertices in the next row
		TotalForceX[(i + 1)*NumCols] = 0.0f;
		TotalForceX[(i + 1)*NumCols + 1] = 0.0f;
		TotalForceY[(i + 1)*NumCols] = 0.0f;
		TotalForceY[(i + 1)*NumCols + 1] = 0.0f;
		TotalForceZ[(i + 1)*NumCols] = 0.0f;
		TotalForceZ[(i + 1)*NumCols + 1] = 0.0f;

		// Unroll twice
		for ( int j = 0; j < NumCols - 1; j += 2, k += 2 )
		{
			// Distance between C-E vertices
			float dx[2], dy[2], dz[2];
			dx[0] = SX[k + 1] - SX[k];
			dx[1] = SX[k + 2] - SX[k + 1];
			dy[0] = SY[k + 1] - SY[k];
			dy[1] = SY[k + 2] - SY[k + 1];
			dz[0] = SZ[k + 1] - SZ[k];
			dz[1] = SZ[k + 2] - SZ[k + 1];
			// Less relaxation length
			dx[0] -= SRelaxE.x;
			dx[1] -= SRelaxE.x;
			dy[0] -= SRelaxE.y;
			dy[1] -= SRelaxE.y;
			dz[0] -= SRelaxE.z;
			dz[1] -= SRelaxE.z;
			// C-E elastic force
			dx[0] *= Elasticity;
			dx[1] *= Elasticity;
			dy[0] *= Elasticity;
			dy[1] *= Elasticity;
			dz[0] *= Elasticity;
			dz[1] *= Elasticity;
			// Total force for C-vertex
			TotalForceX[k] += dx[0];
			TotalForceX[k + 1] += dx[1];
			TotalForceY[k] += dy[0];
			TotalForceY[k + 1] += dy[1];
			TotalForceZ[k] += dz[0];
			TotalForceZ[k + 1] += dz[1];
			// Total force for E-vertex (3rd Newton's Law)
			TotalForceX[k + 1] -= dx[0];
			TotalForceX[k + 2] -= dx[1];
			TotalForceY[k + 1] -= dy[0];
			TotalForceY[k + 2] -= dy[1];
			TotalForceZ[k + 1] -= dz[0];
			TotalForceZ[k + 2] -= dz[1];

			// Distance between C-S vertices
			dx[0] = SX[k + NumCols] - SX[k];
			dx[1] = SX[k + 1 + NumCols] - SX[k + 1];
			dy[0] = SY[k + NumCols] - SY[k];
			dy[1] = SY[k + 1 + NumCols] - SY[k + 1];
			dz[0] = SZ[k + NumCols] - SZ[k];
			dz[1] = SZ[k + 1 + NumCols] - SZ[k + 1];
			// Less relaxation length
			dx[0] -= SRelaxS.x;
			dx[1] -= SRelaxS.x;
			dy[0] -= SRelaxS.y;
			dy[1] -= SRelaxS.y;
			dz[0] -= SRelaxS.z;
			dz[1] -= SRelaxS.z;
			// C-S elastic force
			dx[0] *= Elasticity;
			dx[1] *= Elasticity;
			dy[0] *= Elasticity;
			dy[1] *= Elasticity;
			dz[0] *= Elasticity;
			dz[1] *= Elasticity;
			// Total force for C-vertex
			TotalForceX[k] += dx[0];
			TotalForceX[k + 1] += dx[1];
			TotalForceY[k] += dy[0];
			TotalForceY[k + 1] += dy[1];
			TotalForceZ[k] += dz[0];
			TotalForceZ[k + 1] += dz[1];
			// Total force for S-vertex (3rd Newton's Law)
			TotalForceX[k + NumCols] -= dx[0];
			TotalForceX[k + 1 + NumCols] -= dx[1];
			TotalForceY[k + NumCols] -= dy[0];
			TotalForceY[k + 1 + NumCols] -= dy[1];
			TotalForceZ[k + NumCols] -= dz[0];	
			TotalForceZ[k + 1 + NumCols] -= dz[1];
			
			// Distance between C-SE vertices
			dx[0] = SX[k + NumCols + 1] - SX[k];
			dx[1] = SX[k + 1 + NumCols + 1] - SX[k + 1];
			dy[0] = SY[k + NumCols + 1] - SY[k];
			dy[1] = SY[k + 1 + NumCols + 1] - SY[k + 1];
			dz[0] = SZ[k + NumCols + 1] - SZ[k];
			dz[1] = SZ[k + 1 + NumCols + 1] - SZ[k + 1];
			// Less relaxation length
			dx[0] -= SRelaxSE.x;
			dx[1] -= SRelaxSE.x;
			dy[0] -= SRelaxSE.y;
			dy[1] -= SRelaxSE.y;
			dz[0] -= SRelaxSE.z;
			dz[1] -= SRelaxSE.z;
			// C-SE elastic force
			dx[0] *= Elasticity;
			dx[1] *= Elasticity;
			dy[0] *= Elasticity;
			dy[1] *= Elasticity;
			dz[0] *= Elasticity;
			dz[1] *= Elasticity;
			// Total force for C-vertex
			TotalForceX[k] += dx[0];
			TotalForceX[k + 1] += dx[1];
			TotalForceY[k] += dy[0];
			TotalForceY[k + 1] += dy[1];
			TotalForceZ[k] += dz[0];
			TotalForceZ[k + 1] += dz[1];
			// Total force for SE-vertex (3rd Newton's Law)
			TotalForceX[k + NumCols + 1] -= dx[0];
			TotalForceX[k + 1 + NumCols + 1] = -dx[1];
			TotalForceY[k + NumCols + 1] -= dy[0];
			TotalForceY[k + 1 + NumCols + 1] = -dy[1];
			TotalForceZ[k + NumCols + 1] -= dz[0];
			TotalForceZ[k + 1 + NumCols + 1] = -dz[1];
		}
		k += 2;
	}
	
	float Massdt = Mass*dt;
	// For each vertex add external force
	for ( k = 0; k < NumVertex; k += 2 )
	{
		// Update total and reset external forces
		TotalForceX[k] += ExternalForceX[k];
		TotalForceX[k + 1] += ExternalForceX[k + 1];
		TotalForceY[k] += ExternalForceY[k];
		TotalForceY[k + 1] += ExternalForceY[k + 1];
		TotalForceZ[k] += ExternalForceZ[k];
		TotalForceZ[k + 1] += ExternalForceZ[k + 1];
		
		// For each fixed vertex reset total force 
		if ( !Fixed[k] )
			TotalForceX[k] = TotalForceY[k] = TotalForceZ[k] = 0.0f;
		if ( !Fixed[k + 1] )
			TotalForceX[k + 1] = TotalForceY[k + 1] = TotalForceZ[k + 1] = 0.0f;
		// Calculate velocity: V = V + A*dt, A = F/m
		VX[k] += TotalForceX[k]*Massdt;
		VX[k + 1] += TotalForceX[k + 1]*Massdt;
		VY[k] += TotalForceY[k]*Massdt;
		VY[k + 1] += TotalForceY[k + 1]*Massdt;
		VZ[k] += TotalForceZ[k]*Massdt;
		VZ[k + 1] += TotalForceZ[k + 1]*Massdt;
		// Calculate new vertex position: S = S + V*dt
		SX[k] += VX[k]*dt;
		SX[k + 1] += VX[k + 1]*dt;
		SY[k] += VY[k]*dt;
		SY[k + 1] += VY[k + 1]*dt;
		SZ[k] += VZ[k]*dt;
		SZ[k + 1] += VZ[k + 1]*dt;
			
		// Acount for damping factor: V = V*damp
		VX[k] *= Damping;
		VX[k + 1] *= Damping;
		VY[k] *= Damping;
		VY[k + 1] *= Damping;
		VZ[k] *= Damping;
		VZ[k + 1] *= Damping;
		// Set normals to macth velocity
		Normal[k].x = VX[k];
		Normal[k + 1].x = VX[k + 1];
		Normal[k].y = VY[k];
		Normal[k + 1].y = VY[k + 1];
		Normal[k].z = VZ[k];
		Normal[k + 1].z = VZ[k + 1];
	}
}

///////////////////////////////////////////////////////////////////////
// Update Planar surface using internal elastic and external forces
// 2-version: NumCols must be divisible by 2
/*void SURFACE::Update2(float dt)
{
	float
		E[2] = {Elasticity, Elasticity},
		D[2] = {Damping, Damping},
		SREX[2] = {SRelaxE.x, SRelaxE.x},
		SREY[2] = {SRelaxE.y, SRelaxE.y},
		SREZ[2] = {SRelaxE.z, SRelaxE.z},
		SRSX[2] = {SRelaxS.x, SRelaxS.x},
		SRSY[2] = {SRelaxS.y, SRelaxS.y},
		SRSZ[2] = {SRelaxS.z, SRelaxS.z},
		SRSEX[2] = {SRelaxSE.x, SRelaxSE.x},
		SRSEY[2] = {SRelaxSE.y, SRelaxSE.y},
		SRSEZ[2] = {SRelaxSE.z, SRelaxSE.z};

	// Reset Total Force
	memset(TotalForceX, 0, NumVertex*sizeof(float));
	memset(TotalForceY, 0, NumVertex*sizeof(float));
	memset(TotalForceZ, 0, NumVertex*sizeof(float));

	__asm femms
	// For each vertex
	for ( int i = 0, k = 0; i < NumRows - 1; i++ )
	{
		// Unroll twice
		for ( int j = 0; j < NumCols - 1; j += 2, k += 2 )
		{
			// Distance between C-E vertices
			float dx[2], dy[2], dz[2];
			_sub2(dx, &SX[k + 1], &SX[k]);
			_sub2(dy, &SY[k + 1], &SY[k]);
			_sub2(dz, &SZ[k + 1], &SZ[k]);
			// Less relaxation length
			_dec2(dx, SREX);
			_dec2(dy, SREY);
			_dec2(dz, SREZ);
			// C-E elastic force
			_scale2(dx, E);
			_scale2(dy, E);
			_scale2(dz, E);
			// Total force for C-vertex
			_inc2(&TotalForceX[k], dx);
			_inc2(&TotalForceY[k], dy);
			_inc2(&TotalForceZ[k], dz);
			// Total force for E-vertex (3rd Newton's Law)
			_dec2(&TotalForceX[k + 1], dx);
			_dec2(&TotalForceY[k + 1], dy);
			_dec2(&TotalForceZ[k + 1], dz);

			// Distance between C-S vertices
			_sub2(dx, &SX[k + NumCols], &SX[k]);
			_sub2(dy, &SY[k + NumCols], &SY[k]);
			_sub2(dz, &SZ[k + NumCols], &SZ[k]);
			// Less relaxation length
			_dec2(dx, SRSX);
			_dec2(dy, SRSY);
			_dec2(dz, SRSZ);
			// C-S elastic force
			_scale2(dx, E);
			_scale2(dy, E);
			_scale2(dz, E);
			// Total force for C-vertex
			_inc2(&TotalForceX[k], dx);
			_inc2(&TotalForceY[k], dy);
			_inc2(&TotalForceZ[k], dz);
			// Total force for S-vertex (3rd Newton's Law)
			_dec2(&TotalForceX[k + NumCols], dx);
			_dec2(&TotalForceY[k + NumCols], dy);
			_dec2(&TotalForceZ[k + NumCols], dz);
			
			// Distance between C-SE vertices
			_sub2(dx, &SX[k + NumCols + 1], &SX[k]);
			_sub2(dy, &SY[k + NumCols + 1], &SY[k]);
			_sub2(dz, &SZ[k + NumCols + 1], &SZ[k]);
			// Less relaxation length
			_dec2(dx, SRSEX);
			_dec2(dy, SRSEY);
			_dec2(dz, SRSEZ);
			// C-SE elastic force
			_scale2(dx, E);
			_scale2(dy, E);
			_scale2(dz, E);
			// Total force for C-vertex
			_inc2(&TotalForceX[k], dx);
			_inc2(&TotalForceY[k], dy);
			_inc2(&TotalForceZ[k], dz);
			// Total force for SE-vertex (3rd Newton's Law)
			_dec2(&TotalForceX[k + NumCols + 1], dx);
			_dec2(&TotalForceY[k + NumCols + 1], dy);
			_dec2(&TotalForceZ[k + NumCols + 1], dz);
		}
		k += 2;
	}
	
	float Massdt[2] = {Mass, Mass};
	float dT[2] = {dt, dt};
	_scale2(Massdt, dT);

	// For each vertex add external force
	for ( k = 0; k < NumVertex; k += 2 )
	{
		// Update total and reset external forces
		_inc2(&TotalForceX[k], &ExternalForceX[k]);		
		_zero2(&ExternalForceX[k]);
		_inc2(&TotalForceY[k], &ExternalForceY[k]);
		_zero2(&ExternalForceY[k]);
		_inc2(&TotalForceZ[k], &ExternalForceZ[k]);
		_zero2(&ExternalForceZ[k]);
		
		// Apply fixed vertex mask
		_and2(&TotalForceX[k], &Fixed[k]);

		// Calculate acceleration: A = F/m
		float dVX[2], dVY[2], dVZ[2];
		float dSX[2], dSY[2], dSZ[2];
		// Calculate velocity: V = V + A*dt
		_mul2(dVX, &TotalForceX[k], Massdt);
		_mul2(dVY, &TotalForceY[k], Massdt);
		_mul2(dVZ, &TotalForceZ[k], Massdt);
		// V = V + dV
		_inc2(&VX[k], dVX);
		_inc2(&VZ[k], dVY);
		_inc2(&VZ[k], dVZ);
		// dS = V*dt
		_mul2(dSX, &VX[k], dT);
		_mul2(dSY, &VY[k], dT);
		_mul2(dSZ, &VZ[k], dT);
		// Calculate new vertex position: S = S + dS
		_inc2(&SX[k], dSX);
		_inc2(&SZ[k], dSY);
		_inc2(&SZ[k], dSZ);
		
		// Acount for damping factor: V = V*damp
		_scale2(&VX[k], D);
		_scale2(&VY[k], D);
		_scale2(&VZ[k], D);
		// Set normals to macth velocity
		Normal[k].x = VX[k];
		Normal[k + 1].x = VX[k + 1];
		Normal[k].y = VY[k];
		Normal[k + 1].y = VY[k + 1];
		Normal[k].z = VZ[k];
		Normal[k + 1].z = VZ[k + 1];
	}
	__asm femms
}*/

///////////////////////////////////////////////////////////////////////
// Update Planar surface using internal elastic and external forces
// 3DNow! version: NumCols must be divisible by 2
void SURFACE::Update3DNow(float dt)
{
	float* ptrS, Massdt[2], dt2[2];
	int i, j, k;
	float
		SREX[2] = {SRelaxE.x, SRelaxE.x},
		SREY[2] = {SRelaxE.y, SRelaxE.y},
		SREZ[2] = {SRelaxE.z, SRelaxE.z},
		SRSX[2] = {SRelaxS.x, SRelaxS.x},
		SRSY[2] = {SRelaxS.y, SRelaxS.y},
		SRSZ[2] = {SRelaxS.z, SRelaxS.z},
		SRSEX[2] = {SRelaxSE.x, SRelaxSE.x},
		SRSEY[2] = {SRelaxSE.y, SRelaxSE.y},
		SRSEZ[2] = {SRelaxSE.z, SRelaxSE.z};

	// Initial setup
	__asm {
		femms
		mov		ebx,this					// ebx -> this

		// Clear the first row
		xor		eax,eax
		mov		ecx,[ebx]this.NumCols
		lea		edi,[ebx]this.TotalForceX
		rep		stosd
		mov		ecx,[ebx]this.NumCols
		lea		edi,[ebx]this.TotalForceY
		rep		stosd
		mov		ecx,[ebx]this.NumCols
		lea		edi,[ebx]this.TotalForceZ
		rep		stosd

		movd	mm7,[ebx]this.Elasticity
		punpckldq mm7,mm7					// mm7 = Elasticity
		mov		eax,[ebx]this.NumVertex
		shl		eax,2						// eax = NumVertex*sizeof(float)
		mov		ecx,[ebx]this.NumCols
		shl		ecx,2						// ecx = NumCols*sizeof(float)
		mov		edx,eax						
		shl		edx,1						// edx = NumVertex*sizeof(float)*2

		lea		edi,[ebx]this.SX			// [edi] -> SX/SY/SZ
		lea		esi,[ebx]this.TotalForceX	// [esi] -> TotalForceX/TotalForceY/TotalForceZ
		mov		i,eax
		sub		i,ecx
M:
		pxor	mm0,mm0
		movq	[esi + ecx],mm0
		add		esi,eax
		movq	[esi + ecx],mm0
		add		esi,eax
		movq	[esi + ecx],mm0
		sub		esi,edx
		mov		j,ecx
		sub		j,8
M1:		
		prefetch [edi + 64]
		prefetch [esi + 64]
		prefetch [edi + ecx + 64]
		prefetch [esi + ecx + 64]
		// [edi] -> S
		// [esi] -> TotalForce
		// mm7 = Elasticity
		// mm0 = dx/dy/dz
		// Process all vertices X-coordinate
		movq	mm3,[edi]			// mm3 = SX[k]
		movq	mm0,[edi + 4]		// SX[k + 1]
		movq	mm1,[edi + ecx]		// SX[k + NumCols]
		movq	mm2,[edi + ecx + 4]	// SX[k + NumCols + 1]
		pfsub	mm0,mm3				// dx = SX[k + 1] - SX[k]
		pfsub	mm1,mm3				// dx = SX[k + NumCols] - SX[k]
		pfsub	mm2,mm3				// dx = SX[k + NumCols + 1] - SX[k]
		movq	mm4,SREX
		movq	mm5,SRSX
		movq	mm6,SRSEX
		pfsub	mm0,mm4				// dxE -= SRelaxE.x
		pfsub	mm1,mm5				// dxS -= SRelaxS.x
		pfsub	mm2,mm6				// dxSE -= SRelaxSE.x
		pfmul	mm0,mm7				// dxE *= Elasticity
		pfmul	mm1,mm7				// dxS *= Elasticity
		pfmul	mm2,mm7				// dxSE *= Elasticity

		movq	mm3,[esi + 4]		// TotalForceX[k + 1]
		pfsub	mm3,mm0				// TotalForceX[k + 1] - dxE
		movq	[esi + 4],mm3		// TotalForceX[k + 1] -= dxE

		movd	mm4,[esi + ecx]		// TotalForceX[k + NumCols]
		pfsub	mm4,mm1				// TotalForceX[k + NumCols] - dxS
		movq	[esi + ecx],mm4		// TotalForceX[k + NumCols] -= dxS

		movd	mm5,[esi + ecx + 4]	// TotalForceX[k + NumCols + 1]
		pfsub	mm5,mm2				// TotalForceX[k + NumCols + 1] - dxSE
		movq	[esi + ecx + 4],mm5	// TotalForceX[k + NumCols + 1] -= dxSE

		pfadd	mm0,[esi]			// TotalForceX[k] + dxE
		pfadd	mm0,mm1				// TotalForceX[k] + dxS
		pfadd	mm0,mm2				// TotalForceX[k] + dxSE
		movq	[esi],mm0			// TotalForceX[k] += dx

		// Y-coordinate
		add		edi,eax
		add		esi,eax
		prefetch [edi + 64]
		prefetch [esi + 64]
		prefetch [edi + ecx + 64]
		prefetch [esi + ecx + 64]
		movq	mm3,[edi]			// mm3 = SY[k]
		movq	mm0,[edi + 4]		// SY[k + 1]
		movq	mm1,[edi + ecx]		// SY[k + NumCols]
		movq	mm2,[edi + ecx + 4]	// SY[k + NumCols + 1]
		pfsub	mm0,mm3				// dy = SY[k + 1] - SY[k]
		pfsub	mm1,mm3				// dy = SY[k + NumCols] - SY[k]
		pfsub	mm2,mm3				// dy = SY[k + NumCols + 1] - SY[k]
		movq	mm4,SREY
		movq	mm5,SRSY
		movq	mm6,SRSEY
		pfsub	mm0,mm4				// dyE -= SRelaxE.y
		pfsub	mm1,mm5				// dyS -= SRelaxS.y
		pfsub	mm2,mm6				// dySE -= SRelaxSE.y
		pfmul	mm0,mm7				// dyE *= Elasticity
		pfmul	mm1,mm7				// dyS *= Elasticity
		pfmul	mm2,mm7				// dySE *= Elasticity

		movq	mm3,[esi + 4]		// TotalForceY[k + 1]
		pfsub	mm3,mm0				// TotalForceY[k + 1] - dyE
		movq	[esi + 4],mm3		// TotalForceY[k + 1] -= dyE

		movd	mm4,[esi + ecx]		// TotalForceY[k + NumCols]
		pfsub	mm4,mm1				// TotalForceY[k + NumCols] - dyS
		movq	[esi + ecx],mm4		// TotalForceY[k + NumCols] -= dyS

		movd	mm5,[esi + ecx + 4]	// TotalForceY[k + NumCols + 1]
		pfsub	mm5,mm2				// TotalForceY[k + NumCols + 1] - dySE
		movq	[esi + ecx + 4],mm5	// TotalForceY[k + NumCols + 1] -= dySE

		pfadd	mm0,[esi]			// TotalForceY[k] += dyE
		pfadd	mm0,mm1				// TotalForceY[k] += dyS
		pfadd	mm0,mm2				// TotalForceY[k] += dySE
		movq	[esi],mm0			// TotalForceY[k] += dy

		// Z-coordinate
		add		edi,eax
		add		esi,eax
		prefetch [edi + 64]
		prefetch [esi + 64]
		prefetch [edi + ecx + 64]
		prefetch [esi + ecx + 64]
		movq	mm3,[edi]			// mm3 = SZ[k]
		movq	mm0,[edi + 4]		// SZ[k + 1]
		movq	mm1,[edi + ecx]		// SZ[k + NumCols]
		movq	mm2,[edi + ecx + 4]	// SZ[k + NumCols + 1]
		pfsub	mm0,mm3				// dz = SZ[k + 1] - SZ[k]
		pfsub	mm1,mm3				// dz = SZ[k + NumCols] - SZ[k]
		pfsub	mm2,mm3				// dz = SZ[k + NumCols + 1] - SZ[k]
		movq	mm4,SREZ
		movq	mm5,SRSZ
		movq	mm6,SRSEZ
		pfsub	mm0,mm4				// dzE -= SRelaxE.z
		pfsub	mm1,mm5				// dzS -= SRelaxS.z
		pfsub	mm2,mm6				// dzSE -= SRelaxSE.z
		pfmul	mm0,mm7				// dzE *= Elasticity
		pfmul	mm1,mm7				// dzS *= Elasticity
		pfmul	mm2,mm7				// dzSE *= Elasticity

		movq	mm3,[esi + 4]		// TotalForceZ[k + 1]
		pfsub	mm3,mm0				// TotalForceZ[k + 1] - dzE
		movq	[esi + 4],mm3		// TotalForceZ[k + 1] -= dzE

		movd	mm4,[esi + ecx]		// TotalForceZ[k + NumCols]
		pfsub	mm4,mm1				// TotalForceZ[k + NumCols] - dzS
		movq	[esi + ecx],mm4		// TotalForceZ[k + NumCols] -= dzS

		movd	mm5,[esi + ecx + 4]	// TotalForceZ[k + NumCols + 1]
		pfsub	mm5,mm2				// TotalForceZ[k + NumCols + 1] - dzSE
		movq	[esi + ecx + 4],mm5	// TotalForceZ[k + NumCols + 1] -= dzSE

		pfadd	mm0,[esi]			// TotalForceZ[k] += dzE
		pfadd	mm0,mm1				// TotalForceZ[k] += dzS
		pfadd	mm0,mm2				// TotalForceZ[k] += dzSE
		movq	[esi],mm0			// TotalForceZ[k] += dz

		// Update indices
		sub		edi,edx
		sub		esi,edx
		add		edi,8
		add		esi,8
		sub		j,8
		jnz		M1

		add		edi,8
		add		esi,8
		sub		i,ecx
		jnz		M

		lea		ecx,[ebx]this.SX
		mov		ptrS,ecx					// ptrS = SX
		movd	mm5,[ebx]this.Mass
		punpckldq mm5,mm5
		movd	mm6,dt						// mm6 = dt
		punpckldq mm6,mm6
		movq	dt2,mm6
		pfmul	mm5,mm6						// mm5 = Massdt
		movq	Massdt,mm5
		movd	mm7,[ebx]this.Damping
		punpckldq mm7,mm7					// mm7 = Damping

		lea		ecx,[ebx]this.Normal		// [ecx] -> Normal
		lea		edx,[ebx]this.Fixed			// [edx] -> Fixed
		lea		edi,[ebx]this.TotalForceX	// [edi] -> TotalForce
		lea		esi,[ebx]this.ExternalForceX// [esi] -> ExternalForce
		lea		ebx,[ebx]this.VX			// [ebx] -> V
		mov		k,eax
		// For each vertex add external force
M2:
		prefetch [edi + 64]
		prefetch [esi + 64]
		prefetch [ebx + 64]
		prefetch [ecx + 64]
		prefetch [edi + eax + 64]
		prefetch [edi + eax*2 + 64]
		movq	mm6,[edx]				// Fixed[k]

		movq	mm0,[edi]				// TotalForceX[k]
		movq	mm1,[edi + eax]			// TotalForceY[k]
		movq	mm2,[edi + eax*2]		// TotalForceZ[k]
		pfadd	mm0,[esi]				// mm0 = TotalForceX[k] + ExternalForceX[k]
		pfadd	mm1,[esi + eax]			// mm1 = TotalForceY[k] + ExternalForceY[k]
		pfadd	mm2,[esi + eax*2]		// mm2 = TotalForceZ[k] + ExternalForceZ[k]
		pand	mm0,mm6					// TotalForceX[k] & Fixed[k]
		movq	[edi],mm0				// TotalForceX[k] += ExternalForceX[k] & Fixed[k]
		pand	mm1,mm6					// TotalForceY[k] & Fixed[k]
		movq	[edi + eax],mm1			// TotalForceY[k] += ExternalForceY[k] & Fixed[k]
		pand	mm2,mm6					// TotalForceZ[k] & Fixed[k]
		movq	[edi + eax*2],mm2		// TotalForceZ[k] += ExternalForceZ[k] & Fixed[k]
		movq	mm6,Massdt
		pxor	mm3,mm3
		movq	[esi],mm3				// ExternalForceX[k] = 0
		movq	[esi + eax],mm3			// ExternalForceY[k] = 0
		movq	[esi + eax*2],mm3		// ExternalForceZ[k] = 0
		pfmul	mm0,mm6					// mm0 = TotalForceX[k]*Massdt
		pfmul	mm1,mm6					// mm1 = TotalForceY[k]*Massdt
		pfmul	mm2,mm6					// mm2 = TotalForceZ[k]*Massdt

		movq	mm6,dt2
		// Calculate velocity: V = V + TotalForce*Massdt
		pfadd	mm0,[ebx]				// VX[k] + TotalForceX[k]*Massdt
		pfadd	mm1,[ebx + eax]			// VY[k] + TotalForceY[k]*Massdt
		pfadd	mm2,[ebx + eax*2]		// VZ[k] + TotalForceZ[k]*Massdt
		movq	mm3,mm0					// mm3 = VX[k] + TotalForceX[k]*Massdt
		movq	mm4,mm1					// mm3 = VY[k] + TotalForceY[k]*Massdt
		movq	mm5,mm2					// mm3 = VY[k] + TotalForceZ[k]*Massdt
		pfmul	mm3,mm7					// VX[k]*Damping
		pfmul	mm4,mm7					// VY[k]*Damping
		pfmul	mm5,mm7					// VZ[k]*Damping
		pfmul	mm0,mm6					// mm0 = (VX[k] + TotalForceX[k]*Massdt)*dt
		pfmul	mm1,mm6					// mm1 = (VY[k] + TotalForceY[k]*Massdt)*dt
		pfmul	mm2,mm6					// mm2 = (VY[k] + TotalForceZ[k]*Massdt)*dt
		movq	[ebx],mm3				// VX[k] += TotalForceX[k]*Massdt
		movq	[ebx + eax],mm4			// VY[k] += TotalForceY[k]*Massdt
		movq	[ebx + eax*2],mm5		// VZ[k] += TotalForceZ[k]*Massdt
		movd	[ecx],mm3				// Normal[k].x = VX[k]
		movd	[ecx + 4],mm4			// Normal[k].y = VY[k]
		movd	[ecx + 8],mm5			// Normal[k].z = VZ[k]
		pswapd	mm3,mm3
		pswapd	mm4,mm4
		pswapd	mm5,mm5
		movd	[ecx + TYPE D3DVECTOR],mm3// Normal[k].x = VX[k]
		movd	[ecx + TYPE D3DVECTOR + 4],mm4// Normal[k].y = VY[k]
		movd	[ecx + TYPE D3DVECTOR + 8],mm5// Normal[k].z = VZ[k]

		// Calculate new vertex position: S = S + V*dt
		// Save edi
		mov		i,edi
		// [edi] -> S
		mov		edi,ptrS
		pfadd	mm0,[edi]				// SX[k] + VX[k]*dt
		pfadd	mm1,[edi + eax]			// SY[k] + VY[k]*dt
		pfadd	mm2,[edi + eax*2]		// SZ[k] + VZ[k]*dt
		movq	[edi],mm0				// SX[k] += VX[k]*dt
		movq	[edi + eax],mm1			// SY[k] += VY[k]*dt
		movq	[edi + eax*2],mm2		// SZ[k] += VZ[k]*dt
		mov		edi,i

		// Update pointers
		add		ptrS,8
		add		edi,8
		add		esi,8
		add		ebx,8
		add		ecx,(TYPE D3DVECTOR)*2
		add		edx,8

		sub		k,8
		jnz		M2
		femms
	}
}

///////////////////////////////////////////////////////////////////////
// D3DVECTOR Surface initialization
void SURFACE2::Init(int NumRows, int NumCols, float Mass, float Elasticity,
	float Damping, float dW, float dH)
{
	// Set default parameter values
	this->NumRows = NumRows;
	this->NumCols = NumCols;
	NumVertex = NumRows*NumCols;
	this->Elasticity = Elasticity;
	this->Mass = 1.0f/Mass;
	this->Damping = Damping;
	this->dW = dW;
	this->dH = dH;
	memset(&Center, 0, sizeof(Center));

	float Width = dW*(NumCols - 1);
	float Height = dH*(NumRows - 1);
	
	// Init relaxation length
	SRelaxE.x = dW;
	SRelaxE.y = 0.0f;
	SRelaxE.z = 0.0f;
	
	SRelaxS.x = 0.0f;
	SRelaxS.y = 0.0f;
	SRelaxS.z = dH;
	
	SRelaxSE.x = dW;
	SRelaxSE.y = 0.0f;
	SRelaxSE.z = dH;
	
	// Initial surface coordinates
	for ( int i = 0, k = 0; i < NumRows; i++ )
		for ( int j = 0; j < NumCols; j++, k++ )
		{
			// Vertex coodinates
			S[k].x = dW*j - Width/2;
			S[k].y = 0.0f;
			S[k].z = dH*i - Height/2;
			// Mark surface edges as fixed
			Fixed[k] =
				!i || i == NumRows - 1 || !j || j == NumCols - 1 ? 0 : -1;
			Normal[k].x = 0.0f;
			Normal[k].y = 1.0f;
			Normal[k].z = 0.0f;
		}
	// Reset velocities and forces
	memset(V, 0, NumVertex*sizeof(D3DVECTOR));
	memset(ExternalForce, 0, NumVertex*sizeof(D3DVECTOR));
	memset(TotalForce, 0, NumVertex*sizeof(D3DVECTOR));
}

/////////////////////////////////////////////////////////////////////
// Get surface vertex data (Direct3D interface)
void SURFACE2::GetData(D3DRMVERTEX* pVertex)
{
	// Set new vertices
	for ( int k = 0; k < NumVertex; k++ )
	{
		pVertex[k].position = S[k];
		pVertex[k].normal = Normal[k];
	}
}

/////////////////////////////////////////////////////////////////////
// Detect and calculate collision forces between an effector and
// a Planar deformable surface
void SURFACE2::Deform(EFFECTOR& Effector)
{
	D3DVECTOR Offset;
	_sub_vect(&Offset, &Effector.Center, &Center);
	
	// For each vertex
	for ( int k = 0; k < NumVertex; k++ )
	{
		// Calculate distance of vertex from effector
		D3DVECTOR v;
		_sub_vect(&v, &S[k], &Offset);
		float distance = _mag_vect(&v);

		// Check if vertex within reach of effector
		if ( distance < Effector.R )
		{
			D3DVECTOR NewForce;
			distance = Effector.Strength/(distance + 0.01f);
			_scale_vect(&NewForce, &v, distance);
			
			// Update vertex external force
			_add_vect(&ExternalForce[k], &ExternalForce[k], &NewForce);
		
			// Vertex affects effector?
			if ( Effector.AffectedByExtForces )
				_sub_vect(&Effector.ExternalForce, &Effector.ExternalForce, &NewForce);
		}
	}
}

///////////////////////////////////////////////////////////////////////
// Update Planar surface using internal elastic and external forces
void SURFACE2::Update(float dt)
{
	// Reset Total Force
	memset(TotalForce, 0, NumVertex*sizeof(D3DVECTOR));

	// For each vertex
	for ( int i = 0; i < NumRows - 1; i++ )
		for ( int j = 0; j < NumCols - 1; j++ )
		{
			int k = i*NumCols + j;
			// Distance between C-E vertices
			D3DVECTOR d, dF;
			_sub_vect(&d, &S[k + 1], &S[k]);
			// Less relaxation length
			_sub_vect(&d, &d, &SRelaxE);
			// C-E elastic force
			_scale_vect(&dF, &d, Elasticity);
			// Total force for C-vertex
			_add_vect(&TotalForce[k], &TotalForce[k], &dF);
			// Total force for E-vertex (3rd Newton's Law)
			_sub_vect(&TotalForce[k + 1], &TotalForce[k + 1], &dF);

			// Distance between C-S vertices
			_sub_vect(&d, &S[k + NumCols], &S[k]);
			// Less relaxation length
			_sub_vect(&d, &d, &SRelaxS);
			// C-S elastic force
			_scale_vect(&dF, &d, Elasticity);
			// Total force for C-vertex
			_add_vect(&TotalForce[k], &TotalForce[k], &dF);
			// Total force for S-vertex (3rd Newton's Law)
			_sub_vect(&TotalForce[k + NumCols], &TotalForce[k + NumCols], &dF);
			
			// Distance between C-SE vertices
			_sub_vect(&d, &S[k + NumCols + 1], &S[k]);
			// Less relaxation length
			_sub_vect(&d, &d, &SRelaxSE);
			// C-SE elastic force
			_scale_vect(&dF, &d, Elasticity);
			// Total force for C-vertex
			_add_vect(&TotalForce[k], &TotalForce[k], &dF);
			// Total force for SE-vertex (3rd Newton's Law)
			_sub_vect(&TotalForce[k + NumCols + 1], &TotalForce[k + NumCols + 1], &dF);
		}
	
	// For each vertex add external force
	for ( int k = 0; k < NumVertex; k++ )
	{
		_add_vect(&TotalForce[k], &TotalForce[k], &ExternalForce[k]);
		
		// For each vertex that is not fixed in space
		if ( Fixed[k] )
		{
			float Massdt = Mass*dt;
			D3DVECTOR A, dS;
			// Calculate acceleration: A = F/m
			_scale_vect(&A, &TotalForce[k], Massdt);
			// Calculate velocity: V = V + A*dt
			_add_vect(&V[k], &V[k], &A);
			// Calculate new vertex position: dS = V*dt
			_scale_vect(&dS, &V[k], dt);
			// S = S + dS
			_add_vect(&S[k], &S[k], &dS);
			// Acount for damping factor: V = V*damp
			_scale_vect(&V[k], &V[k], Damping);
			// Set normals to macth velocity
			Normal[k] = V[k];
		}
	}
	// Reset external force
	memset(ExternalForce, 0, NumVertex*sizeof(D3DVECTOR));
}
