88 files changed, 719 insertions, 6694 deletions
diff --git a/btgui/OpenGLWindow/renderscene.cpp b/btgui/OpenGLWindow/renderscene.cpp
index 174488ccd..42e9cb547 100644
--- a/btgui/OpenGLWindow/renderscene.cpp
+++ b/btgui/OpenGLWindow/renderscene.cpp
@@ -18,7 +18,7 @@ bool keepStaticObjects = false;
 //#include "LinearMath/btQuickprof.h"
 #include "BulletCommon/btQuaternion.h"
 #include "BulletCommon/btMatrix3x3.h"
-//#include "../opencl/gpu_rigidbody_pipeline/btConvexUtility.h"
+//#include "../opencl/gpu_rigidbody_pipeline/b3ConvexUtility.h"
 #include "ShapeData.h"
 ///work-in-progress 
 ///This ReadBulletSample is kept as simple as possible without dependencies to the Bullet SDK.
diff --git a/build/stringify.bat b/build/stringify.bat
index b0e091b7e..d121c8ff0 100644
--- a/build/stringify.bat
+++ b/build/stringify.bat
@@ -11,11 +11,10 @@ premake4 --file=stringifyKernel.lua --kernelfile="../opencl/parallel_primitives/
 premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_broadphase/kernels/sap.cl" --headerfile="../opencl/gpu_broadphase/kernels/sapKernels.h" --stringname="sapCL" stringify
 premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_broadphase/kernels/sapFast.cl" --headerfile="../opencl/gpu_broadphase/kernels/sapFastKernels.h" --stringname="sapFastCL" stringify
 
-premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/sat.cl" --headerfile="../opencl/gpu_sat/kernels/satKernels.h" --stringname="satKernelsCL" stringify
-premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/satClipHullContacts.cl" --headerfile="../opencl/gpu_sat/kernels/satClipHullContacts.h" --stringname="satClipKernelsCL" stringify
-premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/primitiveContacts.cl" --headerfile="../opencl/gpu_sat/kernels/primitiveContacts.h" --stringname="primitiveContactsKernelsCL" stringify
-
-premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_sat/kernels/bvhTraversal.cl" --headerfile="../opencl/gpu_sat/kernels/bvhTraversal.h" --stringname="bvhTraversalKernelCL" stringify
+premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_narrowphase/kernels/sat.cl" --headerfile="../opencl/gpu_narrowphase/kernels/satKernels.h" --stringname="satKernelsCL" stringify
+premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_narrowphase/kernels/satClipHullContacts.cl" --headerfile="../opencl/gpu_narrowphase/kernels/satClipHullContacts.h" --stringname="satClipKernelsCL" stringify
+premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_narrowphase/kernels/primitiveContacts.cl" --headerfile="../opencl/gpu_narrowphase/kernels/primitiveContacts.h" --stringname="primitiveContactsKernelsCL" stringify
+premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_narrowphase/kernels/bvhTraversal.cl" --headerfile="../opencl/gpu_narrowphase/kernels/bvhTraversal.h" --stringname="bvhTraversalKernelCL" stringify
 
 
 premake4 --file=stringifyKernel.lua --kernelfile="../opencl/gpu_rigidbody/kernels/integrateKernel.cl" --headerfile="../opencl/gpu_rigidbody/kernels/integrateKernel.h" --stringname="integrateKernelCL" stringify
diff --git a/demo/donttouch/OpenGL3CoreRenderer.cpp b/demo/donttouch/OpenGL3CoreRenderer.cpp
index ca1a55a3f..e7dc2b213 100644
--- a/demo/donttouch/OpenGL3CoreRenderer.cpp
+++ b/demo/donttouch/OpenGL3CoreRenderer.cpp
@@ -195,7 +195,7 @@ GraphicsShape* createGraphicsShapeFromConcaveMesh(const btBvhTriangleMeshShape*
 	btAlignedObjectArray<GraphicsVertex>* vertices = new btAlignedObjectArray<GraphicsVertex>;
 	btAlignedObjectArray<int>* indicesPtr = new btAlignedObjectArray<int>;
 
-	const btStridingMeshInterface* meshInterface = trimesh->getMeshInterface();
+	const b3StridingMeshInterface* meshInterface = trimesh->getMeshInterface();
 
 	btVector3 trimeshScaling(1,1,1);
 	for (int partId=0;partId<meshInterface->getNumSubParts();partId++)
diff --git a/demo/donttouch/btGpuDynamicsWorld.cpp b/demo/donttouch/btGpuDynamicsWorld.cpp
index f6b3da3dc..23c0432e8 100644
--- a/demo/donttouch/btGpuDynamicsWorld.cpp
+++ b/demo/donttouch/btGpuDynamicsWorld.cpp
@@ -127,7 +127,7 @@ int btGpuDynamicsWorld::findOrRegisterCollisionShape(const btCollisionShape* col
 				m_uniqueShapes.push_back(colShape);
 				
 				btBvhTriangleMeshShape* trimesh = (btBvhTriangleMeshShape*) colShape;
-				btStridingMeshInterface* meshInterface = trimesh->getMeshInterface();
+				b3StridingMeshInterface* meshInterface = trimesh->getMeshInterface();
 				btAlignedObjectArray<btVector3> vertices;
 				btAlignedObjectArray<int> indices;
 				
diff --git a/demo/gpudemo/GpuDemo.cpp b/demo/gpudemo/GpuDemo.cpp
index bf97de3fa..10da7cbf3 100644
--- a/demo/gpudemo/GpuDemo.cpp
+++ b/demo/gpudemo/GpuDemo.cpp
@@ -1,7 +1,7 @@
 #include "GpuDemo.h"
 #include "GpuDemoInternalData.h"
 #include "BulletCommon/btScalar.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/ShapeData.h"
 #include "OpenGLWindow/GLInstancingRenderer.h"
 
@@ -50,27 +50,27 @@ void GpuDemo::initCL(int preferredDeviceIndex, int preferredPlatformIndex)
 	
 	//	if (useInterop)
 	//	{
-	//		m_data->m_clContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
+	//		m_data->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
 	//	} else
 	{
-		m_clData->m_clContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex,&platformId);
-		btOpenCLUtils::printPlatformInfo(platformId);
+		m_clData->m_clContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex,&platformId);
+		b3OpenCLUtils::printPlatformInfo(platformId);
 	}
 	
 	
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	
-	int numDev = btOpenCLUtils::getNumDevices(m_clData->m_clContext);
+	int numDev = b3OpenCLUtils::getNumDevices(m_clData->m_clContext);
 	
 	if (numDev>0)
 	{
-		m_clData->m_clDevice= btOpenCLUtils::getDevice(m_clData->m_clContext,0);
+		m_clData->m_clDevice= b3OpenCLUtils::getDevice(m_clData->m_clContext,0);
 		m_clData->m_clQueue = clCreateCommandQueue(m_clData->m_clContext, m_clData->m_clDevice, 0, &ciErrNum);
 		oclCHECKERROR(ciErrNum, CL_SUCCESS);
         
-        btOpenCLUtils::printDeviceInfo(m_clData->m_clDevice);
+        b3OpenCLUtils::printDeviceInfo(m_clData->m_clDevice);
 		btOpenCLDeviceInfo info;
-		btOpenCLUtils::getDeviceInfo(m_clData->m_clDevice,&info);
+		b3OpenCLUtils::getDeviceInfo(m_clData->m_clDevice,&info);
 		m_clData->m_clDeviceName = info.m_deviceName;
 		m_clData->m_clInitialized = true;
 		
diff --git a/demo/gpudemo/GpuDemoInternalData.h b/demo/gpudemo/GpuDemoInternalData.h
index d35aa9edf..38912db2c 100644
--- a/demo/gpudemo/GpuDemoInternalData.h
+++ b/demo/gpudemo/GpuDemoInternalData.h
@@ -1,7 +1,7 @@
 #ifndef GPU_DEMO_INTERNAL_DATA_H
 #define GPU_DEMO_INTERNAL_DATA_H
 
-#include "basic_initialize/btOpenCLInclude.h"
+#include "basic_initialize/b3OpenCLInclude.h"
 
 struct GpuDemoInternalData
 {
diff --git a/demo/gpudemo/ParticleDemo.cpp b/demo/gpudemo/ParticleDemo.cpp
index a603dcfd9..4acd71926 100644
--- a/demo/gpudemo/ParticleDemo.cpp
+++ b/demo/gpudemo/ParticleDemo.cpp
@@ -2,7 +2,7 @@
 
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "OpenGLWindow/ShapeData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 
 #define MSTRINGIFY(A) #A
 static char* particleKernelsString = 
@@ -15,7 +15,7 @@ static char* particleKernelsString =
 #include "parallel_primitives/host/btLauncherCL.h"
 //#include "../../opencl/primitives/AdlPrimitives/Math/Math.h"
 //#include "../../opencl/broadphase_benchmark/btGridBroadphaseCL.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "GpuDemoInternalData.h"
 
 
@@ -92,7 +92,7 @@ struct ParticleInternalData
 
 	cl_kernel m_collideParticlesKernel;
 
-	btGpuSapBroadphase*	m_broadphaseGPU;
+	b3GpuSapBroadphase*	m_broadphaseGPU;
 	
 
 	cl_mem		m_clPositionBuffer;
@@ -168,7 +168,7 @@ void ParticleDemo::setupScene(const ConstructionInfo& ci)
 	int maxPairsSmallProxy = 32;
 	float radius = 3.f*m_data->m_simParamCPU[0].m_particleRad;
 
-	m_data->m_broadphaseGPU = new btGpuSapBroadphase(m_clData->m_clContext ,m_clData->m_clDevice,m_clData->m_clQueue);//overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
+	m_data->m_broadphaseGPU = new b3GpuSapBroadphase(m_clData->m_clContext ,m_clData->m_clDevice,m_clData->m_clQueue);//overlappingPairCache,btVector3(4.f, 4.f, 4.f), 128, 128, 128,maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
 
 	/*m_data->m_broadphaseGPU = new btGridBroadphaseCl(overlappingPairCache,btVector3(radius,radius,radius), 128, 128, 128,
 		maxObjects, maxObjects, maxPairsSmallProxy, 100.f, 128,
@@ -188,16 +188,16 @@ void ParticleDemo::setupScene(const ConstructionInfo& ci)
 
 	cl_int pErrNum;
 
-	cl_program prog = btOpenCLUtils::compileCLProgramFromString(m_clData->m_clContext,m_clData->m_clDevice,particleKernelsString,0,"",INTEROPKERNEL_SRC_PATH);
-	m_data->m_updatePositionsKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updatePositionsKernel" ,&pErrNum,prog);
+	cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_clData->m_clContext,m_clData->m_clDevice,particleKernelsString,0,"",INTEROPKERNEL_SRC_PATH);
+	m_data->m_updatePositionsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updatePositionsKernel" ,&pErrNum,prog);
 	oclCHECKERROR(pErrNum, CL_SUCCESS);
-	m_data->m_updatePositionsKernel2 = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "integrateMotionKernel" ,&pErrNum,prog);
+	m_data->m_updatePositionsKernel2 = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "integrateMotionKernel" ,&pErrNum,prog);
 	oclCHECKERROR(pErrNum, CL_SUCCESS);
 
-	m_data->m_updateAabbsKernel= btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updateAabbsKernel" ,&pErrNum,prog);
+	m_data->m_updateAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "updateAabbsKernel" ,&pErrNum,prog);
 	oclCHECKERROR(pErrNum, CL_SUCCESS);
 
-	m_data->m_collideParticlesKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "collideParticlesKernel" ,&pErrNum,prog);
+	m_data->m_collideParticlesKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext, m_clData->m_clDevice,particleKernelsString, "collideParticlesKernel" ,&pErrNum,prog);
 	oclCHECKERROR(pErrNum, CL_SUCCESS);
 
 	m_instancingRenderer = ci.m_instancingRenderer;
diff --git a/demo/gpudemo/broadphase/PairBench.cpp b/demo/gpudemo/broadphase/PairBench.cpp
index e853d4e7c..1f11b7b43 100644
--- a/demo/gpudemo/broadphase/PairBench.cpp
+++ b/demo/gpudemo/broadphase/PairBench.cpp
@@ -4,9 +4,9 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
@@ -95,7 +95,7 @@ __kernel void updateAabbSimple( __global float4* posOrnColors, const int numNode
 
 struct	PairBenchInternalData
 {
-	btGpuSapBroadphase*	m_broadphaseGPU;
+	b3GpuSapBroadphase*	m_broadphaseGPU;
 
 	cl_kernel	m_moveObjectsKernel;
 	cl_kernel	m_sineWaveKernel;
@@ -152,13 +152,13 @@ void	PairBench::initPhysics(const ConstructionInfo& ci)
 	initCL(ci.preferredOpenCLDeviceIndex,ci.preferredOpenCLPlatformIndex);
 	if (m_clData->m_clContext)
 	{
-		m_data->m_broadphaseGPU = new btGpuSapBroadphase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
+		m_data->m_broadphaseGPU = new b3GpuSapBroadphase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
 		cl_program pairBenchProg=0;
 		int errNum=0;
-		m_data->m_moveObjectsKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"moveObjectsKernel",&errNum,pairBenchProg);
-		m_data->m_sineWaveKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"sineWaveKernel",&errNum,pairBenchProg);
-		m_data->m_colorPairsKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"colorPairsKernel",&errNum,pairBenchProg);
-		m_data->m_updateAabbSimple = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"updateAabbSimple",&errNum,pairBenchProg);
+		m_data->m_moveObjectsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"moveObjectsKernel",&errNum,pairBenchProg);
+		m_data->m_sineWaveKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"sineWaveKernel",&errNum,pairBenchProg);
+		m_data->m_colorPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"colorPairsKernel",&errNum,pairBenchProg);
+		m_data->m_updateAabbSimple = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_pairBenchKernelString,"updateAabbSimple",&errNum,pairBenchProg);
 			
 	}
 
diff --git a/demo/gpudemo/premake4.lua b/demo/gpudemo/premake4.lua
index 94b77020a..09b6f81c8 100644
--- a/demo/gpudemo/premake4.lua
+++ b/demo/gpudemo/premake4.lua
@@ -61,9 +61,9 @@ function createProject(vendor)
 			"../../btgui/OpenGLTrueTypeFont/opengl_fontstashcallbacks.cpp",
 			"../../btgui/OpenGLTrueTypeFont/opengl_fontstashcallbacks.h",
 			"../../btgui/FontFiles/OpenSans.cpp",
-			"../../opencl/basic_initialize/btOpenCLUtils.cpp",
-			"../../opencl/basic_initialize/btOpenCLUtils.h",
-			"../../opencl/gpu_broadphase/host/btGpuSapBroadphase.cpp",
+			"../../opencl/basic_initialize/b3OpenCLUtils.cpp",
+			"../../opencl/basic_initialize/b3OpenCLUtils.h",
+			"../../opencl/gpu_broadphase/host/b3GpuSapBroadphase.cpp",
 			"../../opencl/gpu_narrowphase/host/**.cpp",
 			"../../opencl/gpu_narrowphase/host/**.h",
 			"../../opencl/parallel_primitives/host/btBoundSearchCL.cpp",
diff --git a/demo/gpudemo/rigidbody/ConcaveScene.cpp b/demo/gpudemo/rigidbody/ConcaveScene.cpp
index 653276307..1c1cd9c1a 100644
--- a/demo/gpudemo/rigidbody/ConcaveScene.cpp
+++ b/demo/gpudemo/rigidbody/ConcaveScene.cpp
@@ -6,15 +6,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 #include"../../ObjLoader/objLoader.h"
 #include "BulletCommon/btTransform.h"
diff --git a/demo/gpudemo/rigidbody/GpuCompoundScene.cpp b/demo/gpudemo/rigidbody/GpuCompoundScene.cpp
index 04c558750..f6d7f19ec 100644
--- a/demo/gpudemo/rigidbody/GpuCompoundScene.cpp
+++ b/demo/gpudemo/rigidbody/GpuCompoundScene.cpp
@@ -6,15 +6,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 #include "BulletCommon/btTransform.h"
 
diff --git a/demo/gpudemo/rigidbody/GpuConvexScene.cpp b/demo/gpudemo/rigidbody/GpuConvexScene.cpp
index 548244f8f..9fc5e0acc 100644
--- a/demo/gpudemo/rigidbody/GpuConvexScene.cpp
+++ b/demo/gpudemo/rigidbody/GpuConvexScene.cpp
@@ -6,15 +6,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 #include "../gwenUserInterface.h"
 
diff --git a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp b/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp
index 076cf73f5..969c30e7d 100644
--- a/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp
+++ b/demo/gpudemo/rigidbody/GpuRigidBodyDemo.cpp
@@ -4,15 +4,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 
 static btKeyboardCallback oldCallback = 0;
@@ -104,15 +104,15 @@ void	GpuRigidBodyDemo::initPhysics(const ConstructionInfo& ci)
 		int errNum=0;
 
 		cl_program rbProg=0;
-		m_data->m_copyTransformsToVBOKernel = btOpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_rigidBodyKernelString,"copyTransformsToVBOKernel",&errNum,rbProg);
+		m_data->m_copyTransformsToVBOKernel = b3OpenCLUtils::compileCLKernelFromString(m_clData->m_clContext,m_clData->m_clDevice,s_rigidBodyKernelString,"copyTransformsToVBOKernel",&errNum,rbProg);
 		
-		btConfig config;
-		btGpuNarrowPhase* np = new btGpuNarrowPhase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue,config);
-		btGpuSapBroadphase* bp = new btGpuSapBroadphase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
+		b3Config config;
+		b3GpuNarrowPhase* np = new b3GpuNarrowPhase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue,config);
+		b3GpuSapBroadphase* bp = new b3GpuSapBroadphase(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue);
 		m_data->m_np = np;
 		m_data->m_bp = bp;
 
-		m_data->m_rigidBodyPipeline = new btGpuRigidBodyPipeline(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue, np, bp);
+		m_data->m_rigidBodyPipeline = new b3GpuRigidBodyPipeline(m_clData->m_clContext,m_clData->m_clDevice,m_clData->m_clQueue, np, bp);
 
 
 		setupScene(ci);
diff --git a/demo/gpudemo/rigidbody/GpuRigidBodyDemoInternalData.h b/demo/gpudemo/rigidbody/GpuRigidBodyDemoInternalData.h
index 5bbdcf54e..5319a8b77 100644
--- a/demo/gpudemo/rigidbody/GpuRigidBodyDemoInternalData.h
+++ b/demo/gpudemo/rigidbody/GpuRigidBodyDemoInternalData.h
@@ -1,7 +1,7 @@
 #ifndef GPU_RIGIDBODY_INTERNAL_DATA_H
 #define GPU_RIGIDBODY_INTERNAL_DATA_H
 
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "parallel_primitives/host/btOpenCLArray.h"
 #include "BulletCommon/btVector3.h"
 
@@ -12,10 +12,10 @@ struct	GpuRigidBodyDemoInternalData
 
 	btOpenCLArray<btVector4>*	m_instancePosOrnColor;
 
-	class btGpuRigidBodyPipeline* m_rigidBodyPipeline;
+	class b3GpuRigidBodyPipeline* m_rigidBodyPipeline;
 
-	class btGpuNarrowPhase* m_np;
-	class btGpuSapBroadphase* m_bp;
+	class b3GpuNarrowPhase* m_np;
+	class b3GpuSapBroadphase* m_bp;
 
 	GpuRigidBodyDemoInternalData()
 		:m_instancePosOrnColor(0),
diff --git a/demo/gpudemo/rigidbody/GpuSphereScene.cpp b/demo/gpudemo/rigidbody/GpuSphereScene.cpp
index 320335ad1..26b2b1287 100644
--- a/demo/gpudemo/rigidbody/GpuSphereScene.cpp
+++ b/demo/gpudemo/rigidbody/GpuSphereScene.cpp
@@ -6,15 +6,15 @@
 #include "OpenGLWindow/GLInstancingRenderer.h"
 #include "BulletCommon/btQuaternion.h"
 #include "OpenGLWindow/btgWindowInterface.h"
-#include "gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "../GpuDemoInternalData.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "OpenGLWindow/OpenGLInclude.h"
 #include "OpenGLWindow/GLInstanceRendererInternalData.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "gpu_rigidbody/host/btGpuRigidBodyPipeline.h"
-#include "gpu_rigidbody/host/btGpuNarrowPhase.h"
-#include "gpu_rigidbody/host/btConfig.h"
+#include "gpu_rigidbody/host/b3GpuRigidBodyPipeline.h"
+#include "gpu_rigidbody/host/b3GpuNarrowPhase.h"
+#include "gpu_rigidbody/host/b3Config.h"
 #include "GpuRigidBodyDemoInternalData.h"
 #include "../gwenUserInterface.h"
 
diff --git a/docs/GDC2013_ErwinCoumans_GPU_rigid_body_simulation.pdf b/docs/GDC2013_ErwinCoumans_GPU_rigid_body_simulation.pdf
new file mode 100644
index 000000000..d48a5a8fc
--- /dev/null
+++ b/docs/GDC2013_ErwinCoumans_GPU_rigid_body_simulation.pdf
diff --git a/opencl/basic_initialize/btOpenCLInclude.h b/opencl/basic_initialize/b3OpenCLInclude.h
index 5f0e78da6..5f0e78da6 100644
--- a/opencl/basic_initialize/btOpenCLInclude.h
+++ b/opencl/basic_initialize/b3OpenCLInclude.h
diff --git a/opencl/basic_initialize/btOpenCLUtils.cpp b/opencl/basic_initialize/b3OpenCLUtils.cpp
index 57dcda2ba..3a7bb3f42 100644
--- a/opencl/basic_initialize/btOpenCLUtils.cpp
+++ b/opencl/basic_initialize/b3OpenCLUtils.cpp
@@ -25,8 +25,8 @@ bool gDebugSkipLoadingBinary = false;
 #ifdef _WIN32
 #pragma warning (disable:4996)
 #endif
-#include "btOpenCLUtils.h"
-//#include "btOpenCLInclude.h"
+#include "b3OpenCLUtils.h"
+//#include "b3OpenCLInclude.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -134,7 +134,7 @@ cl_platform_id btOpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum)
 	return platform;
 }
 
-void btOpenCLUtils::getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo* platformInfo)
+void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInfo* platformInfo)
 {
 	cl_int ciErrNum;
 	ciErrNum = clGetPlatformInfo(	platform,CL_PLATFORM_VENDOR,BT_MAX_STRING_LENGTH,platformInfo->m_platformVendor,NULL);
@@ -148,7 +148,7 @@ void btOpenCLUtils::getPlatformInfo(cl_platform_id platform, btOpenCLPlatformInf
 void btOpenCLUtils_printPlatformInfo(cl_platform_id platform)
 {
 	btOpenCLPlatformInfo platformInfo;
-	btOpenCLUtils::getPlatformInfo (platform, &platformInfo);
+	b3OpenCLUtils::getPlatformInfo (platform, &platformInfo);
 	printf("Platform info:\n");
 	printf("  CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
 	printf("  CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
@@ -310,7 +310,7 @@ cl_context btOpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int
 //				printf("OpenCL platform details:\n");
 				btOpenCLPlatformInfo platformInfo;
 
-				btOpenCLUtils::getPlatformInfo(platform, &platformInfo);
+				b3OpenCLUtils::getPlatformInfo(platform, &platformInfo);
 
 				if (retPlatformId)
 					*retPlatformId = platform;
@@ -368,7 +368,7 @@ int btOpenCLUtils_getNumDevices(cl_context cxMainContext)
 
 
 
-void btOpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info)
+void b3OpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info)
 {
 	// CL_DEVICE_NAME
 	clGetDeviceInfo(device, CL_DEVICE_NAME, BT_MAX_STRING_LENGTH, &info->m_deviceName, NULL);
@@ -453,7 +453,7 @@ void btOpenCLUtils::getDeviceInfo(cl_device_id device, btOpenCLDeviceInfo* info)
 void btOpenCLUtils_printDeviceInfo(cl_device_id device)
 {
 	btOpenCLDeviceInfo info;
-	btOpenCLUtils::getDeviceInfo(device,&info);
+	b3OpenCLUtils::getDeviceInfo(device,&info);
 	printf("Device Info:\n");
 	printf("  CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName);
 	printf("  CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor);
diff --git a/opencl/basic_initialize/btOpenCLUtils.h b/opencl/basic_initialize/b3OpenCLUtils.h
index 29a732e41..42ccd0014 100644
--- a/opencl/basic_initialize/btOpenCLUtils.h
+++ b/opencl/basic_initialize/b3OpenCLUtils.h
@@ -19,7 +19,7 @@ subject to the following restrictions:
 #ifndef BT_OPENCL_UTILS_H
 #define BT_OPENCL_UTILS_H
 
-#include "btOpenCLInclude.h"
+#include "b3OpenCLInclude.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -110,7 +110,7 @@ typedef struct
 
 
 ///C++ API for OpenCL utilities: convenience functions
-struct btOpenCLUtils
+struct b3OpenCLUtils
 {
 	/// CL Context optionally takes a GL context. This is a generic type because we don't really want this code
 	/// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise.
diff --git a/opencl/basic_initialize/main.cpp b/opencl/basic_initialize/main.cpp
index 263ba1b30..19d4445f4 100644
--- a/opencl/basic_initialize/main.cpp
+++ b/opencl/basic_initialize/main.cpp
@@ -15,7 +15,7 @@ subject to the following restrictions:
 
 ///original author: Erwin Coumans
 
-#include "btOpenCLUtils.h"
+#include "b3OpenCLUtils.h"
 #include <stdio.h>
 
 cl_context			g_cxMainContext;
@@ -28,33 +28,33 @@ int main(int argc, char* argv[])
 	int ciErrNum = 0;
 	
 	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
-	const char* vendorSDK = btOpenCLUtils::getSdkVendorName();
+	const char* vendorSDK = b3OpenCLUtils::getSdkVendorName();
 
 	printf("This program was compiled using the %s OpenCL SDK\n",vendorSDK);
-	int numPlatforms = btOpenCLUtils::getNumPlatforms();
+	int numPlatforms = b3OpenCLUtils::getNumPlatforms();
 	printf("Num Platforms = %d\n", numPlatforms);
 
 	for (int i=0;i<numPlatforms;i++)
 	{
-		cl_platform_id platform = btOpenCLUtils::getPlatform(i);
+		cl_platform_id platform = b3OpenCLUtils::getPlatform(i);
 		btOpenCLPlatformInfo platformInfo;
-		btOpenCLUtils::getPlatformInfo(platform,&platformInfo);
+		b3OpenCLUtils::getPlatformInfo(platform,&platformInfo);
 		printf("--------------------------------\n");
 		printf("Platform info for platform nr %d:\n",i);
 		printf("  CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor);
 		printf("  CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName);
 		printf("  CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion);
 		
-		cl_context context = btOpenCLUtils::createContextFromPlatform(platform,deviceType,&ciErrNum);
+		cl_context context = b3OpenCLUtils::createContextFromPlatform(platform,deviceType,&ciErrNum);
 		
-		int numDevices = btOpenCLUtils::getNumDevices(context);
+		int numDevices = b3OpenCLUtils::getNumDevices(context);
 		printf("Num Devices = %d\n", numDevices);
 		for (int j=0;j<numDevices;j++)
 		{
-			cl_device_id dev = btOpenCLUtils::getDevice(context,j);
+			cl_device_id dev = b3OpenCLUtils::getDevice(context,j);
 			btOpenCLDeviceInfo devInfo;
-			btOpenCLUtils::getDeviceInfo(dev,&devInfo);
-			btOpenCLUtils::printDeviceInfo(dev);
+			b3OpenCLUtils::getDeviceInfo(dev,&devInfo);
+			b3OpenCLUtils::printDeviceInfo(dev);
 		}
 
 		clReleaseContext(context);
@@ -65,21 +65,21 @@ int main(int argc, char* argv[])
 	
 	void* glCtx=0;
 	void* glDC = 0;
-	printf("Initialize OpenCL using btOpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n");
-	g_cxMainContext = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
+	printf("Initialize OpenCL using b3OpenCLUtils::createContextFromType for CL_DEVICE_TYPE_GPU\n");
+	g_cxMainContext = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, glCtx, glDC);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 
 	if (g_cxMainContext)
 	{
-		int numDev = btOpenCLUtils::getNumDevices(g_cxMainContext);
+		int numDev = b3OpenCLUtils::getNumDevices(g_cxMainContext);
 
 		for (int i=0;i<numDev;i++)
 		{
 			cl_device_id		device;
-			device = btOpenCLUtils::getDevice(g_cxMainContext,i);
+			device = b3OpenCLUtils::getDevice(g_cxMainContext,i);
 			btOpenCLDeviceInfo clInfo;
-			btOpenCLUtils::getDeviceInfo(device,&clInfo);
-			btOpenCLUtils::printDeviceInfo(device);
+			b3OpenCLUtils::getDeviceInfo(device,&clInfo);
+			b3OpenCLUtils::printDeviceInfo(device);
 			// create a command-queue
 			g_cqCommandQue = clCreateCommandQueue(g_cxMainContext, device, 0, &ciErrNum);
 			oclCHECKERROR(ciErrNum, CL_SUCCESS);
diff --git a/opencl/basic_initialize/premake4.lua b/opencl/basic_initialize/premake4.lua
index a9a07f1c8..44af06b5d 100644
--- a/opencl/basic_initialize/premake4.lua
+++ b/opencl/basic_initialize/premake4.lua
@@ -15,8 +15,8 @@ function createProject(vendor)
 
 		files {
 			"main.cpp",
-			"btOpenCLUtils.cpp",
-			"btOpenCLUtils.h"
+			"b3OpenCLUtils.cpp",
+			"b3OpenCLUtils.h"
 		}
 		
 	end
diff --git a/opencl/gpu_broadphase/host/btGpuSapBroadphase.cpp b/opencl/gpu_broadphase/host/b3GpuSapBroadphase.cpp
index 9987146a9..b08ba53fa 100644
--- a/opencl/gpu_broadphase/host/btGpuSapBroadphase.cpp
+++ b/opencl/gpu_broadphase/host/b3GpuSapBroadphase.cpp
@@ -1,9 +1,9 @@
 
-#include "btGpuSapBroadphase.h"
+#include "b3GpuSapBroadphase.h"
 #include "BulletCommon/btVector3.h"
 #include "parallel_primitives/host/btLauncherCL.h"
 #include "BulletCommon/btQuickprof.h"
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 
 
 #include "../kernels/sapKernels.h"
@@ -11,7 +11,7 @@
 #include "BulletCommon/btMinMax.h"
 
 
-btGpuSapBroadphase::btGpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q )
+b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q )
 :m_context(ctx),
 m_device(device),
 m_queue(q),
@@ -28,44 +28,44 @@ m_currentBuffer(-1)
     
 	cl_int errNum=0;
 
-	cl_program sapProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"","opencl/gpu_broadphase/kernels/sap.cl");
+	cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"","opencl/gpu_broadphase/kernels/sap.cl");
 	btAssert(errNum==CL_SUCCESS);
-	cl_program sapFastProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"","opencl/gpu_broadphase/kernels/sapFast.cl");
+	cl_program sapFastProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapFastSrc,&errNum,"","opencl/gpu_broadphase/kernels/sapFast.cl");
 	btAssert(errNum==CL_SUCCESS);
 
 	
-	//m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
-	//m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
-	//m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
+	//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
+	//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
+	//m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
 
 	
-	m_sap2Kernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
+	m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
 	btAssert(errNum==CL_SUCCESS);
 
 #if 0
 
-	m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
+	m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
 	btAssert(errNum==CL_SUCCESS);
 #else
 #ifndef __APPLE__
-	m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg );
+	m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapFastSrc, "computePairsKernel",&errNum,sapFastProg );
 	btAssert(errNum==CL_SUCCESS);
 #else
-	m_sapKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
+	m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
 	btAssert(errNum==CL_SUCCESS);
 #endif
 #endif
 
-	m_flipFloatKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg );
+	m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg );
 
-	m_copyAabbsKernel= btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg );
+	m_copyAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg );
 
-	m_scatterKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg );
+	m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg );
 
 	m_sorter = new btRadixSort32CL(m_context,m_device,m_queue);
 }
 
-btGpuSapBroadphase::~btGpuSapBroadphase()
+b3GpuSapBroadphase::~b3GpuSapBroadphase()
 {
 	delete m_sorter;
 	clReleaseKernel(m_scatterKernel);
@@ -97,7 +97,7 @@ static unsigned int FloatFlip(float fl)
 	return f ^ mask;
 };
 
-void  btGpuSapBroadphase::init3dSap()
+void  b3GpuSapBroadphase::init3dSap()
 {
 	if (m_currentBuffer<0)
 	{
@@ -123,7 +123,7 @@ void  btGpuSapBroadphase::init3dSap()
 		}
 	}
 }
-void  btGpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
+void  b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
 {
 	btAssert(m_currentBuffer>=0);
 	if (m_currentBuffer<0)
@@ -155,7 +155,7 @@ void  btGpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
 	
 }
 
-void  btGpuSapBroadphase::calculateOverlappingPairsHost()
+void  b3GpuSapBroadphase::calculateOverlappingPairsHost()
 {
 	//test
 	//if (m_currentBuffer>=0)
@@ -249,7 +249,7 @@ void  btGpuSapBroadphase::calculateOverlappingPairsHost()
 
 }
 
-void  btGpuSapBroadphase::calculateOverlappingPairs()
+void  b3GpuSapBroadphase::calculateOverlappingPairs()
 {
 	int axis = 0;//todo on GPU for now hardcode
 
@@ -512,7 +512,7 @@ void  btGpuSapBroadphase::calculateOverlappingPairs()
 	
 }
 
-void btGpuSapBroadphase::writeAabbsToGpu()
+void b3GpuSapBroadphase::writeAabbsToGpu()
 {
 	m_allAabbsGPU.copyFromHost(m_allAabbsCPU);//might not be necessary, the 'setupGpuAabbsFull' already takes care of this
 	m_smallAabbsGPU.copyFromHost(m_smallAabbsCPU);
@@ -520,10 +520,10 @@ void btGpuSapBroadphase::writeAabbsToGpu()
 
 }
 
-void btGpuSapBroadphase::createLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
+void b3GpuSapBroadphase::createLargeProxy(const btVector3& aabbMin,  const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
 {
 	int index = userPtr;
-	btSapAabb aabb;
+	b3SapAabb aabb;
 	for (int i=0;i<4;i++)
 	{
 		aabb.m_min[i] = aabbMin[i];
@@ -535,10 +535,10 @@ void btGpuSapBroadphase::createLargeProxy(const btVector3& aabbMin,  const btVec
 	m_allAabbsCPU.push_back(aabb);
 }
 
-void btGpuSapBroadphase::createProxy(const btVector3& aabbMin,  const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
+void b3GpuSapBroadphase::createProxy(const btVector3& aabbMin,  const btVector3& aabbMax, int userPtr ,short int collisionFilterGroup,short int collisionFilterMask)
 {
 	int index = userPtr;
-	btSapAabb aabb;
+	b3SapAabb aabb;
 	for (int i=0;i<4;i++)
 	{
 		aabb.m_min[i] = aabbMin[i];
@@ -550,16 +550,16 @@ void btGpuSapBroadphase::createProxy(const btVector3& aabbMin,  const btVector3&
 	m_allAabbsCPU.push_back(aabb);
 }
 
-cl_mem	btGpuSapBroadphase::getAabbBufferWS()
+cl_mem	b3GpuSapBroadphase::getAabbBufferWS()
 {
 	return m_allAabbsGPU.getBufferCL();
 }
 
-int	btGpuSapBroadphase::getNumOverlap()
+int	b3GpuSapBroadphase::getNumOverlap()
 {
 	return m_overlappingPairs.size();
 }
-cl_mem	btGpuSapBroadphase::getOverlappingPairBuffer()
+cl_mem	b3GpuSapBroadphase::getOverlappingPairBuffer()
 {
 	return m_overlappingPairs.getBufferCL();
 }
 \ No newline at end of file
diff --git a/opencl/gpu_broadphase/host/btGpuSapBroadphase.h b/opencl/gpu_broadphase/host/b3GpuSapBroadphase.h
index 2eb5e3dbb..5f26efc8a 100644
--- a/opencl/gpu_broadphase/host/btGpuSapBroadphase.h
+++ b/opencl/gpu_broadphase/host/b3GpuSapBroadphase.h
@@ -6,11 +6,11 @@
 class btVector3;
 #include "parallel_primitives/host/btRadixSort32CL.h"
 
-#include "btSapAabb.h"
+#include "b3SapAabb.h"
 
 
 
-class btGpuSapBroadphase
+class b3GpuSapBroadphase
 {
 	
 	cl_context				m_context;
@@ -30,24 +30,24 @@ class btGpuSapBroadphase
 
 	public:
 	
-	btOpenCLArray<btSapAabb>	m_allAabbsGPU;
-	btAlignedObjectArray<btSapAabb>	m_allAabbsCPU;
+	btOpenCLArray<b3SapAabb>	m_allAabbsGPU;
+	btAlignedObjectArray<b3SapAabb>	m_allAabbsCPU;
 
-	btOpenCLArray<btSapAabb>	m_smallAabbsGPU;
-	btAlignedObjectArray<btSapAabb>	m_smallAabbsCPU;
+	btOpenCLArray<b3SapAabb>	m_smallAabbsGPU;
+	btAlignedObjectArray<b3SapAabb>	m_smallAabbsCPU;
 
-	btOpenCLArray<btSapAabb>	m_largeAabbsGPU;
-	btAlignedObjectArray<btSapAabb>	m_largeAabbsCPU;
+	btOpenCLArray<b3SapAabb>	m_largeAabbsGPU;
+	btAlignedObjectArray<b3SapAabb>	m_largeAabbsCPU;
 
 	btOpenCLArray<btInt2>		m_overlappingPairs;
 
 	//temporary gpu work memory
 	btOpenCLArray<btSortData>	m_gpuSmallSortData;
-	btOpenCLArray<btSapAabb>	m_gpuSmallSortedAabbs;
+	btOpenCLArray<b3SapAabb>	m_gpuSmallSortedAabbs;
 
 
-	btGpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q );
-	virtual ~btGpuSapBroadphase();
+	b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue  q );
+	virtual ~b3GpuSapBroadphase();
 	
 	void  calculateOverlappingPairs();
 	void  calculateOverlappingPairsHost();
diff --git a/opencl/gpu_broadphase/host/btSapAabb.h b/opencl/gpu_broadphase/host/b3SapAabb.h
index 3354ffe9e..423e10ba9 100644
--- a/opencl/gpu_broadphase/host/btSapAabb.h
+++ b/opencl/gpu_broadphase/host/b3SapAabb.h
@@ -1,7 +1,7 @@
 #ifndef BT_SAP_AABB_H
 #define BT_SAP_AABB_H
 
-struct btSapAabb
+struct b3SapAabb
 {
 	union
 	{
diff --git a/opencl/gpu_broadphase/test/main.cpp b/opencl/gpu_broadphase/test/main.cpp
index 62a5a667d..90fbb52d5 100644
--- a/opencl/gpu_broadphase/test/main.cpp
+++ b/opencl/gpu_broadphase/test/main.cpp
@@ -14,8 +14,8 @@ subject to the following restrictions:
 
 
 #include <stdio.h>
-#include "../basic_initialize/btOpenCLUtils.h"
-#include "../host/btGpuSapBroadphase.h"
+#include "../basic_initialize/b3OpenCLUtils.h"
+#include "../host/b3GpuSapBroadphase.h"
 #include "BulletCommon/btVector3.h"
 #include "parallel_primitives/host/btFillCL.h"
 #include "parallel_primitives/host/btBoundSearchCL.h"
@@ -47,17 +47,17 @@ void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
 
 	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
 
-	g_context = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
+	g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
-	int numDev = btOpenCLUtils::getNumDevices(g_context);
+	int numDev = b3OpenCLUtils::getNumDevices(g_context);
 	if (numDev>0)
 	{
 		btOpenCLDeviceInfo info;
-		g_device= btOpenCLUtils::getDevice(g_context,0);
+		g_device= b3OpenCLUtils::getDevice(g_context,0);
 		g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
 		oclCHECKERROR(ciErrNum, CL_SUCCESS);
-        btOpenCLUtils::printDeviceInfo(g_device);
-		btOpenCLUtils::getDeviceInfo(g_device,&info);
+        b3OpenCLUtils::printDeviceInfo(g_device);
+		b3OpenCLUtils::getDeviceInfo(g_device,&info);
 		g_deviceName = info.m_deviceName;
 	}
 }
@@ -73,7 +73,7 @@ inline void broadphaseTest()
 {
 	TEST_INIT;
 
-	btGpuSapBroadphase* sap = new btGpuSapBroadphase(g_context,g_device,g_queue);
+	b3GpuSapBroadphase* sap = new b3GpuSapBroadphase(g_context,g_device,g_queue);
 	int group=1;
 	int mask=1;
 	btVector3 aabbMin(0,0,0);
diff --git a/opencl/gpu_broadphase/test/premake4.lua b/opencl/gpu_broadphase/test/premake4.lua
index 98a6e7b36..7803cf673 100644
--- a/opencl/gpu_broadphase/test/premake4.lua
+++ b/opencl/gpu_broadphase/test/premake4.lua
@@ -16,11 +16,11 @@ function createProject(vendor)
 		
 		files {
 			"main.cpp",
-			"../../basic_initialize/btOpenCLInclude.h",
-			"../../basic_initialize/btOpenCLUtils.cpp",
-			"../../basic_initialize/btOpenCLUtils.h",
-			"../host/btGpuSapBroadphase.cpp",
-			"../host/btGpuSapBroadphase.h",
+			"../../basic_initialize/b3OpenCLInclude.h",
+			"../../basic_initialize/b3OpenCLUtils.cpp",
+			"../../basic_initialize/b3OpenCLUtils.h",
+			"../host/b3GpuSapBroadphase.cpp",
+			"../host/b3GpuSapBroadphase.h",
 			"../../parallel_primitives/host/btFillCL.cpp",
 			"../../parallel_primitives/host/btFillCL.h",
 			"../../parallel_primitives/host/btBoundSearchCL.cpp",
diff --git a/opencl/gpu_narrowphase/host/btCollidable.h b/opencl/gpu_narrowphase/host/b3Collidable.h
index 86ad51efe..69805617d 100644
--- a/opencl/gpu_narrowphase/host/btCollidable.h
+++ b/opencl/gpu_narrowphase/host/b3Collidable.h
@@ -14,7 +14,7 @@ enum btShapeTypes
 	MAX_NUM_SHAPE_TYPES,
 };
 
-struct btCollidable
+struct b3Collidable
 {
 	int m_numChildShapes;
 	float m_radius;
diff --git a/opencl/gpu_narrowphase/host/btContact4.h b/opencl/gpu_narrowphase/host/b3Contact4.h
index 10aaec47c..15e722d38 100644
--- a/opencl/gpu_narrowphase/host/btContact4.h
+++ b/opencl/gpu_narrowphase/host/b3Contact4.h
@@ -4,7 +4,7 @@
 #include "BulletCommon/btVector3.h"
 
 
-ATTRIBUTE_ALIGNED16(struct) btContact4
+ATTRIBUTE_ALIGNED16(struct) b3Contact4
 {
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 
diff --git a/opencl/gpu_narrowphase/host/ConvexHullContact.cpp b/opencl/gpu_narrowphase/host/b3ConvexHullContact.cpp
index f7403a42b..b0e32ec51 100644
--- a/opencl/gpu_narrowphase/host/ConvexHullContact.cpp
+++ b/opencl/gpu_narrowphase/host/b3ConvexHullContact.cpp
@@ -20,16 +20,16 @@ subject to the following restrictions:
 
 //#define BT_DEBUG_SAT_FACE
 
-#include "ConvexHullContact.h"
+#include "b3ConvexHullContact.h"
 #include <string.h>//memcpy
-#include "btConvexPolyhedronCL.h"
+#include "b3ConvexPolyhedronCL.h"
 
 
 typedef btAlignedObjectArray<btVector3> btVertexArray;
 #include "BulletCommon/btQuickprof.h"
 
 #include <float.h> //for FLT_MAX
-#include "basic_initialize/btOpenCLUtils.h"
+#include "basic_initialize/b3OpenCLUtils.h"
 #include "parallel_primitives/host/btLauncherCL.h"
 //#include "AdlQuaternion.h"
 
@@ -63,21 +63,21 @@ m_totalContactsOut(m_context, m_queue)
 //		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl");
 //#endif
 
-		cl_program satProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,src,&errNum,flags,"opencl/gpu_narrowphase/kernels/sat.cl");
+		cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,src,&errNum,flags,"opencl/gpu_narrowphase/kernels/sat.cl");
 		btAssert(errNum==CL_SUCCESS);
 
-		m_findSeparatingAxisKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findSeparatingAxisKernel",&errNum,satProg );
+		m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findSeparatingAxisKernel",&errNum,satProg );
 		btAssert(m_findSeparatingAxisKernel);
 		btAssert(errNum==CL_SUCCESS);
 
-		m_findConcaveSeparatingAxisKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findConcaveSeparatingAxisKernel",&errNum,satProg );
+		m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findConcaveSeparatingAxisKernel",&errNum,satProg );
 		btAssert(m_findConcaveSeparatingAxisKernel);
 		btAssert(errNum==CL_SUCCESS);
 		
-		m_findCompoundPairsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findCompoundPairsKernel",&errNum,satProg );
+		m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "findCompoundPairsKernel",&errNum,satProg );
 		btAssert(m_findCompoundPairsKernel);
 		btAssert(errNum==CL_SUCCESS);
-		m_processCompoundPairsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "processCompoundPairsKernel",&errNum,satProg );
+		m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,src, "processCompoundPairsKernel",&errNum,satProg );
 		btAssert(m_processCompoundPairsKernel);
 		btAssert(errNum==CL_SUCCESS);
 	}
@@ -91,29 +91,29 @@ m_totalContactsOut(m_context, m_queue)
 //		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
 //#endif
 
-		cl_program satClipContactsProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,"opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
+		cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,"opencl/gpu_narrowphase/kernels/satClipHullContacts.cl");
 		btAssert(errNum==CL_SUCCESS);
 
-		m_clipHullHullKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg);
+		m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
 
-		m_clipCompoundsHullHullKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg);
+		m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
 		
 
-        m_findClippingFacesKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg);
+        m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
 
-        m_clipFacesAndContactReductionKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndContactReductionKernel",&errNum,satClipContactsProg);
+        m_clipFacesAndContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndContactReductionKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);        
 
-		m_clipHullHullConcaveConvexKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg);
+		m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
 
-		m_extractManifoldAndAddContactKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
+		m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
 
-        m_newContactReductionKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip,
+        m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip,
                             "newContactReductionKernel",&errNum,satClipContactsProg);
 		btAssert(errNum==CL_SUCCESS);
 	}
@@ -131,27 +131,27 @@ m_totalContactsOut(m_context, m_queue)
 	 if (1)
 	{
 		const char* srcBvh = bvhTraversalKernelCL;
-		cl_program bvhTraversalProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"","opencl/gpu_narrowphase/kernels/bvhTraversal.cl");
+		cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"","opencl/gpu_narrowphase/kernels/bvhTraversal.cl");
 		btAssert(errNum==CL_SUCCESS);
 
-		m_bvhTraversalKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,"");
+		m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,"");
 		btAssert(errNum==CL_SUCCESS);
 
 	}
         
 	 {
 		 const char* primitiveContactsSrc = primitiveContactsKernelsCL;
-		cl_program primitiveContactsProg = btOpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"","opencl/gpu_narrowphase/kernels/primitiveContacts.cl");
+		cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"","opencl/gpu_narrowphase/kernels/primitiveContacts.cl");
 		btAssert(errNum==CL_SUCCESS);
 
-		m_primitiveContactsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,"");
+		m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,"");
 		btAssert(errNum==CL_SUCCESS);
 
-		m_findConcaveSphereContactsKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg );
+		m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg );
 		btAssert(errNum==CL_SUCCESS);
 		btAssert(m_findConcaveSphereContactsKernel);
 
-		m_processCompoundPairsPrimitivesKernel = btOpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,"");
+		m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,"");
 		btAssert(errNum==CL_SUCCESS);
 		btAssert(m_processCompoundPairsPrimitivesKernel);
 		 
@@ -408,19 +408,19 @@ int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4&
 void computeContactPlaneConvex(int pairIndex,
 																int bodyIndexA, int bodyIndexB, 
 																int collidableIndexA, int collidableIndexB, 
-																const btRigidBodyCL* rigidBodies, 
-																const btCollidable* collidables,
-																const btConvexPolyhedronCL* convexShapes,
+																const b3RigidBodyCL* rigidBodies, 
+																const b3Collidable* collidables,
+																const b3ConvexPolyhedronCL* convexShapes,
 																const btVector3* convexVertices,
 																const int* convexIndices,
 																const btGpuFace* faces,
-																btContact4* globalContactsOut,
+																b3Contact4* globalContactsOut,
 																int& nGlobalContactsOut,
 																int maxContactCapacity)
 {
 
 		int shapeIndex = collidables[collidableIndexB].m_shapeIndex;
-	const btConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];
+	const b3ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];
 	
 	btVector3 posB = rigidBodies[bodyIndexB].m_pos;
 	btQuaternion ornB = rigidBodies[bodyIndexB].m_quat;
@@ -513,7 +513,7 @@ void computeContactPlaneConvex(int pairIndex,
 			dstIdx=nGlobalContactsOut;
 			nGlobalContactsOut++;
 
-			btContact4* c = &globalContactsOut[dstIdx];
+			b3Contact4* c = &globalContactsOut[dstIdx];
 			c->m_worldNormal = planeNormalWorld;
 			c->setFrictionCoeff(0.7);
 			c->setRestituitionCoeff(0.f);
@@ -541,13 +541,13 @@ void computeContactPlaneConvex(int pairIndex,
 void computeContactPlaneCompound(int pairIndex,
 																int bodyIndexA, int bodyIndexB, 
 																int collidableIndexA, int collidableIndexB, 
-																const btRigidBodyCL* rigidBodies, 
-																const btCollidable* collidables,
-																const btConvexPolyhedronCL* convexShapes,
+																const b3RigidBodyCL* rigidBodies, 
+																const b3Collidable* collidables,
+																const b3ConvexPolyhedronCL* convexShapes,
 																const btVector3* convexVertices,
 																const int* convexIndices,
 																const btGpuFace* faces,
-																btContact4* globalContactsOut,
+																b3Contact4* globalContactsOut,
 																int& nGlobalContactsOut,
 																int maxContactCapacity)
 {
@@ -558,7 +558,7 @@ void computeContactPlaneCompound(int pairIndex,
 
 
 	int shapeIndex = collidables[collidableIndexB].m_shapeIndex;
-	const btConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];
+	const b3ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];
 	
 	btVector3 posB = rigidBodies[bodyIndexB].m_pos;
 	btQuaternion ornB = rigidBodies[bodyIndexB].m_quat;
@@ -651,7 +651,7 @@ void computeContactPlaneCompound(int pairIndex,
 			dstIdx=nGlobalContactsOut;
 			nGlobalContactsOut++;
 
-			btContact4* c = &globalContactsOut[dstIdx];
+			b3Contact4* c = &globalContactsOut[dstIdx];
 			c->m_worldNormal = planeNormalWorld;
 			c->setFrictionCoeff(0.7);
 			c->setRestituitionCoeff(0.f);
@@ -680,13 +680,13 @@ void computeContactPlaneCompound(int pairIndex,
 void	computeContactSphereConvex(int pairIndex,
 																int bodyIndexA, int bodyIndexB, 
 																int collidableIndexA, int collidableIndexB, 
-																const btRigidBodyCL* rigidBodies, 
-																const btCollidable* collidables,
-																const btConvexPolyhedronCL* convexShapes,
+																const b3RigidBodyCL* rigidBodies, 
+																const b3Collidable* collidables,
+																const b3ConvexPolyhedronCL* convexShapes,
 																const btVector3* convexVertices,
 																const int* convexIndices,
 																const btGpuFace* faces,
-																btContact4* globalContactsOut,
+																b3Contact4* globalContactsOut,
 																int& nGlobalContactsOut,
 																int maxContactCapacity)
 {
@@ -814,7 +814,7 @@ void	computeContactSphereConvex(int pairIndex,
 			dstIdx=nGlobalContactsOut;
 			nGlobalContactsOut++;
 
-			btContact4* c = &globalContactsOut[dstIdx];
+			b3Contact4* c = &globalContactsOut[dstIdx];
 			c->m_worldNormal = normalOnSurfaceB1;
 			c->setFrictionCoeff(0.7);
 			c->setRestituitionCoeff(0.f);
@@ -833,15 +833,15 @@ void	computeContactSphereConvex(int pairIndex,
 
 
 void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btInt2>* pairs, int nPairs,
-			const btOpenCLArray<btRigidBodyCL>* bodyBuf,
-			btOpenCLArray<btContact4>* contactOut, int& nContacts,
+			const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
+			btOpenCLArray<b3Contact4>* contactOut, int& nContacts,
 			int maxContactCapacity,
-			const btOpenCLArray<btConvexPolyhedronCL>& convexData,
+			const btOpenCLArray<b3ConvexPolyhedronCL>& convexData,
 			const btOpenCLArray<btVector3>& gpuVertices,
 			const btOpenCLArray<btVector3>& gpuUniqueEdges,
 			const btOpenCLArray<btGpuFace>& gpuFaces,
 			const btOpenCLArray<int>& gpuIndices,
-			const btOpenCLArray<btCollidable>& gpuCollidables,
+			const btOpenCLArray<b3Collidable>& gpuCollidables,
 			const btOpenCLArray<btGpuChildShape>& gpuChildShapes,
 
 			const btOpenCLArray<btYetAnotherAabb>& clAabbsWS,
@@ -850,7 +850,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
             btOpenCLArray<btVector3>& worldNormalsAGPU,
             btOpenCLArray<btVector3>& worldVertsA1GPU,
             btOpenCLArray<btVector3>& worldVertsB2GPU,    
-			btAlignedObjectArray<class btOptimizedBvh*>& bvhData,
+			btAlignedObjectArray<class b3OptimizedBvh*>& bvhData,
 			btOpenCLArray<btQuantizedBvhNode>*	treeNodesGPU,
 			btOpenCLArray<btBvhSubtreeInfo>*	subTreesGPU,
 			int numObjects,
@@ -870,12 +870,12 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
 	btAlignedObjectArray<btInt2> hostPairs;
 	pairs->copyToHost(hostPairs);
 
-	btAlignedObjectArray<btRigidBodyCL> hostBodyBuf;
+	btAlignedObjectArray<b3RigidBodyCL> hostBodyBuf;
 	bodyBuf->copyToHost(hostBodyBuf);
 
 	
 
-	btAlignedObjectArray<btConvexPolyhedronCL> hostConvexData;
+	btAlignedObjectArray<b3ConvexPolyhedronCL> hostConvexData;
 	convexData.copyToHost(hostConvexData);
 
 	btAlignedObjectArray<btVector3> hostVertices;
@@ -887,7 +887,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
 	gpuFaces.copyToHost(hostFaces);
 	btAlignedObjectArray<int> hostIndices;
 	gpuIndices.copyToHost(hostIndices);
-	btAlignedObjectArray<btCollidable> hostCollidables;
+	btAlignedObjectArray<b3Collidable> hostCollidables;
 	gpuCollidables.copyToHost(hostCollidables);
 	
 	btAlignedObjectArray<btGpuChildShape> cpuChildShapes;
@@ -896,7 +896,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
 
 	btAlignedObjectArray<btInt4> hostTriangleConvexPairs;
 
-	btAlignedObjectArray<btContact4> hostContacts;
+	btAlignedObjectArray<b3Contact4> hostContacts;
 	if (nContacts)
 	{
 		contactOut->copyToHost(hostContacts);
@@ -1355,7 +1355,7 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( const btOpenCLArray<btI
 			clFinish(m_queue);
 			nContacts = m_totalContactsOut.at(0);
 			contactOut->resize(nContacts);
-			btAlignedObjectArray<btContact4> cpuContacts;
+			btAlignedObjectArray<b3Contact4> cpuContacts;
 			contactOut->copyToHost(cpuContacts);
 //			printf("nContacts after = %d\n", nContacts);
 		}
diff --git a/opencl/gpu_narrowphase/host/ConvexHullContact.h b/opencl/gpu_narrowphase/host/b3ConvexHullContact.h
index 66b736243..52d61234a 100644
--- a/opencl/gpu_narrowphase/host/ConvexHullContact.h
+++ b/opencl/gpu_narrowphase/host/b3ConvexHullContact.h
@@ -3,15 +3,15 @@
 #define _CONVEX_HULL_CONTACT_H
 
 #include "parallel_primitives/host/btOpenCLArray.h"
-#include "btRigidBodyCL.h"
+#include "b3RigidBodyCL.h"
 #include "BulletCommon/btAlignedObjectArray.h"
-#include "btConvexUtility.h"
-#include "btConvexPolyhedronCL.h"
-#include "btCollidable.h"
-#include "btContact4.h"
+#include "b3ConvexUtility.h"
+#include "b3ConvexPolyhedronCL.h"
+#include "b3Collidable.h"
+#include "b3Contact4.h"
 #include "parallel_primitives/host/btInt2.h"
 #include "parallel_primitives/host/btInt4.h"
-#include "btOptimizedBvh.h"
+#include "b3OptimizedBvh.h"
 
 //#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h"
 
@@ -65,15 +65,15 @@ struct GpuSatCollision
 	
 
 	void computeConvexConvexContactsGPUSAT( const btOpenCLArray<btInt2>* pairs, int nPairs, 
-			const btOpenCLArray<btRigidBodyCL>* bodyBuf,
-			btOpenCLArray<btContact4>* contactOut, int& nContacts,
+			const btOpenCLArray<b3RigidBodyCL>* bodyBuf,
+			btOpenCLArray<b3Contact4>* contactOut, int& nContacts,
 			int maxContactCapacity,
-			const btOpenCLArray<btConvexPolyhedronCL>& hostConvexData,
+			const btOpenCLArray<b3ConvexPolyhedronCL>& hostConvexData,
 			const btOpenCLArray<btVector3>& vertices,
 			const btOpenCLArray<btVector3>& uniqueEdges,
 			const btOpenCLArray<btGpuFace>& faces,
 			const btOpenCLArray<int>& indices,
-			const btOpenCLArray<btCollidable>& gpuCollidables,
+			const btOpenCLArray<b3Collidable>& gpuCollidables,
 			const btOpenCLArray<btGpuChildShape>& gpuChildShapes,
 
 			const btOpenCLArray<btYetAnotherAabb>& clAabbs,
@@ -82,7 +82,7 @@ struct GpuSatCollision
            btOpenCLArray<btVector3>& worldNormalsAGPU,
            btOpenCLArray<btVector3>& worldVertsA1GPU,
            btOpenCLArray<btVector3>& worldVertsB2GPU,
-		   btAlignedObjectArray<class btOptimizedBvh*>& bvhData,
+		   btAlignedObjectArray<class b3OptimizedBvh*>& bvhData,
 		   btOpenCLArray<btQuantizedBvhNode>*	treeNodesGPU,
 			btOpenCLArray<btBvhSubtreeInfo>*	subTreesGPU,
 			int numObjects,
diff --git a/opencl/gpu_narrowphase/host/btConvexPolyhedronCL.h b/opencl/gpu_narrowphase/host/b3ConvexPolyhedronCL.h
index 5ee6682a9..a20fc3eb3 100644
--- a/opencl/gpu_narrowphase/host/btConvexPolyhedronCL.h
+++ b/opencl/gpu_narrowphase/host/b3ConvexPolyhedronCL.h
@@ -10,7 +10,7 @@ struct btGpuFace
 	int m_numIndices;
 };
 
-ATTRIBUTE_ALIGNED16(struct) btConvexPolyhedronCL
+ATTRIBUTE_ALIGNED16(struct) b3ConvexPolyhedronCL
 {
 	btVector3		m_localCenter;
 	btVector3		m_extents;
diff --git a/opencl/gpu_narrowphase/host/btConvexUtility.cpp b/opencl/gpu_narrowphase/host/b3ConvexUtility.cpp
index e54942e1a..090a92cb8 100644
--- a/opencl/gpu_narrowphase/host/btConvexUtility.cpp
+++ b/opencl/gpu_narrowphase/host/b3ConvexUtility.cpp
@@ -14,21 +14,21 @@ subject to the following restrictions:
 //Originally written by Erwin Coumans
 
 
-#include "btConvexUtility.h"
+#include "b3ConvexUtility.h"
 #include "BulletGeometry/btConvexHullComputer.h"
 #include "BulletGeometry/btGrahamScan2dConvexHull.h"
 #include "BulletCommon/btQuaternion.h"
 #include "BulletCommon/btHashMap.h"
 
-#include "btConvexPolyhedronCL.h"
+#include "b3ConvexPolyhedronCL.h"
 
 
 
-btConvexUtility::~btConvexUtility()
+b3ConvexUtility::~b3ConvexUtility()
 {
 }
 
-bool	btConvexUtility::initializePolyhedralFeatures(const btVector3* orgVertices, int numPoints, bool mergeCoplanarTriangles)
+bool	b3ConvexUtility::initializePolyhedralFeatures(const btVector3* orgVertices, int numPoints, bool mergeCoplanarTriangles)
 {
 	
 	
@@ -310,7 +310,7 @@ struct btInternalEdge
 //
 
 #ifdef TEST_INTERNAL_OBJECTS
-bool btConvexUtility::testContainment() const
+bool b3ConvexUtility::testContainment() const
 {
 	for(int p=0;p<8;p++)
 	{
@@ -336,7 +336,7 @@ bool btConvexUtility::testContainment() const
 }
 #endif
 
-void	btConvexUtility::initialize()
+void	b3ConvexUtility::initialize()
 {
 
 	btHashMap<btInternalVertexPair,btInternalEdge> edges;
diff --git a/opencl/gpu_narrowphase/host/btConvexUtility.h b/opencl/gpu_narrowphase/host/b3ConvexUtility.h
index 4a71e17aa..8344c65e0 100644
--- a/opencl/gpu_narrowphase/host/btConvexUtility.h
+++ b/opencl/gpu_narrowphase/host/b3ConvexUtility.h
@@ -20,7 +20,7 @@ subject to the following restrictions:
 #include "BulletCommon/btAlignedObjectArray.h"
 #include "BulletCommon/btTransform.h"
 
-#include "btConvexPolyhedronCL.h"
+#include "b3ConvexPolyhedronCL.h"
 
 
 struct btMyFace
@@ -29,7 +29,7 @@ struct btMyFace
 	btScalar	m_plane[4];
 };
 
-ATTRIBUTE_ALIGNED16(class) btConvexUtility
+ATTRIBUTE_ALIGNED16(class) b3ConvexUtility
 {
 	public:
 	BT_DECLARE_ALIGNED_ALLOCATOR();
@@ -45,10 +45,10 @@ ATTRIBUTE_ALIGNED16(class) btConvexUtility
 	btAlignedObjectArray<btVector3> m_uniqueEdges;
 
 		
-	btConvexUtility()
+	b3ConvexUtility()
 	{
 	}
-	virtual ~btConvexUtility();
+	virtual ~b3ConvexUtility();
 
 	bool	initializePolyhedralFeatures(const btVector3* orgVertices, int numVertices, bool mergeCoplanarTriangles=true);
 		
diff --git a/opencl/gpu_narrowphase/host/btOptimizedBvh.cpp b/opencl/gpu_narrowphase/host/b3OptimizedBvh.cpp
index 29c445d53..23db14423 100644
--- a/opencl/gpu_narrowphase/host/btOptimizedBvh.cpp
+++ b/opencl/gpu_narrowphase/host/b3OptimizedBvh.cpp
@@ -14,22 +14,22 @@ subject to the following restrictions:
 */
 
 
-#include "btOptimizedBvh.h"
-#include "btStridingMeshInterface.h"
+#include "b3OptimizedBvh.h"
+#include "b3StridingMeshInterface.h"
 #include "BulletGeometry/btAabbUtil2.h"
 #include "BulletCommon/btIDebugDraw.h"
 
 
-btOptimizedBvh::btOptimizedBvh()
+b3OptimizedBvh::b3OptimizedBvh()
 { 
 }
 
-btOptimizedBvh::~btOptimizedBvh()
+b3OptimizedBvh::~b3OptimizedBvh()
 {
 }
 
 
-void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax)
+void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax)
 {
 	m_useQuantization = useQuantizedAabbCompression;
 
@@ -80,7 +80,7 @@ void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantized
 	struct	QuantizedNodeTriangleCallback : public btInternalTriangleIndexCallback
 	{
 		QuantizedNodeArray&	m_triangleNodes;
-		const btQuantizedBvh* m_optimizedTree; // for quantization
+		const b3QuantizedBvh* m_optimizedTree; // for quantization
 
 		QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other)
 		{
@@ -89,7 +89,7 @@ void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantized
 			return *this;
 		}
 
-		QuantizedNodeTriangleCallback(QuantizedNodeArray&	triangleNodes,const btQuantizedBvh* tree)
+		QuantizedNodeTriangleCallback(QuantizedNodeArray&	triangleNodes,const b3QuantizedBvh* tree)
 			:m_triangleNodes(triangleNodes),m_optimizedTree(tree)
 		{
 		}
@@ -203,7 +203,7 @@ void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantized
 
 
 
-void	btOptimizedBvh::refit(btStridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
+void	b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
 {
 	if (m_useQuantization)
 	{
@@ -230,7 +230,7 @@ void	btOptimizedBvh::refit(btStridingMeshInterface* meshInterface,const btVector
 
 
 
-void	btOptimizedBvh::refitPartial(btStridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
+void	b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface,const btVector3& aabbMin,const btVector3& aabbMax)
 {
 	//incrementally initialize quantization values
 	btAssert(m_useQuantization);
@@ -269,7 +269,7 @@ void	btOptimizedBvh::refitPartial(btStridingMeshInterface* meshInterface,const b
 	
 }
 
-void	btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface,int firstNode,int endNode,int index)
+void	b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index)
 {
 	(void)index;
 
@@ -382,10 +382,10 @@ void	btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface,int f
 }
 
 ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
-btOptimizedBvh* btOptimizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
+b3OptimizedBvh* b3OptimizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
 {
-	btQuantizedBvh* bvh = btQuantizedBvh::deSerializeInPlace(i_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
+	b3QuantizedBvh* bvh = b3QuantizedBvh::deSerializeInPlace(i_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
 	
 	//we don't add additional data so just do a static upcast
-	return static_cast<btOptimizedBvh*>(bvh);
+	return static_cast<b3OptimizedBvh*>(bvh);
 }
diff --git a/opencl/gpu_narrowphase/host/btOptimizedBvh.h b/opencl/gpu_narrowphase/host/b3OptimizedBvh.h
index e6692b456..2a5321104 100644
--- a/opencl/gpu_narrowphase/host/btOptimizedBvh.h
+++ b/opencl/gpu_narrowphase/host/b3OptimizedBvh.h
@@ -18,13 +18,13 @@ subject to the following restrictions:
 #ifndef BT_OPTIMIZED_BVH_H
 #define BT_OPTIMIZED_BVH_H
 
-#include "btQuantizedBvh.h"
+#include "b3QuantizedBvh.h"
 
-class btStridingMeshInterface;
+class b3StridingMeshInterface;
 
 
-///The btOptimizedBvh extends the btQuantizedBvh to create AABB tree for triangle meshes, through the btStridingMeshInterface.
-ATTRIBUTE_ALIGNED16(class) btOptimizedBvh : public btQuantizedBvh
+///The b3OptimizedBvh extends the b3QuantizedBvh to create AABB tree for triangle meshes, through the b3StridingMeshInterface.
+ATTRIBUTE_ALIGNED16(class) b3OptimizedBvh : public b3QuantizedBvh
 {
 	
 public:
@@ -34,27 +34,27 @@ protected:
 
 public:
 
-	btOptimizedBvh();
+	b3OptimizedBvh();
 
-	virtual ~btOptimizedBvh();
+	virtual ~b3OptimizedBvh();
 
-	void	build(btStridingMeshInterface* triangles,bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax);
+	void	build(b3StridingMeshInterface* triangles,bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax);
 
-	void	refit(btStridingMeshInterface* triangles,const btVector3& aabbMin,const btVector3& aabbMax);
+	void	refit(b3StridingMeshInterface* triangles,const btVector3& aabbMin,const btVector3& aabbMax);
 
-	void	refitPartial(btStridingMeshInterface* triangles,const btVector3& aabbMin, const btVector3& aabbMax);
+	void	refitPartial(b3StridingMeshInterface* triangles,const btVector3& aabbMin, const btVector3& aabbMax);
 
-	void	updateBvhNodes(btStridingMeshInterface* meshInterface,int firstNode,int endNode,int index);
+	void	updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index);
 
 	/// Data buffer MUST be 16 byte aligned
 	virtual bool serializeInPlace(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const
 	{
-		return btQuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
+		return b3QuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian);
 
 	}
 
 	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
-	static btOptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
+	static b3OptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
 
 
 };
diff --git a/opencl/gpu_narrowphase/host/btQuantizedBvh.cpp b/opencl/gpu_narrowphase/host/b3QuantizedBvh.cpp
index 3c10d5bcf..e55b2988a 100644
--- a/opencl/gpu_narrowphase/host/btQuantizedBvh.cpp
+++ b/opencl/gpu_narrowphase/host/b3QuantizedBvh.cpp
@@ -13,7 +13,7 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "btQuantizedBvh.h"
+#include "b3QuantizedBvh.h"
 
 #include "BulletGeometry/btAabbUtil2.h"
 #include "BulletCommon/btIDebugDraw.h"
@@ -21,7 +21,7 @@ subject to the following restrictions:
 
 #define RAYAABB2
 
-btQuantizedBvh::btQuantizedBvh() : 
+b3QuantizedBvh::b3QuantizedBvh() : 
 					m_bulletVersion(BT_BULLET_VERSION),
 					m_useQuantization(false), 
 					m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY)
@@ -37,7 +37,7 @@ btQuantizedBvh::btQuantizedBvh() :
 
 
 
-void btQuantizedBvh::buildInternal()
+void b3QuantizedBvh::buildInternal()
 {
 	///assumes that caller filled in the m_quantizedLeafNodes
 	m_useQuantization = true;
@@ -88,7 +88,7 @@ btVector3 color[4]=
 
 
 
-void	btQuantizedBvh::setQuantizationValues(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,btScalar quantizationMargin)
+void	b3QuantizedBvh::setQuantizationValues(const btVector3& bvhAabbMin,const btVector3& bvhAabbMax,btScalar quantizationMargin)
 {
 	//enlarge the AABB to avoid division by zero when initializing the quantization values
 	btVector3 clampValue(quantizationMargin,quantizationMargin,quantizationMargin);
@@ -102,7 +102,7 @@ void	btQuantizedBvh::setQuantizationValues(const btVector3& bvhAabbMin,const btV
 
 
 
-btQuantizedBvh::~btQuantizedBvh()
+b3QuantizedBvh::~b3QuantizedBvh()
 {
 }
 
@@ -111,7 +111,7 @@ int gStackDepth = 0;
 int gMaxStackDepth = 0;
 #endif //DEBUG_TREE_BUILDING
 
-void	btQuantizedBvh::buildTree	(int startIndex,int endIndex)
+void	b3QuantizedBvh::buildTree	(int startIndex,int endIndex)
 {
 #ifdef DEBUG_TREE_BUILDING
 	gStackDepth++;
@@ -194,7 +194,7 @@ void	btQuantizedBvh::buildTree	(int startIndex,int endIndex)
 
 }
 
-void	btQuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex)
+void	b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex)
 {
 	btAssert(m_useQuantization);
 
@@ -227,7 +227,7 @@ void	btQuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChild
 }
 
 
-int	btQuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis)
+int	b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis)
 {
 	int i;
 	int splitIndex =startIndex;
@@ -281,7 +281,7 @@ int	btQuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int sp
 }
 
 
-int	btQuantizedBvh::calcSplittingAxis(int startIndex,int endIndex)
+int	b3QuantizedBvh::calcSplittingAxis(int startIndex,int endIndex)
 {
 	int i;
 
@@ -310,7 +310,7 @@ int	btQuantizedBvh::calcSplittingAxis(int startIndex,int endIndex)
 
 
 
-void	btQuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3QuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	//either choose recursive traversal (walkTree) or stackless (walkStacklessTree)
 
@@ -350,7 +350,7 @@ void	btQuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallb
 int maxIterations = 0;
 
 
-void	btQuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3QuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	btAssert(!m_useQuantization);
 
@@ -395,7 +395,7 @@ void	btQuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback,const
 
 /*
 ///this was the original recursive traversal, before we optimized towards stackless traversal
-void	btQuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3QuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback* nodeCallback,const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	bool isLeafNode, aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMin,rootNode->m_aabbMax);
 	if (aabbOverlap)
@@ -414,7 +414,7 @@ void	btQuantizedBvh::walkTree(btOptimizedBvhNode* rootNode,btNodeOverlapCallback
 }
 */
 
-void btQuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
+void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantizedBvhNode* currentNode,btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
 {
 	btAssert(m_useQuantization);
 	
@@ -446,7 +446,7 @@ void btQuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const btQuantize
 
 
 
-void	btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
+void	b3QuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
 {
 	btAssert(!m_useQuantization);
 
@@ -538,7 +538,7 @@ void	btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall
 
 
 
-void	btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
+void	b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex,int endNodeIndex) const
 {
 	btAssert(m_useQuantization);
 	
@@ -664,7 +664,7 @@ void	btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback*
 
 }
 
-void	btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const
+void	b3QuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const
 {
 	btAssert(m_useQuantization);
 	
@@ -730,7 +730,7 @@ void	btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallb
 }
 
 //This traversal can be called from Playstation 3 SPU
-void	btQuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
+void	b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const
 {
 	btAssert(m_useQuantization);
 
@@ -753,13 +753,13 @@ void	btQuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(btNodeOverlapCallba
 }
 
 
-void	btQuantizedBvh::reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget) const
+void	b3QuantizedBvh::reportRayOverlappingNodex (btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget) const
 {
 	reportBoxCastOverlappingNodex(nodeCallback,raySource,rayTarget,btVector3(0,0,0),btVector3(0,0,0));
 }
 
 
-void	btQuantizedBvh::reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3QuantizedBvh::reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	//always use stackless
 
@@ -787,7 +787,7 @@ void	btQuantizedBvh::reportBoxCastOverlappingNodex(btNodeOverlapCallback* nodeCa
 }
 
 
-void	btQuantizedBvh::swapLeafNodes(int i,int splitIndex)
+void	b3QuantizedBvh::swapLeafNodes(int i,int splitIndex)
 {
 	if (m_useQuantization)
 	{
@@ -802,7 +802,7 @@ void	btQuantizedBvh::swapLeafNodes(int i,int splitIndex)
 	}
 }
 
-void	btQuantizedBvh::assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex)
+void	b3QuantizedBvh::assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex)
 {
 	if (m_useQuantization)
 	{
@@ -825,15 +825,15 @@ static const unsigned BVH_ALIGNMENT_BLOCKS = 2;
 #endif
 
 
-unsigned int btQuantizedBvh::getAlignmentSerializationPadding()
+unsigned int b3QuantizedBvh::getAlignmentSerializationPadding()
 {
 	// I changed this to 0 since the extra padding is not needed or used.
 	return 0;//BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT;
 }
 
-unsigned btQuantizedBvh::calculateSerializeBufferSize() const
+unsigned b3QuantizedBvh::calculateSerializeBufferSize() const
 {
-	unsigned baseSize = sizeof(btQuantizedBvh) + getAlignmentSerializationPadding();
+	unsigned baseSize = sizeof(b3QuantizedBvh) + getAlignmentSerializationPadding();
 	baseSize += sizeof(btBvhSubtreeInfo) * m_subtreeHeaderCount;
 	if (m_useQuantization)
 	{
@@ -842,7 +842,7 @@ unsigned btQuantizedBvh::calculateSerializeBufferSize() const
 	return baseSize + m_curNodeIndex * sizeof(btOptimizedBvhNode);
 }
 
-bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const
+bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const
 {
 	btAssert(m_subtreeHeaderCount == m_SubtreeHeaders.size());
 	m_subtreeHeaderCount = m_SubtreeHeaders.size();
@@ -855,11 +855,11 @@ bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
 	}
 */
 
-	btQuantizedBvh *targetBvh = (btQuantizedBvh *)o_alignedDataBuffer;
+	b3QuantizedBvh *targetBvh = (b3QuantizedBvh *)o_alignedDataBuffer;
 
 	// construct the class so the virtual function table, etc will be set up
 	// Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor
-	new (targetBvh) btQuantizedBvh;
+	new (targetBvh) b3QuantizedBvh;
 
 	if (i_swapEndian)
 	{
@@ -886,7 +886,7 @@ bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
 	targetBvh->m_useQuantization = m_useQuantization;
 
 	unsigned char *nodeData = (unsigned char *)targetBvh;
-	nodeData += sizeof(btQuantizedBvh);
+	nodeData += sizeof(b3QuantizedBvh);
 	
 	unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;
 	nodeData += sizeToAdd;
@@ -1028,14 +1028,14 @@ bool btQuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe
 	return true;
 }
 
-btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
+b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)
 {
 
 	if (i_alignedDataBuffer == NULL)// || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0))
 	{
 		return NULL;
 	}
-	btQuantizedBvh *bvh = (btQuantizedBvh *)i_alignedDataBuffer;
+	b3QuantizedBvh *bvh = (b3QuantizedBvh *)i_alignedDataBuffer;
 
 	if (i_swapEndian)
 	{
@@ -1058,7 +1058,7 @@ btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
 	}
 
 	unsigned char *nodeData = (unsigned char *)bvh;
-	nodeData += sizeof(btQuantizedBvh);
+	nodeData += sizeof(b3QuantizedBvh);
 	
 	unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;
 	nodeData += sizeToAdd;
@@ -1067,7 +1067,7 @@ btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
 
 	// Must call placement new to fill in virtual function table, etc, but we don't want to overwrite most data, so call a special version of the constructor
 	// Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor
-	new (bvh) btQuantizedBvh(*bvh, false);
+	new (bvh) b3QuantizedBvh(*bvh, false);
 
 	if (bvh->m_useQuantization)
 	{
@@ -1135,7 +1135,7 @@ btQuantizedBvh *btQuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un
 }
 
 // Constructor that prevents btVector3's default constructor from being called
-btQuantizedBvh::btQuantizedBvh(btQuantizedBvh &self, bool /* ownsMemory */) :
+b3QuantizedBvh::b3QuantizedBvh(b3QuantizedBvh &self, bool /* ownsMemory */) :
 m_bvhAabbMin(self.m_bvhAabbMin),
 m_bvhAabbMax(self.m_bvhAabbMax),
 m_bvhQuantization(self.m_bvhQuantization),
@@ -1144,7 +1144,7 @@ m_bulletVersion(BT_BULLET_VERSION)
 
 }
 
-void btQuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData)
+void b3QuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedBvhFloatData)
 {
 	m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax);
 	m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin);
@@ -1215,7 +1215,7 @@ void btQuantizedBvh::deSerializeFloat(struct btQuantizedBvhFloatData& quantizedB
 	}
 }
 
-void btQuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData)
+void b3QuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantizedBvhDoubleData)
 {
 	m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax);
 	m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin);
@@ -1290,7 +1290,7 @@ void btQuantizedBvh::deSerializeDouble(struct btQuantizedBvhDoubleData& quantize
 
 
 ///fills the dataBuffer and returns the struct name (and 0 on failure)
-const char*	btQuantizedBvh::serialize(void* dataBuffer, btSerializer* serializer) const
+const char*	b3QuantizedBvh::serialize(void* dataBuffer, btSerializer* serializer) const
 {
 	btAssert(0);
 	return 0;
diff --git a/opencl/gpu_narrowphase/host/btQuantizedBvh.h b/opencl/gpu_narrowphase/host/b3QuantizedBvh.h
index 35378b2d0..8c1188abe 100644
--- a/opencl/gpu_narrowphase/host/btQuantizedBvh.h
+++ b/opencl/gpu_narrowphase/host/b3QuantizedBvh.h
@@ -168,10 +168,10 @@ typedef btAlignedObjectArray<btQuantizedBvhNode>	QuantizedNodeArray;
 typedef btAlignedObjectArray<btBvhSubtreeInfo>		BvhSubtreeInfoArray;
 
 
-///The btQuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU.
+///The b3QuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU.
 ///It is used by the btBvhTriangleMeshShape as midphase, and by the btMultiSapBroadphase.
 ///It is recommended to use quantization for better performance and lower memory requirements.
-ATTRIBUTE_ALIGNED16(class) btQuantizedBvh
+ATTRIBUTE_ALIGNED16(class) b3QuantizedBvh
 {
 public:
 	enum btTraversalMode
@@ -334,9 +334,9 @@ public:
 	
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 
-	btQuantizedBvh();
+	b3QuantizedBvh();
 
-	virtual ~btQuantizedBvh();
+	virtual ~b3QuantizedBvh();
 
 	
 	///***************************************** expert/internal use only *************************
@@ -468,7 +468,7 @@ public:
 	virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const;
 
 	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place'
-	static btQuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
+	static b3QuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);
 
 	static unsigned int getAlignmentSerializationPadding();
 //////////////////////////////////////////////////////////////////////
@@ -495,7 +495,7 @@ private:
 	// Special "copy" constructor that allows for in-place deserialization
 	// Prevents btVector3's default constructor from being called, but doesn't inialize much else
 	// ownsMemory should most likely be false if deserializing, and if you are not, don't call this (it also changes the function signature, which we need)
-	btQuantizedBvh(btQuantizedBvh &other, bool ownsMemory);
+	b3QuantizedBvh(b3QuantizedBvh &other, bool ownsMemory);
 
 }
 ;
@@ -572,7 +572,7 @@ struct	btQuantizedBvhDoubleData
 };
 
 
-SIMD_FORCE_INLINE	int	btQuantizedBvh::calculateSerializeBufferSizeNew() const
+SIMD_FORCE_INLINE	int	b3QuantizedBvh::calculateSerializeBufferSizeNew() const
 {
 	return sizeof(btQuantizedBvhData);
 }
diff --git a/opencl/gpu_narrowphase/host/btRigidBodyCL.h b/opencl/gpu_narrowphase/host/b3RigidBodyCL.h
index e91e4ad09..d8a1bfcf6 100644
--- a/opencl/gpu_narrowphase/host/btRigidBodyCL.h
+++ b/opencl/gpu_narrowphase/host/b3RigidBodyCL.h
@@ -4,7 +4,7 @@
 #include "BulletCommon/btScalar.h"
 #include "BulletCommon/btMatrix3x3.h"
 
-ATTRIBUTE_ALIGNED16(struct) btRigidBodyCL
+ATTRIBUTE_ALIGNED16(struct) b3RigidBodyCL
 {
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 
diff --git a/opencl/gpu_narrowphase/host/btStridingMeshInterface.cpp b/opencl/gpu_narrowphase/host/b3StridingMeshInterface.cpp
index 298f6cbf7..c2025ce64 100644
--- a/opencl/gpu_narrowphase/host/btStridingMeshInterface.cpp
+++ b/opencl/gpu_narrowphase/host/b3StridingMeshInterface.cpp
@@ -13,16 +13,16 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "btStridingMeshInterface.h"
+#include "b3StridingMeshInterface.h"
 
 
-btStridingMeshInterface::~btStridingMeshInterface()
+b3StridingMeshInterface::~b3StridingMeshInterface()
 {
 
 }
 
 
-void	btStridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
+void	b3StridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleIndexCallback* callback,const btVector3& aabbMin,const btVector3& aabbMax) const
 {
 	(void)aabbMin;
 	(void)aabbMax;
@@ -173,7 +173,7 @@ void	btStridingMeshInterface::InternalProcessAllTriangles(btInternalTriangleInde
 	}
 }
 
-void	btStridingMeshInterface::calculateAabbBruteForce(btVector3& aabbMin,btVector3& aabbMax)
+void	b3StridingMeshInterface::calculateAabbBruteForce(btVector3& aabbMin,btVector3& aabbMax)
 {
 
 	struct	AabbCalculationCallback : public btInternalTriangleIndexCallback
diff --git a/opencl/gpu_narrowphase/host/btStridingMeshInterface.h b/opencl/gpu_narrowphase/host/b3StridingMeshInterface.h
index b457df484..e2bb8cb1e 100644
--- a/opencl/gpu_narrowphase/host/btStridingMeshInterface.h
+++ b/opencl/gpu_narrowphase/host/b3StridingMeshInterface.h
@@ -17,7 +17,7 @@ subject to the following restrictions:
 #define BT_STRIDING_MESHINTERFACE_H
 
 #include "BulletCommon/btVector3.h"
-#include "btTriangleCallback.h"
+#include "b3TriangleCallback.h"
 //#include "btConcaveShape.h"
 
 
@@ -27,10 +27,10 @@ enum  	PHY_ScalarType {
 };
 
 
-///	The btStridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with btBvhTriangleMeshShape and some other collision shapes.
+///	The b3StridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with btBvhTriangleMeshShape and some other collision shapes.
 /// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips.
 /// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory.
-ATTRIBUTE_ALIGNED16(class ) btStridingMeshInterface
+ATTRIBUTE_ALIGNED16(class ) b3StridingMeshInterface
 {
 	protected:
 	
@@ -39,12 +39,12 @@ ATTRIBUTE_ALIGNED16(class ) btStridingMeshInterface
 	public:
 		BT_DECLARE_ALIGNED_ALLOCATOR();
 		
-		btStridingMeshInterface() :m_scaling(btScalar(1.),btScalar(1.),btScalar(1.))
+		b3StridingMeshInterface() :m_scaling(btScalar(1.),btScalar(1.),btScalar(1.))
 		{
 
 		}
 
-		virtual ~btStridingMeshInterface();
+		virtual ~b3StridingMeshInterface();
 
 
 
@@ -157,7 +157,7 @@ struct	btStridingMeshInterfaceData
 
 
 
-SIMD_FORCE_INLINE	int	btStridingMeshInterface::calculateSerializeBufferSize() const
+SIMD_FORCE_INLINE	int	b3StridingMeshInterface::calculateSerializeBufferSize() const
 {
 	return sizeof(btStridingMeshInterfaceData);
 }
diff --git a/opencl/gpu_narrowphase/host/btTriangleCallback.cpp b/opencl/gpu_narrowphase/host/b3TriangleCallback.cpp
index f558bf6d2..3a4e619e0 100644
--- a/opencl/gpu_narrowphase/host/btTriangleCallback.cpp
+++ b/opencl/gpu_narrowphase/host/b3TriangleCallback.cpp
@@ -13,9 +13,9 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "btTriangleCallback.h"
+#include "b3TriangleCallback.h"
 
-btTriangleCallback::~btTriangleCallback()
+b3TriangleCallback::~b3TriangleCallback()
 {
 
 }
diff --git a/opencl/gpu_narrowphase/host/btTriangleCallback.h b/opencl/gpu_narrowphase/host/b3TriangleCallback.h
index e9ce72ffb..b5a881ee4 100644
--- a/opencl/gpu_narrowphase/host/btTriangleCallback.h
+++ b/opencl/gpu_narrowphase/host/b3TriangleCallback.h
@@ -19,13 +19,13 @@ subject to the following restrictions:
 #include "BulletCommon/btVector3.h"
 
 
-///The btTriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles.
+///The b3TriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles.
 ///This callback is called by processAllTriangles for all btConcaveShape derived class, such as  btBvhTriangleMeshShape, btStaticPlaneShape and btHeightfieldTerrainShape.
-class btTriangleCallback
+class b3TriangleCallback
 {
 public:
 
-	virtual ~btTriangleCallback();
+	virtual ~b3TriangleCallback();
 	virtual void processTriangle(btVector3* triangle, int partId, int triangleIndex) = 0;
 };
 
diff --git a/opencl/gpu_narrowphase/host/btTriangleIndexVertexArray.cpp b/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.cpp
index a665024cb..552ed2073 100644
--- a/opencl/gpu_narrowphase/host/btTriangleIndexVertexArray.cpp
+++ b/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.cpp
@@ -13,9 +13,9 @@ subject to the following restrictions:
 3. This notice may not be removed or altered from any source distribution.
 */
 
-#include "btTriangleIndexVertexArray.h"
+#include "b3TriangleIndexVertexArray.h"
 
-btTriangleIndexVertexArray::btTriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride)
+b3TriangleIndexVertexArray::b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride)
 : m_hasAabb(0)
 {
 	btIndexedMesh mesh;
@@ -31,12 +31,12 @@ btTriangleIndexVertexArray::btTriangleIndexVertexArray(int numTriangles,int* tri
 
 }
 
-btTriangleIndexVertexArray::~btTriangleIndexVertexArray()
+b3TriangleIndexVertexArray::~b3TriangleIndexVertexArray()
 {
 
 }
 
-void	btTriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart)
+void	b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart)
 {
 	btAssert(subpart< getNumSubParts() );
 
@@ -56,7 +56,7 @@ void	btTriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertex
 	indicestype = mesh.m_indexType;
 }
 
-void	btTriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const
+void	b3TriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const
 {
 	const btIndexedMesh& mesh = m_indexedMeshes[subpart];
 
@@ -73,20 +73,20 @@ void	btTriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned
 	indicestype = mesh.m_indexType;
 }
 
-bool	btTriangleIndexVertexArray::hasPremadeAabb() const
+bool	b3TriangleIndexVertexArray::hasPremadeAabb() const
 {
 	return (m_hasAabb == 1);
 }
 
 
-void	btTriangleIndexVertexArray::setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const
+void	b3TriangleIndexVertexArray::setPremadeAabb(const btVector3& aabbMin, const btVector3& aabbMax ) const
 {
 	m_aabbMin = aabbMin;
 	m_aabbMax = aabbMax;
 	m_hasAabb = 1; // this is intentionally an int see notes in header
 }
 
-void	btTriangleIndexVertexArray::getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const
+void	b3TriangleIndexVertexArray::getPremadeAabb(btVector3* aabbMin, btVector3* aabbMax ) const
 {
 	*aabbMin = m_aabbMin;
 	*aabbMax = m_aabbMax;
diff --git a/opencl/gpu_narrowphase/host/btTriangleIndexVertexArray.h b/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.h
index bc6f05e26..ce9275b3f 100644
--- a/opencl/gpu_narrowphase/host/btTriangleIndexVertexArray.h
+++ b/opencl/gpu_narrowphase/host/b3TriangleIndexVertexArray.h
@@ -16,12 +16,12 @@ subject to the following restrictions:
 #ifndef BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
 #define BT_TRIANGLE_INDEX_VERTEX_ARRAY_H
 
-#include "btStridingMeshInterface.h"
+#include "b3StridingMeshInterface.h"
 #include "BulletCommon/btAlignedObjectArray.h"
 #include "BulletCommon/btScalar.h"
 
 
-///The btIndexedMesh indexes a single vertex and index array. Multiple btIndexedMesh objects can be passed into a btTriangleIndexVertexArray using addIndexedMesh.
+///The btIndexedMesh indexes a single vertex and index array. Multiple btIndexedMesh objects can be passed into a b3TriangleIndexVertexArray using addIndexedMesh.
 ///Instead of the number of indices, we pass the number of triangles.
 ATTRIBUTE_ALIGNED16( struct)	btIndexedMesh
 {
@@ -37,7 +37,7 @@ ATTRIBUTE_ALIGNED16( struct)	btIndexedMesh
    int                     m_vertexStride;
 
    // The index type is set when adding an indexed mesh to the
-   // btTriangleIndexVertexArray, do not set it manually
+   // b3TriangleIndexVertexArray, do not set it manually
    PHY_ScalarType m_indexType;
 
    // The vertex type has a default type similar to Bullet's precision mode (float or double)
@@ -61,11 +61,11 @@ ATTRIBUTE_ALIGNED16( struct)	btIndexedMesh
 
 typedef btAlignedObjectArray<btIndexedMesh>	IndexedMeshArray;
 
-///The btTriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays.
+///The b3TriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays.
 ///Additional meshes can be added using addIndexedMesh
 ///No duplcate is made of the vertex/index data, it only indexes into external vertex/index arrays.
-///So keep those arrays around during the lifetime of this btTriangleIndexVertexArray.
-ATTRIBUTE_ALIGNED16( class) btTriangleIndexVertexArray : public btStridingMeshInterface
+///So keep those arrays around during the lifetime of this b3TriangleIndexVertexArray.
+ATTRIBUTE_ALIGNED16( class) b3TriangleIndexVertexArray : public b3StridingMeshInterface
 {
 protected:
 	IndexedMeshArray	m_indexedMeshes;
@@ -78,14 +78,14 @@ public:
 
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 
-	btTriangleIndexVertexArray() : m_hasAabb(0)
+	b3TriangleIndexVertexArray() : m_hasAabb(0)
 	{
 	}
 
-	virtual ~btTriangleIndexVertexArray();
+	virtual ~b3TriangleIndexVertexArray();
 
 	//just to be backwards compatible
-	btTriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride);
+	b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,btScalar* vertexBase,int vertexStride);
 	
 	void	addIndexedMesh(const btIndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER)
 	{
diff --git a/opencl/gpu_narrowphase/test/main.cpp b/opencl/gpu_narrowphase/test/main.cpp
index 9b5383b32..a18ce7abd 100644
--- a/opencl/gpu_narrowphase/test/main.cpp
+++ b/opencl/gpu_narrowphase/test/main.cpp
@@ -14,8 +14,8 @@ subject to the following restrictions:
 
 
 #include <stdio.h>
-#include "../basic_initialize/btOpenCLUtils.h"
-#include "../host/ConvexHullContact.h"
+#include "../basic_initialize/b3OpenCLUtils.h"
+#include "../host/b3ConvexHullContact.h"
 
 #include "BulletCommon/btVector3.h"
 #include "parallel_primitives/host/btFillCL.h"
@@ -23,7 +23,7 @@ subject to the following restrictions:
 #include "parallel_primitives/host/btRadixSort32CL.h"
 #include "parallel_primitives/host/btPrefixScanCL.h"
 #include "BulletCommon/CommandLineArgs.h"
-#include "../host/ConvexHullContact.h"
+#include "../host/b3ConvexHullContact.h"
 
 #include "BulletCommon/btMinMax.h"
 int g_nPassed = 0;
@@ -49,17 +49,17 @@ void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
 
 	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
 
-	g_context = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
+	g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
-	int numDev = btOpenCLUtils::getNumDevices(g_context);
+	int numDev = b3OpenCLUtils::getNumDevices(g_context);
 	if (numDev>0)
 	{
 		btOpenCLDeviceInfo info;
-		g_device= btOpenCLUtils::getDevice(g_context,0);
+		g_device= b3OpenCLUtils::getDevice(g_context,0);
 		g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
 		oclCHECKERROR(ciErrNum, CL_SUCCESS);
-        btOpenCLUtils::printDeviceInfo(g_device);
-		btOpenCLUtils::getDeviceInfo(g_device,&info);
+        b3OpenCLUtils::printDeviceInfo(g_device);
+		b3OpenCLUtils::getDeviceInfo(g_device,&info);
 		g_deviceName = info.m_deviceName;
 	}
 }
diff --git a/opencl/gpu_narrowphase/test/premake4.lua b/opencl/gpu_narrowphase/test/premake4.lua
index 551980e52..04246f177 100644
--- a/opencl/gpu_narrowphase/test/premake4.lua
+++ b/opencl/gpu_narrowphase/test/premake4.lua
@@ -16,9 +16,9 @@ function createProject(vendor)
 		
 		files {
 			"main.cpp",
-			"../../basic_initialize/btOpenCLInclude.h",
-			"../../basic_initialize/btOpenCLUtils.cpp",
-			"../../basic_initialize/btOpenCLUtils.h",
+			"../../basic_initialize/b3OpenCLInclude.h",
+			"../../basic_initialize/b3OpenCLUtils.cpp",
+			"../../basic_initialize/b3OpenCLUtils.h",
 			"../host/**.cpp",
 			"../host/**.h",
 			"../../parallel_primitives/host/btFillCL.cpp",
diff --git a/opencl/gpu_rigidbody/host/btConfig.h b/opencl/gpu_rigidbody/host/b3Config.h
index 77ddec96b..94f5fafce 100644
--- a/opencl/gpu_rigidbody/host/btConfig.h
+++ b/opencl/gpu_rigidbody/host/b3Config.h
@@ -1,7 +1,7 @@
 #ifndef BT_CONFIG_H
 #define BT_CONFIG_H
 
-struct	btConfig
+struct	b3Config
 {
 	int	m_maxConvexBodies;
 	int	m_maxConvexShapes;
@@ -18,7 +18,7 @@ struct	btConfig
 	
 	int m_maxTriConvexPairCapacity;
 
-	btConfig()
+	b3Config()
 		:m_maxConvexBodies(32*1024),
 		m_maxConvexShapes(8192),
 		m_maxVerticesPerFace(64),
@@ -27,7 +27,8 @@ struct	btConfig
 		m_maxConvexIndices(8192),
 		m_maxConvexUniqueEdges(8192),
 		m_maxCompoundChildShapes(8192),
-		m_maxTriConvexPairCapacity(512*1024)
+		//m_maxTriConvexPairCapacity(512*1024)
+		m_maxTriConvexPairCapacity(256*1024)
 	{
 		m_maxBroadphasePairs = 16*m_maxConvexBodies;
 		m_maxContactCapacity = m_maxBroadphasePairs;
diff --git a/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.cpp b/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp
index d971f55cf..d189cf758 100644
--- a/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.cpp
+++ b/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.cpp
@@ -1,15 +1,15 @@
 
 
-#include "btGpuBatchingPgsSolver.h"
+#include "b3GpuBatchingPgsSolver.h"
 #include "../../parallel_primitives/host/btRadixSort32CL.h"
 #include "BulletCommon/btQuickprof.h"
 #include "../../parallel_primitives/host/btLauncherCL.h"
 #include "../../parallel_primitives/host/btBoundSearchCL.h"
 #include "../../parallel_primitives/host/btPrefixScanCL.h"
 #include <string.h>
-#include "../../basic_initialize/btOpenCLUtils.h"
-#include "../host/btConfig.h"
-#include "Solver.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
+#include "../host/b3Config.h"
+#include "b3Solver.h"
 
 
 #define SOLVER_SETUP_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverSetup.cl"
@@ -49,11 +49,11 @@ struct	btGpuBatchingPgsSolverInternalData
 	int m_pairCapacity;
 	int m_nIterations;
 
-	btOpenCLArray<btGpuConstraint4>* m_contactCGPU;
+	btOpenCLArray<b3GpuConstraint4>* m_contactCGPU;
 	btOpenCLArray<unsigned int>* m_numConstraints;
 	btOpenCLArray<unsigned int>* m_offsets;
 		
-	Solver*		m_solverGPU;		
+	b3Solver*		m_solverGPU;		
 	
 	cl_kernel m_batchingKernel;
 	cl_kernel m_batchingKernelNew;
@@ -69,21 +69,21 @@ struct	btGpuBatchingPgsSolverInternalData
 	class btPrefixScanCL*	m_scan;
 
 	btOpenCLArray<btSortData>* m_sortDataBuffer;
-	btOpenCLArray<btContact4>* m_contactBuffer;
+	btOpenCLArray<b3Contact4>* m_contactBuffer;
 
-	btOpenCLArray<btRigidBodyCL>* m_bodyBufferGPU;
+	btOpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
 	btOpenCLArray<btInertiaCL>* m_inertiaBufferGPU;
-	btOpenCLArray<btContact4>* m_pBufContactOutGPU;
+	btOpenCLArray<b3Contact4>* m_pBufContactOutGPU;
 
 
 	btAlignedObjectArray<unsigned int> m_idxBuffer;
 	btAlignedObjectArray<btSortData> m_sortData;
-	btAlignedObjectArray<btContact4> m_old;
+	btAlignedObjectArray<b3Contact4> m_old;
 };
 
 
 
-btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue  q,int pairCapacity)
+b3GpuBatchingPgsSolver::b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue  q,int pairCapacity)
 {
 	m_data = new btGpuBatchingPgsSolverInternalData;
 	m_data->m_context = ctx;
@@ -92,11 +92,11 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
 	m_data->m_pairCapacity = pairCapacity;
 	m_data->m_nIterations = 4;
 
-	m_data->m_bodyBufferGPU = new btOpenCLArray<btRigidBodyCL>(ctx,q);
+	m_data->m_bodyBufferGPU = new btOpenCLArray<b3RigidBodyCL>(ctx,q);
 	m_data->m_inertiaBufferGPU = new btOpenCLArray<btInertiaCL>(ctx,q);
-	m_data->m_pBufContactOutGPU = new btOpenCLArray<btContact4>(ctx,q);
+	m_data->m_pBufContactOutGPU = new btOpenCLArray<b3Contact4>(ctx,q);
 
-	m_data->m_solverGPU = new Solver(ctx,device,q,512*1024);
+	m_data->m_solverGPU = new b3Solver(ctx,device,q,512*1024);
 
 	m_data->m_sort32 = new btRadixSort32CL(ctx,device,m_data->m_queue);
 	m_data->m_scan = new btPrefixScanCL(ctx,device,m_data->m_queue,BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
@@ -105,12 +105,12 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
 	const int sortSize = BTNEXTMULTIPLEOF( pairCapacity, 512 );
 
 	m_data->m_sortDataBuffer = new btOpenCLArray<btSortData>(ctx,m_data->m_queue,sortSize);
-	m_data->m_contactBuffer = new btOpenCLArray<btContact4>(ctx,m_data->m_queue);
+	m_data->m_contactBuffer = new btOpenCLArray<b3Contact4>(ctx,m_data->m_queue);
 
 	m_data->m_numConstraints = new btOpenCLArray<unsigned int>(ctx,m_data->m_queue,BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT );
 	m_data->m_numConstraints->resize(BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
 
-	m_data->m_contactCGPU = new btOpenCLArray<btGpuConstraint4>(ctx,q,pairCapacity);
+	m_data->m_contactCGPU = new btOpenCLArray<b3GpuConstraint4>(ctx,q,pairCapacity);
 
 	m_data->m_offsets = new btOpenCLArray<unsigned int>( ctx,m_data->m_queue, BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT );
 	m_data->m_offsets->resize(BT_SOLVER_N_SPLIT*BT_SOLVER_N_SPLIT);
@@ -131,54 +131,54 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
 	
 	{
 		
-		cl_program solveContactProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
+		cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
 		btAssert(solveContactProg);
 		
-		cl_program solveFrictionProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
+		cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
 		btAssert(solveFrictionProg);
 
-		cl_program solverSetup2Prog= btOpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
+		cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
 		btAssert(solverSetup2Prog);
 
 		
-		cl_program solverSetupProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
+		cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
 		btAssert(solverSetupProg);
 		
 		
-		m_data->m_solveFrictionKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
+		m_data->m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
 		btAssert(m_data->m_solveFrictionKernel);
 
-		m_data->m_solveContactKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
+		m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
 		btAssert(m_data->m_solveContactKernel);
 		
-		m_data->m_contactToConstraintKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
+		m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
 		btAssert(m_data->m_contactToConstraintKernel);
 			
-		m_data->m_setSortDataKernel =  btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_data->m_setSortDataKernel =  b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_data->m_setSortDataKernel);
 				
-		m_data->m_reorderContactKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_data->m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_data->m_reorderContactKernel);
 		
 
-		m_data->m_copyConstraintKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_data->m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_data->m_copyConstraintKernel);
 		
 	}
 
 	{
-		cl_program batchingProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
+		cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
 		btAssert(batchingProg);
 		
-		m_data->m_batchingKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
+		m_data->m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
 		btAssert(m_data->m_batchingKernel);
 	}
 			
 	{
-		cl_program batchingNewProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
+		cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
 		btAssert(batchingNewProg);
 		
-		m_data->m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
+		m_data->m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
 		btAssert(m_data->m_batchingKernelNew);
 	}
 		
@@ -190,7 +190,7 @@ btGpuBatchingPgsSolver::btGpuBatchingPgsSolver(cl_context ctx,cl_device_id devic
 
 }
 
-btGpuBatchingPgsSolver::~btGpuBatchingPgsSolver()
+b3GpuBatchingPgsSolver::~b3GpuBatchingPgsSolver()
 {
 	delete m_data->m_sortDataBuffer;
 	delete m_data->m_contactBuffer;
@@ -232,8 +232,8 @@ struct btConstraintCfg
 
 
 
-void btGpuBatchingPgsSolver::solveContactConstraint(  const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations)
+void b3GpuBatchingPgsSolver::solveContactConstraint(  const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations)
 {
 	
 	
@@ -404,7 +404,7 @@ void btGpuBatchingPgsSolver::solveContactConstraint(  const btOpenCLArray<btRigi
 
 
 
-void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const btConfig& config)
+void b3GpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config)
 {
 	m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies);
 	m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies);
@@ -423,11 +423,11 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
         csCfg.m_staticIdx = 0;//m_static0Index;//m_planeBodyIndex;
         
         
-        btOpenCLArray<btRigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
+        btOpenCLArray<b3RigidBodyCL>* bodyBuf = m_data->m_bodyBufferGPU;
 
         void* additionalData = 0;//m_data->m_frictionCGPU;
         const btOpenCLArray<btInertiaCL>* shapeBuf = m_data->m_inertiaBufferGPU;
-        btOpenCLArray<btGpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
+        btOpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU;
         int nContacts = nContactOut;
         
         
@@ -442,7 +442,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
             
             if( m_data->m_solverGPU->m_contactBuffer2 == 0 )
             {
-				m_data->m_solverGPU->m_contactBuffer2 = new btOpenCLArray<btContact4>(m_data->m_context,m_data->m_queue, nContacts );
+				m_data->m_solverGPU->m_contactBuffer2 = new btOpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts );
                 m_data->m_solverGPU->m_contactBuffer2->resize(nContacts);
             }
 			
@@ -456,13 +456,13 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
                 
                 
                 
-                const btOpenCLArray<btRigidBodyCL>* bodyNative = bodyBuf;
+                const btOpenCLArray<b3RigidBodyCL>* bodyNative = bodyBuf;
                 
                 
                 {
                     
-                    //btOpenCLArray<btRigidBodyCL>* bodyNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
-                    //btOpenCLArray<btContact4>* contactNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );
+                    //btOpenCLArray<b3RigidBodyCL>* bodyNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf );
+                    //btOpenCLArray<b3Contact4>* contactNative = btOpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn );
                     
                     const int sortAlignment = 512; // todo. get this out of sort
                     if( csCfg.m_enableParallelSolve )
@@ -596,8 +596,8 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
 					} else
 					{
 						BT_PROFILE("cpu batchContacts");
-						btAlignedObjectArray<btContact4> cpuContacts;
-						btOpenCLArray<btContact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
+						btAlignedObjectArray<b3Contact4> cpuContacts;
+						btOpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2;
 						contactsIn->copyToHost(cpuContacts);
                     
 						btOpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints;
@@ -641,7 +641,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
 						}
 						{
 							BT_PROFILE("m_contactBuffer->copyFromHost");
-							m_data->m_solverGPU->m_contactBuffer2->copyFromHost((btAlignedObjectArray<btContact4>&)cpuContacts);
+							m_data->m_solverGPU->m_contactBuffer2->copyFromHost((btAlignedObjectArray<b3Contact4>&)cpuContacts);
 						}
 						
 					} 
@@ -658,7 +658,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
 						shapeBuf, m_data->m_solverGPU->m_contactBuffer2,
 						contactConstraintOut, 
 						additionalData, nContacts, 
-						(SolverBase::ConstraintCfg&) csCfg );
+						(b3SolverBase::ConstraintCfg&) csCfg );
                     clFinish(m_data->m_queue);
                 }
                 
@@ -711,7 +711,7 @@ void btGpuBatchingPgsSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem
 }
 
 
-void btGpuBatchingPgsSolver::batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx )
+void b3GpuBatchingPgsSolver::batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx )
 {
 }
 
@@ -731,10 +731,10 @@ btAlignedObjectArray<int> bodyUsed;
 
 btAlignedObjectArray<unsigned int> idxBuffer;
 btAlignedObjectArray<btSortData> sortData;
-btAlignedObjectArray<btContact4> old;
+btAlignedObjectArray<b3Contact4> old;
 
 
-inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
+inline int b3GpuBatchingPgsSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies)
 {
 	btAlignedObjectArray<int> bodyUsed;
 	bodyUsed.resize(numBodies);
@@ -841,7 +841,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
         BT_PROFILE("reorder");
 		//	reorder
 		
-		memcpy( &old[0], cs, sizeof(btContact4)*n);
+		memcpy( &old[0], cs, sizeof(b3Contact4)*n);
 		for(int i=0; i<n; i++)
 		{
 			int idx = sortData[i].m_value;
@@ -861,7 +861,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch( btContact4* cs, int n,
 }
 
 
-inline int btGpuBatchingPgsSolver::sortConstraintByBatch2( btContact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
+inline int b3GpuBatchingPgsSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
 {
 	
 	BT_PROFILE("sortConstraintByBatch");
@@ -993,7 +993,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch2( btContact4* cs, int n
         BT_PROFILE("reorder");
 		//	reorder
 		
-		memcpy( &m_data->m_old[0], cs, sizeof(btContact4)*numConstraints);
+		memcpy( &m_data->m_old[0], cs, sizeof(b3Contact4)*numConstraints);
 
 		for(int i=0; i<numConstraints; i++)
 		{
@@ -1016,7 +1016,7 @@ inline int btGpuBatchingPgsSolver::sortConstraintByBatch2( btContact4* cs, int n
 }
 
 
-inline int btGpuBatchingPgsSolver::sortConstraintByBatch3( btContact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
+inline int b3GpuBatchingPgsSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies)
 {
 	
 	BT_PROFILE("sortConstraintByBatch");
diff --git a/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.h b/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.h
new file mode 100644
index 000000000..367718e94
--- /dev/null
+++ b/opencl/gpu_rigidbody/host/b3GpuBatchingPgsSolver.h
@@ -0,0 +1,40 @@
+
+#ifndef BT_GPU_BATCHING_PGS_SOLVER_H
+#define BT_GPU_BATCHING_PGS_SOLVER_H
+
+#include "../../basic_initialize/b3OpenCLInclude.h"
+#include "../../parallel_primitives/host/btOpenCLArray.h"
+#include "../../gpu_narrowphase/host/b3RigidBodyCL.h"
+#include "../../gpu_narrowphase/host/b3Contact4.h"
+#include "b3GpuConstraint4.h"
+
+class b3GpuBatchingPgsSolver
+{
+protected:
+
+	
+
+	struct btGpuBatchingPgsSolverInternalData*		m_data;
+
+	void batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
+	
+	inline int sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
+	inline int sortConstraintByBatch2( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
+	inline int sortConstraintByBatch3( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
+	
+
+
+	void solveContactConstraint(  const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
+
+public:
+	
+	b3GpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue  q,int pairCapacity);
+	virtual ~b3GpuBatchingPgsSolver();
+
+	void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config);
+
+};
+
+#endif //BT_GPU_BATCHING_PGS_SOLVER_H
+
diff --git a/opencl/gpu_rigidbody/host/btGpuConstraint4.h b/opencl/gpu_rigidbody/host/b3GpuConstraint4.h
index 7095218b7..a383bf5d5 100644
--- a/opencl/gpu_rigidbody/host/btGpuConstraint4.h
+++ b/opencl/gpu_rigidbody/host/b3GpuConstraint4.h
@@ -3,7 +3,7 @@
 #define BT_CONSTRAINT4_h
 #include "BulletCommon/btVector3.h"
 
-ATTRIBUTE_ALIGNED16(struct) btGpuConstraint4
+ATTRIBUTE_ALIGNED16(struct) b3GpuConstraint4
 {
 	BT_DECLARE_ALIGNED_ALLOCATOR();
 
diff --git a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp b/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.cpp
index cabcabe3c..4d9578804 100644
--- a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.cpp
+++ b/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.cpp
@@ -1,26 +1,26 @@
-#include "btGpuNarrowPhase.h"
+#include "b3GpuNarrowPhase.h"
 
 
 #include "parallel_primitives/host/btOpenCLArray.h"
-#include "../../gpu_narrowphase/host/btConvexPolyhedronCL.h"
-#include "../../gpu_narrowphase/host/ConvexHullContact.h"
-#include "../../gpu_broadphase/host/btSapAabb.h"
+#include "../../gpu_narrowphase/host/b3ConvexPolyhedronCL.h"
+#include "../../gpu_narrowphase/host/b3ConvexHullContact.h"
+#include "../../gpu_broadphase/host/b3SapAabb.h"
 #include <string.h>
-#include "btConfig.h"
-#include "../../gpu_narrowphase/host/btOptimizedBvh.h"
-#include "../../gpu_narrowphase/host/btTriangleIndexVertexArray.h"
+#include "b3Config.h"
+#include "../../gpu_narrowphase/host/b3OptimizedBvh.h"
+#include "../../gpu_narrowphase/host/b3TriangleIndexVertexArray.h"
 #include "BulletGeometry/btAabbUtil2.h"
 
 struct btGpuNarrowPhaseInternalData
 {
-	btAlignedObjectArray<btConvexUtility*>* m_convexData;
+	btAlignedObjectArray<b3ConvexUtility*>* m_convexData;
     
-	btAlignedObjectArray<btConvexPolyhedronCL> m_convexPolyhedra;
+	btAlignedObjectArray<b3ConvexPolyhedronCL> m_convexPolyhedra;
 	btAlignedObjectArray<btVector3> m_uniqueEdges;
 	btAlignedObjectArray<btVector3> m_convexVertices;
 	btAlignedObjectArray<int> m_convexIndices;
     
-	btOpenCLArray<btConvexPolyhedronCL>* m_convexPolyhedraGPU;
+	btOpenCLArray<b3ConvexPolyhedronCL>* m_convexPolyhedraGPU;
 	btOpenCLArray<btVector3>* m_uniqueEdgesGPU;
 	btOpenCLArray<btVector3>* m_convexVerticesGPU;
 	btOpenCLArray<int>* m_convexIndicesGPU;
@@ -44,12 +44,12 @@ struct btGpuNarrowPhaseInternalData
 	btOpenCLArray<btInt2>* m_convexPairsOutGPU;
 	btOpenCLArray<btInt2>* m_planePairs;
     
-	btOpenCLArray<btContact4>* m_pBufContactOutGPU;
-	btAlignedObjectArray<btContact4>* m_pBufContactOutCPU;
+	btOpenCLArray<b3Contact4>* m_pBufContactOutGPU;
+	btAlignedObjectArray<b3Contact4>* m_pBufContactOutCPU;
 	
     
-	btAlignedObjectArray<btRigidBodyCL>* m_bodyBufferCPU;
-	btOpenCLArray<btRigidBodyCL>* m_bodyBufferGPU;
+	btAlignedObjectArray<b3RigidBodyCL>* m_bodyBufferCPU;
+	btOpenCLArray<b3RigidBodyCL>* m_bodyBufferGPU;
     
 	btAlignedObjectArray<btInertiaCL>*	m_inertiaBufferCPU;
 	btOpenCLArray<btInertiaCL>*	m_inertiaBufferGPU;
@@ -57,18 +57,18 @@ struct btGpuNarrowPhaseInternalData
 	int m_numAcceleratedShapes;
 	int m_numAcceleratedRigidBodies;
     
-	btAlignedObjectArray<btCollidable>	m_collidablesCPU;
-	btOpenCLArray<btCollidable>*	m_collidablesGPU;
+	btAlignedObjectArray<b3Collidable>	m_collidablesCPU;
+	btOpenCLArray<b3Collidable>*	m_collidablesGPU;
 
-	btOpenCLArray<btSapAabb>* m_localShapeAABBGPU;
-	btAlignedObjectArray<btSapAabb>* m_localShapeAABBCPU;
+	btOpenCLArray<b3SapAabb>* m_localShapeAABBGPU;
+	btAlignedObjectArray<b3SapAabb>* m_localShapeAABBCPU;
 
-	btAlignedObjectArray<class btOptimizedBvh*> m_bvhData;
+	btAlignedObjectArray<class b3OptimizedBvh*> m_bvhData;
 	btOpenCLArray<btQuantizedBvhNode>*	m_treeNodesGPU;
 	btOpenCLArray<btBvhSubtreeInfo>*	m_subTreesGPU;
 	
 
-	btConfig	m_config;
+	b3Config	m_config;
     
 };
 
@@ -76,7 +76,7 @@ struct btGpuNarrowPhaseInternalData
 
 
 
-btGpuNarrowPhase::btGpuNarrowPhase(cl_context ctx, cl_device_id device, cl_command_queue queue, const btConfig& config)
+b3GpuNarrowPhase::b3GpuNarrowPhase(cl_context ctx, cl_device_id device, cl_command_queue queue, const b3Config& config)
 :m_data(0) ,m_planeBodyIndex(-1),m_static0Index(-1),
 m_context(ctx),
 m_device(device),
@@ -95,30 +95,30 @@ m_queue(queue)
 	m_data->m_convexPairsOutGPU = new btOpenCLArray<btInt2>(ctx,queue,config.m_maxBroadphasePairs,false);
 	m_data->m_planePairs = new btOpenCLArray<btInt2>(ctx,queue,config.m_maxBroadphasePairs,false);
     
-	m_data->m_pBufContactOutCPU = new btAlignedObjectArray<btContact4>();
+	m_data->m_pBufContactOutCPU = new btAlignedObjectArray<b3Contact4>();
 	m_data->m_pBufContactOutCPU->resize(config.m_maxBroadphasePairs);
-	m_data->m_bodyBufferCPU = new btAlignedObjectArray<btRigidBodyCL>();
+	m_data->m_bodyBufferCPU = new btAlignedObjectArray<b3RigidBodyCL>();
 	m_data->m_bodyBufferCPU->resize(config.m_maxConvexBodies);
     
 	m_data->m_inertiaBufferCPU = new btAlignedObjectArray<btInertiaCL>();
 	m_data->m_inertiaBufferCPU->resize(config.m_maxConvexBodies);
 	
-	m_data->m_pBufContactOutGPU = new btOpenCLArray<btContact4>(ctx,queue, config.m_maxContactCapacity,true);
+	m_data->m_pBufContactOutGPU = new btOpenCLArray<b3Contact4>(ctx,queue, config.m_maxContactCapacity,true);
 	
 	m_data->m_inertiaBufferGPU = new btOpenCLArray<btInertiaCL>(ctx,queue,config.m_maxConvexBodies,false);
-	m_data->m_collidablesGPU = new btOpenCLArray<btCollidable>(ctx,queue,config.m_maxConvexShapes);
+	m_data->m_collidablesGPU = new btOpenCLArray<b3Collidable>(ctx,queue,config.m_maxConvexShapes);
 
-	m_data->m_localShapeAABBCPU = new btAlignedObjectArray<btSapAabb>;
-	m_data->m_localShapeAABBGPU = new btOpenCLArray<btSapAabb>(ctx,queue,config.m_maxConvexShapes);
+	m_data->m_localShapeAABBCPU = new btAlignedObjectArray<b3SapAabb>;
+	m_data->m_localShapeAABBGPU = new btOpenCLArray<b3SapAabb>(ctx,queue,config.m_maxConvexShapes);
     
     
 	//m_data->m_solverDataGPU = adl::Solver<adl::TYPE_CL>::allocate(ctx,queue, config.m_maxBroadphasePairs,false);
-	m_data->m_bodyBufferGPU = new btOpenCLArray<btRigidBodyCL>(ctx,queue, config.m_maxConvexBodies,false);
+	m_data->m_bodyBufferGPU = new btOpenCLArray<b3RigidBodyCL>(ctx,queue, config.m_maxConvexBodies,false);
 
 	m_data->m_convexFacesGPU = new btOpenCLArray<btGpuFace>(ctx,queue,config.m_maxConvexShapes*config.m_maxFacesPerShape,false);
 	m_data->m_gpuChildShapes = new btOpenCLArray<btGpuChildShape>(ctx,queue,config.m_maxCompoundChildShapes,false);
 	
-	m_data->m_convexPolyhedraGPU = new btOpenCLArray<btConvexPolyhedronCL>(ctx,queue,config.m_maxConvexShapes,false);
+	m_data->m_convexPolyhedraGPU = new btOpenCLArray<b3ConvexPolyhedronCL>(ctx,queue,config.m_maxConvexShapes,false);
 	m_data->m_uniqueEdgesGPU = new btOpenCLArray<btVector3>(ctx,queue,config.m_maxConvexUniqueEdges,true);
 	m_data->m_convexVerticesGPU = new btOpenCLArray<btVector3>(ctx,queue,config.m_maxConvexVertices,true);
 	m_data->m_convexIndicesGPU = new btOpenCLArray<int>(ctx,queue,config.m_maxConvexIndices,true);
@@ -132,7 +132,7 @@ m_queue(queue)
     
     
 
-	m_data->m_convexData = new btAlignedObjectArray<btConvexUtility* >();
+	m_data->m_convexData = new btAlignedObjectArray<b3ConvexUtility* >();
     
 
 	m_data->m_convexData->resize(config.m_maxConvexShapes);
@@ -151,7 +151,7 @@ m_queue(queue)
 }
 
 
-btGpuNarrowPhase::~btGpuNarrowPhase()
+b3GpuNarrowPhase::~b3GpuNarrowPhase()
 {
 	delete m_data->m_gpuSatCollision;
 	delete m_data->m_pBufPairsCPU;
@@ -186,7 +186,7 @@ btGpuNarrowPhase::~btGpuNarrowPhase()
 }
 
 
-int	btGpuNarrowPhase::allocateCollidable()
+int	b3GpuNarrowPhase::allocateCollidable()
 {
 	int curSize = m_data->m_collidablesCPU.size();
 	m_data->m_collidablesCPU.expand();
@@ -197,18 +197,18 @@ int	btGpuNarrowPhase::allocateCollidable()
 
 
 
-int		btGpuNarrowPhase::registerSphereShape(float radius)
+int		b3GpuNarrowPhase::registerSphereShape(float radius)
 {
 	int collidableIndex = allocateCollidable();
 
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	col.m_shapeType = SHAPE_SPHERE;
 	col.m_shapeIndex = 0;
 	col.m_radius = radius;
 	
 	if (col.m_shapeIndex>=0)
 	{
-		btSapAabb aabb;
+		b3SapAabb aabb;
 		btVector3 myAabbMin(-radius,-radius,-radius);
 		btVector3 myAabbMax(radius,radius,radius);
 
@@ -231,7 +231,7 @@ int		btGpuNarrowPhase::registerSphereShape(float radius)
 }
 
 
-int btGpuNarrowPhase::registerFace(const btVector3& faceNormal, float faceConstant)
+int b3GpuNarrowPhase::registerFace(const btVector3& faceNormal, float faceConstant)
 {
 	int faceOffset = m_data->m_convexFaces.size();
 	btGpuFace& face = m_data->m_convexFaces.expand();
@@ -243,18 +243,18 @@ int btGpuNarrowPhase::registerFace(const btVector3& faceNormal, float faceConsta
 	return faceOffset;
 }
 
-int		btGpuNarrowPhase::registerPlaneShape(const btVector3& planeNormal, float planeConstant)
+int		b3GpuNarrowPhase::registerPlaneShape(const btVector3& planeNormal, float planeConstant)
 {
 	int collidableIndex = allocateCollidable();
 
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	col.m_shapeType = SHAPE_PLANE;
 	col.m_shapeIndex = registerFace(planeNormal,planeConstant);
 	col.m_radius = planeConstant;
 	
 	if (col.m_shapeIndex>=0)
 	{
-		btSapAabb aabb;
+		b3SapAabb aabb;
 		aabb.m_min[0] = -1e30f;
 		aabb.m_min[1] = -1e30f;
 		aabb.m_min[2] = -1e30f;
@@ -274,13 +274,13 @@ int		btGpuNarrowPhase::registerPlaneShape(const btVector3& planeNormal, float pl
 }
 
 
-int btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* convexPtr,btCollidable& col)
+int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* convexPtr,b3Collidable& col)
 {
 	m_data->m_convexData->resize(m_data->m_numAcceleratedShapes+1);
 	m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1);
 	
     
-	btConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1);
+	b3ConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1);
 	convex.mC = convexPtr->mC;
 	convex.mE = convexPtr->mE;
 	convex.m_extents= convexPtr->m_extents;
@@ -344,7 +344,7 @@ int btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* convexPtr,btColli
 }
 
 
-int		btGpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
+int		b3GpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling)
 {
 	btAlignedObjectArray<btVector3> verts;
 	
@@ -355,7 +355,7 @@ int		btGpuNarrowPhase::registerConvexHullShape(const float* vertices, int stride
 		verts.push_back(btVector3(vertex[0]*scaling[0],vertex[1]*scaling[1],vertex[2]*scaling[2]));
 	}
 
-	btConvexUtility* utilPtr = new btConvexUtility();
+	b3ConvexUtility* utilPtr = new b3ConvexUtility();
 	bool merge = true;
 	if (numVertices)
 	{
@@ -366,10 +366,10 @@ int		btGpuNarrowPhase::registerConvexHullShape(const float* vertices, int stride
 	return collidableIndex;
 }
 
-int		btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr)
+int		b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr)
 {
 	int collidableIndex = allocateCollidable();
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	col.m_shapeType = SHAPE_CONVEX_HULL;
 	col.m_shapeIndex = -1;
 	
@@ -386,7 +386,7 @@ int		btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr)
 
 	if (col.m_shapeIndex>=0)
 	{
-		btSapAabb aabb;
+		b3SapAabb aabb;
 		
 		btVector3 myAabbMin(1e30f,1e30f,1e30f);
 		btVector3 myAabbMax(-1e30f,-1e30f,-1e30f);
@@ -414,11 +414,11 @@ int		btGpuNarrowPhase::registerConvexHullShape(btConvexUtility* utilPtr)
 
 }
 
-int		btGpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShape>* childShapes)
+int		b3GpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShape>* childShapes)
 {
 	
 	int collidableIndex = allocateCollidable();
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	col.m_shapeType = SHAPE_COMPOUND_OF_CONVEX_HULLS;
 	
 	col.m_shapeIndex = m_data->m_cpuChildShapes.size();
@@ -437,7 +437,7 @@ int		btGpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShap
 	col.m_numChildShapes = childShapes->size();
 	
 	
-	btSapAabb aabbWS;
+	b3SapAabb aabbWS;
 	btVector3 myAabbMin(1e30f,1e30f,1e30f);
 	btVector3 myAabbMax(-1e30f,-1e30f,-1e30f);
 	
@@ -445,8 +445,8 @@ int		btGpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShap
 	for (int i=0;i<childShapes->size();i++)
 	{
 		int childColIndex = childShapes->at(i).m_shapeIndex;
-		btCollidable& childCol = getCollidableCpu(childColIndex);
-		btSapAabb aabbLoc =m_data->m_localShapeAABBCPU->at(childColIndex);
+		b3Collidable& childCol = getCollidableCpu(childColIndex);
+		b3SapAabb aabbLoc =m_data->m_localShapeAABBCPU->at(childColIndex);
 
 		btVector3 childLocalAabbMin(aabbLoc.m_min[0],aabbLoc.m_min[1],aabbLoc.m_min[2]);
 		btVector3 childLocalAabbMax(aabbLoc.m_max[0],aabbLoc.m_max[1],aabbLoc.m_max[2]);
@@ -485,7 +485,7 @@ int		btGpuNarrowPhase::registerCompoundShape(btAlignedObjectArray<btGpuChildShap
 }
 
 
-int		btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,const float* scaling1)
+int		b3GpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,const float* scaling1)
 {
 	//right now we only support one single mesh, it is on the todo to merge all mesh data etc
 	btAssert(m_data->m_treeNodesGPU ==0);
@@ -499,14 +499,14 @@ int		btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vert
 	btVector3 scaling(scaling1[0],scaling1[1],scaling1[2]);
 
 	int collidableIndex = allocateCollidable();
-	btCollidable& col = getCollidableCpu(collidableIndex);
+	b3Collidable& col = getCollidableCpu(collidableIndex);
 	
 	col.m_shapeType = SHAPE_CONCAVE_TRIMESH;
 	col.m_shapeIndex = registerConcaveMeshShape(vertices,indices,col,scaling);
 
 	
 
-	btSapAabb aabb;
+	b3SapAabb aabb;
 	btVector3 myAabbMin(1e30f,1e30f,1e30f);
 	btVector3 myAabbMax(-1e30f,-1e30f,-1e30f);
 
@@ -529,11 +529,11 @@ int		btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vert
 	m_data->m_localShapeAABBCPU->push_back(aabb);
 	m_data->m_localShapeAABBGPU->push_back(aabb);
 
-	btOptimizedBvh* bvh = new btOptimizedBvh();
-	//void btOptimizedBvh::build(btStridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax)
+	b3OptimizedBvh* bvh = new b3OptimizedBvh();
+	//void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const btVector3& bvhAabbMin, const btVector3& bvhAabbMax)
 	
 	bool useQuantizedAabbCompression = true;
-	btTriangleIndexVertexArray* meshInterface=new btTriangleIndexVertexArray();
+	b3TriangleIndexVertexArray* meshInterface=new b3TriangleIndexVertexArray();
 	btIndexedMesh mesh;
 	mesh.m_numTriangles = indices->size()/3;
 	mesh.m_numVertices = vertices->size();
@@ -560,7 +560,7 @@ int		btGpuNarrowPhase::registerConcaveMesh(btAlignedObjectArray<btVector3>* vert
 	return collidableIndex;
 }
 
-int btGpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,btCollidable& col, const float* scaling1)
+int b3GpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices,b3Collidable& col, const float* scaling1)
 {
 
 
@@ -570,7 +570,7 @@ int btGpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>*
 	m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1);
 	
     
-	btConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1);
+	b3ConvexPolyhedronCL& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1);
 	convex.mC = btVector3(0,0,0);
 	convex.mE = btVector3(0,0,0);
 	convex.m_extents= btVector3(0,0,0);
@@ -639,48 +639,48 @@ int btGpuNarrowPhase::registerConcaveMeshShape(btAlignedObjectArray<btVector3>*
 
 
 
-cl_mem	btGpuNarrowPhase::getBodiesGpu()
+cl_mem	b3GpuNarrowPhase::getBodiesGpu()
 {
 	return (cl_mem)m_data->m_bodyBufferGPU->getBufferCL();
 }
 
 
-int	btGpuNarrowPhase::getNumBodiesGpu() const
+int	b3GpuNarrowPhase::getNumBodiesGpu() const
 {
 	return m_data->m_bodyBufferGPU->size();
 }
 
-cl_mem	btGpuNarrowPhase::getBodyInertiasGpu()
+cl_mem	b3GpuNarrowPhase::getBodyInertiasGpu()
 {
 	return (cl_mem)m_data->m_inertiaBufferGPU->getBufferCL();
 }
 
-int	btGpuNarrowPhase::getNumBodyInertiasGpu() const
+int	b3GpuNarrowPhase::getNumBodyInertiasGpu() const
 {
 	return m_data->m_inertiaBufferGPU->size();
 }
 
 
-btCollidable& btGpuNarrowPhase::getCollidableCpu(int collidableIndex)
+b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex)
 {
 	return m_data->m_collidablesCPU[collidableIndex];
 }
 
-const btCollidable& btGpuNarrowPhase::getCollidableCpu(int collidableIndex) const
+const b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex) const
 {
 	return m_data->m_collidablesCPU[collidableIndex];
 }
 
-cl_mem btGpuNarrowPhase::getCollidablesGpu()
+cl_mem b3GpuNarrowPhase::getCollidablesGpu()
 {
 	return m_data->m_collidablesGPU->getBufferCL();
 }
 
-cl_mem	btGpuNarrowPhase::getAabbBufferGpu()
+cl_mem	b3GpuNarrowPhase::getAabbBufferGpu()
 {
 	return m_data->m_localShapeAABBGPU->getBufferCL();
 }
-int	btGpuNarrowPhase::getNumCollidablesGpu() const
+int	b3GpuNarrowPhase::getNumCollidablesGpu() const
 {
 	return m_data->m_collidablesGPU->size();
 }
@@ -689,17 +689,17 @@ int	btGpuNarrowPhase::getNumCollidablesGpu() const
 
 
 
-int	btGpuNarrowPhase::getNumContactsGpu() const
+int	b3GpuNarrowPhase::getNumContactsGpu() const
 {
 	return m_data->m_pBufContactOutGPU->size();
 }
-cl_mem btGpuNarrowPhase::getContactsGpu()
+cl_mem b3GpuNarrowPhase::getContactsGpu()
 {
 	return m_data->m_pBufContactOutGPU->getBufferCL();
 }
 
 
-void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWS, int numObjects)
+void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWS, int numObjects)
 {
 	int nContactOut = 0;
 
@@ -742,7 +742,7 @@ void btGpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphase
 
 }
 
-const btSapAabb& btGpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const
+const b3SapAabb& b3GpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const
 {
 	return m_data->m_localShapeAABBCPU->at(collidableIndex);
 }
@@ -751,7 +751,7 @@ const btSapAabb& btGpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const
 
 
 
-int btGpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation , const float* aabbMinPtr, const float* aabbMaxPtr,bool writeToGpu)
+int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation , const float* aabbMinPtr, const float* aabbMaxPtr,bool writeToGpu)
 {
 	btVector3 aabbMin(aabbMinPtr[0],aabbMinPtr[1],aabbMinPtr[2]);
 	btVector3 aabbMax (aabbMaxPtr[0],aabbMaxPtr[1],aabbMaxPtr[2]);
@@ -760,7 +760,7 @@ int btGpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const f
     
 	m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies+1);
     
-	btRigidBodyCL& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies);
+	b3RigidBodyCL& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies);
     
 	float friction = 1.f;
 	float restitution = 0.f;
@@ -844,7 +844,7 @@ int btGpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const f
 	return m_data->m_numAcceleratedRigidBodies++;
 }
 
-void	btGpuNarrowPhase::writeAllBodiesToGpu()
+void	b3GpuNarrowPhase::writeAllBodiesToGpu()
 {
 	m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies);
 	m_data->m_inertiaBufferGPU->resize(m_data->m_numAcceleratedRigidBodies);
diff --git a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.h b/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h
index 2508bd713..f5f1e4ee5 100644
--- a/opencl/gpu_rigidbody/host/btGpuNarrowPhase.h
+++ b/opencl/gpu_rigidbody/host/b3GpuNarrowPhase.h
@@ -1,12 +1,12 @@
 #ifndef BT_GPU_NARROWPHASE_H
 #define BT_GPU_NARROWPHASE_H
 
-#include "../../gpu_narrowphase/host/btCollidable.h"
-#include "basic_initialize/btOpenCLInclude.h"
+#include "../../gpu_narrowphase/host/b3Collidable.h"
+#include "basic_initialize/b3OpenCLInclude.h"
 #include "BulletCommon/btAlignedObjectArray.h"
 #include "BulletCommon/btVector3.h"
 
-class btGpuNarrowPhase
+class b3GpuNarrowPhase
 {
 protected:
 
@@ -19,17 +19,17 @@ protected:
 	cl_device_id m_device;
 	cl_command_queue m_queue;
 
-	int registerConvexHullShape(class btConvexUtility* convexPtr, btCollidable& col);
-	int registerConcaveMeshShape(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices, btCollidable& col, const float* scaling);
+	int registerConvexHullShape(class b3ConvexUtility* convexPtr, b3Collidable& col);
+	int registerConcaveMeshShape(btAlignedObjectArray<btVector3>* vertices, btAlignedObjectArray<int>* indices, b3Collidable& col, const float* scaling);
 
 public:
 
 	
 
 
-	btGpuNarrowPhase(cl_context vtx, cl_device_id dev, cl_command_queue q, const struct btConfig& config);
+	b3GpuNarrowPhase(cl_context vtx, cl_device_id dev, cl_command_queue q, const struct b3Config& config);
 
-	virtual ~btGpuNarrowPhase(void);
+	virtual ~b3GpuNarrowPhase(void);
 
 	int		registerSphereShape(float radius);
 	int		registerPlaneShape(const btVector3& planeNormal, float planeConstant);
@@ -41,7 +41,7 @@ public:
 	
 	//do they need to be merged?
 	
-	int	registerConvexHullShape(btConvexUtility* utilPtr);
+	int	registerConvexHullShape(b3ConvexUtility* utilPtr);
 	int	registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling);
 
 	int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax,bool writeToGpu);
@@ -72,10 +72,10 @@ public:
 
 	int allocateCollidable();
 
-	btCollidable& getCollidableCpu(int collidableIndex);
-	const btCollidable& getCollidableCpu(int collidableIndex) const;
+	b3Collidable& getCollidableCpu(int collidableIndex);
+	const b3Collidable& getCollidableCpu(int collidableIndex) const;
 
-	const struct btSapAabb& getLocalSpaceAabb(int collidableIndex) const;
+	const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const;
 };
 
 #endif //BT_GPU_NARROWPHASE_H
diff --git a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp
index 905d7b709..f0187398d 100644
--- a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.cpp
+++ b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.cpp
@@ -1,34 +1,41 @@
-#include "btGpuRigidBodyPipeline.h"
-#include "btGpuRigidBodyPipelineInternalData.h"
+#include "b3GpuRigidBodyPipeline.h"
+#include "b3GpuRigidBodyPipelineInternalData.h"
 #include "../kernels/integrateKernel.h"
 #include "../kernels/updateAabbsKernel.h"
 
-#include "../../basic_initialize/btOpenCLUtils.h"
-#include "btGpuNarrowPhase.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
+#include "b3GpuNarrowPhase.h"
 #include "BulletGeometry/btAabbUtil2.h"
-#include "../../gpu_broadphase/host/btSapAabb.h"
-#include "../../gpu_broadphase/host/btGpuSapBroadphase.h"
+#include "../../gpu_broadphase/host/b3SapAabb.h"
+#include "../../gpu_broadphase/host/b3GpuSapBroadphase.h"
 #include "parallel_primitives/host/btLauncherCL.h"
-#include "btPgsJacobiSolver.h"
-#include "../../gpu_narrowphase/host/btRigidBodyCL.h"
-#include "../../gpu_narrowphase/host/btContact4.h"
-#include "btGpuBatchingPgsSolver.h"
-#include "Solver.h"
+//#define TEST_OTHER_GPU_SOLVER
+#ifdef TEST_OTHER_GPU_SOLVER
 #include "btGpuJacobiSolver.h"
+#include "btPgsJacobiSolver.h"
+#endif //TEST_OTHER_GPU_SOLVER
+
+#include "../../gpu_narrowphase/host/b3RigidBodyCL.h"
+#include "../../gpu_narrowphase/host/b3Contact4.h"
+#include "b3GpuBatchingPgsSolver.h"
+#include "b3Solver.h"
+
 #include "BulletCommon/btQuickprof.h"
-#include "btConfig.h"
+#include "b3Config.h"
 
-btGpuRigidBodyPipeline::btGpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue  q,class btGpuNarrowPhase* narrowphase, class btGpuSapBroadphase* broadphaseSap )
+b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue  q,class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap )
 {
-	m_data = new btGpuRigidBodyPipelineInternalData;
+	m_data = new b3GpuRigidBodyPipelineInternalData;
 	m_data->m_context = ctx;
 	m_data->m_device = device;
 	m_data->m_queue = q;
+#ifdef TEST_OTHER_GPU_SOLVER
 	m_data->m_solver = new btPgsJacobiSolver();
-	btConfig config;
-	
-	m_data->m_solver2 = new btGpuBatchingPgsSolver(ctx,device,q,config.m_maxBroadphasePairs);
-	m_data->m_solver3 = new btGpuJacobiSolver(ctx,device,q,config.m_maxBroadphasePairs);
+	m_data->m_solver3 = new btGpuJacobiSolver(ctx,device,q,config.m_maxBroadphasePairs);	
+#endif //	TEST_OTHER_GPU_SOLVER
+	b3Config config;
+	m_data->m_solver2 = new b3GpuBatchingPgsSolver(ctx,device,q,config.m_maxBroadphasePairs);
+
 	
 	
 	m_data->m_broadphaseSap = broadphaseSap;
@@ -37,16 +44,16 @@ btGpuRigidBodyPipeline::btGpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
 	cl_int errNum=0;
 
 	{
-		cl_program prog = btOpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,integrateKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/integrateKernel.cl");
+		cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,integrateKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/integrateKernel.cl");
 		btAssert(errNum==CL_SUCCESS);
-		m_data->m_integrateTransformsKernel = btOpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,integrateKernelCL, "integrateTransformsKernel",&errNum,prog);
+		m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,integrateKernelCL, "integrateTransformsKernel",&errNum,prog);
 		btAssert(errNum==CL_SUCCESS);
 		clReleaseProgram(prog);
 	}
 	{
-		cl_program prog = btOpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,updateAabbsKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl");
+		cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,updateAabbsKernelCL,&errNum,"","opencl/gpu_rigidbody/kernels/updateAabbsKernel.cl");
 		btAssert(errNum==CL_SUCCESS);
-		m_data->m_updateAabbsKernel = btOpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,updateAabbsKernelCL, "initializeGpuAabbsFull",&errNum,prog);
+		m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,updateAabbsKernelCL, "initializeGpuAabbsFull",&errNum,prog);
 		btAssert(errNum==CL_SUCCESS);
 		clReleaseProgram(prog);
 	}
@@ -54,17 +61,22 @@ btGpuRigidBodyPipeline::btGpuRigidBodyPipeline(cl_context ctx,cl_device_id devic
 
 }
 
-btGpuRigidBodyPipeline::~btGpuRigidBodyPipeline()
+b3GpuRigidBodyPipeline::~b3GpuRigidBodyPipeline()
 {
 	clReleaseKernel(m_data->m_integrateTransformsKernel);
 	
+#ifdef TEST_OTHER_GPU_SOLVER
 	delete m_data->m_solver;
-	delete m_data->m_solver2;
 	delete m_data->m_solver3;
+#endif //TEST_OTHER_GPU_SOLVER
+	
+	delete m_data->m_solver2;
+	
+	
 	delete m_data;
 }
 
-void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
+void	b3GpuRigidBodyPipeline::stepSimulation(float deltaTime)
 {
 
 	//update worldspace AABBs from local AABB/worldtransform
@@ -104,14 +116,15 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 	
 	if (numContacts)
 	{
-		btOpenCLArray<btRigidBodyCL> gpuBodies(m_data->m_context,m_data->m_queue,0,true);
+		btOpenCLArray<b3RigidBodyCL> gpuBodies(m_data->m_context,m_data->m_queue,0,true);
 		gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(),m_data->m_narrowphase->getNumBodiesGpu());
 		btOpenCLArray<btInertiaCL> gpuInertias(m_data->m_context,m_data->m_queue,0,true);
 		gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(),m_data->m_narrowphase->getNumBodiesGpu());
-		btOpenCLArray<btContact4> gpuContacts(m_data->m_context,m_data->m_queue,0,true);
+		btOpenCLArray<b3Contact4> gpuContacts(m_data->m_context,m_data->m_queue,0,true);
 		gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(),m_data->m_narrowphase->getNumContactsGpu());
 
 		bool useJacobi = false;
+#ifdef TEST_OTHER_GPU_SOLVER
 		if (useJacobi)
 		{
 			bool useGpu = true;
@@ -120,9 +133,9 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 				bool forceHost = false;
 				if (forceHost)
 				{
-					btAlignedObjectArray<btRigidBodyCL> hostBodies;
+					btAlignedObjectArray<b3RigidBodyCL> hostBodies;
 					btAlignedObjectArray<btInertiaCL> hostInertias;
-					btAlignedObjectArray<btContact4> hostContacts;
+					btAlignedObjectArray<b3Contact4> hostContacts;
 				
 					{
 						BT_PROFILE("copyToHost");
@@ -148,11 +161,11 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 				}
 			} else
 			{
-				btAlignedObjectArray<btRigidBodyCL> hostBodies;
+				btAlignedObjectArray<b3RigidBodyCL> hostBodies;
 				gpuBodies.copyToHost(hostBodies);
 				btAlignedObjectArray<btInertiaCL> hostInertias;
 				gpuInertias.copyToHost(hostInertias);
-				btAlignedObjectArray<btContact4> hostContacts;
+				btAlignedObjectArray<b3Contact4> hostContacts;
 				gpuContacts.copyToHost(hostContacts);
 				{
 					m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]);
@@ -161,8 +174,9 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 			}
 		
 		} else
+#endif //TEST_OTHER_GPU_SOLVER
 		{
-			btConfig config;
+			b3Config config;
 			m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(),gpuInertias.getBufferCL(),numContacts, gpuContacts.getBufferCL(),config);
 			
 			//m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL());
@@ -181,7 +195,7 @@ void	btGpuRigidBodyPipeline::stepSimulation(float deltaTime)
 
 }
 
-void	btGpuRigidBodyPipeline::integrate(float timeStep)
+void	b3GpuRigidBodyPipeline::integrate(float timeStep)
 {
 	//integrate
 
@@ -201,7 +215,7 @@ void	btGpuRigidBodyPipeline::integrate(float timeStep)
 
 
 
-void	btGpuRigidBodyPipeline::setupGpuAabbsFull()
+void	b3GpuRigidBodyPipeline::setupGpuAabbsFull()
 {
 	cl_int ciErrNum=0;
 
@@ -226,12 +240,12 @@ void	btGpuRigidBodyPipeline::setupGpuAabbsFull()
 
 
 
-cl_mem	btGpuRigidBodyPipeline::getBodyBuffer()
+cl_mem	b3GpuRigidBodyPipeline::getBodyBuffer()
 {
 	return m_data->m_narrowphase->getBodiesGpu();
 }
 
-int	btGpuRigidBodyPipeline::getNumBodies() const
+int	b3GpuRigidBodyPipeline::getNumBodies() const
 {
 	return m_data->m_narrowphase->getNumBodiesGpu();
 }
@@ -240,12 +254,12 @@ int	btGpuRigidBodyPipeline::getNumBodies() const
 
 
 
-int		btGpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex)
+int		b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex)
 {
 	btVector3 aabbMin(0,0,0),aabbMax(0,0,0);
 	if (collidableIndex>=0)
 	{
-		btSapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex);
+		b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex);
 		btVector3 localAabbMin(localAabb.m_min[0],localAabb.m_min[1],localAabb.m_min[2]);
 		btVector3 localAabbMax(localAabb.m_max[0],localAabb.m_max[1],localAabb.m_max[2]);
 		
diff --git a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.h b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.h
index 45b8a8346..23f89a2ee 100644
--- a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipeline.h
+++ b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipeline.h
@@ -1,25 +1,25 @@
 #ifndef BT_GPU_RIGIDBODY_PIPELINE_H
 #define BT_GPU_RIGIDBODY_PIPELINE_H
 
-#include "../../basic_initialize/btOpenCLInclude.h"
+#include "../../basic_initialize/b3OpenCLInclude.h"
 
-class btGpuRigidBodyPipeline
+class b3GpuRigidBodyPipeline
 {
 protected:
-	struct btGpuRigidBodyPipelineInternalData*	m_data;
+	struct b3GpuRigidBodyPipelineInternalData*	m_data;
 
 	int allocateCollidable();
 
 public:
 
-	btGpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue  q , class btGpuNarrowPhase* narrowphase, class btGpuSapBroadphase* broadphaseSap);
-	virtual ~btGpuRigidBodyPipeline();
+	b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue  q , class b3GpuNarrowPhase* narrowphase, class b3GpuSapBroadphase* broadphaseSap);
+	virtual ~b3GpuRigidBodyPipeline();
 
 	void	stepSimulation(float deltaTime);
 	void	integrate(float timeStep);
 	void	setupGpuAabbsFull();
 
-	int		registerConvexPolyhedron(class btConvexUtility* convex);
+	int		registerConvexPolyhedron(class b3ConvexUtility* convex);
 
 	//int		registerConvexPolyhedron(const float* vertices, int strideInBytes, int numVertices, const float* scaling);
 	//int		registerSphereShape(float radius);
diff --git a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipelineInternalData.h b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipelineInternalData.h
index e10b2ddeb..1391295be 100644
--- a/opencl/gpu_rigidbody/host/btGpuRigidBodyPipelineInternalData.h
+++ b/opencl/gpu_rigidbody/host/b3GpuRigidBodyPipelineInternalData.h
@@ -1,14 +1,14 @@
 #ifndef BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
 #define BT_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H
 
-#include "../../basic_initialize/btOpenCLInclude.h"
+#include "../../basic_initialize/b3OpenCLInclude.h"
 #include "BulletCommon/btAlignedObjectArray.h"
 
 #include "../../parallel_primitives/host/btOpenCLArray.h"
-#include "../../gpu_narrowphase/host/btCollidable.h"
+#include "../../gpu_narrowphase/host/b3Collidable.h"
 
 
-struct btGpuRigidBodyPipelineInternalData
+struct b3GpuRigidBodyPipelineInternalData
 {
 
 	cl_context			m_context;
@@ -19,12 +19,12 @@ struct btGpuRigidBodyPipelineInternalData
 	cl_kernel	m_updateAabbsKernel;
 	
 	class btPgsJacobiSolver* m_solver;
-	class btGpuBatchingPgsSolver* m_solver2;
+	class b3GpuBatchingPgsSolver* m_solver2;
 	class btGpuJacobiSolver* m_solver3;
 	
-	class btGpuSapBroadphase* m_broadphaseSap;
+	class b3GpuSapBroadphase* m_broadphaseSap;
 
-	class btGpuNarrowPhase*	m_narrowphase;
+	class b3GpuNarrowPhase*	m_narrowphase;
 	
 };
 
diff --git a/opencl/gpu_rigidbody/host/Solver.cpp b/opencl/gpu_rigidbody/host/b3Solver.cpp
index 9c22c68b4..c1feb3013 100644
--- a/opencl/gpu_rigidbody/host/Solver.cpp
+++ b/opencl/gpu_rigidbody/host/b3Solver.cpp
@@ -14,7 +14,7 @@ subject to the following restrictions:
 //Originally written by Takahiro Harada
 
 
-#include "Solver.h"
+#include "b3Solver.h"
 
 ///useNewBatchingKernel  is a rewritten kernel using just a single thread of the warp, for experiments
 bool useNewBatchingKernel = false;
@@ -87,7 +87,7 @@ public:
 
 
 
-Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity)
+b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity)
 			:m_nIterations(4),
 			m_context(ctx),
 			m_device(device),
@@ -100,7 +100,7 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
 	const int sortSize = BTNEXTMULTIPLEOF( pairCapacity, 512 );
 
 	m_sortDataBuffer = new btOpenCLArray<btSortData>(ctx,queue,sortSize);
-	m_contactBuffer2 = new btOpenCLArray<btContact4>(ctx,queue);
+	m_contactBuffer2 = new btOpenCLArray<b3Contact4>(ctx,queue);
 
 	m_numConstraints = new btOpenCLArray<unsigned int>(ctx,queue,N_SPLIT*N_SPLIT );
 	m_numConstraints->resize(N_SPLIT*N_SPLIT);
@@ -125,59 +125,59 @@ Solver::Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int
 	
 	{
 		
-		cl_program solveContactProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
+		cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, SOLVER_CONTACT_KERNEL_PATH);
 		btAssert(solveContactProg);
 		
-		cl_program solveFrictionProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
+		cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, SOLVER_FRICTION_KERNEL_PATH);
 		btAssert(solveFrictionProg);
 
-		cl_program solverSetup2Prog= btOpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
+		cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, SOLVER_SETUP2_KERNEL_PATH);
 		btAssert(solverSetup2Prog);
 
 		
-		cl_program solverSetupProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
+		cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, SOLVER_SETUP_KERNEL_PATH);
 		btAssert(solverSetupProg);
 		
 		
-		m_solveFrictionKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
+		m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
 		btAssert(m_solveFrictionKernel);
 
-		m_solveContactKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
+		m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
 		btAssert(m_solveContactKernel);
 		
-		m_contactToConstraintKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
+		m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
 		btAssert(m_contactToConstraintKernel);
 			
-		m_setSortDataKernel =  btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_setSortDataKernel =  b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_setSortDataKernel);
 				
-		m_reorderContactKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_reorderContactKernel);
 		
 
-		m_copyConstraintKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+		m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
 		btAssert(m_copyConstraintKernel);
 		
 	}
 
 	{
-		cl_program batchingProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
+		cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, BATCHING_PATH);
 		btAssert(batchingProg);
 		
-		m_batchingKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
+		m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
 		btAssert(m_batchingKernel);
 	}
 	{
-		cl_program batchingNewProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
+		cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, BATCHING_NEW_PATH);
 		btAssert(batchingNewProg);
 	
-		m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
-		//m_batchingKernelNew = btOpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
+		m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
+		//m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
 		btAssert(m_batchingKernelNew);
 	}
 }
 		
-Solver::~Solver()
+b3Solver::~b3Solver()
 {
 	delete m_sortDataBuffer;
 	delete m_contactBuffer2;
@@ -204,10 +204,10 @@ Solver::~Solver()
  
 
 
-/*void Solver::reorderConvertToConstraints( const btOpenCLArray<btRigidBodyCL>* bodyBuf, 
+/*void b3Solver::reorderConvertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, 
 	const btOpenCLArray<btInertiaCL>* shapeBuf,
-	btOpenCLArray<btContact4>* contactsIn, btOpenCLArray<btGpuConstraint4>* contactCOut, void* additionalData, 
-	int nContacts, const Solver::ConstraintCfg& cfg )
+	btOpenCLArray<b3Contact4>* contactsIn, btOpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, 
+	int nContacts, const b3Solver::ConstraintCfg& cfg )
 {
 	if( m_contactBuffer )
 	{
@@ -216,7 +216,7 @@ Solver::~Solver()
 	if( m_contactBuffer == 0 )
 	{
 		BT_PROFILE("new m_contactBuffer;");
-		m_contactBuffer = new btOpenCLArray<btContact4>(m_context,m_queue,nContacts );
+		m_contactBuffer = new btOpenCLArray<b3Contact4>(m_context,m_queue,nContacts );
 		m_contactBuffer->resize(nContacts);
 	}
 	
@@ -256,7 +256,7 @@ Solver::~Solver()
 
 		{
 			BT_PROFILE("batchContacts");
-			Solver::batchContacts( contactsIn, nContacts, m_numConstraints, m_offsets, cfg.m_staticIdx );
+			b3Solver::batchContacts( contactsIn, nContacts, m_numConstraints, m_offsets, cfg.m_staticIdx );
 
 		}
 	}
@@ -269,7 +269,7 @@ Solver::~Solver()
 	
 	{
 		BT_PROFILE("convertToConstraints");
-		Solver::convertToConstraints(  bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg );
+		b3Solver::convertToConstraints(  bodyBuf, shapeBuf, contactsIn, contactCOut, additionalData, nContacts, cfg );
 	}
 
 	{
@@ -303,7 +303,7 @@ Solver::~Solver()
 template<bool JACOBI>
 static
 __inline
-void solveContact(btGpuConstraint4& cs, 
+void solveContact(b3GpuConstraint4& cs, 
 	const btVector3& posA, btVector3& linVelA, btVector3& angVelA, float invMassA, const btMatrix3x3& invInertiaA,
 	const btVector3& posB, btVector3& linVelB, btVector3& angVelB, float invMassB, const btMatrix3x3& invInertiaB, 
 	float maxRambdaDt[4], float minRambdaDt[4])
@@ -380,7 +380,7 @@ void solveContact(btGpuConstraint4& cs,
 
 	static
 	__inline
-	void solveFriction(btGpuConstraint4& cs, 
+	void solveFriction(b3GpuConstraint4& cs, 
 		const btVector3& posA, btVector3& linVelA, btVector3& angVelA, float invMassA, const btMatrix3x3& invInertiaA,
 		const btVector3& posB, btVector3& linVelB, btVector3& angVelB, float invMassB, const btMatrix3x3& invInertiaB, 
 		float maxRambdaDt[4], float minRambdaDt[4])
@@ -454,7 +454,7 @@ void solveContact(btGpuConstraint4& cs,
 
 struct SolveTask// : public ThreadPool::Task
 {
-	SolveTask(btAlignedObjectArray<btRigidBodyCL>& bodies,  btAlignedObjectArray<btInertiaCL>& shapes, btAlignedObjectArray<btGpuConstraint4>& constraints,
+	SolveTask(btAlignedObjectArray<b3RigidBodyCL>& bodies,  btAlignedObjectArray<btInertiaCL>& shapes, btAlignedObjectArray<b3GpuConstraint4>& constraints,
 		int start, int nConstraints)
 		: m_bodies( bodies ), m_shapes( shapes ), m_constraints( constraints ), m_start( start ), m_nConstraints( nConstraints ),
 		m_solveFriction( true ){}
@@ -472,8 +472,8 @@ struct SolveTask// : public ThreadPool::Task
 			float frictionCoeff = m_constraints[i].getFrictionCoeff();
 			int aIdx = (int)m_constraints[i].m_bodyA;
 			int bIdx = (int)m_constraints[i].m_bodyB;
-			btRigidBodyCL& bodyA = m_bodies[aIdx];
-			btRigidBodyCL& bodyB = m_bodies[bIdx];
+			b3RigidBodyCL& bodyA = m_bodies[aIdx];
+			b3RigidBodyCL& bodyB = m_bodies[bIdx];
 
 			if( !m_solveFriction )
 			{
@@ -512,24 +512,24 @@ struct SolveTask// : public ThreadPool::Task
 		
 	}
 
-	btAlignedObjectArray<btRigidBodyCL>& m_bodies;
+	btAlignedObjectArray<b3RigidBodyCL>& m_bodies;
 	btAlignedObjectArray<btInertiaCL>& m_shapes;
-	btAlignedObjectArray<btGpuConstraint4>& m_constraints;
+	btAlignedObjectArray<b3GpuConstraint4>& m_constraints;
 	int m_start;
 	int m_nConstraints;
 	bool m_solveFriction;
 };
 
 
-void Solver::solveContactConstraintHost(  btOpenCLArray<btRigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
+void b3Solver::solveContactConstraintHost(  btOpenCLArray<b3RigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf, 
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
 {
 
-	btAlignedObjectArray<btRigidBodyCL> bodyNative;
+	btAlignedObjectArray<b3RigidBodyCL> bodyNative;
 	bodyBuf->copyToHost(bodyNative);
 	btAlignedObjectArray<btInertiaCL> shapeNative;
 	shapeBuf->copyToHost(shapeNative);
-	btAlignedObjectArray<btGpuConstraint4> constraintNative;
+	btAlignedObjectArray<b3GpuConstraint4> constraintNative;
 	constraint->copyToHost(constraintNative);
 
 	for(int iter=0; iter<m_nIterations; iter++)
@@ -553,8 +553,8 @@ void Solver::solveContactConstraintHost(  btOpenCLArray<btRigidBodyCL>* bodyBuf,
 	
 }
 
-void Solver::solveContactConstraint(  const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
+void b3Solver::solveContactConstraint(  const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
 {
 	
 	
@@ -712,12 +712,12 @@ void Solver::solveContactConstraint(  const btOpenCLArray<btRigidBodyCL>* bodyBu
 	
 }
 
-void Solver::convertToConstraints( const btOpenCLArray<btRigidBodyCL>* bodyBuf, 
+void b3Solver::convertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, 
 	const btOpenCLArray<btInertiaCL>* shapeBuf, 
-	btOpenCLArray<btContact4>* contactsIn, btOpenCLArray<btGpuConstraint4>* contactCOut, void* additionalData, 
+	btOpenCLArray<b3Contact4>* contactsIn, btOpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, 
 	int nContacts, const ConstraintCfg& cfg )
 {
-	btOpenCLArray<btGpuConstraint4>* constraintNative =0;
+	btOpenCLArray<b3GpuConstraint4>* constraintNative =0;
 
 	struct CB
 	{
@@ -756,9 +756,9 @@ void Solver::convertToConstraints( const btOpenCLArray<btRigidBodyCL>* bodyBuf,
 }
 
 /*
-void Solver::sortContacts(  const btOpenCLArray<btRigidBodyCL>* bodyBuf, 
-			btOpenCLArray<btContact4>* contactsIn, void* additionalData, 
-			int nContacts, const Solver::ConstraintCfg& cfg )
+void b3Solver::sortContacts(  const btOpenCLArray<b3RigidBodyCL>* bodyBuf, 
+			btOpenCLArray<b3Contact4>* contactsIn, void* additionalData, 
+			int nContacts, const b3Solver::ConstraintCfg& cfg )
 {
 	
 	
@@ -813,7 +813,7 @@ void Solver::sortContacts(  const btOpenCLArray<btRigidBodyCL>* bodyBuf,
 		{	//	5. sort constraints by cellIdx
 			//	todo. preallocate this
 //			btAssert( contactsIn->getType() == TYPE_HOST );
-//			btOpenCLArray<btContact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn );	//	copying contacts to this buffer
+//			btOpenCLArray<b3Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn );	//	copying contacts to this buffer
 
 			{
 				
@@ -834,7 +834,7 @@ void Solver::sortContacts(  const btOpenCLArray<btRigidBodyCL>* bodyBuf,
 
 */
 
-void	Solver::batchContacts(  btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* nNative, btOpenCLArray<unsigned int>* offsetsNative, int staticIdx )
+void	b3Solver::batchContacts(  btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* nNative, btOpenCLArray<unsigned int>* offsetsNative, int staticIdx )
 {
 	
 	int numWorkItems = 64*N_SPLIT*N_SPLIT;
@@ -893,7 +893,7 @@ void	Solver::batchContacts(  btOpenCLArray<btContact4>* contacts, int nContacts,
 
 #ifdef BATCH_DEBUG
 	aaaa
-		btContact4* hostContacts = new btContact4[nContacts];
+		b3Contact4* hostContacts = new b3Contact4[nContacts];
 		m_contactBuffer->read(hostContacts,nContacts);
 		clFinish(m_queue);
 
diff --git a/opencl/gpu_rigidbody/host/Solver.h b/opencl/gpu_rigidbody/host/b3Solver.h
index dc54281e0..8338e0367 100644
--- a/opencl/gpu_rigidbody/host/Solver.h
+++ b/opencl/gpu_rigidbody/host/b3Solver.h
@@ -18,21 +18,21 @@ subject to the following restrictions:
 #define __ADL_SOLVER_H
 
 #include "../../parallel_primitives/host/btOpenCLArray.h"
-#include "../host/btGpuConstraint4.h"
-#include "../../gpu_narrowphase/host/btRigidBodyCL.h"
-#include "../../gpu_narrowphase/host/btContact4.h"
+#include "../host/b3GpuConstraint4.h"
+#include "../../gpu_narrowphase/host/b3RigidBodyCL.h"
+#include "../../gpu_narrowphase/host/b3Contact4.h"
 
-#include "../host/btGpuConstraint4.h"
+#include "../host/b3GpuConstraint4.h"
 #include "../../parallel_primitives/host/btPrefixScanCL.h"
 #include "../../parallel_primitives/host/btRadixSort32CL.h"
 #include "../../parallel_primitives/host/btBoundSearchCL.h"
 
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 
 
 #define BTNEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment))
 
-class SolverBase
+class b3SolverBase
 {
 	public:
 		
@@ -60,7 +60,7 @@ class SolverBase
 		};
 };
 
-class Solver : public SolverBase
+class b3Solver : public b3SolverBase
 {
 	public:
 
@@ -88,7 +88,7 @@ class Solver : public SolverBase
 		class btPrefixScanCL*	m_scan;
 
 		btOpenCLArray<btSortData>* m_sortDataBuffer;
-		btOpenCLArray<btContact4>* m_contactBuffer2;
+		btOpenCLArray<b3Contact4>* m_contactBuffer2;
 
 		enum
 		{
@@ -98,23 +98,23 @@ class Solver : public SolverBase
 		
 
 		
-		Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity);
+		b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity);
 
-		virtual ~Solver();
+		virtual ~b3Solver();
 		
-		void solveContactConstraint( const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* inertiaBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
+		void solveContactConstraint( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* inertiaBuf, 
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
 
-		void solveContactConstraintHost(  btOpenCLArray<btRigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
+		void solveContactConstraintHost(  btOpenCLArray<b3RigidBodyCL>* bodyBuf, btOpenCLArray<btInertiaCL>* shapeBuf, 
+			btOpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches);
 
 
-		void convertToConstraints( const btOpenCLArray<btRigidBodyCL>* bodyBuf, 
+		void convertToConstraints( const btOpenCLArray<b3RigidBodyCL>* bodyBuf, 
 			const btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btContact4>* contactsIn, btOpenCLArray<btGpuConstraint4>* contactCOut, void* additionalData, 
+			btOpenCLArray<b3Contact4>* contactsIn, btOpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, 
 			int nContacts, const ConstraintCfg& cfg );
 
-		void	batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
+		void	batchContacts( btOpenCLArray<b3Contact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
 
 };
 
diff --git a/opencl/gpu_rigidbody/host/btBroadphaseProxy.h b/opencl/gpu_rigidbody/host/btBroadphaseProxy.h
deleted file mode 100644
index 9aea9ad83..000000000
--- a/opencl/gpu_rigidbody/host/btBroadphaseProxy.h
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_BROADPHASE_PROXY_H
-#define BT_BROADPHASE_PROXY_H
-
-#include "BulletCommon/btScalar.h" //for SIMD_FORCE_INLINE
-#include "BulletCommon/btVector3.h"
-#include "BulletCommon/btAlignedAllocator.h"
-
-
-/// btDispatcher uses these types
-/// IMPORTANT NOTE:The types are ordered polyhedral, implicit convex and concave
-/// to facilitate type checking
-/// CUSTOM_POLYHEDRAL_SHAPE_TYPE,CUSTOM_CONVEX_SHAPE_TYPE and CUSTOM_CONCAVE_SHAPE_TYPE can be used to extend Bullet without modifying source code
-enum BroadphaseNativeTypes
-{
-	// polyhedral convex shapes
-	BOX_SHAPE_PROXYTYPE,
-	TRIANGLE_SHAPE_PROXYTYPE,
-	TETRAHEDRAL_SHAPE_PROXYTYPE,
-	CONVEX_TRIANGLEMESH_SHAPE_PROXYTYPE,
-	CONVEX_HULL_SHAPE_PROXYTYPE,
-	CONVEX_POINT_CLOUD_SHAPE_PROXYTYPE,
-	CUSTOM_POLYHEDRAL_SHAPE_TYPE,
-//implicit convex shapes
-IMPLICIT_CONVEX_SHAPES_START_HERE,
-	SPHERE_SHAPE_PROXYTYPE,
-	MULTI_SPHERE_SHAPE_PROXYTYPE,
-	CAPSULE_SHAPE_PROXYTYPE,
-	CONE_SHAPE_PROXYTYPE,
-	CONVEX_SHAPE_PROXYTYPE,
-	CYLINDER_SHAPE_PROXYTYPE,
-	UNIFORM_SCALING_SHAPE_PROXYTYPE,
-	MINKOWSKI_SUM_SHAPE_PROXYTYPE,
-	MINKOWSKI_DIFFERENCE_SHAPE_PROXYTYPE,
-	BOX_2D_SHAPE_PROXYTYPE,
-	CONVEX_2D_SHAPE_PROXYTYPE,
-	CUSTOM_CONVEX_SHAPE_TYPE,
-//concave shapes
-CONCAVE_SHAPES_START_HERE,
-	//keep all the convex shapetype below here, for the check IsConvexShape in broadphase proxy!
-	TRIANGLE_MESH_SHAPE_PROXYTYPE,
-	SCALED_TRIANGLE_MESH_SHAPE_PROXYTYPE,
-	///used for demo integration FAST/Swift collision library and Bullet
-	FAST_CONCAVE_MESH_PROXYTYPE,
-	//terrain
-	TERRAIN_SHAPE_PROXYTYPE,
-///Used for GIMPACT Trimesh integration
-	GIMPACT_SHAPE_PROXYTYPE,
-///Multimaterial mesh
-    MULTIMATERIAL_TRIANGLE_MESH_PROXYTYPE,
-	
-	EMPTY_SHAPE_PROXYTYPE,
-	STATIC_PLANE_PROXYTYPE,
-	CUSTOM_CONCAVE_SHAPE_TYPE,
-CONCAVE_SHAPES_END_HERE,
-
-	COMPOUND_SHAPE_PROXYTYPE,
-
-	SOFTBODY_SHAPE_PROXYTYPE,
-	HFFLUID_SHAPE_PROXYTYPE,
-	HFFLUID_BUOYANT_CONVEX_SHAPE_PROXYTYPE,
-	INVALID_SHAPE_PROXYTYPE,
-
-	MAX_BROADPHASE_COLLISION_TYPES
-	
-};
-
-
-///The btBroadphaseProxy is the main class that can be used with the Bullet broadphases. 
-///It stores collision shape type information, collision filter information and a client object, typically a btCollisionObject or btRigidBody.
-ATTRIBUTE_ALIGNED16(struct) btBroadphaseProxy
-{
-
-BT_DECLARE_ALIGNED_ALLOCATOR();
-	
-	///optional filtering to cull potential collisions
-	enum CollisionFilterGroups
-	{
-	        DefaultFilter = 1,
-	        StaticFilter = 2,
-	        KinematicFilter = 4,
-	        DebrisFilter = 8,
-			SensorTrigger = 16,
-			CharacterFilter = 32,
-	        AllFilter = -1 //all bits sets: DefaultFilter | StaticFilter | KinematicFilter | DebrisFilter | SensorTrigger
-	};
-
-	//Usually the client btCollisionObject or Rigidbody class
-	void*	m_clientObject;
-	short int m_collisionFilterGroup;
-	short int m_collisionFilterMask;
-	void*	m_multiSapParentProxy;		
-	int			m_uniqueId;//m_uniqueId is introduced for paircache. could get rid of this, by calculating the address offset etc.
-
-	btVector3	m_aabbMin;
-	btVector3	m_aabbMax;
-
-	SIMD_FORCE_INLINE int getUid() const
-	{
-		return m_uniqueId;
-	}
-
-	//used for memory pools
-	btBroadphaseProxy() :m_clientObject(0),m_multiSapParentProxy(0)
-	{
-	}
-
-	btBroadphaseProxy(const btVector3& aabbMin,const btVector3& aabbMax,void* userPtr,short int collisionFilterGroup, short int collisionFilterMask,void* multiSapParentProxy=0)
-		:m_clientObject(userPtr),
-		m_collisionFilterGroup(collisionFilterGroup),
-		m_collisionFilterMask(collisionFilterMask),
-		m_aabbMin(aabbMin),
-		m_aabbMax(aabbMax)
-	{
-		m_multiSapParentProxy = multiSapParentProxy;
-	}
-
-	
-
-	static SIMD_FORCE_INLINE bool isPolyhedral(int proxyType)
-	{
-		return (proxyType  < IMPLICIT_CONVEX_SHAPES_START_HERE);
-	}
-
-	static SIMD_FORCE_INLINE bool	isConvex(int proxyType)
-	{
-		return (proxyType < CONCAVE_SHAPES_START_HERE);
-	}
-
-	static SIMD_FORCE_INLINE bool	isNonMoving(int proxyType)
-	{
-		return (isConcave(proxyType) && !(proxyType==GIMPACT_SHAPE_PROXYTYPE));
-	}
-
-	static SIMD_FORCE_INLINE bool	isConcave(int proxyType)
-	{
-		return ((proxyType > CONCAVE_SHAPES_START_HERE) &&
-			(proxyType < CONCAVE_SHAPES_END_HERE));
-	}
-	static SIMD_FORCE_INLINE bool	isCompound(int proxyType)
-	{
-		return (proxyType == COMPOUND_SHAPE_PROXYTYPE);
-	}
-
-	static SIMD_FORCE_INLINE bool	isSoftBody(int proxyType)
-	{
-		return (proxyType == SOFTBODY_SHAPE_PROXYTYPE);
-	}
-
-	static SIMD_FORCE_INLINE bool isInfinite(int proxyType)
-	{
-		return (proxyType == STATIC_PLANE_PROXYTYPE);
-	}
-
-	static SIMD_FORCE_INLINE bool isConvex2d(int proxyType)
-	{
-		return (proxyType == BOX_2D_SHAPE_PROXYTYPE) ||	(proxyType == CONVEX_2D_SHAPE_PROXYTYPE);
-	}
-
-	
-}
-;
-
-class btCollisionAlgorithm;
-
-struct btBroadphaseProxy;
-
-
-
-///The btBroadphasePair class contains a pair of aabb-overlapping objects.
-///A btDispatcher can search a btCollisionAlgorithm that performs exact/narrowphase collision detection on the actual collision shapes.
-ATTRIBUTE_ALIGNED16(struct) btBroadphasePair
-{
-	btBroadphasePair ()
-		:
-	m_pProxy0(0),
-		m_pProxy1(0),
-		m_algorithm(0),
-		m_internalInfo1(0)
-	{
-	}
-
-BT_DECLARE_ALIGNED_ALLOCATOR();
-
-	btBroadphasePair(const btBroadphasePair& other)
-		:		m_pProxy0(other.m_pProxy0),
-				m_pProxy1(other.m_pProxy1),
-				m_algorithm(other.m_algorithm),
-				m_internalInfo1(other.m_internalInfo1)
-	{
-	}
-	btBroadphasePair(btBroadphaseProxy& proxy0,btBroadphaseProxy& proxy1)
-	{
-
-		//keep them sorted, so the std::set operations work
-		if (proxy0.m_uniqueId < proxy1.m_uniqueId)
-        { 
-            m_pProxy0 = &proxy0; 
-            m_pProxy1 = &proxy1; 
-        }
-        else 
-        { 
-			m_pProxy0 = &proxy1; 
-            m_pProxy1 = &proxy0; 
-        }
-
-		m_algorithm = 0;
-		m_internalInfo1 = 0;
-
-	}
-	
-	btBroadphaseProxy* m_pProxy0;
-	btBroadphaseProxy* m_pProxy1;
-	
-	mutable btCollisionAlgorithm* m_algorithm;
-	union { void* m_internalInfo1; int m_internalTmpValue;};//don't use this data, it will be removed in future version.
-
-};
-
-/*
-//comparison for set operation, see Solid DT_Encounter
-SIMD_FORCE_INLINE bool operator<(const btBroadphasePair& a, const btBroadphasePair& b) 
-{ 
-    return a.m_pProxy0 < b.m_pProxy0 || 
-        (a.m_pProxy0 == b.m_pProxy0 && a.m_pProxy1 < b.m_pProxy1); 
-}
-*/
-
-
-
-class btBroadphasePairSortPredicate
-{
-	public:
-
-		bool operator() ( const btBroadphasePair& a, const btBroadphasePair& b ) const
-		{
-			const int uidA0 = a.m_pProxy0 ? a.m_pProxy0->m_uniqueId : -1;
-			const int uidB0 = b.m_pProxy0 ? b.m_pProxy0->m_uniqueId : -1;
-			const int uidA1 = a.m_pProxy1 ? a.m_pProxy1->m_uniqueId : -1;
-			const int uidB1 = b.m_pProxy1 ? b.m_pProxy1->m_uniqueId : -1;
-
-			 return uidA0 > uidB0 || 
-				(a.m_pProxy0 == b.m_pProxy0 && uidA1 > uidB1) ||
-				(a.m_pProxy0 == b.m_pProxy0 && a.m_pProxy1 == b.m_pProxy1 && a.m_algorithm > b.m_algorithm); 
-		}
-};
-
-
-SIMD_FORCE_INLINE bool operator==(const btBroadphasePair& a, const btBroadphasePair& b) 
-{
-	 return (a.m_pProxy0 == b.m_pProxy0) && (a.m_pProxy1 == b.m_pProxy1);
-}
-
-
-#endif //BT_BROADPHASE_PROXY_H
-
diff --git a/opencl/gpu_rigidbody/host/btCollisionObject.h b/opencl/gpu_rigidbody/host/btCollisionObject.h
deleted file mode 100644
index 7f2f8022e..000000000
--- a/opencl/gpu_rigidbody/host/btCollisionObject.h
+++ /dev/null
@@ -1,534 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_COLLISION_OBJECT_H
-#define BT_COLLISION_OBJECT_H
-
-#include "BulletCommon/btTransform.h"
-
-//island management, m_activationState1
-#define ACTIVE_TAG 1
-#define ISLAND_SLEEPING 2
-#define WANTS_DEACTIVATION 3
-#define DISABLE_DEACTIVATION 4
-#define DISABLE_SIMULATION 5
-
-struct	btBroadphaseProxy;
-class	btCollisionShape;
-struct btCollisionShapeData;
-#include "BulletCommon/btMotionState.h"
-#include "BulletCommon/btAlignedAllocator.h"
-#include "BulletCommon/btAlignedObjectArray.h"
-
-typedef btAlignedObjectArray<class btCollisionObject*> btCollisionObjectArray;
-
-#ifdef BT_USE_DOUBLE_PRECISION
-#define btCollisionObjectData btCollisionObjectDoubleData
-#define btCollisionObjectDataName "btCollisionObjectDoubleData"
-#else
-#define btCollisionObjectData btCollisionObjectFloatData
-#define btCollisionObjectDataName "btCollisionObjectFloatData"
-#endif
-
-
-/// btCollisionObject can be used to manage collision detection objects. 
-/// btCollisionObject maintains all information that is needed for a collision detection: Shape, Transform and AABB proxy.
-/// They can be added to the btCollisionWorld.
-ATTRIBUTE_ALIGNED16(class)	btCollisionObject
-{
-
-protected:
-
-	btTransform	m_worldTransform;
-
-	///m_interpolationWorldTransform is used for CCD and interpolation
-	///it can be either previous or future (predicted) transform
-	btTransform	m_interpolationWorldTransform;
-	//those two are experimental: just added for bullet time effect, so you can still apply impulses (directly modifying velocities) 
-	//without destroying the continuous interpolated motion (which uses this interpolation velocities)
-	btVector3	m_interpolationLinearVelocity;
-	btVector3	m_interpolationAngularVelocity;
-	
-	btVector3	m_anisotropicFriction;
-	int			m_hasAnisotropicFriction;
-	btScalar	m_contactProcessingThreshold;	
-
-	btBroadphaseProxy*		m_broadphaseHandle;
-	btCollisionShape*		m_collisionShape;
-	///m_extensionPointer is used by some internal low-level Bullet extensions.
-	void*					m_extensionPointer;
-	
-	///m_rootCollisionShape is temporarily used to store the original collision shape
-	///The m_collisionShape might be temporarily replaced by a child collision shape during collision detection purposes
-	///If it is NULL, the m_collisionShape is not temporarily replaced.
-	btCollisionShape*		m_rootCollisionShape;
-
-	int				m_collisionFlags;
-
-	int				m_islandTag1;
-	int				m_companionId;
-
-	mutable int				m_activationState1;
-	mutable btScalar			m_deactivationTime;
-
-	btScalar		m_friction;
-	btScalar		m_restitution;
-	btScalar		m_rollingFriction;
-
-	///m_internalType is reserved to distinguish Bullet's btCollisionObject, btRigidBody, btSoftBody, btGhostObject etc.
-	///do not assign your own m_internalType unless you write a new dynamics object class.
-	int				m_internalType;
-
-	///users can point to their objects, m_userPointer is not used by Bullet, see setUserPointer/getUserPointer
-	void*			m_userObjectPointer;
-
-	///time of impact calculation
-	btScalar		m_hitFraction; 
-	
-	///Swept sphere radius (0.0 by default), see btConvexConvexAlgorithm::
-	btScalar		m_ccdSweptSphereRadius;
-
-	/// Don't do continuous collision detection if the motion (in one step) is less then m_ccdMotionThreshold
-	btScalar		m_ccdMotionThreshold;
-	
-	/// If some object should have elaborate collision filtering by sub-classes
-	int			m_checkCollideWith;
-
-	virtual bool	checkCollideWithOverride(const btCollisionObject* /* co */) const
-	{
-		return true;
-	}
-
-public:
-
-	BT_DECLARE_ALIGNED_ALLOCATOR();
-
-	enum CollisionFlags
-	{
-		CF_STATIC_OBJECT= 1,
-		CF_KINEMATIC_OBJECT= 2,
-		CF_NO_CONTACT_RESPONSE = 4,
-		CF_CUSTOM_MATERIAL_CALLBACK = 8,//this allows per-triangle material (friction/restitution)
-		CF_CHARACTER_OBJECT = 16,
-		CF_DISABLE_VISUALIZE_OBJECT = 32, //disable debug drawing
-		CF_DISABLE_SPU_COLLISION_PROCESSING = 64//disable parallel/SPU processing
-	};
-
-	enum	CollisionObjectTypes
-	{
-		CO_COLLISION_OBJECT =1,
-		CO_RIGID_BODY=2,
-		///CO_GHOST_OBJECT keeps track of all objects overlapping its AABB and that pass its collision filter
-		///It is useful for collision sensors, explosion objects, character controller etc.
-		CO_GHOST_OBJECT=4,
-		CO_SOFT_BODY=8,
-		CO_HF_FLUID=16,
-		CO_USER_TYPE=32
-	};
-
-	enum AnisotropicFrictionFlags
-	{
-		CF_ANISOTROPIC_FRICTION_DISABLED=0,
-		CF_ANISOTROPIC_FRICTION = 1,
-		CF_ANISOTROPIC_ROLLING_FRICTION = 2
-	};
-
-	SIMD_FORCE_INLINE bool mergesSimulationIslands() const
-	{
-		///static objects, kinematic and object without contact response don't merge islands
-		return  ((m_collisionFlags & (CF_STATIC_OBJECT | CF_KINEMATIC_OBJECT | CF_NO_CONTACT_RESPONSE) )==0);
-	}
-
-	const btVector3& getAnisotropicFriction() const
-	{
-		return m_anisotropicFriction;
-	}
-	void	setAnisotropicFriction(const btVector3& anisotropicFriction, int frictionMode = CF_ANISOTROPIC_FRICTION)
-	{
-		m_anisotropicFriction = anisotropicFriction;
-		bool isUnity = (anisotropicFriction[0]!=1.f) || (anisotropicFriction[1]!=1.f) || (anisotropicFriction[2]!=1.f);
-		m_hasAnisotropicFriction = isUnity?frictionMode : 0;
-	}
-	bool	hasAnisotropicFriction(int frictionMode = CF_ANISOTROPIC_FRICTION) const
-	{
-		return (m_hasAnisotropicFriction&frictionMode)!=0;
-	}
-
-	///the constraint solver can discard solving contacts, if the distance is above this threshold. 0 by default.
-	///Note that using contacts with positive distance can improve stability. It increases, however, the chance of colliding with degerate contacts, such as 'interior' triangle edges
-	void	setContactProcessingThreshold( btScalar contactProcessingThreshold)
-	{
-		m_contactProcessingThreshold = contactProcessingThreshold;
-	}
-	btScalar	getContactProcessingThreshold() const
-	{
-		return m_contactProcessingThreshold;
-	}
-
-	SIMD_FORCE_INLINE bool		isStaticObject() const {
-		return (m_collisionFlags & CF_STATIC_OBJECT) != 0;
-	}
-
-	SIMD_FORCE_INLINE bool		isKinematicObject() const
-	{
-		return (m_collisionFlags & CF_KINEMATIC_OBJECT) != 0;
-	}
-
-	SIMD_FORCE_INLINE bool		isStaticOrKinematicObject() const
-	{
-		return (m_collisionFlags & (CF_KINEMATIC_OBJECT | CF_STATIC_OBJECT)) != 0 ;
-	}
-
-	SIMD_FORCE_INLINE bool		hasContactResponse() const {
-		return (m_collisionFlags & CF_NO_CONTACT_RESPONSE)==0;
-	}
-
-	
-	btCollisionObject();
-
-	virtual ~btCollisionObject();
-
-	virtual void	setCollisionShape(btCollisionShape* collisionShape)
-	{
-		m_collisionShape = collisionShape;
-		m_rootCollisionShape = collisionShape;
-	}
-
-	SIMD_FORCE_INLINE const btCollisionShape*	getCollisionShape() const
-	{
-		return m_collisionShape;
-	}
-
-	SIMD_FORCE_INLINE btCollisionShape*	getCollisionShape()
-	{
-		return m_collisionShape;
-	}
-
-	
-
-	
-
-	///Avoid using this internal API call, the extension pointer is used by some Bullet extensions. 
-	///If you need to store your own user pointer, use 'setUserPointer/getUserPointer' instead.
-	void*		internalGetExtensionPointer() const
-	{
-		return m_extensionPointer;
-	}
-	///Avoid using this internal API call, the extension pointer is used by some Bullet extensions
-	///If you need to store your own user pointer, use 'setUserPointer/getUserPointer' instead.
-	void	internalSetExtensionPointer(void* pointer)
-	{
-		m_extensionPointer = pointer;
-	}
-
-	SIMD_FORCE_INLINE	int	getActivationState() const { return m_activationState1;}
-	
-	void setActivationState(int newState) const;
-
-	void	setDeactivationTime(btScalar time)
-	{
-		m_deactivationTime = time;
-	}
-	btScalar	getDeactivationTime() const
-	{
-		return m_deactivationTime;
-	}
-
-	void forceActivationState(int newState) const;
-
-	void	activate(bool forceActivation = false) const;
-
-	SIMD_FORCE_INLINE bool isActive() const
-	{
-		return ((getActivationState() != ISLAND_SLEEPING) && (getActivationState() != DISABLE_SIMULATION));
-	}
-
-	void	setRestitution(btScalar rest)
-	{
-		m_restitution = rest;
-	}
-	btScalar	getRestitution() const
-	{
-		return m_restitution;
-	}
-	void	setFriction(btScalar frict)
-	{
-		m_friction = frict;
-	}
-	btScalar	getFriction() const
-	{
-		return m_friction;
-	}
-
-	void	setRollingFriction(btScalar frict)
-	{
-		m_rollingFriction = frict;
-	}
-	btScalar	getRollingFriction() const
-	{
-		return m_rollingFriction;
-	}
-
-
-	///reserved for Bullet internal usage
-	int	getInternalType() const
-	{
-		return m_internalType;
-	}
-
-	btTransform&	getWorldTransform()
-	{
-		return m_worldTransform;
-	}
-
-	const btTransform&	getWorldTransform() const
-	{
-		return m_worldTransform;
-	}
-
-	void	setWorldTransform(const btTransform& worldTrans)
-	{
-		m_worldTransform = worldTrans;
-	}
-
-
-	SIMD_FORCE_INLINE btBroadphaseProxy*	getBroadphaseHandle()
-	{
-		return m_broadphaseHandle;
-	}
-
-	SIMD_FORCE_INLINE const btBroadphaseProxy*	getBroadphaseHandle() const
-	{
-		return m_broadphaseHandle;
-	}
-
-	void	setBroadphaseHandle(btBroadphaseProxy* handle)
-	{
-		m_broadphaseHandle = handle;
-	}
-
-
-	const btTransform&	getInterpolationWorldTransform() const
-	{
-		return m_interpolationWorldTransform;
-	}
-
-	btTransform&	getInterpolationWorldTransform()
-	{
-		return m_interpolationWorldTransform;
-	}
-
-	void	setInterpolationWorldTransform(const btTransform&	trans)
-	{
-		m_interpolationWorldTransform = trans;
-	}
-
-	void	setInterpolationLinearVelocity(const btVector3& linvel)
-	{
-		m_interpolationLinearVelocity = linvel;
-	}
-
-	void	setInterpolationAngularVelocity(const btVector3& angvel)
-	{
-		m_interpolationAngularVelocity = angvel;
-	}
-
-	const btVector3&	getInterpolationLinearVelocity() const
-	{
-		return m_interpolationLinearVelocity;
-	}
-
-	const btVector3&	getInterpolationAngularVelocity() const
-	{
-		return m_interpolationAngularVelocity;
-	}
-
-	SIMD_FORCE_INLINE int getIslandTag() const
-	{
-		return	m_islandTag1;
-	}
-
-	void	setIslandTag(int tag)
-	{
-		m_islandTag1 = tag;
-	}
-
-	SIMD_FORCE_INLINE int getCompanionId() const
-	{
-		return	m_companionId;
-	}
-
-	void	setCompanionId(int id)
-	{
-		m_companionId = id;
-	}
-
-	SIMD_FORCE_INLINE btScalar			getHitFraction() const
-	{
-		return m_hitFraction; 
-	}
-
-	void	setHitFraction(btScalar hitFraction)
-	{
-		m_hitFraction = hitFraction;
-	}
-
-	
-	SIMD_FORCE_INLINE int	getCollisionFlags() const
-	{
-		return m_collisionFlags;
-	}
-
-	void	setCollisionFlags(int flags)
-	{
-		m_collisionFlags = flags;
-	}
-	
-	///Swept sphere radius (0.0 by default), see btConvexConvexAlgorithm::
-	btScalar			getCcdSweptSphereRadius() const
-	{
-		return m_ccdSweptSphereRadius;
-	}
-
-	///Swept sphere radius (0.0 by default), see btConvexConvexAlgorithm::
-	void	setCcdSweptSphereRadius(btScalar radius)
-	{
-		m_ccdSweptSphereRadius = radius;
-	}
-
-	btScalar 	getCcdMotionThreshold() const
-	{
-		return m_ccdMotionThreshold;
-	}
-
-	btScalar 	getCcdSquareMotionThreshold() const
-	{
-		return m_ccdMotionThreshold*m_ccdMotionThreshold;
-	}
-
-
-
-	/// Don't do continuous collision detection if the motion (in one step) is less then m_ccdMotionThreshold
-	void	setCcdMotionThreshold(btScalar ccdMotionThreshold)
-	{
-		m_ccdMotionThreshold = ccdMotionThreshold;
-	}
-
-	///users can point to their objects, userPointer is not used by Bullet
-	void*	getUserPointer() const
-	{
-		return m_userObjectPointer;
-	}
-	
-	///users can point to their objects, userPointer is not used by Bullet
-	void	setUserPointer(void* userPointer)
-	{
-		m_userObjectPointer = userPointer;
-	}
-
-
-	inline bool checkCollideWith(const btCollisionObject* co) const
-	{
-		if (m_checkCollideWith)
-			return checkCollideWithOverride(co);
-
-		return true;
-	}
-
-	virtual	int	calculateSerializeBufferSize()	const;
-
-	///fills the dataBuffer and returns the struct name (and 0 on failure)
-	virtual	const char*	serialize(void* dataBuffer, class btSerializer* serializer) const;
-
-	virtual void serializeSingleObject(class btSerializer* serializer) const;
-
-};
-
-///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
-struct	btCollisionObjectDoubleData
-{
-	void					*m_broadphaseHandle;
-	void					*m_collisionShape;
-	btCollisionShapeData	*m_rootCollisionShape;
-	char					*m_name;
-
-	btTransformDoubleData	m_worldTransform;
-	btTransformDoubleData	m_interpolationWorldTransform;
-	btVector3DoubleData		m_interpolationLinearVelocity;
-	btVector3DoubleData		m_interpolationAngularVelocity;
-	btVector3DoubleData		m_anisotropicFriction;
-	double					m_contactProcessingThreshold;	
-	double					m_deactivationTime;
-	double					m_friction;
-	double					m_rollingFriction;
-	double					m_restitution;
-	double					m_hitFraction; 
-	double					m_ccdSweptSphereRadius;
-	double					m_ccdMotionThreshold;
-
-	int						m_hasAnisotropicFriction;
-	int						m_collisionFlags;
-	int						m_islandTag1;
-	int						m_companionId;
-	int						m_activationState1;
-	int						m_internalType;
-	int						m_checkCollideWith;
-
-	char	m_padding[4];
-};
-
-///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
-struct	btCollisionObjectFloatData
-{
-	void					*m_broadphaseHandle;
-	void					*m_collisionShape;
-	btCollisionShapeData	*m_rootCollisionShape;
-	char					*m_name;
-
-	btTransformFloatData	m_worldTransform;
-	btTransformFloatData	m_interpolationWorldTransform;
-	btVector3FloatData		m_interpolationLinearVelocity;
-	btVector3FloatData		m_interpolationAngularVelocity;
-	btVector3FloatData		m_anisotropicFriction;
-	float					m_contactProcessingThreshold;	
-	float					m_deactivationTime;
-	float					m_friction;
-	float					m_rollingFriction;
-
-	float					m_restitution;
-	float					m_hitFraction; 
-	float					m_ccdSweptSphereRadius;
-	float					m_ccdMotionThreshold;
-
-	int						m_hasAnisotropicFriction;
-	int						m_collisionFlags;
-	int						m_islandTag1;
-	int						m_companionId;
-	int						m_activationState1;
-	int						m_internalType;
-	int						m_checkCollideWith;
-	char					m_padding[4];
-};
-
-
-
-SIMD_FORCE_INLINE	int	btCollisionObject::calculateSerializeBufferSize() const
-{
-	return sizeof(btCollisionObjectData);
-}
-
-
-
-#endif //BT_COLLISION_OBJECT_H
diff --git a/opencl/gpu_rigidbody/host/btConstraintSolver.h b/opencl/gpu_rigidbody/host/btConstraintSolver.h
deleted file mode 100644
index 016d521d9..000000000
--- a/opencl/gpu_rigidbody/host/btConstraintSolver.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_CONSTRAINT_SOLVER_H
-#define BT_CONSTRAINT_SOLVER_H
-
-#include "BulletCommon/btScalar.h"
-
-class btPersistentManifold;
-class btRigidBody;
-class btCollisionObject;
-class btTypedConstraint;
-struct btContactSolverInfo;
-struct btBroadphaseProxy;
-class btIDebugDraw;
-class btStackAlloc;
-class	btDispatcher;
-/// btConstraintSolver provides solver interface
-class btConstraintSolver
-{
-
-public:
-
-	virtual ~btConstraintSolver() {}
-	
-	virtual void prepareSolve (int /* numBodies */, int /* numManifolds */) {;}
-
-	///solve a group of constraints
-	virtual btScalar solveGroup(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifold,int numManifolds,btTypedConstraint** constraints,int numConstraints, const btContactSolverInfo& info,class btIDebugDraw* debugDrawer, btStackAlloc* stackAlloc,btDispatcher* dispatcher) = 0;
-
-	virtual void allSolved (const btContactSolverInfo& /* info */,class btIDebugDraw* /* debugDrawer */, btStackAlloc* /* stackAlloc */) {;}
-
-	///clear internal cached data and reset random seed
-	virtual	void	reset() = 0;
-};
-
-
-
-
-#endif //BT_CONSTRAINT_SOLVER_H
diff --git a/opencl/gpu_rigidbody/host/btContactSolverInfo.h b/opencl/gpu_rigidbody/host/btContactSolverInfo.h
deleted file mode 100644
index ebbeb8b8b..000000000
--- a/opencl/gpu_rigidbody/host/btContactSolverInfo.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_CONTACT_SOLVER_INFO
-#define BT_CONTACT_SOLVER_INFO
-
-#include "BulletCommon/btScalar.h"
-
-enum	btSolverMode
-{
-	SOLVER_RANDMIZE_ORDER = 1,
-	SOLVER_FRICTION_SEPARATE = 2,
-	SOLVER_USE_WARMSTARTING = 4,
-	SOLVER_USE_2_FRICTION_DIRECTIONS = 16,
-	SOLVER_ENABLE_FRICTION_DIRECTION_CACHING = 32,
-	SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION = 64,
-	SOLVER_CACHE_FRIENDLY = 128,
-	SOLVER_SIMD = 256,
-	SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS = 512,
-	SOLVER_ALLOW_ZERO_LENGTH_FRICTION_DIRECTIONS = 1024
-};
-
-struct btContactSolverInfoData
-{
-	
-
-	btScalar	m_tau;
-	btScalar	m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
-	btScalar	m_friction;
-	btScalar	m_timeStep;
-	btScalar	m_restitution;
-	int		m_numIterations;
-	btScalar	m_maxErrorReduction;
-	btScalar	m_sor;
-	btScalar	m_erp;//used as Baumgarte factor
-	btScalar	m_erp2;//used in Split Impulse
-	btScalar	m_globalCfm;//constraint force mixing
-	int			m_splitImpulse;
-	btScalar	m_splitImpulsePenetrationThreshold;
-	btScalar	m_splitImpulseTurnErp;
-	btScalar	m_linearSlop;
-	btScalar	m_warmstartingFactor;
-
-	int			m_solverMode;
-	int	m_restingContactRestitutionThreshold;
-	int			m_minimumSolverBatchSize;
-	btScalar	m_maxGyroscopicForce;
-	btScalar	m_singleAxisRollingFrictionThreshold;
-
-
-};
-
-struct btContactSolverInfo : public btContactSolverInfoData
-{
-
-	
-
-	inline btContactSolverInfo()
-	{
-		m_tau = btScalar(0.6);
-		m_damping = btScalar(1.0);
-		m_friction = btScalar(0.3);
-		m_timeStep = btScalar(1.f/60.f);
-		m_restitution = btScalar(0.);
-		m_maxErrorReduction = btScalar(20.);
-		m_numIterations = 10;
-		m_erp = btScalar(0.2);
-		m_erp2 = btScalar(0.8);
-		m_globalCfm = btScalar(0.);
-		m_sor = btScalar(1.);
-		m_splitImpulse = true;
-		m_splitImpulsePenetrationThreshold = -.04f;
-		m_splitImpulseTurnErp = 0.1f;
-		m_linearSlop = btScalar(0.0);
-		m_warmstartingFactor=btScalar(0.85);
-		//m_solverMode =  SOLVER_USE_WARMSTARTING |  SOLVER_SIMD | SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION|SOLVER_USE_2_FRICTION_DIRECTIONS|SOLVER_ENABLE_FRICTION_DIRECTION_CACHING;// | SOLVER_RANDMIZE_ORDER;
-		m_solverMode = SOLVER_USE_WARMSTARTING | SOLVER_SIMD;// | SOLVER_RANDMIZE_ORDER;
-		m_restingContactRestitutionThreshold = 2;//unused as of 2.81
-		m_minimumSolverBatchSize = 128; //try to combine islands until the amount of constraints reaches this limit
-		m_maxGyroscopicForce = 100.f; ///only used to clamp forces for bodies that have their BT_ENABLE_GYROPSCOPIC_FORCE flag set (using btRigidBody::setFlag)
-		m_singleAxisRollingFrictionThreshold = 1e30f;///if the velocity is above this threshold, it will use a single constraint row (axis), otherwise 3 rows.
-	}
-};
-
-///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
-struct btContactSolverInfoDoubleData
-{
-	double		m_tau;
-	double		m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
-	double		m_friction;
-	double		m_timeStep;
-	double		m_restitution;
-	double		m_maxErrorReduction;
-	double		m_sor;
-	double		m_erp;//used as Baumgarte factor
-	double		m_erp2;//used in Split Impulse
-	double		m_globalCfm;//constraint force mixing
-	double		m_splitImpulsePenetrationThreshold;
-	double		m_splitImpulseTurnErp;
-	double		m_linearSlop;
-	double		m_warmstartingFactor;
-	double		m_maxGyroscopicForce;
-	double		m_singleAxisRollingFrictionThreshold;
-
-	int			m_numIterations;
-	int			m_solverMode;
-	int			m_restingContactRestitutionThreshold;
-	int			m_minimumSolverBatchSize;
-	int			m_splitImpulse;
-	char		m_padding[4];
-
-};
-///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
-struct btContactSolverInfoFloatData
-{
-	float		m_tau;
-	float		m_damping;//global non-contact constraint damping, can be locally overridden by constraints during 'getInfo2'.
-	float		m_friction;
-	float		m_timeStep;
-
-	float		m_restitution;
-	float		m_maxErrorReduction;
-	float		m_sor;
-	float		m_erp;//used as Baumgarte factor
-
-	float		m_erp2;//used in Split Impulse
-	float		m_globalCfm;//constraint force mixing
-	float		m_splitImpulsePenetrationThreshold;
-	float		m_splitImpulseTurnErp;
-
-	float		m_linearSlop;
-	float		m_warmstartingFactor;
-	float		m_maxGyroscopicForce;
-	float		m_singleAxisRollingFrictionThreshold;
-
-	int			m_numIterations;
-	int			m_solverMode;
-	int			m_restingContactRestitutionThreshold;
-	int			m_minimumSolverBatchSize;
-
-	int			m_splitImpulse;
-	char		m_padding[4];
-};
-
-
-
-#endif //BT_CONTACT_SOLVER_INFO
diff --git a/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.h b/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.h
deleted file mode 100644
index 4f8efcce1..000000000
--- a/opencl/gpu_rigidbody/host/btGpuBatchingPgsSolver.h
+++ /dev/null
@@ -1,40 +0,0 @@
-
-#ifndef BT_GPU_BATCHING_PGS_SOLVER_H
-#define BT_GPU_BATCHING_PGS_SOLVER_H
-
-#include "../../basic_initialize/btOpenCLInclude.h"
-#include "../../parallel_primitives/host/btOpenCLArray.h"
-#include "../../gpu_narrowphase/host/btRigidBodyCL.h"
-#include "../../gpu_narrowphase/host/btContact4.h"
-#include "btGpuConstraint4.h"
-
-class btGpuBatchingPgsSolver
-{
-protected:
-
-	
-
-	struct btGpuBatchingPgsSolverInternalData*		m_data;
-
-	void batchContacts( btOpenCLArray<btContact4>* contacts, int nContacts, btOpenCLArray<unsigned int>* n, btOpenCLArray<unsigned int>* offsets, int staticIdx );
-	
-	inline int sortConstraintByBatch( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
-	inline int sortConstraintByBatch2( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
-	inline int sortConstraintByBatch3( btContact4* cs, int n, int simdWidth , int staticIdx, int numBodies);
-	
-
-
-	void solveContactConstraint(  const btOpenCLArray<btRigidBodyCL>* bodyBuf, const btOpenCLArray<btInertiaCL>* shapeBuf, 
-			btOpenCLArray<btGpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations);
-
-public:
-	
-	btGpuBatchingPgsSolver(cl_context ctx,cl_device_id device, cl_command_queue  q,int pairCapacity);
-	virtual ~btGpuBatchingPgsSolver();
-
-	void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct btConfig& config);
-
-};
-
-#endif //BT_GPU_BATCHING_PGS_SOLVER_H
-
diff --git a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp
deleted file mode 100644
index 955ed4d55..000000000
--- a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.cpp
+++ /dev/null
@@ -1,1358 +0,0 @@
-
-#include "btGpuJacobiSolver.h"
-#include "BulletCommon/btAlignedObjectArray.h"
-#include "parallel_primitives/host/btPrefixScanCL.h"
-#include "btGpuConstraint4.h"
-#include "BulletCommon/btQuickprof.h"
-#include "../../parallel_primitives/host/btInt2.h"
-#include "../../parallel_primitives/host/btFillCL.h"
-
-
-
-#include "../../parallel_primitives/host/btLauncherCL.h"
-
-
-#include "../kernels/solverUtils.h"
-
-#define SOLVER_UTILS_KERNEL_PATH "opencl/gpu_rigidbody/kernels/solverUtils.cl"
-
-struct btGpuJacobiSolverInternalData
-{
-		//btRadixSort32CL*	m_sort32;
-		//btBoundSearchCL*	m_search;
-		btPrefixScanCL*	m_scan;
-
-		btOpenCLArray<unsigned int>* m_bodyCount;
-		btOpenCLArray<btInt2>*		m_contactConstraintOffsets;
-		btOpenCLArray<unsigned int>* m_offsetSplitBodies;
-
-		btOpenCLArray<btVector3>*	m_deltaLinearVelocities;
-		btOpenCLArray<btVector3>*	m_deltaAngularVelocities;
-
-
-		btOpenCLArray<btGpuConstraint4>* m_contactConstraints;
-
-		btFillCL*	m_filler;
-		
-
-		cl_kernel	m_countBodiesKernel;
-		cl_kernel	m_contactToConstraintSplitKernel;
-		cl_kernel	m_clearVelocitiesKernel;
-		cl_kernel	m_averageVelocitiesKernel;
-		cl_kernel	m_updateBodyVelocitiesKernel;
-		cl_kernel	m_solveContactKernel;
-		cl_kernel	m_solveFrictionKernel;
-
-
-
-};
-
-btGpuJacobiSolver::btGpuJacobiSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity)
-	:m_context(ctx),
-	m_device(device),
-	m_queue(queue)
-{
-	m_data = new btGpuJacobiSolverInternalData;
-	m_data->m_scan = new btPrefixScanCL(m_context,m_device,m_queue);
-	m_data->m_bodyCount = new btOpenCLArray<unsigned int>(m_context,m_queue);
-	m_data->m_filler = new btFillCL(m_context,m_device,m_queue);
-	m_data->m_contactConstraintOffsets = new btOpenCLArray<btInt2>(m_context,m_queue);
-	m_data->m_offsetSplitBodies = new btOpenCLArray<unsigned int>(m_context,m_queue);
-	m_data->m_contactConstraints = new btOpenCLArray<btGpuConstraint4>(m_context,m_queue);
-	m_data->m_deltaLinearVelocities = new btOpenCLArray<btVector3>(m_context,m_queue);
-	m_data->m_deltaAngularVelocities = new btOpenCLArray<btVector3>(m_context,m_queue);
-
-	cl_int pErrNum;
-	const char* additionalMacros="";
-	const char* solverUtilsSource = solverUtilsCL;
-	{
-		cl_program solverUtilsProg= btOpenCLUtils::compileCLProgramFromString( ctx, device, solverUtilsSource, &pErrNum,additionalMacros, SOLVER_UTILS_KERNEL_PATH);
-		btAssert(solverUtilsProg);
-		m_data->m_countBodiesKernel =  btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "CountBodiesKernel", &pErrNum, solverUtilsProg,additionalMacros );
-		btAssert(m_data->m_countBodiesKernel);
-
-		m_data->m_contactToConstraintSplitKernel  = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "ContactToConstraintSplitKernel", &pErrNum, solverUtilsProg,additionalMacros );
-		btAssert(m_data->m_contactToConstraintSplitKernel);
-		m_data->m_clearVelocitiesKernel  = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros );
-		btAssert(m_data->m_clearVelocitiesKernel);
-
-		m_data->m_averageVelocitiesKernel  = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros );
-		btAssert(m_data->m_averageVelocitiesKernel);
-
-		m_data->m_updateBodyVelocitiesKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "UpdateBodyVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros );
-		btAssert(m_data->m_updateBodyVelocitiesKernel);
-
-		
-		m_data->m_solveContactKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg,additionalMacros );
-		btAssert(m_data->m_solveContactKernel );
-
-		m_data->m_solveFrictionKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "SolveFrictionJacobiKernel", &pErrNum, solverUtilsProg,additionalMacros );
-		btAssert(m_data->m_solveFrictionKernel);
-	}
-
-}
-
-btGpuJacobiSolver::~btGpuJacobiSolver()
-{
-	clReleaseKernel(m_data->m_solveContactKernel);
-	clReleaseKernel(m_data->m_solveFrictionKernel);
-	clReleaseKernel(m_data->m_countBodiesKernel);
-	clReleaseKernel(m_data->m_contactToConstraintSplitKernel);
-	clReleaseKernel(m_data->m_averageVelocitiesKernel);
-	clReleaseKernel(m_data->m_updateBodyVelocitiesKernel);
-	clReleaseKernel(m_data->m_clearVelocitiesKernel );
-
-	delete m_data->m_deltaLinearVelocities;
-	delete m_data->m_deltaAngularVelocities;
-	delete m_data->m_contactConstraints;
-	delete m_data->m_offsetSplitBodies;
-	delete m_data->m_contactConstraintOffsets;
-	delete m_data->m_bodyCount;
-	delete m_data->m_filler;
-	delete m_data->m_scan;
-	delete m_data;
-}
-
-
-btVector3 make_float4(float v)
-{
-	return btVector3 (v,v,v);
-}
-
-btVector4 make_float4(float x,float y, float z, float w)
-{
-	return btVector4 (x,y,z,w);
-}
-
-
-	static
-	inline
-	float calcRelVel(const btVector3& l0, const btVector3& l1, const btVector3& a0, const btVector3& a1, 
-					 const btVector3& linVel0, const btVector3& angVel0, const btVector3& linVel1, const btVector3& angVel1)
-	{
-		return btDot(l0, linVel0) + btDot(a0, angVel0) + btDot(l1, linVel1) + btDot(a1, angVel1);
-	}
-
-
-	static
-	inline
-	void setLinearAndAngular(const btVector3& n, const btVector3& r0, const btVector3& r1,
-							 btVector3& linear, btVector3& angular0, btVector3& angular1)
-	{
-		linear = -n;
-		angular0 = -btCross(r0, n);
-		angular1 = btCross(r1, n);
-	}
-
-
-static __inline void solveContact(btGpuConstraint4& cs, 
-	const btVector3& posA, const btVector3& linVelARO, const btVector3& angVelARO, float invMassA, const btMatrix3x3& invInertiaA,
-	const btVector3& posB, const btVector3& linVelBRO, const btVector3& angVelBRO, float invMassB, const btMatrix3x3& invInertiaB, 
-	float maxRambdaDt[4], float minRambdaDt[4], btVector3& dLinVelA, btVector3& dAngVelA, btVector3& dLinVelB, btVector3& dAngVelB)
-{
-
-
-	for(int ic=0; ic<4; ic++)
-	{
-		//	dont necessary because this makes change to 0
-		if( cs.m_jacCoeffInv[ic] == 0.f ) continue;
-
-		{
-			btVector3 angular0, angular1, linear;
-			btVector3 r0 = cs.m_worldPos[ic] - (btVector3&)posA;
-			btVector3 r1 = cs.m_worldPos[ic] - (btVector3&)posB;
-			setLinearAndAngular( (const btVector3 &)-cs.m_linear, (const btVector3 &)r0, (const btVector3 &)r1, linear, angular0, angular1 );
-
-			float rambdaDt = calcRelVel((const btVector3 &)cs.m_linear,(const btVector3 &) -cs.m_linear, angular0, angular1,
-				linVelARO+dLinVelA, angVelARO+dAngVelA, linVelBRO+dLinVelB, angVelBRO+dAngVelB ) + cs.m_b[ic];
-			rambdaDt *= cs.m_jacCoeffInv[ic];
-
-			{
-				float prevSum = cs.m_appliedRambdaDt[ic];
-				float updated = prevSum;
-				updated += rambdaDt;
-				updated = btMax( updated, minRambdaDt[ic] );
-				updated = btMin( updated, maxRambdaDt[ic] );
-				rambdaDt = updated - prevSum;
-				cs.m_appliedRambdaDt[ic] = updated;
-			}
-
-			btVector3 linImp0 = invMassA*linear*rambdaDt;
-			btVector3 linImp1 = invMassB*(-linear)*rambdaDt;
-			btVector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
-			btVector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
-#ifdef _WIN32
-            btAssert(_finite(linImp0.getX()));
-			btAssert(_finite(linImp1.getX()));
-#endif
-			
-			if (invMassA)
-			{
-				dLinVelA += linImp0;
-				dAngVelA += angImp0;
-			}
-			if (invMassB)
-			{
-				dLinVelB += linImp1;
-				dAngVelB += angImp1;
-			}
-		}
-	}
-}
-
-
-
-void solveContact3(btGpuConstraint4* cs,
-			btVector3* posAPtr, btVector3* linVelA, btVector3* angVelA, float invMassA, const btMatrix3x3& invInertiaA,
-			btVector3* posBPtr, btVector3* linVelB, btVector3* angVelB, float invMassB, const btMatrix3x3& invInertiaB,
-			btVector3* dLinVelA, btVector3* dAngVelA, btVector3* dLinVelB, btVector3* dAngVelB)
-{
-	float minRambdaDt = 0;
-	float maxRambdaDt = FLT_MAX;
-
-	for(int ic=0; ic<4; ic++)
-	{
-		if( cs->m_jacCoeffInv[ic] == 0.f ) continue;
-
-		btVector3 angular0, angular1, linear;
-		btVector3 r0 = cs->m_worldPos[ic] - *posAPtr;
-		btVector3 r1 = cs->m_worldPos[ic] - *posBPtr;
-		setLinearAndAngular( -cs->m_linear, r0, r1, linear, angular0, angular1 );
-
-		float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, 
-			*linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];
-		rambdaDt *= cs->m_jacCoeffInv[ic];
-
-		{
-			float prevSum = cs->m_appliedRambdaDt[ic];
-			float updated = prevSum;
-			updated += rambdaDt;
-			updated = btMax( updated, minRambdaDt );
-			updated = btMin( updated, maxRambdaDt );
-			rambdaDt = updated - prevSum;
-			cs->m_appliedRambdaDt[ic] = updated;
-		}
-
-		btVector3 linImp0 = invMassA*linear*rambdaDt;
-		btVector3 linImp1 = invMassB*(-linear)*rambdaDt;
-		btVector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
-		btVector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
-
-		if (invMassA)
-		{
-			*dLinVelA += linImp0;
-			*dAngVelA += angImp0;
-		}
-		if (invMassB)
-		{
-			*dLinVelB += linImp1;
-			*dAngVelB += angImp1;
-		}
-	}
-}
-
-
-static inline void solveFriction(btGpuConstraint4& cs, 
-	const btVector3& posA, const btVector3& linVelARO, const btVector3& angVelARO, float invMassA, const btMatrix3x3& invInertiaA,
-	const btVector3& posB, const btVector3& linVelBRO, const btVector3& angVelBRO, float invMassB, const btMatrix3x3& invInertiaB, 
-	float maxRambdaDt[4], float minRambdaDt[4], btVector3& dLinVelA, btVector3& dAngVelA, btVector3& dLinVelB, btVector3& dAngVelB)
-{
-
-	btVector3 linVelA = linVelARO+dLinVelA;
-	btVector3 linVelB = linVelBRO+dLinVelB;
-	btVector3 angVelA = angVelARO+dAngVelA;
-	btVector3 angVelB = angVelBRO+dAngVelB;
-
-	if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return;
-	const btVector3& center = (const btVector3&)cs.m_center;
-
-	btVector3 n = -(const btVector3&)cs.m_linear;
-
-	btVector3 tangent[2];
-#if 1		
-	btPlaneSpace1 (n, tangent[0],tangent[1]);
-#else
-	btVector3 r = cs.m_worldPos[0]-center;
-	tangent[0] = cross3( n, r );
-	tangent[1] = cross3( tangent[0], n );
-	tangent[0] = normalize3( tangent[0] );
-	tangent[1] = normalize3( tangent[1] );
-#endif
-
-	btVector3 angular0, angular1, linear;
-	btVector3 r0 = center - posA;
-	btVector3 r1 = center - posB;
-	for(int i=0; i<2; i++)
-	{
-		setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 );
-		float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
-			linVelA, angVelA, linVelB, angVelB );
-		rambdaDt *= cs.m_fJacCoeffInv[i];
-
-			{
-				float prevSum = cs.m_fAppliedRambdaDt[i];
-				float updated = prevSum;
-				updated += rambdaDt;
-				updated = btMax( updated, minRambdaDt[i] );
-				updated = btMin( updated, maxRambdaDt[i] );
-				rambdaDt = updated - prevSum;
-				cs.m_fAppliedRambdaDt[i] = updated;
-			}
-
-		btVector3 linImp0 = invMassA*linear*rambdaDt;
-		btVector3 linImp1 = invMassB*(-linear)*rambdaDt;
-		btVector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
-		btVector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
-#ifdef _WIN32
-		btAssert(_finite(linImp0.getX()));
-		btAssert(_finite(linImp1.getX()));
-#endif
-		if (invMassA)
-		{
-			dLinVelA += linImp0;
-			dAngVelA += angImp0;
-		}
-		if (invMassB)
-		{
-			dLinVelB += linImp1;
-			dAngVelB += angImp1;
-		}
-	}
-
-	{	//	angular damping for point constraint
-		btVector3 ab = ( posB - posA ).normalized();
-		btVector3 ac = ( center - posA ).normalized();
-		if( btDot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
-		{
-			float angNA = btDot( n, angVelA );
-			float angNB = btDot( n, angVelB );
-
-			if (invMassA)
-				dAngVelA -= (angNA*0.1f)*n;
-			if (invMassB)
-				dAngVelB -= (angNB*0.1f)*n;
-		}
-	}
-
-}
-
-
-btVector3 mtMul3(const btVector3& a, const btMatrix3x3& b)
-{
-	btVector3 colx = make_float4(b.getRow(0)[0], b.getRow(1)[0], b.getRow(2)[0], 0);
-	btVector3 coly = make_float4(b.getRow(0)[1], b.getRow(1)[1], b.getRow(2)[1], 0);
-	btVector3 colz = make_float4(b.getRow(0)[2], b.getRow(1)[2], b.getRow(2)[2], 0);
-
-	btVector3 ans;
-	ans[0] = btDot( a, colx );
-	ans[1] = btDot( a, coly );
-	ans[2] = btDot( a, colz );
-	return ans;
-}
-
-
-float calcJacCoeff(const btVector3& linear0, const btVector3& linear1, const btVector3& angular0, const btVector3& angular1,
-					float invMass0, const btMatrix3x3* invInertia0, float invMass1, const btMatrix3x3* invInertia1, float countA, float countB)
-{
-	//	linear0,1 are normlized
-	float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;
-	
-	float jmj1 = btDot(mtMul3(angular0,*invInertia0), angular0);
-	float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;
-	float jmj3 = btDot(mtMul3(angular1,*invInertia1), angular1);
-	return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);
-//	return -1.f/((jmj0+jmj1)+(jmj2+jmj3));
-
-}
-
-
-void setConstraint4( const btVector3& posA, const btVector3& linVelA, const btVector3& angVelA, float invMassA, const btMatrix3x3& invInertiaA,
-	const btVector3& posB, const btVector3& linVelB, const btVector3& angVelB, float invMassB, const btMatrix3x3& invInertiaB, 
-	 btContact4* src, float dt, float positionDrift, float positionConstraintCoeff, float countA, float countB,
-	btGpuConstraint4* dstC )
-{
-	dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);
-	dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);
-
-	float dtInv = 1.f/dt;
-	for(int ic=0; ic<4; ic++)
-	{
-		dstC->m_appliedRambdaDt[ic] = 0.f;
-	}
-	dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;
-
-
-	dstC->m_linear = -src->m_worldNormal;
-	dstC->m_linear[3] = 0.7f ;//src->getFrictionCoeff() );
-	for(int ic=0; ic<4; ic++)
-	{
-		btVector3 r0 = src->m_worldPos[ic] - posA;
-		btVector3 r1 = src->m_worldPos[ic] - posB;
-
-		if( ic >= src->m_worldNormal[3] )//npoints
-		{
-			dstC->m_jacCoeffInv[ic] = 0.f;
-			continue;
-		}
-
-		float relVelN;
-		{
-			btVector3 linear, angular0, angular1;
-			setLinearAndAngular(src->m_worldNormal, r0, r1, linear, angular0, angular1);
-
-			dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,
-				invMassA, &invInertiaA, invMassB, &invInertiaB ,countA,countB);
-
-			relVelN = calcRelVel(linear, -linear, angular0, angular1,
-				linVelA, angVelA, linVelB, angVelB);
-
-			float e = 0.f;//src->getRestituitionCoeff();
-			if( relVelN*relVelN < 0.004f ) 
-			{
-				e = 0.f;
-			}
-
-			dstC->m_b[ic] = e*relVelN;
-			//float penetration = src->m_worldPos[ic].w;
-			dstC->m_b[ic] += (src->m_worldPos[ic][3] + positionDrift)*positionConstraintCoeff*dtInv;
-			dstC->m_appliedRambdaDt[ic] = 0.f;
-		}
-	}
-
-	if( src->m_worldNormal[3] > 0 )//npoints
-	{	//	prepare friction
-		btVector3 center = make_float4(0.f);
-		for(int i=0; i<src->m_worldNormal[3]; i++) 
-			center += src->m_worldPos[i];
-		center /= (float)src->m_worldNormal[3];
-
-		btVector3 tangent[2];
-		btPlaneSpace1(src->m_worldNormal,tangent[0],tangent[1]);
-		
-		btVector3 r[2];
-		r[0] = center - posA;
-		r[1] = center - posB;
-
-		for(int i=0; i<2; i++)
-		{
-			btVector3 linear, angular0, angular1;
-			setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1);
-
-			dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,
-				invMassA, &invInertiaA, invMassB, &invInertiaB ,countA,countB);
-			dstC->m_fAppliedRambdaDt[i] = 0.f;
-		}
-		dstC->m_center = center;
-	}
-
-	for(int i=0; i<4; i++)
-	{
-		if( i<src->m_worldNormal[3] )
-		{
-			dstC->m_worldPos[i] = src->m_worldPos[i];
-		}
-		else
-		{
-			dstC->m_worldPos[i] = make_float4(0.f);
-		}
-	}
-}
-
-
-
-void ContactToConstraintKernel(btContact4* gContact, btRigidBodyCL* gBodies, btInertiaCL* gShapes, btGpuConstraint4* gConstraintOut, int nContacts,
-float dt,
-float positionDrift,
-float positionConstraintCoeff, int gIdx, btAlignedObjectArray<unsigned int>& bodyCount
-)
-{
-	//int gIdx = 0;//GET_GLOBAL_IDX;
-	
-	if( gIdx < nContacts )
-	{
-		int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);
-		int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
-
-		btVector3 posA = gBodies[aIdx].m_pos;
-		btVector3 linVelA = gBodies[aIdx].m_linVel;
-		btVector3 angVelA = gBodies[aIdx].m_angVel;
-		float invMassA = gBodies[aIdx].m_invMass;
-		btMatrix3x3 invInertiaA = gShapes[aIdx].m_invInertiaWorld;//.m_invInertia;
-
-		btVector3 posB = gBodies[bIdx].m_pos;
-		btVector3 linVelB = gBodies[bIdx].m_linVel;
-		btVector3 angVelB = gBodies[bIdx].m_angVel;
-		float invMassB = gBodies[bIdx].m_invMass;
-		btMatrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld;//m_invInertia;
-
-		btGpuConstraint4 cs;
-		float countA = invMassA ? (float)(bodyCount[aIdx]) : 1;
-		float countB = invMassB ? (float)(bodyCount[bIdx]) : 1;
-    	setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
-			&gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,
-			&cs );
-		
-
-		
-		cs.m_batchIdx = gContact[gIdx].m_batchIdx;
-
-		gConstraintOut[gIdx] = cs;
-	}
-}
-
-
-void btGpuJacobiSolver::solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo)
-{
-	BT_PROFILE("btGpuJacobiSolver::solveGroup");
-
-	btAlignedObjectArray<unsigned int> bodyCount;
-	bodyCount.resize(numBodies);
-	for (int i=0;i<numBodies;i++)
-		bodyCount[i] = 0;
-
-	btAlignedObjectArray<btInt2> contactConstraintOffsets;
-	contactConstraintOffsets.resize(numManifolds);
-
-
-	for (int i=0;i<numManifolds;i++)
-	{
-		int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;
-		int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;
-
-		bool isFixedA = (pa <0) || (pa == solverInfo.m_fixedBodyIndex);
-		bool isFixedB = (pb <0) || (pb == solverInfo.m_fixedBodyIndex);
-
-		int bodyIndexA = manifoldPtr[i].getBodyA();
-		int bodyIndexB = manifoldPtr[i].getBodyB();
-
-		if (!isFixedA)
-		{
-			contactConstraintOffsets[i].x = bodyCount[bodyIndexA];
-			bodyCount[bodyIndexA]++;
-		}
-		if (!isFixedB)
-		{
-			contactConstraintOffsets[i].y = bodyCount[bodyIndexB];
-			bodyCount[bodyIndexB]++;
-		} 
-	}
-
-	btAlignedObjectArray<unsigned int> offsetSplitBodies;
-	offsetSplitBodies.resize(numBodies);
-	unsigned int totalNumSplitBodies;
-	m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodies,&totalNumSplitBodies);
-	int numlastBody = bodyCount[numBodies-1];
-	totalNumSplitBodies += numlastBody;
-
-	
-
-
-
-	btAlignedObjectArray<btGpuConstraint4> contactConstraints;
-	contactConstraints.resize(numManifolds);
-
-	for (int i=0;i<numManifolds;i++)
-	{
-		ContactToConstraintKernel(&manifoldPtr[0],bodies,inertias,&contactConstraints[0],numManifolds,
-			solverInfo.m_deltaTime,
-			solverInfo.m_positionDrift,
-			solverInfo.m_positionConstraintCoeff,
-			i, bodyCount);
-	}
-	int maxIter = solverInfo.m_numIterations;
-
-
-	btAlignedObjectArray<btVector3> deltaLinearVelocities;
-	btAlignedObjectArray<btVector3> deltaAngularVelocities;
-	deltaLinearVelocities.resize(totalNumSplitBodies);
-	deltaAngularVelocities.resize(totalNumSplitBodies);
-	for (int i=0;i<totalNumSplitBodies;i++)
-	{
-		deltaLinearVelocities[i].setZero();
-		deltaAngularVelocities[i].setZero();
-	}
-
-
-
-	for (int iter = 0;iter<maxIter;iter++)
-	{
-		int i=0;
-		for( i=0; i<numManifolds; i++)
-		{
-
-			float frictionCoeff = contactConstraints[i].getFrictionCoeff();
-			int aIdx = (int)contactConstraints[i].m_bodyA;
-			int bIdx = (int)contactConstraints[i].m_bodyB;
-			btRigidBodyCL& bodyA = bodies[aIdx];
-			btRigidBodyCL& bodyB = bodies[bIdx];
-
-			btVector3 zero(0,0,0);
-			
-			btVector3* dlvAPtr=&zero;
-			btVector3* davAPtr=&zero;
-			btVector3* dlvBPtr=&zero;
-			btVector3* davBPtr=&zero;
-			
-			if (bodyA.getInvMass())
-			{
-				int bodyOffsetA = offsetSplitBodies[aIdx];
-				int constraintOffsetA = contactConstraintOffsets[i].x;
-				int splitIndexA = bodyOffsetA+constraintOffsetA;
-				dlvAPtr = &deltaLinearVelocities[splitIndexA];
-				davAPtr = &deltaAngularVelocities[splitIndexA];
-			}
-
-			if (bodyB.getInvMass())
-			{
-				int bodyOffsetB = offsetSplitBodies[bIdx];
-				int constraintOffsetB = contactConstraintOffsets[i].y;
-				int splitIndexB= bodyOffsetB+constraintOffsetB;
-				dlvBPtr =&deltaLinearVelocities[splitIndexB];
-				davBPtr = &deltaAngularVelocities[splitIndexB];
-			}
-
-
-
-			{
-				float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
-				float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
-
-				solveContact( contactConstraints[i], (btVector3&)bodyA.m_pos, (btVector3&)bodyA.m_linVel, (btVector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld, 
-					(btVector3&)bodyB.m_pos, (btVector3&)bodyB.m_linVel, (btVector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld,
-					maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr		);
-
-
-			}
-		}
-
-		
-		//easy
-		for (int i=0;i<numBodies;i++)
-		{
-			if (bodies[i].getInvMass())
-			{
-				int bodyOffset = offsetSplitBodies[i];
-				int count = bodyCount[i];
-				float factor = 1.f/float(count);
-				btVector3 averageLinVel;
-				averageLinVel.setZero();
-				btVector3 averageAngVel;
-				averageAngVel.setZero();
-				for (int j=0;j<count;j++)
-				{
-					averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
-					averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
-				}
-				for (int j=0;j<count;j++)
-				{
-					deltaLinearVelocities[bodyOffset+j] = averageLinVel;
-					deltaAngularVelocities[bodyOffset+j] = averageAngVel;
-				}
-			}
-		}
-
-		//solve friction
-
-		for(int i=0; i<numManifolds; i++)
-		{
-			float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
-			float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
-
-			float sum = 0;
-			for(int j=0; j<4; j++)
-			{
-				sum +=contactConstraints[i].m_appliedRambdaDt[j];
-			}
-			float frictionCoeff = contactConstraints[i].getFrictionCoeff();
-			int aIdx = (int)contactConstraints[i].m_bodyA;
-			int bIdx = (int)contactConstraints[i].m_bodyB;
-			btRigidBodyCL& bodyA = bodies[aIdx];
-			btRigidBodyCL& bodyB = bodies[bIdx];
-
-			btVector3 zero(0,0,0);
-			
-			btVector3* dlvAPtr=&zero;
-			btVector3* davAPtr=&zero;
-			btVector3* dlvBPtr=&zero;
-			btVector3* davBPtr=&zero;
-			
-			if (bodyA.getInvMass())
-			{
-				int bodyOffsetA = offsetSplitBodies[aIdx];
-				int constraintOffsetA = contactConstraintOffsets[i].x;
-				int splitIndexA = bodyOffsetA+constraintOffsetA;
-				dlvAPtr = &deltaLinearVelocities[splitIndexA];
-				davAPtr = &deltaAngularVelocities[splitIndexA];
-			}
-
-			if (bodyB.getInvMass())
-			{
-				int bodyOffsetB = offsetSplitBodies[bIdx];
-				int constraintOffsetB = contactConstraintOffsets[i].y;
-				int splitIndexB= bodyOffsetB+constraintOffsetB;
-				dlvBPtr =&deltaLinearVelocities[splitIndexB];
-				davBPtr = &deltaAngularVelocities[splitIndexB];
-			}
-
-			for(int j=0; j<4; j++)
-			{
-				maxRambdaDt[j] = frictionCoeff*sum;
-				minRambdaDt[j] = -maxRambdaDt[j];
-			}
-
-			solveFriction( contactConstraints[i], (btVector3&)bodyA.m_pos, (btVector3&)bodyA.m_linVel, (btVector3&)bodyA.m_angVel, bodyA.m_invMass,inertias[aIdx].m_invInertiaWorld, 
-				(btVector3&)bodyB.m_pos, (btVector3&)bodyB.m_linVel, (btVector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld,
-				maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr);
-
-		}
-
-		//easy
-		for (int i=0;i<numBodies;i++)
-		{
-			if (bodies[i].getInvMass())
-			{
-				int bodyOffset = offsetSplitBodies[i];
-				int count = bodyCount[i];
-				float factor = 1.f/float(count);
-				btVector3 averageLinVel;
-				averageLinVel.setZero();
-				btVector3 averageAngVel;
-				averageAngVel.setZero();
-				for (int j=0;j<count;j++)
-				{
-					averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
-					averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
-				}
-				for (int j=0;j<count;j++)
-				{
-					deltaLinearVelocities[bodyOffset+j] = averageLinVel;
-					deltaAngularVelocities[bodyOffset+j] = averageAngVel;
-				}
-			}
-		}
-
-
-
-	}
-
-
-	//easy
-	for (int i=0;i<numBodies;i++)
-	{
-		if (bodies[i].getInvMass())
-		{
-			int bodyOffset = offsetSplitBodies[i];
-			int count = bodyCount[i];
-			if (count)
-			{
-				bodies[i].m_linVel += deltaLinearVelocities[bodyOffset];
-				bodies[i].m_angVel += deltaAngularVelocities[bodyOffset];
-			}
-		}
-	}
-}
-
-
-
-void  btGpuJacobiSolver::solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo)
-{
-
-	BT_PROFILE("btGpuJacobiSolver::solveGroup");
-
-	int numBodies = bodies->size();
-	int numManifolds = manifoldPtr->size();
-
-	m_data->m_bodyCount->resize(numBodies);
-	
-	unsigned int val=0;
-	btInt2 val2;
-	val2.x=0;
-	val2.y=0;
-
-	 {
-		BT_PROFILE("m_filler");
-		m_data->m_contactConstraintOffsets->resize(numManifolds);
-		m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies);
-		
-	
-		m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds);
-	}
-
-	{
-		BT_PROFILE("m_countBodiesKernel");
-		btLauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel);
-		launcher.setBuffer(manifoldPtr->getBufferCL());
-		launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-		launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
-		launcher.setConst(numManifolds);
-		launcher.setConst(solverInfo.m_fixedBodyIndex);
-		launcher.launch1D(numManifolds);
-	}
-
-	unsigned int totalNumSplitBodies=0;
-	m_data->m_offsetSplitBodies->resize(numBodies);
-	m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies);
-	totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1);
-
-
-	int numContacts = manifoldPtr->size();
-	m_data->m_contactConstraints->resize(numContacts);
-
-	
-	{
-		BT_PROFILE("contactToConstraintSplitKernel");
-		btLauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel);
-		launcher.setBuffer(manifoldPtr->getBufferCL());
-		launcher.setBuffer(bodies->getBufferCL());
-		launcher.setBuffer(inertias->getBufferCL());
-		launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
-		launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-        launcher.setConst(numContacts);
-		launcher.setConst(solverInfo.m_deltaTime);
-		launcher.setConst(solverInfo.m_positionDrift);
-		launcher.setConst(solverInfo.m_positionConstraintCoeff);
-		launcher.launch1D( numContacts, 64 );
-		clFinish(m_queue);
-	}
-
-	
-	m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies);
-	m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies);
-
-
-	
-	{
-		BT_PROFILE("m_clearVelocitiesKernel");
-		btLauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel);
-		launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-		launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-		launch.setConst(totalNumSplitBodies);
-		launch.launch1D(totalNumSplitBodies);
-	}
-	
-	int maxIter = solverInfo.m_numIterations;
-
-	for (int iter = 0;iter<maxIter;iter++)
-	{
-		{
-			BT_PROFILE("m_solveContactKernel");
-			btLauncherCL launcher( m_queue, m_data->m_solveContactKernel );
-			launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
-			launcher.setBuffer(bodies->getBufferCL());
-			launcher.setBuffer(inertias->getBufferCL());
-			launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
-			launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-			launcher.setConst(solverInfo.m_deltaTime);
-			launcher.setConst(solverInfo.m_positionDrift);
-			launcher.setConst(solverInfo.m_positionConstraintCoeff);
-			launcher.setConst(solverInfo.m_fixedBodyIndex);
-			launcher.setConst(numManifolds);
-
-			launcher.launch1D(numManifolds);
-			clFinish(m_queue);
-		}
-
-
-		{
-			BT_PROFILE("average velocities");
-			btLauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
-			launcher.setBuffer(bodies->getBufferCL());
-			launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-			launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-			launcher.setConst(numBodies);
-			launcher.launch1D(numBodies);
-			clFinish(m_queue);
-		}
-
-		{
-			BT_PROFILE("m_solveFrictionKernel");
-			btLauncherCL launcher( m_queue, m_data->m_solveFrictionKernel);
-			launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
-			launcher.setBuffer(bodies->getBufferCL());
-			launcher.setBuffer(inertias->getBufferCL());
-			launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
-			launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-			launcher.setConst(solverInfo.m_deltaTime);
-			launcher.setConst(solverInfo.m_positionDrift);
-			launcher.setConst(solverInfo.m_positionConstraintCoeff);
-			launcher.setConst(solverInfo.m_fixedBodyIndex);
-			launcher.setConst(numManifolds);
-
-			launcher.launch1D(numManifolds);
-			clFinish(m_queue);
-		}
-
-		{
-			BT_PROFILE("average velocities");
-			btLauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
-			launcher.setBuffer(bodies->getBufferCL());
-			launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-			launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-			launcher.setConst(numBodies);
-			launcher.launch1D(numBodies);
-			clFinish(m_queue);
-		}
-
-		
-
-	}
-
-
-	{
-			BT_PROFILE("update body velocities");
-			btLauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel);
-			launcher.setBuffer(bodies->getBufferCL());
-			launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-			launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-			launcher.setConst(numBodies);
-			launcher.launch1D(numBodies);
-			clFinish(m_queue);
-		}
-
-
-
-}
-
-
-void  btGpuJacobiSolver::solveGroupMixed(btOpenCLArray<btRigidBodyCL>* bodiesGPU,btOpenCLArray<btInertiaCL>* inertiasGPU,btOpenCLArray<btContact4>* manifoldPtrGPU,const btJacobiSolverInfo& solverInfo)
-{
-
-	btAlignedObjectArray<btRigidBodyCL> bodiesCPU;
-	bodiesGPU->copyToHost(bodiesCPU);
-	btAlignedObjectArray<btInertiaCL> inertiasCPU;
-	inertiasGPU->copyToHost(inertiasCPU);
-	btAlignedObjectArray<btContact4> manifoldPtrCPU;
-	manifoldPtrGPU->copyToHost(manifoldPtrCPU);
-	
-	int numBodiesCPU = bodiesGPU->size();
-	int numManifoldsCPU = manifoldPtrGPU->size();
-	BT_PROFILE("btGpuJacobiSolver::solveGroupMixed");
-
-	btAlignedObjectArray<unsigned int> bodyCount;
-	bodyCount.resize(numBodiesCPU);
-	for (int i=0;i<numBodiesCPU;i++)
-		bodyCount[i] = 0;
-
-	btAlignedObjectArray<btInt2> contactConstraintOffsets;
-	contactConstraintOffsets.resize(numManifoldsCPU);
-
-
-	for (int i=0;i<numManifoldsCPU;i++)
-	{
-		int pa = manifoldPtrCPU[i].m_bodyAPtrAndSignBit;
-		int pb = manifoldPtrCPU[i].m_bodyBPtrAndSignBit;
-
-		bool isFixedA = (pa <0) || (pa == solverInfo.m_fixedBodyIndex);
-		bool isFixedB = (pb <0) || (pb == solverInfo.m_fixedBodyIndex);
-
-		int bodyIndexA = manifoldPtrCPU[i].getBodyA();
-		int bodyIndexB = manifoldPtrCPU[i].getBodyB();
-
-		if (!isFixedA)
-		{
-			contactConstraintOffsets[i].x = bodyCount[bodyIndexA];
-			bodyCount[bodyIndexA]++;
-		}
-		if (!isFixedB)
-		{
-			contactConstraintOffsets[i].y = bodyCount[bodyIndexB];
-			bodyCount[bodyIndexB]++;
-		} 
-	}
-
-	btAlignedObjectArray<unsigned int> offsetSplitBodies;
-	offsetSplitBodies.resize(numBodiesCPU);
-	unsigned int totalNumSplitBodiesCPU;
-	m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodiesCPU,&totalNumSplitBodiesCPU);
-	int numlastBody = bodyCount[numBodiesCPU-1];
-	totalNumSplitBodiesCPU += numlastBody;
-
-		int numBodies = bodiesGPU->size();
-	int numManifolds = manifoldPtrGPU->size();
-
-	m_data->m_bodyCount->resize(numBodies);
-	
-	unsigned int val=0;
-	btInt2 val2;
-	val2.x=0;
-	val2.y=0;
-
-	 {
-		BT_PROFILE("m_filler");
-		m_data->m_contactConstraintOffsets->resize(numManifolds);
-		m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies);
-		
-	
-		m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds);
-	}
-
-	{
-		BT_PROFILE("m_countBodiesKernel");
-		btLauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel);
-		launcher.setBuffer(manifoldPtrGPU->getBufferCL());
-		launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-		launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
-		launcher.setConst(numManifolds);
-		launcher.setConst(solverInfo.m_fixedBodyIndex);
-		launcher.launch1D(numManifolds);
-	}
-
-	unsigned int totalNumSplitBodies=0;
-	m_data->m_offsetSplitBodies->resize(numBodies);
-	m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies);
-	totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1);
-
-	if (totalNumSplitBodies != totalNumSplitBodiesCPU)
-	{
-		printf("error in totalNumSplitBodies!\n");
-	}
-
-	int numContacts = manifoldPtrGPU->size();
-	m_data->m_contactConstraints->resize(numContacts);
-
-	
-	{
-		BT_PROFILE("contactToConstraintSplitKernel");
-		btLauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel);
-		launcher.setBuffer(manifoldPtrGPU->getBufferCL());
-		launcher.setBuffer(bodiesGPU->getBufferCL());
-		launcher.setBuffer(inertiasGPU->getBufferCL());
-		launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
-		launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-        launcher.setConst(numContacts);
-		launcher.setConst(solverInfo.m_deltaTime);
-		launcher.setConst(solverInfo.m_positionDrift);
-		launcher.setConst(solverInfo.m_positionConstraintCoeff);
-		launcher.launch1D( numContacts, 64 );
-		clFinish(m_queue);
-	}
-
-
-
-	btAlignedObjectArray<btGpuConstraint4> contactConstraints;
-	contactConstraints.resize(numManifoldsCPU);
-
-	for (int i=0;i<numManifoldsCPU;i++)
-	{
-		ContactToConstraintKernel(&manifoldPtrCPU[0],&bodiesCPU[0],&inertiasCPU[0],&contactConstraints[0],numManifoldsCPU,
-			solverInfo.m_deltaTime,
-			solverInfo.m_positionDrift,
-			solverInfo.m_positionConstraintCoeff,
-			i, bodyCount);
-	}
-	int maxIter = solverInfo.m_numIterations;
-
-
-	btAlignedObjectArray<btVector3> deltaLinearVelocities;
-	btAlignedObjectArray<btVector3> deltaAngularVelocities;
-	deltaLinearVelocities.resize(totalNumSplitBodiesCPU);
-	deltaAngularVelocities.resize(totalNumSplitBodiesCPU);
-	for (int i=0;i<totalNumSplitBodiesCPU;i++)
-	{
-		deltaLinearVelocities[i].setZero();
-		deltaAngularVelocities[i].setZero();
-	}
-
-	m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies);
-	m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies);
-
-
-	
-	{
-		BT_PROFILE("m_clearVelocitiesKernel");
-		btLauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel);
-		launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-		launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-		launch.setConst(totalNumSplitBodies);
-		launch.launch1D(totalNumSplitBodies);
-	}
-	
-
-		///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-
-	m_data->m_contactConstraints->copyToHost(contactConstraints);
-	m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies);
-	m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets);
-	m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
-	m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
-
-	for (int iter = 0;iter<maxIter;iter++)
-	{
-
-				{
-			BT_PROFILE("m_solveContactKernel");
-			btLauncherCL launcher( m_queue, m_data->m_solveContactKernel );
-			launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
-			launcher.setBuffer(bodiesGPU->getBufferCL());
-			launcher.setBuffer(inertiasGPU->getBufferCL());
-			launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
-			launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-			launcher.setConst(solverInfo.m_deltaTime);
-			launcher.setConst(solverInfo.m_positionDrift);
-			launcher.setConst(solverInfo.m_positionConstraintCoeff);
-			launcher.setConst(solverInfo.m_fixedBodyIndex);
-			launcher.setConst(numManifolds);
-
-			launcher.launch1D(numManifolds);
-			clFinish(m_queue);
-		}
-
-
-		int i=0;
-		for( i=0; i<numManifoldsCPU; i++)
-		{
-
-			float frictionCoeff = contactConstraints[i].getFrictionCoeff();
-			int aIdx = (int)contactConstraints[i].m_bodyA;
-			int bIdx = (int)contactConstraints[i].m_bodyB;
-			btRigidBodyCL& bodyA = bodiesCPU[aIdx];
-			btRigidBodyCL& bodyB = bodiesCPU[bIdx];
-
-			btVector3 zero(0,0,0);
-			
-			btVector3* dlvAPtr=&zero;
-			btVector3* davAPtr=&zero;
-			btVector3* dlvBPtr=&zero;
-			btVector3* davBPtr=&zero;
-			
-			if (bodyA.getInvMass())
-			{
-				int bodyOffsetA = offsetSplitBodies[aIdx];
-				int constraintOffsetA = contactConstraintOffsets[i].x;
-				int splitIndexA = bodyOffsetA+constraintOffsetA;
-				dlvAPtr = &deltaLinearVelocities[splitIndexA];
-				davAPtr = &deltaAngularVelocities[splitIndexA];
-			}
-
-			if (bodyB.getInvMass())
-			{
-				int bodyOffsetB = offsetSplitBodies[bIdx];
-				int constraintOffsetB = contactConstraintOffsets[i].y;
-				int splitIndexB= bodyOffsetB+constraintOffsetB;
-				dlvBPtr =&deltaLinearVelocities[splitIndexB];
-				davBPtr = &deltaAngularVelocities[splitIndexB];
-			}
-
-
-
-			{
-				float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
-				float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
-
-				solveContact( contactConstraints[i], (btVector3&)bodyA.m_pos, (btVector3&)bodyA.m_linVel, (btVector3&)bodyA.m_angVel, bodyA.m_invMass, inertiasCPU[aIdx].m_invInertiaWorld, 
-					(btVector3&)bodyB.m_pos, (btVector3&)bodyB.m_linVel, (btVector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
-					maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr		);
-
-
-			}
-		}
-
-		
-		{
-			BT_PROFILE("average velocities");
-			btLauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
-			launcher.setBuffer(bodiesGPU->getBufferCL());
-			launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-			launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-			launcher.setConst(numBodies);
-			launcher.launch1D(numBodies);
-			clFinish(m_queue);
-		}
-
-		//easy
-		for (int i=0;i<numBodiesCPU;i++)
-		{
-			if (bodiesCPU[i].getInvMass())
-			{
-				int bodyOffset = offsetSplitBodies[i];
-				int count = bodyCount[i];
-				float factor = 1.f/float(count);
-				btVector3 averageLinVel;
-				averageLinVel.setZero();
-				btVector3 averageAngVel;
-				averageAngVel.setZero();
-				for (int j=0;j<count;j++)
-				{
-					averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
-					averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
-				}
-				for (int j=0;j<count;j++)
-				{
-					deltaLinearVelocities[bodyOffset+j] = averageLinVel;
-					deltaAngularVelocities[bodyOffset+j] = averageAngVel;
-				}
-			}
-		}
-//	m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities);
-	//m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities);
-	m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities);
-	m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities);
-
-#if 0
-
-		{
-			BT_PROFILE("m_solveFrictionKernel");
-			btLauncherCL launcher( m_queue, m_data->m_solveFrictionKernel);
-			launcher.setBuffer(m_data->m_contactConstraints->getBufferCL());
-			launcher.setBuffer(bodiesGPU->getBufferCL());
-			launcher.setBuffer(inertiasGPU->getBufferCL());
-			launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL());
-			launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-			launcher.setConst(solverInfo.m_deltaTime);
-			launcher.setConst(solverInfo.m_positionDrift);
-			launcher.setConst(solverInfo.m_positionConstraintCoeff);
-			launcher.setConst(solverInfo.m_fixedBodyIndex);
-			launcher.setConst(numManifolds);
-
-			launcher.launch1D(numManifolds);
-			clFinish(m_queue);
-		}
-
-		//solve friction
-
-		for(int i=0; i<numManifoldsCPU; i++)
-		{
-			float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
-			float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
-
-			float sum = 0;
-			for(int j=0; j<4; j++)
-			{
-				sum +=contactConstraints[i].m_appliedRambdaDt[j];
-			}
-			float frictionCoeff = contactConstraints[i].getFrictionCoeff();
-			int aIdx = (int)contactConstraints[i].m_bodyA;
-			int bIdx = (int)contactConstraints[i].m_bodyB;
-			btRigidBodyCL& bodyA = bodiesCPU[aIdx];
-			btRigidBodyCL& bodyB = bodiesCPU[bIdx];
-
-			btVector3 zero(0,0,0);
-			
-			btVector3* dlvAPtr=&zero;
-			btVector3* davAPtr=&zero;
-			btVector3* dlvBPtr=&zero;
-			btVector3* davBPtr=&zero;
-			
-			if (bodyA.getInvMass())
-			{
-				int bodyOffsetA = offsetSplitBodies[aIdx];
-				int constraintOffsetA = contactConstraintOffsets[i].x;
-				int splitIndexA = bodyOffsetA+constraintOffsetA;
-				dlvAPtr = &deltaLinearVelocities[splitIndexA];
-				davAPtr = &deltaAngularVelocities[splitIndexA];
-			}
-
-			if (bodyB.getInvMass())
-			{
-				int bodyOffsetB = offsetSplitBodies[bIdx];
-				int constraintOffsetB = contactConstraintOffsets[i].y;
-				int splitIndexB= bodyOffsetB+constraintOffsetB;
-				dlvBPtr =&deltaLinearVelocities[splitIndexB];
-				davBPtr = &deltaAngularVelocities[splitIndexB];
-			}
-
-			for(int j=0; j<4; j++)
-			{
-				maxRambdaDt[j] = frictionCoeff*sum;
-				minRambdaDt[j] = -maxRambdaDt[j];
-			}
-
-			solveFriction( contactConstraints[i], (btVector3&)bodyA.m_pos, (btVector3&)bodyA.m_linVel, (btVector3&)bodyA.m_angVel, bodyA.m_invMass,inertiasCPU[aIdx].m_invInertiaWorld, 
-				(btVector3&)bodyB.m_pos, (btVector3&)bodyB.m_linVel, (btVector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld,
-				maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr);
-
-		}
-
-		{
-			BT_PROFILE("average velocities");
-			btLauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel);
-			launcher.setBuffer(bodiesGPU->getBufferCL());
-			launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-			launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-			launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-			launcher.setConst(numBodies);
-			launcher.launch1D(numBodies);
-			clFinish(m_queue);
-		}
-
-		//easy
-		for (int i=0;i<numBodiesCPU;i++)
-		{
-			if (bodiesCPU[i].getInvMass())
-			{
-				int bodyOffset = offsetSplitBodies[i];
-				int count = bodyCount[i];
-				float factor = 1.f/float(count);
-				btVector3 averageLinVel;
-				averageLinVel.setZero();
-				btVector3 averageAngVel;
-				averageAngVel.setZero();
-				for (int j=0;j<count;j++)
-				{
-					averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;
-					averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;
-				}
-				for (int j=0;j<count;j++)
-				{
-					deltaLinearVelocities[bodyOffset+j] = averageLinVel;
-					deltaAngularVelocities[bodyOffset+j] = averageAngVel;
-				}
-			}
-		}
-
-#endif
-
-	}
-
-	{
-		BT_PROFILE("update body velocities");
-		btLauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel);
-		launcher.setBuffer(bodiesGPU->getBufferCL());
-		launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL());
-		launcher.setBuffer(m_data->m_bodyCount->getBufferCL());
-		launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL());
-		launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL());
-		launcher.setConst(numBodies);
-		launcher.launch1D(numBodies);
-		clFinish(m_queue);
-	}
-
-
-	//easy
-	for (int i=0;i<numBodiesCPU;i++)
-	{
-		if (bodiesCPU[i].getInvMass())
-		{
-			int bodyOffset = offsetSplitBodies[i];
-			int count = bodyCount[i];
-			if (count)
-			{
-				bodiesCPU[i].m_linVel += deltaLinearVelocities[bodyOffset];
-				bodiesCPU[i].m_angVel += deltaAngularVelocities[bodyOffset];
-			}
-		}
-	}
-
-
-//	bodiesGPU->copyFromHost(bodiesCPU);
-
-
-}
-\ No newline at end of file
diff --git a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h b/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h
deleted file mode 100644
index 9e8577c1b..000000000
--- a/opencl/gpu_rigidbody/host/btGpuJacobiSolver.h
+++ /dev/null
@@ -1,53 +0,0 @@
-
-#ifndef BT_GPU_JACOBI_SOLVER_H
-#define BT_GPU_JACOBI_SOLVER_H
-#include "../../basic_initialize/btOpenCLUtils.h"
-
-#include "../../gpu_narrowphase/host/btRigidBodyCL.h"
-#include "../../gpu_narrowphase/host/btContact4.h"
-#include "../../parallel_primitives/host/btOpenCLArray.h"
-
-class btTypedConstraint;
-
-struct btJacobiSolverInfo
-{
-	int m_fixedBodyIndex;
-
-	float m_deltaTime;
-	float m_positionDrift;
-	float m_positionConstraintCoeff;
-	int	m_numIterations;
-
-	btJacobiSolverInfo()
-		:m_fixedBodyIndex(0),
-		m_deltaTime(1./60.f),
-		m_positionDrift( 0.005f ), 
-		m_positionConstraintCoeff( 0.99f ),
-		m_numIterations(14)
-	{
-	}
-};
-class btGpuJacobiSolver
-{
-protected:
-
-	struct btGpuJacobiSolverInternalData* m_data;
-
-	cl_context m_context;
-	cl_device_id m_device;
-	cl_command_queue m_queue;
-
-public:
-
-	btGpuJacobiSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity);
-	virtual ~btGpuJacobiSolver();
-
-
-
-	void  solveGroupHost(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo);
-	void  solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo);
-	void  solveGroupMixed(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo);
-
-};
-#endif //BT_GPU_JACOBI_SOLVER_H
-
diff --git a/opencl/gpu_rigidbody/host/btJacobianEntry.h b/opencl/gpu_rigidbody/host/btJacobianEntry.h
deleted file mode 100644
index 7ae448fbc..000000000
--- a/opencl/gpu_rigidbody/host/btJacobianEntry.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_JACOBIAN_ENTRY_H
-#define BT_JACOBIAN_ENTRY_H
-
-#include "BulletCommon/btMatrix3x3.h"
-
-
-//notes:
-// Another memory optimization would be to store m_1MinvJt in the remaining 3 w components
-// which makes the btJacobianEntry memory layout 16 bytes
-// if you only are interested in angular part, just feed massInvA and massInvB zero
-
-/// Jacobian entry is an abstraction that allows to describe constraints
-/// it can be used in combination with a constraint solver
-/// Can be used to relate the effect of an impulse to the constraint error
-ATTRIBUTE_ALIGNED16(class) btJacobianEntry
-{
-public:
-	btJacobianEntry() {};
-	//constraint between two different rigidbodies
-	btJacobianEntry(
-		const btMatrix3x3& world2A,
-		const btMatrix3x3& world2B,
-		const btVector3& rel_pos1,const btVector3& rel_pos2,
-		const btVector3& jointAxis,
-		const btVector3& inertiaInvA, 
-		const btScalar massInvA,
-		const btVector3& inertiaInvB,
-		const btScalar massInvB)
-		:m_linearJointAxis(jointAxis)
-	{
-		m_aJ = world2A*(rel_pos1.cross(m_linearJointAxis));
-		m_bJ = world2B*(rel_pos2.cross(-m_linearJointAxis));
-		m_0MinvJt	= inertiaInvA * m_aJ;
-		m_1MinvJt = inertiaInvB * m_bJ;
-		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ) + massInvB + m_1MinvJt.dot(m_bJ);
-
-		btAssert(m_Adiag > btScalar(0.0));
-	}
-
-	//angular constraint between two different rigidbodies
-	btJacobianEntry(const btVector3& jointAxis,
-		const btMatrix3x3& world2A,
-		const btMatrix3x3& world2B,
-		const btVector3& inertiaInvA,
-		const btVector3& inertiaInvB)
-		:m_linearJointAxis(btVector3(btScalar(0.),btScalar(0.),btScalar(0.)))
-	{
-		m_aJ= world2A*jointAxis;
-		m_bJ = world2B*-jointAxis;
-		m_0MinvJt	= inertiaInvA * m_aJ;
-		m_1MinvJt = inertiaInvB * m_bJ;
-		m_Adiag =  m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
-
-		btAssert(m_Adiag > btScalar(0.0));
-	}
-
-	//angular constraint between two different rigidbodies
-	btJacobianEntry(const btVector3& axisInA,
-		const btVector3& axisInB,
-		const btVector3& inertiaInvA,
-		const btVector3& inertiaInvB)
-		: m_linearJointAxis(btVector3(btScalar(0.),btScalar(0.),btScalar(0.)))
-		, m_aJ(axisInA)
-		, m_bJ(-axisInB)
-	{
-		m_0MinvJt	= inertiaInvA * m_aJ;
-		m_1MinvJt = inertiaInvB * m_bJ;
-		m_Adiag =  m_0MinvJt.dot(m_aJ) + m_1MinvJt.dot(m_bJ);
-
-		btAssert(m_Adiag > btScalar(0.0));
-	}
-
-	//constraint on one rigidbody
-	btJacobianEntry(
-		const btMatrix3x3& world2A,
-		const btVector3& rel_pos1,const btVector3& rel_pos2,
-		const btVector3& jointAxis,
-		const btVector3& inertiaInvA, 
-		const btScalar massInvA)
-		:m_linearJointAxis(jointAxis)
-	{
-		m_aJ= world2A*(rel_pos1.cross(jointAxis));
-		m_bJ = world2A*(rel_pos2.cross(-jointAxis));
-		m_0MinvJt	= inertiaInvA * m_aJ;
-		m_1MinvJt = btVector3(btScalar(0.),btScalar(0.),btScalar(0.));
-		m_Adiag = massInvA + m_0MinvJt.dot(m_aJ);
-
-		btAssert(m_Adiag > btScalar(0.0));
-	}
-
-	btScalar	getDiagonal() const { return m_Adiag; }
-
-	// for two constraints on the same rigidbody (for example vehicle friction)
-	btScalar	getNonDiagonal(const btJacobianEntry& jacB, const btScalar massInvA) const
-	{
-		const btJacobianEntry& jacA = *this;
-		btScalar lin = massInvA * jacA.m_linearJointAxis.dot(jacB.m_linearJointAxis);
-		btScalar ang = jacA.m_0MinvJt.dot(jacB.m_aJ);
-		return lin + ang;
-	}
-
-	
-
-	// for two constraints on sharing two same rigidbodies (for example two contact points between two rigidbodies)
-	btScalar	getNonDiagonal(const btJacobianEntry& jacB,const btScalar massInvA,const btScalar massInvB) const
-	{
-		const btJacobianEntry& jacA = *this;
-		btVector3 lin = jacA.m_linearJointAxis * jacB.m_linearJointAxis;
-		btVector3 ang0 = jacA.m_0MinvJt * jacB.m_aJ;
-		btVector3 ang1 = jacA.m_1MinvJt * jacB.m_bJ;
-		btVector3 lin0 = massInvA * lin ;
-		btVector3 lin1 = massInvB * lin;
-		btVector3 sum = ang0+ang1+lin0+lin1;
-		return sum[0]+sum[1]+sum[2];
-	}
-
-	btScalar getRelativeVelocity(const btVector3& linvelA,const btVector3& angvelA,const btVector3& linvelB,const btVector3& angvelB)
-	{
-		btVector3 linrel = linvelA - linvelB;
-		btVector3 angvela  = angvelA * m_aJ;
-		btVector3 angvelb  = angvelB * m_bJ;
-		linrel *= m_linearJointAxis;
-		angvela += angvelb;
-		angvela += linrel;
-		btScalar rel_vel2 = angvela[0]+angvela[1]+angvela[2];
-		return rel_vel2 + SIMD_EPSILON;
-	}
-//private:
-
-	btVector3	m_linearJointAxis;
-	btVector3	m_aJ;
-	btVector3	m_bJ;
-	btVector3	m_0MinvJt;
-	btVector3	m_1MinvJt;
-	//Optimization: can be stored in the w/last component of one of the vectors
-	btScalar	m_Adiag;
-
-};
-
-#endif //BT_JACOBIAN_ENTRY_H
diff --git a/opencl/gpu_rigidbody/host/btPgsJacobiSolver.cpp b/opencl/gpu_rigidbody/host/btPgsJacobiSolver.cpp
deleted file mode 100644
index 23a36cf62..000000000
--- a/opencl/gpu_rigidbody/host/btPgsJacobiSolver.cpp
+++ /dev/null
@@ -1,1815 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2012 Erwin Coumans  http://bulletphysics.org
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-//enable BT_SOLVER_DEBUG if you experience solver crashes
-//#define BT_SOLVER_DEBUG
-//#define COMPUTE_IMPULSE_DENOM 1
-//It is not necessary (redundant) to refresh contact manifolds, this refresh has been moved to the collision algorithms.
-
-#define DISABLE_JOINTS
-
-#include "btPgsJacobiSolver.h"
-#include "BulletCommon/btMinMax.h"
-#include "btTypedConstraint.h"
-#include <new>
-#include "BulletCommon/btStackAlloc.h"
-#include "BulletCommon/btQuickprof.h"
-//#include "btSolverBody.h"
-//#include "btSolverConstraint.h"
-#include "BulletCommon/btAlignedObjectArray.h"
-#include <string.h> //for memset
-//#include "../../dynamics/basic_demo/Stubs/AdlContact4.h"
-#include "../../gpu_narrowphase/host/btContact4.h"
-
-bool usePgs = false;//true;
-int		gNumSplitImpulseRecoveries2 = 0;
-
-#include "btRigidBody.h"
-//#include "../../dynamics/basic_demo/Stubs/AdlRigidBody.h"
-#include "../../gpu_narrowphase/host/btRigidBodyCL.h"
-
-btTransform	getWorldTransform(btRigidBodyCL* rb)
-{
-	btTransform newTrans;
-	newTrans.setOrigin(rb->m_pos);
-	newTrans.setRotation(rb->m_quat);
-	return newTrans;
-}
-
-const btMatrix3x3&	getInvInertiaTensorWorld(btInertiaCL* inertia)
-{
-	return inertia->m_invInertiaWorld;
-}
-
-
-
-const btVector3&	getLinearVelocity(btRigidBodyCL* rb)
-{
-	return rb->m_linVel;
-}
-
-const btVector3&	getAngularVelocity(btRigidBodyCL* rb)
-{
-	return rb->m_angVel;
-}
-
-btVector3 getVelocityInLocalPoint(btRigidBodyCL* rb, const btVector3& rel_pos)
-{
-	//we also calculate lin/ang velocity for kinematic objects
-	return getLinearVelocity(rb) + getAngularVelocity(rb).cross(rel_pos);
-	
-}
-
-struct	btContactPoint
-{
-	btVector3	m_positionWorldOnA;
-	btVector3	m_positionWorldOnB;
-	btVector3	m_normalWorldOnB;
-	btScalar	m_appliedImpulse;
-	btScalar	m_distance;
-	btScalar	m_combinedRestitution;
-
-	///information related to friction
-	btScalar	m_combinedFriction;
-	btVector3	m_lateralFrictionDir1;
-	btVector3	m_lateralFrictionDir2;
-	btScalar	m_appliedImpulseLateral1;
-	btScalar	m_appliedImpulseLateral2;	
-	btScalar	m_combinedRollingFriction;
-	btScalar	m_contactMotion1;
-	btScalar	m_contactMotion2;
-	btScalar	m_contactCFM1;
-	btScalar	m_contactCFM2;
-	
-	bool		m_lateralFrictionInitialized;
-
-	btVector3	getPositionWorldOnA()
-	{
-		return m_positionWorldOnA;
-	}
-	btVector3	getPositionWorldOnB()
-	{
-		return m_positionWorldOnB;
-	}
-	btScalar	getDistance()
-	{
-		return m_distance;
-	}
-};
-
-void	getContactPoint(btContact4* contact, int contactIndex, btContactPoint& pointOut)
-{
-	pointOut.m_appliedImpulse = 0.f;
-	pointOut.m_appliedImpulseLateral1 = 0.f;
-	pointOut.m_appliedImpulseLateral2 = 0.f;
-	pointOut.m_combinedFriction = contact->getFrictionCoeff();
-	pointOut.m_combinedRestitution = contact->getRestituitionCoeff();
-	pointOut.m_combinedRollingFriction = 0.f;
-	pointOut.m_contactCFM1 = 0.f;
-	pointOut.m_contactCFM2 = 0.f;
-	pointOut.m_contactMotion1 = 0.f;
-	pointOut.m_contactMotion2 = 0.f;
-	pointOut.m_distance = contact->getPenetration(contactIndex)+0.01;
-	btVector3 n = contact->m_worldNormal;
-	btVector3 normalOnB(-n);
-	normalOnB.normalize();
-
-	btVector3 l1,l2;
-	btPlaneSpace1(normalOnB,l1,l2);
-
-	pointOut.m_normalWorldOnB = normalOnB;
-	//printf("normalOnB = %f,%f,%f\n",normalOnB.getX(),normalOnB.getY(),normalOnB.getZ());
-	pointOut.m_lateralFrictionDir1 = l1;
-	pointOut.m_lateralFrictionDir2 = l2;
-	pointOut.m_lateralFrictionInitialized = true;
-	
-	
-	btVector3 worldPosB = contact->m_worldPos[contactIndex];
-	pointOut.m_positionWorldOnB = worldPosB;
-	pointOut.m_positionWorldOnA = worldPosB+normalOnB*pointOut.m_distance;
-}
-
-int	getNumContacts(btContact4* contact)
-{
-	return contact->getNPoints();
-}
-
-btPgsJacobiSolver::btPgsJacobiSolver()
-:m_btSeed2(0),m_usePgs(usePgs)
-{
-
-}
-
-btPgsJacobiSolver::~btPgsJacobiSolver()
-{
-}
-
-void	btPgsJacobiSolver::solveContacts(int numBodies, btRigidBodyCL* bodies, btInertiaCL* inertias, int numContacts, btContact4* contacts)
-{
-	btContactSolverInfo infoGlobal;
-	infoGlobal.m_splitImpulse = false;
-	infoGlobal.m_timeStep = 1.f/60.f;
-	infoGlobal.m_numIterations = 4;//4;
-//	infoGlobal.m_solverMode|=SOLVER_USE_2_FRICTION_DIRECTIONS|SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS|SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION;
-	//infoGlobal.m_solverMode|=SOLVER_USE_2_FRICTION_DIRECTIONS|SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS;
-	infoGlobal.m_solverMode|=SOLVER_USE_2_FRICTION_DIRECTIONS;
-
-	//if (infoGlobal.m_solverMode & SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS)
-	//if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION))
-				
-
-	solveGroup(bodies,inertias,numBodies,contacts,numContacts,0,0,infoGlobal);
-
-	if (!numContacts)
-		return;
-}
-
-
-
-
-/// btPgsJacobiSolver Sequentially applies impulses
-btScalar btPgsJacobiSolver::solveGroup(btRigidBodyCL* bodies,
-										btInertiaCL* inertias, 
-										int numBodies,
-										btContact4* manifoldPtr, 
-										int numManifolds,
-										btTypedConstraint** constraints,
-										int numConstraints,
-										const btContactSolverInfo& infoGlobal)
-{
-
-	BT_PROFILE("solveGroup");
-	//you need to provide at least some bodies
-	
-	solveGroupCacheFriendlySetup( bodies, inertias,numBodies, manifoldPtr,  numManifolds,constraints, numConstraints,infoGlobal);
-
-	solveGroupCacheFriendlyIterations(constraints, numConstraints,infoGlobal);
-
-	solveGroupCacheFriendlyFinish(bodies, inertias,numBodies, infoGlobal);
-	
-	return 0.f;
-}
-
-
-
-
-
-
-
-
-
-#ifdef USE_SIMD
-#include <emmintrin.h>
-#define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
-static inline __m128 btSimdDot3( __m128 vec0, __m128 vec1 )
-{
-	__m128 result = _mm_mul_ps( vec0, vec1);
-	return _mm_add_ps( btVecSplat( result, 0 ), _mm_add_ps( btVecSplat( result, 1 ), btVecSplat( result, 2 ) ) );
-}
-#endif//USE_SIMD
-
-// Project Gauss Seidel or the equivalent Sequential Impulse
-void btPgsJacobiSolver::resolveSingleConstraintRowGenericSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
-{
-#ifdef USE_SIMD
-	__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
-	__m128	lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
-	__m128	upperLimit1 = _mm_set1_ps(c.m_upperLimit);
-	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse),_mm_set1_ps(c.m_cfm)));
-	__m128 deltaVel1Dotn	=	_mm_add_ps(btSimdDot3(c.m_contactNormal.mVec128,body1.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetDeltaAngularVelocity().mVec128));
-	__m128 deltaVel2Dotn	=	_mm_sub_ps(btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetDeltaAngularVelocity().mVec128),btSimdDot3((c.m_contactNormal).mVec128,body2.internalGetDeltaLinearVelocity().mVec128));
-	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
-	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
-	btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
-	btSimdScalar resultLowerLess,resultUpperLess;
-	resultLowerLess = _mm_cmplt_ps(sum,lowerLimit1);
-	resultUpperLess = _mm_cmplt_ps(sum,upperLimit1);
-	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
-	deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
-	c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
-	__m128 upperMinApplied = _mm_sub_ps(upperLimit1,cpAppliedImp);
-	deltaImpulse = _mm_or_ps( _mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied) );
-	c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultUpperLess, c.m_appliedImpulse), _mm_andnot_ps(resultUpperLess, upperLimit1) );
-	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.internalGetInvMass().mVec128);
-	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.internalGetInvMass().mVec128);
-	__m128 impulseMagnitude = deltaImpulse;
-	body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
-	body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
-	body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
-	body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
-#else
-	resolveSingleConstraintRowGeneric(body1,body2,c);
-#endif
-}
-
-// Project Gauss Seidel or the equivalent Sequential Impulse
- void btPgsJacobiSolver::resolveSingleConstraintRowGeneric(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
-{
-	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
-	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) 	+ c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity());
-	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity());
-
-//	const btScalar delta_rel_vel	=	deltaVel1Dotn-deltaVel2Dotn;
-	deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
-	deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
-
-	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
-	if (sum < c.m_lowerLimit)
-	{
-		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
-		c.m_appliedImpulse = c.m_lowerLimit;
-	}
-	else if (sum > c.m_upperLimit) 
-	{
-		deltaImpulse = c.m_upperLimit-c.m_appliedImpulse;
-		c.m_appliedImpulse = c.m_upperLimit;
-	}
-	else
-	{
-		c.m_appliedImpulse = sum;
-	}
-
-	body1.internalApplyImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
-	body2.internalApplyImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
-}
-
- void btPgsJacobiSolver::resolveSingleConstraintRowLowerLimitSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
-{
-#ifdef USE_SIMD
-	__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedImpulse);
-	__m128	lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
-	__m128	upperLimit1 = _mm_set1_ps(c.m_upperLimit);
-	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhs), _mm_mul_ps(_mm_set1_ps(c.m_appliedImpulse),_mm_set1_ps(c.m_cfm)));
-	__m128 deltaVel1Dotn	=	_mm_add_ps(btSimdDot3(c.m_contactNormal.mVec128,body1.internalGetDeltaLinearVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetDeltaAngularVelocity().mVec128));
-	__m128 deltaVel2Dotn	=	_mm_sub_ps(btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetDeltaAngularVelocity().mVec128),btSimdDot3((c.m_contactNormal).mVec128,body2.internalGetDeltaLinearVelocity().mVec128));
-	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
-	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
-	btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
-	btSimdScalar resultLowerLess,resultUpperLess;
-	resultLowerLess = _mm_cmplt_ps(sum,lowerLimit1);
-	resultUpperLess = _mm_cmplt_ps(sum,upperLimit1);
-	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
-	deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
-	c.m_appliedImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
-	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.internalGetInvMass().mVec128);
-	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.internalGetInvMass().mVec128);
-	__m128 impulseMagnitude = deltaImpulse;
-	body1.internalGetDeltaLinearVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
-	body1.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body1.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
-	body2.internalGetDeltaLinearVelocity().mVec128 = _mm_sub_ps(body2.internalGetDeltaLinearVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
-	body2.internalGetDeltaAngularVelocity().mVec128 = _mm_add_ps(body2.internalGetDeltaAngularVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
-#else
-	resolveSingleConstraintRowLowerLimit(body1,body2,c);
-#endif
-}
-
-// Project Gauss Seidel or the equivalent Sequential Impulse
- void btPgsJacobiSolver::resolveSingleConstraintRowLowerLimit(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
-{
-	btScalar deltaImpulse = c.m_rhs-btScalar(c.m_appliedImpulse)*c.m_cfm;
-	const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.internalGetDeltaLinearVelocity()) 	+ c.m_relpos1CrossNormal.dot(body1.internalGetDeltaAngularVelocity());
-	const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.internalGetDeltaLinearVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetDeltaAngularVelocity());
-
-	deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
-	deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
-	const btScalar sum = btScalar(c.m_appliedImpulse) + deltaImpulse;
-	if (sum < c.m_lowerLimit)
-	{
-		deltaImpulse = c.m_lowerLimit-c.m_appliedImpulse;
-		c.m_appliedImpulse = c.m_lowerLimit;
-	}
-	else
-	{
-		c.m_appliedImpulse = sum;
-	}
-	body1.internalApplyImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
-	body2.internalApplyImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
-}
-
-
-void	btPgsJacobiSolver::resolveSplitPenetrationImpulseCacheFriendly(
-        btSolverBody& body1,
-        btSolverBody& body2,
-        const btSolverConstraint& c)
-{
-		if (c.m_rhsPenetration)
-        {
-			gNumSplitImpulseRecoveries2++;
-			btScalar deltaImpulse = c.m_rhsPenetration-btScalar(c.m_appliedPushImpulse)*c.m_cfm;
-			const btScalar deltaVel1Dotn	=	c.m_contactNormal.dot(body1.internalGetPushVelocity()) 	+ c.m_relpos1CrossNormal.dot(body1.internalGetTurnVelocity());
-			const btScalar deltaVel2Dotn	=	-c.m_contactNormal.dot(body2.internalGetPushVelocity()) + c.m_relpos2CrossNormal.dot(body2.internalGetTurnVelocity());
-
-			deltaImpulse	-=	deltaVel1Dotn*c.m_jacDiagABInv;
-			deltaImpulse	-=	deltaVel2Dotn*c.m_jacDiagABInv;
-			const btScalar sum = btScalar(c.m_appliedPushImpulse) + deltaImpulse;
-			if (sum < c.m_lowerLimit)
-			{
-				deltaImpulse = c.m_lowerLimit-c.m_appliedPushImpulse;
-				c.m_appliedPushImpulse = c.m_lowerLimit;
-			}
-			else
-			{
-				c.m_appliedPushImpulse = sum;
-			}
-			body1.internalApplyPushImpulse(c.m_contactNormal*body1.internalGetInvMass(),c.m_angularComponentA,deltaImpulse);
-			body2.internalApplyPushImpulse(-c.m_contactNormal*body2.internalGetInvMass(),c.m_angularComponentB,deltaImpulse);
-        }
-}
-
- void btPgsJacobiSolver::resolveSplitPenetrationSIMD(btSolverBody& body1,btSolverBody& body2,const btSolverConstraint& c)
-{
-#ifdef USE_SIMD
-	if (!c.m_rhsPenetration)
-		return;
-
-	gNumSplitImpulseRecoveries2++;
-
-	__m128 cpAppliedImp = _mm_set1_ps(c.m_appliedPushImpulse);
-	__m128	lowerLimit1 = _mm_set1_ps(c.m_lowerLimit);
-	__m128	upperLimit1 = _mm_set1_ps(c.m_upperLimit);
-	__m128 deltaImpulse = _mm_sub_ps(_mm_set1_ps(c.m_rhsPenetration), _mm_mul_ps(_mm_set1_ps(c.m_appliedPushImpulse),_mm_set1_ps(c.m_cfm)));
-	__m128 deltaVel1Dotn	=	_mm_add_ps(btSimdDot3(c.m_contactNormal.mVec128,body1.internalGetPushVelocity().mVec128), btSimdDot3(c.m_relpos1CrossNormal.mVec128,body1.internalGetTurnVelocity().mVec128));
-	__m128 deltaVel2Dotn	=	_mm_sub_ps(btSimdDot3(c.m_relpos2CrossNormal.mVec128,body2.internalGetTurnVelocity().mVec128),btSimdDot3((c.m_contactNormal).mVec128,body2.internalGetPushVelocity().mVec128));
-	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel1Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
-	deltaImpulse	=	_mm_sub_ps(deltaImpulse,_mm_mul_ps(deltaVel2Dotn,_mm_set1_ps(c.m_jacDiagABInv)));
-	btSimdScalar sum = _mm_add_ps(cpAppliedImp,deltaImpulse);
-	btSimdScalar resultLowerLess,resultUpperLess;
-	resultLowerLess = _mm_cmplt_ps(sum,lowerLimit1);
-	resultUpperLess = _mm_cmplt_ps(sum,upperLimit1);
-	__m128 lowMinApplied = _mm_sub_ps(lowerLimit1,cpAppliedImp);
-	deltaImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse) );
-	c.m_appliedPushImpulse = _mm_or_ps( _mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, sum) );
-	__m128	linearComponentA = _mm_mul_ps(c.m_contactNormal.mVec128,body1.internalGetInvMass().mVec128);
-	__m128	linearComponentB = _mm_mul_ps((c.m_contactNormal).mVec128,body2.internalGetInvMass().mVec128);
-	__m128 impulseMagnitude = deltaImpulse;
-	body1.internalGetPushVelocity().mVec128 = _mm_add_ps(body1.internalGetPushVelocity().mVec128,_mm_mul_ps(linearComponentA,impulseMagnitude));
-	body1.internalGetTurnVelocity().mVec128 = _mm_add_ps(body1.internalGetTurnVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentA.mVec128,impulseMagnitude));
-	body2.internalGetPushVelocity().mVec128 = _mm_sub_ps(body2.internalGetPushVelocity().mVec128,_mm_mul_ps(linearComponentB,impulseMagnitude));
-	body2.internalGetTurnVelocity().mVec128 = _mm_add_ps(body2.internalGetTurnVelocity().mVec128 ,_mm_mul_ps(c.m_angularComponentB.mVec128,impulseMagnitude));
-#else
-	resolveSplitPenetrationImpulseCacheFriendly(body1,body2,c);
-#endif
-}
-
-
-
-unsigned long btPgsJacobiSolver::btRand2()
-{
-	m_btSeed2 = (1664525L*m_btSeed2 + 1013904223L) & 0xffffffff;
-	return m_btSeed2;
-}
-
-
-
-//See ODE: adam's all-int straightforward(?) dRandInt (0..n-1)
-int btPgsJacobiSolver::btRandInt2 (int n)
-{
-	// seems good; xor-fold and modulus
-	const unsigned long un = static_cast<unsigned long>(n);
-	unsigned long r = btRand2();
-
-	// note: probably more aggressive than it needs to be -- might be
-	//       able to get away without one or two of the innermost branches.
-	if (un <= 0x00010000UL) {
-		r ^= (r >> 16);
-		if (un <= 0x00000100UL) {
-			r ^= (r >> 8);
-			if (un <= 0x00000010UL) {
-				r ^= (r >> 4);
-				if (un <= 0x00000004UL) {
-					r ^= (r >> 2);
-					if (un <= 0x00000002UL) {
-						r ^= (r >> 1);
-					}
-				}
-			}
-		}
-	}
-
-	return (int) (r % un);
-}
-
-
-
-void	btPgsJacobiSolver::initSolverBody(int bodyIndex, btSolverBody* solverBody, btRigidBodyCL* rb)
-{
-
-	solverBody->m_deltaLinearVelocity.setValue(0.f,0.f,0.f);
-	solverBody->m_deltaAngularVelocity.setValue(0.f,0.f,0.f);
-	solverBody->internalGetPushVelocity().setValue(0.f,0.f,0.f);
-	solverBody->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
-
-	if (rb)
-	{
-		solverBody->m_worldTransform = getWorldTransform(rb);
-		solverBody->internalSetInvMass(btVector3(rb->getInvMass(),rb->getInvMass(),rb->getInvMass()));
-		solverBody->m_originalBodyIndex = bodyIndex;
-		solverBody->m_angularFactor = btVector3(1,1,1);
-		solverBody->m_linearFactor = btVector3(1,1,1);
-		solverBody->m_linearVelocity = getLinearVelocity(rb);
-		solverBody->m_angularVelocity = getAngularVelocity(rb);
-	} else
-	{
-		solverBody->m_worldTransform.setIdentity();
-		solverBody->internalSetInvMass(btVector3(0,0,0));
-		solverBody->m_originalBodyIndex = bodyIndex;
-		solverBody->m_angularFactor.setValue(1,1,1);
-		solverBody->m_linearFactor.setValue(1,1,1);
-		solverBody->m_linearVelocity.setValue(0,0,0);
-		solverBody->m_angularVelocity.setValue(0,0,0);
-	}
-
-
-}
-
-
-
-
-
-
-btScalar btPgsJacobiSolver::restitutionCurve(btScalar rel_vel, btScalar restitution)
-{
-	btScalar rest = restitution * -rel_vel;
-	return rest;
-}
-
-
-
-
-
-
-void btPgsJacobiSolver::setupFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias, btSolverConstraint& solverConstraint, const btVector3& normalAxis,int  solverBodyIdA,int solverBodyIdB,btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, btScalar desiredVelocity, btScalar cfmSlip)
-{
-
-	
-	solverConstraint.m_contactNormal = normalAxis;
-	btSolverBody& solverBodyA = m_tmpSolverBodyPool[solverBodyIdA];
-	btSolverBody& solverBodyB = m_tmpSolverBodyPool[solverBodyIdB];
-
-	btRigidBodyCL* body0 = &bodies[solverBodyA.m_originalBodyIndex];
-	btRigidBodyCL* body1 = &bodies[solverBodyB.m_originalBodyIndex];
-
-
-	solverConstraint.m_solverBodyIdA = solverBodyIdA;
-	solverConstraint.m_solverBodyIdB = solverBodyIdB;
-
-	solverConstraint.m_friction = cp.m_combinedFriction;
-	solverConstraint.m_originalContactPoint = 0;
-
-	solverConstraint.m_appliedImpulse = 0.f;
-	solverConstraint.m_appliedPushImpulse = 0.f;
-
-	{
-		btVector3 ftorqueAxis1 = rel_pos1.cross(solverConstraint.m_contactNormal);
-		solverConstraint.m_relpos1CrossNormal = ftorqueAxis1;
-		solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex])*ftorqueAxis1 : btVector3(0,0,0);
-	}
-	{
-		btVector3 ftorqueAxis1 = rel_pos2.cross(-solverConstraint.m_contactNormal);
-		solverConstraint.m_relpos2CrossNormal = ftorqueAxis1;
-		solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex])*ftorqueAxis1 : btVector3(0,0,0);
-	}
-
-	btScalar scaledDenom;
-
-	{
-		btVector3 vec;
-		btScalar denom0 = 0.f;
-		btScalar denom1 = 0.f;
-		if (body0)
-		{
-			vec = ( solverConstraint.m_angularComponentA).cross(rel_pos1);
-			denom0 = body0->getInvMass() + normalAxis.dot(vec);
-		}
-		if (body1)
-		{
-			vec = ( -solverConstraint.m_angularComponentB).cross(rel_pos2);
-			denom1 = body1->getInvMass() + normalAxis.dot(vec);
-		}
-
-		btScalar denom;
-		if (m_usePgs)
-		{
-			scaledDenom = denom = relaxation/(denom0+denom1);
-		} else
-		{
-			denom = relaxation/(denom0+denom1);
-			btScalar countA = body0->getInvMass() ? btScalar(m_bodyCount[solverBodyA.m_originalBodyIndex]): 1.f;
-			btScalar countB = body1->getInvMass() ? btScalar(m_bodyCount[solverBodyB.m_originalBodyIndex]): 1.f;
-
-			scaledDenom = relaxation/(denom0*countA+denom1*countB);
-		}
-
-		solverConstraint.m_jacDiagABInv = denom;
-	}
-
-	{
-		
-
-		btScalar rel_vel;
-		btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?solverBodyA.m_linearVelocity:btVector3(0,0,0)) 
-			+ solverConstraint.m_relpos1CrossNormal.dot(body0?solverBodyA.m_angularVelocity:btVector3(0,0,0));
-		btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?solverBodyB.m_linearVelocity:btVector3(0,0,0)) 
-			+ solverConstraint.m_relpos2CrossNormal.dot(body1?solverBodyB.m_angularVelocity:btVector3(0,0,0));
-
-		rel_vel = vel1Dotn+vel2Dotn;
-
-//		btScalar positionalError = 0.f;
-
-		btSimdScalar velocityError =  desiredVelocity - rel_vel;
-		btSimdScalar	velocityImpulse = velocityError * btSimdScalar(scaledDenom);//solverConstraint.m_jacDiagABInv);
-		solverConstraint.m_rhs = velocityImpulse;
-		solverConstraint.m_cfm = cfmSlip;
-		solverConstraint.m_lowerLimit = 0;
-		solverConstraint.m_upperLimit = 1e10f;
-		
-	}
-}
-
-btSolverConstraint&	btPgsJacobiSolver::addFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias, const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, btScalar desiredVelocity, btScalar cfmSlip)
-{
-	btSolverConstraint& solverConstraint = m_tmpSolverContactFrictionConstraintPool.expandNonInitializing();
-	solverConstraint.m_frictionIndex = frictionIndex;
-	setupFrictionConstraint(bodies,inertias,solverConstraint, normalAxis, solverBodyIdA, solverBodyIdB, cp, rel_pos1, rel_pos2, 
-							colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
-	return solverConstraint;
-}
-
-
-void btPgsJacobiSolver::setupRollingFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias,	btSolverConstraint& solverConstraint, const btVector3& normalAxis1,int solverBodyIdA,int  solverBodyIdB,
-									btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
-									btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, 
-									btScalar desiredVelocity, btScalar cfmSlip)
-
-{
-	btVector3 normalAxis(0,0,0);
-
-
-	solverConstraint.m_contactNormal = normalAxis;
-	btSolverBody& solverBodyA = m_tmpSolverBodyPool[solverBodyIdA];
-	btSolverBody& solverBodyB = m_tmpSolverBodyPool[solverBodyIdB];
-
-	btRigidBodyCL* body0 = &bodies[m_tmpSolverBodyPool[solverBodyIdA].m_originalBodyIndex];
-	btRigidBodyCL* body1 = &bodies[m_tmpSolverBodyPool[solverBodyIdB].m_originalBodyIndex];
-
-	solverConstraint.m_solverBodyIdA = solverBodyIdA;
-	solverConstraint.m_solverBodyIdB = solverBodyIdB;
-
-	solverConstraint.m_friction = cp.m_combinedRollingFriction;
-	solverConstraint.m_originalContactPoint = 0;
-
-	solverConstraint.m_appliedImpulse = 0.f;
-	solverConstraint.m_appliedPushImpulse = 0.f;
-
-	{
-		btVector3 ftorqueAxis1 = -normalAxis1;
-		solverConstraint.m_relpos1CrossNormal = ftorqueAxis1;
-		solverConstraint.m_angularComponentA = body0 ? getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex])*ftorqueAxis1 : btVector3(0,0,0);
-	}
-	{
-		btVector3 ftorqueAxis1 = normalAxis1;
-		solverConstraint.m_relpos2CrossNormal = ftorqueAxis1;
-		solverConstraint.m_angularComponentB = body1 ? getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex])*ftorqueAxis1 : btVector3(0,0,0);
-	}
-
-
-	{
-		btVector3 iMJaA = body0?getInvInertiaTensorWorld(&inertias[solverBodyA.m_originalBodyIndex])*solverConstraint.m_relpos1CrossNormal:btVector3(0,0,0);
-		btVector3 iMJaB = body1?getInvInertiaTensorWorld(&inertias[solverBodyB.m_originalBodyIndex])*solverConstraint.m_relpos2CrossNormal:btVector3(0,0,0);
-		btScalar sum = 0;
-		sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
-		sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
-		solverConstraint.m_jacDiagABInv = btScalar(1.)/sum;
-	}
-
-	{
-		
-
-		btScalar rel_vel;
-		btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(body0?solverBodyA.m_linearVelocity:btVector3(0,0,0)) 
-			+ solverConstraint.m_relpos1CrossNormal.dot(body0?solverBodyA.m_angularVelocity:btVector3(0,0,0));
-		btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(body1?solverBodyB.m_linearVelocity:btVector3(0,0,0)) 
-			+ solverConstraint.m_relpos2CrossNormal.dot(body1?solverBodyB.m_angularVelocity:btVector3(0,0,0));
-
-		rel_vel = vel1Dotn+vel2Dotn;
-
-//		btScalar positionalError = 0.f;
-
-		btSimdScalar velocityError =  desiredVelocity - rel_vel;
-		btSimdScalar	velocityImpulse = velocityError * btSimdScalar(solverConstraint.m_jacDiagABInv);
-		solverConstraint.m_rhs = velocityImpulse;
-		solverConstraint.m_cfm = cfmSlip;
-		solverConstraint.m_lowerLimit = 0;
-		solverConstraint.m_upperLimit = 1e10f;
-		
-	}
-}
-
-
-
-
-
-
-
-
-btSolverConstraint&	btPgsJacobiSolver::addRollingFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias,const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, btScalar desiredVelocity, btScalar cfmSlip)
-{
-	btSolverConstraint& solverConstraint = m_tmpSolverContactRollingFrictionConstraintPool.expandNonInitializing();
-	solverConstraint.m_frictionIndex = frictionIndex;
-	setupRollingFrictionConstraint(bodies,inertias,solverConstraint, normalAxis, solverBodyIdA, solverBodyIdB, cp, rel_pos1, rel_pos2, 
-							colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
-	return solverConstraint;
-}
-
-
-int	btPgsJacobiSolver::getOrInitSolverBody(int bodyIndex, btRigidBodyCL* bodies,btInertiaCL* inertias)
-{
-	//btAssert(bodyIndex< m_tmpSolverBodyPool.size());
-
-	btRigidBodyCL& body = bodies[bodyIndex];
-	int curIndex = -1;
-	if (m_usePgs || body.getInvMass()==0.f)
-	{
-		if (m_bodyCount[bodyIndex]<0)
-		{
-			curIndex = m_tmpSolverBodyPool.size();
-			btSolverBody& solverBody = m_tmpSolverBodyPool.expand();
-			initSolverBody(bodyIndex,&solverBody,&body);
-			solverBody.m_originalBodyIndex = bodyIndex;
-			m_bodyCount[bodyIndex] = curIndex;
-		} else
-		{
-			curIndex = m_bodyCount[bodyIndex];
-		}
-	} else
-	{
-		btAssert(m_bodyCount[bodyIndex]>0);
-		m_bodyCountCheck[bodyIndex]++;
-		curIndex = m_tmpSolverBodyPool.size();
-		btSolverBody& solverBody = m_tmpSolverBodyPool.expand();
-		initSolverBody(bodyIndex,&solverBody,&body);
-		solverBody.m_originalBodyIndex = bodyIndex;
-	}
-
-	btAssert(curIndex>=0);
-	return curIndex;
-
-}
-#include <stdio.h>
-
-
-void btPgsJacobiSolver::setupContactConstraint(btRigidBodyCL* bodies, btInertiaCL* inertias,btSolverConstraint& solverConstraint, 
-																 int solverBodyIdA, int solverBodyIdB,
-																 btContactPoint& cp, const btContactSolverInfo& infoGlobal,
-																 btVector3& vel, btScalar& rel_vel, btScalar& relaxation,
-																 btVector3& rel_pos1, btVector3& rel_pos2)
-{
-			
-			const btVector3& pos1 = cp.getPositionWorldOnA();
-			const btVector3& pos2 = cp.getPositionWorldOnB();
-
-			btSolverBody* bodyA = &m_tmpSolverBodyPool[solverBodyIdA];
-			btSolverBody* bodyB = &m_tmpSolverBodyPool[solverBodyIdB];
-
-			btRigidBodyCL* rb0 = &bodies[bodyA->m_originalBodyIndex];
-			btRigidBodyCL* rb1 = &bodies[bodyB->m_originalBodyIndex];
-
-//			btVector3 rel_pos1 = pos1 - colObj0->getWorldTransform().getOrigin(); 
-//			btVector3 rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
-			rel_pos1 = pos1 - bodyA->getWorldTransform().getOrigin(); 
-			rel_pos2 = pos2 - bodyB->getWorldTransform().getOrigin();
-
-			relaxation = 1.f;
-
-			btVector3 torqueAxis0 = rel_pos1.cross(cp.m_normalWorldOnB);
-			solverConstraint.m_angularComponentA = rb0 ? getInvInertiaTensorWorld(&inertias[bodyA->m_originalBodyIndex])*torqueAxis0 : btVector3(0,0,0);
-			btVector3 torqueAxis1 = rel_pos2.cross(cp.m_normalWorldOnB);		
-			solverConstraint.m_angularComponentB = rb1 ? getInvInertiaTensorWorld(&inertias[bodyB->m_originalBodyIndex])*-torqueAxis1 : btVector3(0,0,0);
-
-			btScalar scaledDenom;
-				{
-#ifdef COMPUTE_IMPULSE_DENOM
-					btScalar denom0 = rb0->computeImpulseDenominator(pos1,cp.m_normalWorldOnB);
-					btScalar denom1 = rb1->computeImpulseDenominator(pos2,cp.m_normalWorldOnB);
-#else							
-					btVector3 vec;
-					btScalar denom0 = 0.f;
-					btScalar denom1 = 0.f;
-					if (rb0)
-					{
-						vec = ( solverConstraint.m_angularComponentA).cross(rel_pos1);
-						denom0 = rb0->getInvMass() + cp.m_normalWorldOnB.dot(vec);
-					}
-					if (rb1)
-					{
-						vec = ( -solverConstraint.m_angularComponentB).cross(rel_pos2);
-						denom1 = rb1->getInvMass() + cp.m_normalWorldOnB.dot(vec);
-					}
-#endif //COMPUTE_IMPULSE_DENOM		
-
-					
-					btScalar denom;
-					if (m_usePgs)
-					{
-						scaledDenom = denom = relaxation/(denom0+denom1);
-					} else
-					{
-						denom = relaxation/(denom0+denom1);
-
-						btScalar countA = rb0->m_invMass? btScalar(m_bodyCount[bodyA->m_originalBodyIndex]) : 1.f;
-						btScalar countB = rb1->m_invMass? btScalar(m_bodyCount[bodyB->m_originalBodyIndex]) : 1.f;
-						scaledDenom = relaxation/(denom0*countA+denom1*countB);
-					}
-					solverConstraint.m_jacDiagABInv = denom;
-				}
-
-				solverConstraint.m_contactNormal = cp.m_normalWorldOnB;
-				solverConstraint.m_relpos1CrossNormal = torqueAxis0;
-				solverConstraint.m_relpos2CrossNormal = -torqueAxis1;
-
-				btScalar restitution = 0.f;
-				btScalar penetration = cp.getDistance()+infoGlobal.m_linearSlop;
-
-				{
-					btVector3 vel1,vel2;
-
-					vel1 = rb0? getVelocityInLocalPoint(rb0,rel_pos1) : btVector3(0,0,0);
-					vel2 = rb1? getVelocityInLocalPoint(rb1, rel_pos2) : btVector3(0,0,0);
-
-	//			btVector3 vel2 = rb1 ? rb1->getVelocityInLocalPoint(rel_pos2) : btVector3(0,0,0);
-					vel  = vel1 - vel2;
-					rel_vel = cp.m_normalWorldOnB.dot(vel);
-
-					
-
-					solverConstraint.m_friction = cp.m_combinedFriction;
-
-				
-					restitution =  restitutionCurve(rel_vel, cp.m_combinedRestitution);
-					if (restitution <= btScalar(0.))
-					{
-						restitution = 0.f;
-					};
-				}
-
-
-				///warm starting (or zero if disabled)
-				if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
-				{
-					solverConstraint.m_appliedImpulse = cp.m_appliedImpulse * infoGlobal.m_warmstartingFactor;
-					if (rb0)
-						bodyA->internalApplyImpulse(solverConstraint.m_contactNormal*bodyA->internalGetInvMass(),solverConstraint.m_angularComponentA,solverConstraint.m_appliedImpulse);
-					if (rb1)
-						bodyB->internalApplyImpulse(solverConstraint.m_contactNormal*bodyB->internalGetInvMass(),-solverConstraint.m_angularComponentB,-(btScalar)solverConstraint.m_appliedImpulse);
-				} else
-				{
-					solverConstraint.m_appliedImpulse = 0.f;
-				}
-
-				solverConstraint.m_appliedPushImpulse = 0.f;
-
-				{
-					btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(rb0?bodyA->m_linearVelocity:btVector3(0,0,0)) 
-						+ solverConstraint.m_relpos1CrossNormal.dot(rb0?bodyA->m_angularVelocity:btVector3(0,0,0));
-					btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rb1?bodyB->m_linearVelocity:btVector3(0,0,0)) 
-						+ solverConstraint.m_relpos2CrossNormal.dot(rb1?bodyB->m_angularVelocity:btVector3(0,0,0));
-					btScalar rel_vel = vel1Dotn+vel2Dotn;
-
-					btScalar positionalError = 0.f;
-					btScalar	velocityError = restitution - rel_vel;// * damping;
-					
-
-					btScalar erp = infoGlobal.m_erp2;
-					if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
-					{
-						erp = infoGlobal.m_erp;
-					}
-
-					if (penetration>0)
-					{
-						positionalError = 0;
-
-						velocityError -= penetration / infoGlobal.m_timeStep;
-					} else
-					{
-						positionalError = -penetration * erp/infoGlobal.m_timeStep;
-					}
-
-					btScalar  penetrationImpulse = positionalError*scaledDenom;//solverConstraint.m_jacDiagABInv;
-					btScalar velocityImpulse = velocityError *scaledDenom;//solverConstraint.m_jacDiagABInv;
-
-					if (!infoGlobal.m_splitImpulse || (penetration > infoGlobal.m_splitImpulsePenetrationThreshold))
-					{
-						//combine position and velocity into rhs
-						solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
-						solverConstraint.m_rhsPenetration = 0.f;
-
-					} else
-					{
-						//split position and velocity into rhs and m_rhsPenetration
-						solverConstraint.m_rhs = velocityImpulse;
-						solverConstraint.m_rhsPenetration = penetrationImpulse;
-					}
-					solverConstraint.m_cfm = 0.f;
-					solverConstraint.m_lowerLimit = 0;
-					solverConstraint.m_upperLimit = 1e10f;
-				}
-
-
-
-
-}
-
-
-
-void btPgsJacobiSolver::setFrictionConstraintImpulse( btRigidBodyCL* bodies, btInertiaCL* inertias,btSolverConstraint& solverConstraint, 
-																		int solverBodyIdA, int solverBodyIdB,
-																 btContactPoint& cp, const btContactSolverInfo& infoGlobal)
-{
-
-	btSolverBody* bodyA = &m_tmpSolverBodyPool[solverBodyIdA];
-	btSolverBody* bodyB = &m_tmpSolverBodyPool[solverBodyIdB];
-
-
-	{
-		btSolverConstraint& frictionConstraint1 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex];
-		if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
-		{
-			frictionConstraint1.m_appliedImpulse = cp.m_appliedImpulseLateral1 * infoGlobal.m_warmstartingFactor;
-			if (bodies[bodyA->m_originalBodyIndex].m_invMass)
-				bodyA->internalApplyImpulse(frictionConstraint1.m_contactNormal*bodies[bodyA->m_originalBodyIndex].m_invMass,frictionConstraint1.m_angularComponentA,frictionConstraint1.m_appliedImpulse);
-			if (bodies[bodyB->m_originalBodyIndex].m_invMass)
-				bodyB->internalApplyImpulse(frictionConstraint1.m_contactNormal*bodies[bodyB->m_originalBodyIndex].m_invMass,-frictionConstraint1.m_angularComponentB,-(btScalar)frictionConstraint1.m_appliedImpulse);
-		} else
-		{
-			frictionConstraint1.m_appliedImpulse = 0.f;
-		}
-	}
-
-	if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-	{
-		btSolverConstraint& frictionConstraint2 = m_tmpSolverContactFrictionConstraintPool[solverConstraint.m_frictionIndex+1];
-		if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
-		{
-			frictionConstraint2.m_appliedImpulse = cp.m_appliedImpulseLateral2  * infoGlobal.m_warmstartingFactor;
-			if (bodies[bodyA->m_originalBodyIndex].m_invMass)
-				bodyA->internalApplyImpulse(frictionConstraint2.m_contactNormal*bodies[bodyA->m_originalBodyIndex].m_invMass,frictionConstraint2.m_angularComponentA,frictionConstraint2.m_appliedImpulse);
-			if (bodies[bodyB->m_originalBodyIndex].m_invMass)
-				bodyB->internalApplyImpulse(frictionConstraint2.m_contactNormal*bodies[bodyB->m_originalBodyIndex].m_invMass,-frictionConstraint2.m_angularComponentB,-(btScalar)frictionConstraint2.m_appliedImpulse);
-		} else
-		{
-			frictionConstraint2.m_appliedImpulse = 0.f;
-		}
-	}
-}
-
-
-
-
-void	btPgsJacobiSolver::convertContact(btRigidBodyCL* bodies, btInertiaCL* inertias,btContact4* manifold,const btContactSolverInfo& infoGlobal)
-{
-	btRigidBodyCL* colObj0=0,*colObj1=0;
-
-	
-	int solverBodyIdA = getOrInitSolverBody(manifold->getBodyA(),bodies,inertias);
-	int solverBodyIdB = getOrInitSolverBody(manifold->getBodyB(),bodies,inertias);
-
-//	btRigidBody* bodyA = btRigidBody::upcast(colObj0);
-//	btRigidBody* bodyB = btRigidBody::upcast(colObj1);
-
-	btSolverBody* solverBodyA = &m_tmpSolverBodyPool[solverBodyIdA];
-	btSolverBody* solverBodyB = &m_tmpSolverBodyPool[solverBodyIdB];
-
-
-
-	///avoid collision response between two static objects
-	if (solverBodyA->m_invMass.isZero() && solverBodyB->m_invMass.isZero())
-		return;
-
-	int rollingFriction=1;
-	int numContacts = getNumContacts(manifold);
-	for (int j=0;j<numContacts;j++)
-	{
-
-		btContactPoint cp;
-		getContactPoint(manifold,j,cp);
-
-		if (cp.getDistance() <= getContactProcessingThreshold(manifold))
-		{
-			btVector3 rel_pos1;
-			btVector3 rel_pos2;
-			btScalar relaxation;
-			btScalar rel_vel;
-			btVector3 vel;
-
-			int frictionIndex = m_tmpSolverContactConstraintPool.size();
-			btSolverConstraint& solverConstraint = m_tmpSolverContactConstraintPool.expandNonInitializing();
-//			btRigidBody* rb0 = btRigidBody::upcast(colObj0);
-//			btRigidBody* rb1 = btRigidBody::upcast(colObj1);
-			solverConstraint.m_solverBodyIdA = solverBodyIdA;
-			solverConstraint.m_solverBodyIdB = solverBodyIdB;
-
-			solverConstraint.m_originalContactPoint = &cp;
-
-			setupContactConstraint(bodies,inertias,solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal, vel, rel_vel, relaxation, rel_pos1, rel_pos2);
-
-//			const btVector3& pos1 = cp.getPositionWorldOnA();
-//			const btVector3& pos2 = cp.getPositionWorldOnB();
-
-			/////setup the friction constraints
-
-			solverConstraint.m_frictionIndex = m_tmpSolverContactFrictionConstraintPool.size();
-
-			btVector3 angVelA,angVelB;
-			solverBodyA->getAngularVelocity(angVelA);
-			solverBodyB->getAngularVelocity(angVelB);			
-			btVector3 relAngVel = angVelB-angVelA;
-
-			if ((cp.m_combinedRollingFriction>0.f) && (rollingFriction>0))
-			{
-				//only a single rollingFriction per manifold
-				rollingFriction--;
-				if (relAngVel.length()>infoGlobal.m_singleAxisRollingFrictionThreshold)
-				{
-					relAngVel.normalize();
-					if (relAngVel.length()>0.001)
-						addRollingFrictionConstraint(bodies,inertias,relAngVel,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-
-				} else
-				{
-					addRollingFrictionConstraint(bodies,inertias,cp.m_normalWorldOnB,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-					btVector3 axis0,axis1;
-					btPlaneSpace1(cp.m_normalWorldOnB,axis0,axis1);
-					if (axis0.length()>0.001)
-						addRollingFrictionConstraint(bodies,inertias,axis0,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-					if (axis1.length()>0.001)
-						addRollingFrictionConstraint(bodies,inertias,axis1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-		
-				}
-			}
-
-			///Bullet has several options to set the friction directions
-			///By default, each contact has only a single friction direction that is recomputed automatically very frame 
-			///based on the relative linear velocity.
-			///If the relative velocity it zero, it will automatically compute a friction direction.
-			
-			///You can also enable two friction directions, using the SOLVER_USE_2_FRICTION_DIRECTIONS.
-			///In that case, the second friction direction will be orthogonal to both contact normal and first friction direction.
-			///
-			///If you choose SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION, then the friction will be independent from the relative projected velocity.
-			///
-			///The user can manually override the friction directions for certain contacts using a contact callback, 
-			///and set the cp.m_lateralFrictionInitialized to true
-			///In that case, you can set the target relative motion in each friction direction (cp.m_contactMotion1 and cp.m_contactMotion2)
-			///this will give a conveyor belt effect
-			///
-			if (!(infoGlobal.m_solverMode & SOLVER_ENABLE_FRICTION_DIRECTION_CACHING) || !cp.m_lateralFrictionInitialized)
-			{
-				cp.m_lateralFrictionDir1 = vel - cp.m_normalWorldOnB * rel_vel;
-				btScalar lat_rel_vel = cp.m_lateralFrictionDir1.length2();
-				if (!(infoGlobal.m_solverMode & SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION) && lat_rel_vel > SIMD_EPSILON)
-				{
-					cp.m_lateralFrictionDir1 *= 1.f/btSqrt(lat_rel_vel);
-					if((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-					{
-						cp.m_lateralFrictionDir2 = cp.m_lateralFrictionDir1.cross(cp.m_normalWorldOnB);
-						cp.m_lateralFrictionDir2.normalize();//??
-						addFrictionConstraint(bodies,inertias,cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-
-					}
-
-					addFrictionConstraint(bodies,inertias,cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-
-				} else
-				{
-					btPlaneSpace1(cp.m_normalWorldOnB,cp.m_lateralFrictionDir1,cp.m_lateralFrictionDir2);
-
-					if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-					{
-						addFrictionConstraint(bodies,inertias,cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-					}
-
-					addFrictionConstraint(bodies,inertias,cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation);
-
-					if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION))
-					{
-						cp.m_lateralFrictionInitialized = true;
-					}
-				}
-
-			} else
-			{
-				addFrictionConstraint(bodies,inertias,cp.m_lateralFrictionDir1,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation,cp.m_contactMotion1, cp.m_contactCFM1);
-
-				if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-					addFrictionConstraint(bodies,inertias,cp.m_lateralFrictionDir2,solverBodyIdA,solverBodyIdB,frictionIndex,cp,rel_pos1,rel_pos2,colObj0,colObj1, relaxation, cp.m_contactMotion2, cp.m_contactCFM2);
-
-				setFrictionConstraintImpulse( bodies,inertias,solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal);
-			}
-		
-
-			
-
-		}
-	}
-}
-
-btScalar btPgsJacobiSolver::solveGroupCacheFriendlySetup(btRigidBodyCL* bodies, btInertiaCL* inertias, int numBodies, btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal)
-{
-	BT_PROFILE("solveGroupCacheFriendlySetup");
-
-
-	m_maxOverrideNumSolverIterations = 0;
-
-
-
-	m_tmpSolverBodyPool.resize(0);
-	
-	
-	m_bodyCount.resize(0);
-	m_bodyCount.resize(numBodies,0);
-	m_bodyCountCheck.resize(0);
-	m_bodyCountCheck.resize(numBodies,0);
-	
-	m_deltaLinearVelocities.resize(0);
-	m_deltaLinearVelocities.resize(numBodies,btVector3(0,0,0));
-	m_deltaAngularVelocities.resize(0);
-	m_deltaAngularVelocities.resize(numBodies,btVector3(0,0,0));
-	
-	int totalBodies = 0;
-
-	for (int i=0;i<numManifolds;i++)
-	{
-		int bodyIndexA = manifoldPtr[i].getBodyA();
-		int bodyIndexB = manifoldPtr[i].getBodyB();
-		if (m_usePgs)
-		{
-			m_bodyCount[bodyIndexA]=-1;
-			m_bodyCount[bodyIndexB]=-1;
-		} else
-		{
-			if (bodies[bodyIndexA].getInvMass())
-			{
-				//m_bodyCount[bodyIndexA]+=manifoldPtr[i].getNPoints();
-				m_bodyCount[bodyIndexA]++;
-			}
-			else
-				m_bodyCount[bodyIndexA]=-1;
-
-			if (bodies[bodyIndexB].getInvMass())
-			//	m_bodyCount[bodyIndexB]+=manifoldPtr[i].getNPoints();
-				m_bodyCount[bodyIndexB]++;
-			else
-				m_bodyCount[bodyIndexB]=-1;
-		}
-
-	}
-
-
-	
-	if (1)
-	{
-		int j;
-		for (j=0;j<numConstraints;j++)
-		{
-			btTypedConstraint* constraint = constraints[j];
-			constraint->buildJacobian();
-			constraint->internalSetAppliedImpulse(0.0f);
-		}
-	}
-
-	//btRigidBody* rb0=0,*rb1=0;
-	//if (1)
-	{
-		{
-
-			int totalNumRows = 0;
-			int i;
-			
-			m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints);
-			//calculate the total number of contraint rows
-			for (i=0;i<numConstraints;i++)
-			{
-				btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
-				btJointFeedback* fb = constraints[i]->getJointFeedback();
-				if (fb)
-				{
-					fb->m_appliedForceBodyA.setZero();
-					fb->m_appliedTorqueBodyA.setZero();
-					fb->m_appliedForceBodyB.setZero();
-					fb->m_appliedTorqueBodyB.setZero();
-				}
-
-				if (constraints[i]->isEnabled())
-				{
-				}
-				if (constraints[i]->isEnabled())
-				{
-					constraints[i]->getInfo1(&info1);
-				} else
-				{
-					info1.m_numConstraintRows = 0;
-					info1.nub = 0;
-				}
-				totalNumRows += info1.m_numConstraintRows;
-			}
-			m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows);
-
-			
-#ifndef DISABLE_JOINTS
-			///setup the btSolverConstraints
-			int currentRow = 0;
-
-			for (i=0;i<numConstraints;i++)
-			{
-				const btTypedConstraint::btConstraintInfo1& info1 = m_tmpConstraintSizesPool[i];
-				
-				if (info1.m_numConstraintRows)
-				{
-					btAssert(currentRow<totalNumRows);
-
-					btSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[currentRow];
-					btTypedConstraint* constraint = constraints[i];
-					btRigidBody& rbA = constraint->getRigidBodyA();
-					btRigidBody& rbB = constraint->getRigidBodyB();
-
-                    int solverBodyIdA = getOrInitSolverBody(rbA);
-                    int solverBodyIdB = getOrInitSolverBody(rbB);
-
-                    btSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA];
-                    btSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB];
-
-
-
-
-					int overrideNumSolverIterations = constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;
-					if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)
-						m_maxOverrideNumSolverIterations = overrideNumSolverIterations;
-
-
-					int j;
-					for ( j=0;j<info1.m_numConstraintRows;j++)
-					{
-						memset(&currentConstraintRow[j],0,sizeof(btSolverConstraint));
-						currentConstraintRow[j].m_lowerLimit = -SIMD_INFINITY;
-						currentConstraintRow[j].m_upperLimit = SIMD_INFINITY;
-						currentConstraintRow[j].m_appliedImpulse = 0.f;
-						currentConstraintRow[j].m_appliedPushImpulse = 0.f;
-						currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;
-						currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;
-						currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations;
-					}
-
-					bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
-					bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
-					bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
-					bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
-					bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f);
-					bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f);
-					bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f);
-					bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f);
-
-
-					btTypedConstraint::btConstraintInfo2 info2;
-					info2.fps = 1.f/infoGlobal.m_timeStep;
-					info2.erp = infoGlobal.m_erp;
-					info2.m_J1linearAxis = currentConstraintRow->m_contactNormal;
-					info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal;
-					info2.m_J2linearAxis = 0;
-					info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal;
-					info2.rowskip = sizeof(btSolverConstraint)/sizeof(btScalar);//check this
-					///the size of btSolverConstraint needs be a multiple of btScalar
-		            btAssert(info2.rowskip*sizeof(btScalar)== sizeof(btSolverConstraint));
-					info2.m_constraintError = &currentConstraintRow->m_rhs;
-					currentConstraintRow->m_cfm = infoGlobal.m_globalCfm;
-					info2.m_damping = infoGlobal.m_damping;
-					info2.cfm = &currentConstraintRow->m_cfm;
-					info2.m_lowerLimit = &currentConstraintRow->m_lowerLimit;
-					info2.m_upperLimit = &currentConstraintRow->m_upperLimit;
-					info2.m_numIterations = infoGlobal.m_numIterations;
-					constraints[i]->getInfo2(&info2);
-
-					///finalize the constraint setup
-					for ( j=0;j<info1.m_numConstraintRows;j++)
-					{
-						btSolverConstraint& solverConstraint = currentConstraintRow[j];
-
-						if (solverConstraint.m_upperLimit>=constraints[i]->getBreakingImpulseThreshold())
-						{
-							solverConstraint.m_upperLimit = constraints[i]->getBreakingImpulseThreshold();
-						}
-
-						if (solverConstraint.m_lowerLimit<=-constraints[i]->getBreakingImpulseThreshold())
-						{
-							solverConstraint.m_lowerLimit = -constraints[i]->getBreakingImpulseThreshold();
-						}
-
-						solverConstraint.m_originalContactPoint = constraint;
-
-						{
-							const btVector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal;
-							solverConstraint.m_angularComponentA = constraint->getRigidBodyA().getInvInertiaTensorWorld()*ftorqueAxis1*constraint->getRigidBodyA().getAngularFactor();
-						}
-						{
-							const btVector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal;
-							solverConstraint.m_angularComponentB = constraint->getRigidBodyB().getInvInertiaTensorWorld()*ftorqueAxis2*constraint->getRigidBodyB().getAngularFactor();
-						}
-
-						{
-							btVector3 iMJlA = solverConstraint.m_contactNormal*rbA.getInvMass();
-							btVector3 iMJaA = rbA.getInvInertiaTensorWorld()*solverConstraint.m_relpos1CrossNormal;
-							btVector3 iMJlB = solverConstraint.m_contactNormal*rbB.getInvMass();//sign of normal?
-							btVector3 iMJaB = rbB.getInvInertiaTensorWorld()*solverConstraint.m_relpos2CrossNormal;
-
-							btScalar sum = iMJlA.dot(solverConstraint.m_contactNormal);
-							sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal);
-							sum += iMJlB.dot(solverConstraint.m_contactNormal);
-							sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal);
-							btScalar fsum = btFabs(sum);
-							btAssert(fsum > SIMD_EPSILON);
-							solverConstraint.m_jacDiagABInv = fsum>SIMD_EPSILON?btScalar(1.)/sum : 0.f;
-						}
-
-
-						///fix rhs
-						///todo: add force/torque accelerators
-						{
-							btScalar rel_vel;
-							btScalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.getLinearVelocity()) + solverConstraint.m_relpos1CrossNormal.dot(rbA.getAngularVelocity());
-							btScalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.getLinearVelocity()) + solverConstraint.m_relpos2CrossNormal.dot(rbB.getAngularVelocity());
-
-							rel_vel = vel1Dotn+vel2Dotn;
-
-							btScalar restitution = 0.f;
-							btScalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2
-							btScalar	velocityError = restitution - rel_vel * info2.m_damping;
-							btScalar	penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv;
-							btScalar	velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv;
-							solverConstraint.m_rhs = penetrationImpulse+velocityImpulse;
-							solverConstraint.m_appliedImpulse = 0.f;
-
-						}
-					}
-				}
-				currentRow+=m_tmpConstraintSizesPool[i].m_numConstraintRows;
-			}
-#endif //DISABLE_JOINTS
-		}
-
-
-		{
-			int i;
-
-			for (i=0;i<numManifolds;i++)
-			{
-				btContact4& manifold = manifoldPtr[i];
-				convertContact(bodies,inertias,&manifold,infoGlobal);
-			}
-		}
-	}
-
-//	btContactSolverInfo info = infoGlobal;
-
-
-	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
-	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
-	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
-
-	///@todo: use stack allocator for such temporarily memory, same for solver bodies/constraints
-	m_orderNonContactConstraintPool.resizeNoInitialize(numNonContactPool);
-	if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-		m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool*2);
-	else
-		m_orderTmpConstraintPool.resizeNoInitialize(numConstraintPool);
-
-	m_orderFrictionConstraintPool.resizeNoInitialize(numFrictionPool);
-	{
-		int i;
-		for (i=0;i<numNonContactPool;i++)
-		{
-			m_orderNonContactConstraintPool[i] = i;
-		}
-		for (i=0;i<numConstraintPool;i++)
-		{
-			m_orderTmpConstraintPool[i] = i;
-		}
-		for (i=0;i<numFrictionPool;i++)
-		{
-			m_orderFrictionConstraintPool[i] = i;
-		}
-	}
-
-	return 0.f;
-
-}
-
-
-btScalar btPgsJacobiSolver::solveSingleIteration(int iteration,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal)
-{
-
-	int numNonContactPool = m_tmpSolverNonContactConstraintPool.size();
-	int numConstraintPool = m_tmpSolverContactConstraintPool.size();
-	int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size();
-	
-	if (infoGlobal.m_solverMode & SOLVER_RANDMIZE_ORDER)
-	{
-		if (1)			// uncomment this for a bit less random ((iteration & 7) == 0)
-		{
-
-			for (int j=0; j<numNonContactPool; ++j) {
-				int tmp = m_orderNonContactConstraintPool[j];
-				int swapi = btRandInt2(j+1);
-				m_orderNonContactConstraintPool[j] = m_orderNonContactConstraintPool[swapi];
-				m_orderNonContactConstraintPool[swapi] = tmp;
-			}
-
-			//contact/friction constraints are not solved more than 
-			if (iteration< infoGlobal.m_numIterations)
-			{
-				for (int j=0; j<numConstraintPool; ++j) {
-					int tmp = m_orderTmpConstraintPool[j];
-					int swapi = btRandInt2(j+1);
-					m_orderTmpConstraintPool[j] = m_orderTmpConstraintPool[swapi];
-					m_orderTmpConstraintPool[swapi] = tmp;
-				}
-
-				for (int j=0; j<numFrictionPool; ++j) {
-					int tmp = m_orderFrictionConstraintPool[j];
-					int swapi = btRandInt2(j+1);
-					m_orderFrictionConstraintPool[j] = m_orderFrictionConstraintPool[swapi];
-					m_orderFrictionConstraintPool[swapi] = tmp;
-				}
-			}
-		}
-	}
-
-	if (infoGlobal.m_solverMode & SOLVER_SIMD)
-	{
-		///solve all joint constraints, using SIMD, if available
-		for (int j=0;j<m_tmpSolverNonContactConstraintPool.size();j++)
-		{
-			btSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]];
-			if (iteration < constraint.m_overrideNumSolverIterations)
-				resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
-		}
-
-		if (iteration< infoGlobal.m_numIterations)
-		{
-#ifndef DISABLE_JOINTS
-			for (int j=0;j<numConstraints;j++)
-			{
-                if (constraints[j]->isEnabled())
-                {
-                    int bodyAid = getOrInitSolverBody(constraints[j]->getRigidBodyA());
-                    int bodyBid = getOrInitSolverBody(constraints[j]->getRigidBodyB());
-                    btSolverBody& bodyA = m_tmpSolverBodyPool[bodyAid];
-                    btSolverBody& bodyB = m_tmpSolverBodyPool[bodyBid];
-                    constraints[j]->solveConstraintObsolete(bodyA,bodyB,infoGlobal.m_timeStep);
-                }
-			}
-#endif
-
-
-			///solve all contact constraints using SIMD, if available
-			if (infoGlobal.m_solverMode & SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS)
-			{
-				int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-				int multiplier = (infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS)? 2 : 1;
-
-				for (int c=0;c<numPoolConstraints;c++)
-				{
-					btScalar totalImpulse =0;
-
-					{
-						const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[c]];
-						resolveSingleConstraintRowLowerLimitSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-						totalImpulse = solveManifold.m_appliedImpulse;
-					}
-					bool applyFriction = true;
-					if (applyFriction)
-					{
-						{
-
-							btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[c*multiplier]];
-
-							if (totalImpulse>btScalar(0))
-							{
-								solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
-								solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
-
-								resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-							}
-						}
-
-						if (infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS)
-						{
-
-							btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[c*multiplier+1]];
-				
-							if (totalImpulse>btScalar(0))
-							{
-								solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
-								solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
-
-								resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-							}
-						}
-					}
-				}
-
-			}
-			else//SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS
-			{
-				//solve the friction constraints after all contact constraints, don't interleave them
-				int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-				int j;
-
-				for (j=0;j<numPoolConstraints;j++)
-				{
-					const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
-					resolveSingleConstraintRowLowerLimitSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-
-				}
-
-				if (!m_usePgs)
-					averageVelocities();
-				
-
-				///solve all friction constraints, using SIMD, if available
-
-				int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size();
-				for (j=0;j<numFrictionPoolConstraints;j++)
-				{
-					btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]];
-					btScalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
-
-					if (totalImpulse>btScalar(0))
-					{
-						solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
-						solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
-
-						resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-					}
-				}
-
-				
-				int numRollingFrictionPoolConstraints = m_tmpSolverContactRollingFrictionConstraintPool.size();
-				for (j=0;j<numRollingFrictionPoolConstraints;j++)
-				{
-
-					btSolverConstraint& rollingFrictionConstraint = m_tmpSolverContactRollingFrictionConstraintPool[j];
-					btScalar totalImpulse = m_tmpSolverContactConstraintPool[rollingFrictionConstraint.m_frictionIndex].m_appliedImpulse;
-					if (totalImpulse>btScalar(0))
-					{
-						btScalar rollingFrictionMagnitude = rollingFrictionConstraint.m_friction*totalImpulse;
-						if (rollingFrictionMagnitude>rollingFrictionConstraint.m_friction)
-							rollingFrictionMagnitude = rollingFrictionConstraint.m_friction;
-
-						rollingFrictionConstraint.m_lowerLimit = -rollingFrictionMagnitude;
-						rollingFrictionConstraint.m_upperLimit = rollingFrictionMagnitude;
-
-						resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdA],m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdB],rollingFrictionConstraint);
-					}
-				}
-				
-
-			}			
-		}
-	} else
-	{
-		//non-SIMD version
-		///solve all joint constraints
-		for (int j=0;j<m_tmpSolverNonContactConstraintPool.size();j++)
-		{
-			btSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[m_orderNonContactConstraintPool[j]];
-			if (iteration < constraint.m_overrideNumSolverIterations)
-				resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint);
-		}
-
-		if (iteration< infoGlobal.m_numIterations)
-		{
-#ifndef DISABLE_JOINTS
-			for (int j=0;j<numConstraints;j++)
-			{
-                if (constraints[j]->isEnabled())
-                {
-                    int bodyAid = getOrInitSolverBody(constraints[j]->getRigidBodyA());
-                    int bodyBid = getOrInitSolverBody(constraints[j]->getRigidBodyB());
-                    btSolverBody& bodyA = m_tmpSolverBodyPool[bodyAid];
-                    btSolverBody& bodyB = m_tmpSolverBodyPool[bodyBid];
-                    constraints[j]->solveConstraintObsolete(bodyA,bodyB,infoGlobal.m_timeStep);
-                }
-			}
-#endif //DISABLE_JOINTS
-
-			///solve all contact constraints
-			int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-			for (int j=0;j<numPoolConstraints;j++)
-			{
-				const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
-				resolveSingleConstraintRowLowerLimit(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-			}
-			///solve all friction constraints
-			int numFrictionPoolConstraints = m_tmpSolverContactFrictionConstraintPool.size();
-			for (int j=0;j<numFrictionPoolConstraints;j++)
-			{
-				btSolverConstraint& solveManifold = m_tmpSolverContactFrictionConstraintPool[m_orderFrictionConstraintPool[j]];
-				btScalar totalImpulse = m_tmpSolverContactConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
-
-				if (totalImpulse>btScalar(0))
-				{
-					solveManifold.m_lowerLimit = -(solveManifold.m_friction*totalImpulse);
-					solveManifold.m_upperLimit = solveManifold.m_friction*totalImpulse;
-
-					resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-				}
-			}
-
-			int numRollingFrictionPoolConstraints = m_tmpSolverContactRollingFrictionConstraintPool.size();
-			for (int j=0;j<numRollingFrictionPoolConstraints;j++)
-			{
-				btSolverConstraint& rollingFrictionConstraint = m_tmpSolverContactRollingFrictionConstraintPool[j];
-				btScalar totalImpulse = m_tmpSolverContactConstraintPool[rollingFrictionConstraint.m_frictionIndex].m_appliedImpulse;
-				if (totalImpulse>btScalar(0))
-				{
-					btScalar rollingFrictionMagnitude = rollingFrictionConstraint.m_friction*totalImpulse;
-					if (rollingFrictionMagnitude>rollingFrictionConstraint.m_friction)
-						rollingFrictionMagnitude = rollingFrictionConstraint.m_friction;
-
-					rollingFrictionConstraint.m_lowerLimit = -rollingFrictionMagnitude;
-					rollingFrictionConstraint.m_upperLimit = rollingFrictionMagnitude;
-
-					resolveSingleConstraintRowGeneric(m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdA],m_tmpSolverBodyPool[rollingFrictionConstraint.m_solverBodyIdB],rollingFrictionConstraint);
-				}
-			}
-		}
-	}
-	return 0.f;
-}
-
-
-void btPgsJacobiSolver::solveGroupCacheFriendlySplitImpulseIterations(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal)
-{
-	int iteration;
-	if (infoGlobal.m_splitImpulse)
-	{
-		if (infoGlobal.m_solverMode & SOLVER_SIMD)
-		{
-			for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
-			{
-				{
-					int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-					int j;
-					for (j=0;j<numPoolConstraints;j++)
-					{
-						const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
-
-						resolveSplitPenetrationSIMD(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-					}
-				}
-			}
-		}
-		else
-		{
-			for ( iteration = 0;iteration<infoGlobal.m_numIterations;iteration++)
-			{
-				{
-					int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-					int j;
-					for (j=0;j<numPoolConstraints;j++)
-					{
-						const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]];
-
-						resolveSplitPenetrationImpulseCacheFriendly(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA],m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB],solveManifold);
-					}
-				}
-			}
-		}
-	}
-}
-
-btScalar btPgsJacobiSolver::solveGroupCacheFriendlyIterations(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal)
-{
-	BT_PROFILE("solveGroupCacheFriendlyIterations");
-
-	{
-		///this is a special step to resolve penetrations (just for contacts)
-		solveGroupCacheFriendlySplitImpulseIterations(constraints,numConstraints,infoGlobal);
-
-		int maxIterations = m_maxOverrideNumSolverIterations > infoGlobal.m_numIterations? m_maxOverrideNumSolverIterations : infoGlobal.m_numIterations;
-
-		for ( int iteration = 0 ; iteration< maxIterations ; iteration++)
-		//for ( int iteration = maxIterations-1  ; iteration >= 0;iteration--)
-		{			
-			
-			solveSingleIteration(iteration, constraints,numConstraints,infoGlobal);
-
-
-			if (!m_usePgs)
-			{
-				averageVelocities();
-			}
-		}
-		
-	}
-	return 0.f;
-}
-
-void	btPgsJacobiSolver::averageVelocities()
-{
-	BT_PROFILE("averaging");
-	//average the velocities
-	int numBodies = m_bodyCount.size();
-
-	m_deltaLinearVelocities.resize(0);
-	m_deltaLinearVelocities.resize(numBodies,btVector3(0,0,0));
-	m_deltaAngularVelocities.resize(0);
-	m_deltaAngularVelocities.resize(numBodies,btVector3(0,0,0));
-
-	for (int i=0;i<m_tmpSolverBodyPool.size();i++)
-	{
-		if (!m_tmpSolverBodyPool[i].m_invMass.isZero())
-		{
-			int orgBodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex;
-			m_deltaLinearVelocities[orgBodyIndex]+=m_tmpSolverBodyPool[i].getDeltaLinearVelocity();
-			m_deltaAngularVelocities[orgBodyIndex]+=m_tmpSolverBodyPool[i].getDeltaAngularVelocity();
-		}
-	}
-				
-	for (int i=0;i<m_tmpSolverBodyPool.size();i++)
-	{
-		int orgBodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex;
-
-		if (!m_tmpSolverBodyPool[i].m_invMass.isZero())
-		{
-						
-			btAssert(m_bodyCount[orgBodyIndex] == m_bodyCountCheck[orgBodyIndex]);
-						
-			btScalar factor = 1.f/btScalar(m_bodyCount[orgBodyIndex]);
-						
-
-			m_tmpSolverBodyPool[i].m_deltaLinearVelocity = m_deltaLinearVelocities[orgBodyIndex]*factor;
-			m_tmpSolverBodyPool[i].m_deltaAngularVelocity = m_deltaAngularVelocities[orgBodyIndex]*factor;
-		}
-	}
-}
-
-btScalar btPgsJacobiSolver::solveGroupCacheFriendlyFinish(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,const btContactSolverInfo& infoGlobal)
-{
-	int numPoolConstraints = m_tmpSolverContactConstraintPool.size();
-	int i,j;
-
-	if (infoGlobal.m_solverMode & SOLVER_USE_WARMSTARTING)
-	{
-		for (j=0;j<numPoolConstraints;j++)
-		{
-			const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[j];
-			btContactPoint* pt = (btContactPoint*) solveManifold.m_originalContactPoint;
-			btAssert(pt);
-			pt->m_appliedImpulse = solveManifold.m_appliedImpulse;
-		//	float f = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
-			//	printf("pt->m_appliedImpulseLateral1 = %f\n", f);
-			pt->m_appliedImpulseLateral1 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex].m_appliedImpulse;
-			//printf("pt->m_appliedImpulseLateral1 = %f\n", pt->m_appliedImpulseLateral1);
-			if ((infoGlobal.m_solverMode & SOLVER_USE_2_FRICTION_DIRECTIONS))
-			{
-				pt->m_appliedImpulseLateral2 = m_tmpSolverContactFrictionConstraintPool[solveManifold.m_frictionIndex+1].m_appliedImpulse;
-			}
-			//do a callback here?
-		}
-	}
-
-	numPoolConstraints = m_tmpSolverNonContactConstraintPool.size();
-	for (j=0;j<numPoolConstraints;j++)
-	{
-		const btSolverConstraint& solverConstr = m_tmpSolverNonContactConstraintPool[j];
-		btTypedConstraint* constr = (btTypedConstraint*)solverConstr.m_originalContactPoint;
-		btJointFeedback* fb = constr->getJointFeedback();
-		if (fb)
-		{
-			fb->m_appliedForceBodyA += solverConstr.m_contactNormal*solverConstr.m_appliedImpulse*constr->getRigidBodyA().getLinearFactor()/infoGlobal.m_timeStep;
-			fb->m_appliedForceBodyB += -solverConstr.m_contactNormal*solverConstr.m_appliedImpulse*constr->getRigidBodyB().getLinearFactor()/infoGlobal.m_timeStep;
-			fb->m_appliedTorqueBodyA += solverConstr.m_relpos1CrossNormal* constr->getRigidBodyA().getAngularFactor()*solverConstr.m_appliedImpulse/infoGlobal.m_timeStep;
-			fb->m_appliedTorqueBodyB += -solverConstr.m_relpos1CrossNormal* constr->getRigidBodyB().getAngularFactor()*solverConstr.m_appliedImpulse/infoGlobal.m_timeStep;
-			
-		}
-
-		constr->internalSetAppliedImpulse(solverConstr.m_appliedImpulse);
-		if (btFabs(solverConstr.m_appliedImpulse)>=constr->getBreakingImpulseThreshold())
-		{
-			constr->setEnabled(false);
-		}
-	}
-
-	{
-		BT_PROFILE("write back velocities and transforms");
-		for ( i=0;i<m_tmpSolverBodyPool.size();i++)
-		{
-			int bodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex;
-			//btAssert(i==bodyIndex);
-
-			btRigidBodyCL* body = &bodies[bodyIndex];
-			if (body->getInvMass())
-			{
-				if (infoGlobal.m_splitImpulse)
-					m_tmpSolverBodyPool[i].writebackVelocityAndTransform(infoGlobal.m_timeStep, infoGlobal.m_splitImpulseTurnErp);
-				else
-					m_tmpSolverBodyPool[i].writebackVelocity();
-
-				if (m_usePgs)
-				{
-					body->m_linVel = m_tmpSolverBodyPool[i].m_linearVelocity;
-					body->m_angVel = m_tmpSolverBodyPool[i].m_angularVelocity;
-				} else
-				{
-					btScalar factor = 1.f/btScalar(m_bodyCount[bodyIndex]);
-
-					btVector3 deltaLinVel = m_deltaLinearVelocities[bodyIndex]*factor;
-					btVector3 deltaAngVel = m_deltaAngularVelocities[bodyIndex]*factor;
-					//printf("body %d\n",bodyIndex);
-					//printf("deltaLinVel = %f,%f,%f\n",deltaLinVel.getX(),deltaLinVel.getY(),deltaLinVel.getZ());
-					//printf("deltaAngVel = %f,%f,%f\n",deltaAngVel.getX(),deltaAngVel.getY(),deltaAngVel.getZ());
-
-					body->m_linVel += deltaLinVel;
-					body->m_angVel += deltaAngVel;
-				}
-			
-				if (infoGlobal.m_splitImpulse)
-				{
-					body->m_pos = m_tmpSolverBodyPool[i].m_worldTransform.getOrigin();
-					btQuaternion orn;
-					orn = m_tmpSolverBodyPool[i].m_worldTransform.getRotation();
-					body->m_quat = orn;
-				}
-			}
-		}
-	}
-
-
-	m_tmpSolverContactConstraintPool.resizeNoInitialize(0);
-	m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0);
-	m_tmpSolverContactFrictionConstraintPool.resizeNoInitialize(0);
-	m_tmpSolverContactRollingFrictionConstraintPool.resizeNoInitialize(0);
-
-	m_tmpSolverBodyPool.resizeNoInitialize(0);
-	return 0.f;
-}
-
-
-
-void	btPgsJacobiSolver::reset()
-{
-	m_btSeed2 = 0;
-}
-\ No newline at end of file
diff --git a/opencl/gpu_rigidbody/host/btPgsJacobiSolver.h b/opencl/gpu_rigidbody/host/btPgsJacobiSolver.h
deleted file mode 100644
index 703456d2c..000000000
--- a/opencl/gpu_rigidbody/host/btPgsJacobiSolver.h
+++ /dev/null
@@ -1,145 +0,0 @@
-#ifndef BT_PGS_JACOBI_SOLVER
-#define BT_PGS_JACOBI_SOLVER
-
-
-struct btContact4;
-struct btContactPoint;
-
-
-class btDispatcher;
-
-#include "btTypedConstraint.h"
-#include "btContactSolverInfo.h"
-#include "btSolverBody.h"
-#include "btSolverConstraint.h"
-#include "btConstraintSolver.h"
-struct btRigidBodyCL;
-struct btInertiaCL;
-
-class btPgsJacobiSolver
-{
-
-protected:
-	btAlignedObjectArray<btSolverBody>      m_tmpSolverBodyPool;
-	btConstraintArray			m_tmpSolverContactConstraintPool;
-	btConstraintArray			m_tmpSolverNonContactConstraintPool;
-	btConstraintArray			m_tmpSolverContactFrictionConstraintPool;
-	btConstraintArray			m_tmpSolverContactRollingFrictionConstraintPool;
-
-	btAlignedObjectArray<int>	m_orderTmpConstraintPool;
-	btAlignedObjectArray<int>	m_orderNonContactConstraintPool;
-	btAlignedObjectArray<int>	m_orderFrictionConstraintPool;
-	btAlignedObjectArray<btTypedConstraint::btConstraintInfo1> m_tmpConstraintSizesPool;
-	
-	btAlignedObjectArray<int>		m_bodyCount;
-	btAlignedObjectArray<int>		m_bodyCountCheck;
-	
-	btAlignedObjectArray<btVector3>	m_deltaLinearVelocities;
-	btAlignedObjectArray<btVector3>	m_deltaAngularVelocities;
-
-	bool						m_usePgs;
-	void						averageVelocities();
-
-	int							m_maxOverrideNumSolverIterations;
-	btScalar	getContactProcessingThreshold(btContact4* contact)
-	{
-		return 0.02f;
-	}
-	void setupFrictionConstraint(	btRigidBodyCL* bodies,btInertiaCL* inertias, btSolverConstraint& solverConstraint, const btVector3& normalAxis,int solverBodyIdA,int  solverBodyIdB,
-									btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
-									btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, 
-									btScalar desiredVelocity=0., btScalar cfmSlip=0.);
-
-	void setupRollingFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias,	btSolverConstraint& solverConstraint, const btVector3& normalAxis,int solverBodyIdA,int  solverBodyIdB,
-									btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,
-									btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, 
-									btScalar desiredVelocity=0., btScalar cfmSlip=0.);
-
-	btSolverConstraint&	addFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias,const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, btScalar desiredVelocity=0., btScalar cfmSlip=0.);
-	btSolverConstraint&	addRollingFrictionConstraint(btRigidBodyCL* bodies,btInertiaCL* inertias,const btVector3& normalAxis,int solverBodyIdA,int solverBodyIdB,int frictionIndex,btContactPoint& cp,const btVector3& rel_pos1,const btVector3& rel_pos2,btRigidBodyCL* colObj0,btRigidBodyCL* colObj1, btScalar relaxation, btScalar desiredVelocity=0, btScalar cfmSlip=0.f);
-
-
-	void setupContactConstraint(btRigidBodyCL* bodies, btInertiaCL* inertias,
-								btSolverConstraint& solverConstraint, int solverBodyIdA, int solverBodyIdB, btContactPoint& cp, 
-								const btContactSolverInfo& infoGlobal, btVector3& vel, btScalar& rel_vel, btScalar& relaxation, 
-								btVector3& rel_pos1, btVector3& rel_pos2);
-
-	void setFrictionConstraintImpulse( btRigidBodyCL* bodies, btInertiaCL* inertias,btSolverConstraint& solverConstraint, int solverBodyIdA,int solverBodyIdB, 
-										 btContactPoint& cp, const btContactSolverInfo& infoGlobal);
-
-	///m_btSeed2 is used for re-arranging the constraint rows. improves convergence/quality of friction
-	unsigned long	m_btSeed2;
-
-	
-	btScalar restitutionCurve(btScalar rel_vel, btScalar restitution);
-
-	void	convertContact(btRigidBodyCL* bodies, btInertiaCL* inertias,btContact4* manifold,const btContactSolverInfo& infoGlobal);
-
-
-	void	resolveSplitPenetrationSIMD(
-     btSolverBody& bodyA,btSolverBody& bodyB,
-        const btSolverConstraint& contactConstraint);
-
-	void	resolveSplitPenetrationImpulseCacheFriendly(
-       btSolverBody& bodyA,btSolverBody& bodyB,
-        const btSolverConstraint& contactConstraint);
-
-	//internal method
-	int		getOrInitSolverBody(int bodyIndex, btRigidBodyCL* bodies,btInertiaCL* inertias);
-	void	initSolverBody(int bodyIndex, btSolverBody* solverBody, btRigidBodyCL* collisionObject);
-
-	void	resolveSingleConstraintRowGeneric(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
-
-	void	resolveSingleConstraintRowGenericSIMD(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
-	
-	void	resolveSingleConstraintRowLowerLimit(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
-	
-	void	resolveSingleConstraintRowLowerLimitSIMD(btSolverBody& bodyA,btSolverBody& bodyB,const btSolverConstraint& contactConstraint);
-		
-protected:
-
-	virtual btScalar solveGroupCacheFriendlySetup(btRigidBodyCL* bodies, btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
-
-
-	virtual btScalar solveGroupCacheFriendlyIterations(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
-	virtual void solveGroupCacheFriendlySplitImpulseIterations(btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
-	btScalar solveSingleIteration(int iteration, btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
-
-
-	virtual btScalar solveGroupCacheFriendlyFinish(btRigidBodyCL* bodies, btInertiaCL* inertias,int numBodies,const btContactSolverInfo& infoGlobal);
-
-
-public:
-
-	BT_DECLARE_ALIGNED_ALLOCATOR();
-	
-	btPgsJacobiSolver();
-	virtual ~btPgsJacobiSolver();
-
-	void	solveContacts(int numBodies, btRigidBodyCL* bodies, btInertiaCL* inertias, int numContacts, btContact4* contacts);
-	
-	btScalar solveGroup(btRigidBodyCL* bodies,btInertiaCL* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal);
-
-	///clear internal cached data and reset random seed
-	virtual	void	reset();
-	
-	unsigned long btRand2();
-
-	int btRandInt2 (int n);
-
-	void	setRandSeed(unsigned long seed)
-	{
-		m_btSeed2 = seed;
-	}
-	unsigned long	getRandSeed() const
-	{
-		return m_btSeed2;
-	}
-
-
-
-
-};
-
-#endif //BT_PGS_JACOBI_SOLVER
-
diff --git a/opencl/gpu_rigidbody/host/btRigidBody.h b/opencl/gpu_rigidbody/host/btRigidBody.h
deleted file mode 100644
index 087dfc867..000000000
--- a/opencl/gpu_rigidbody/host/btRigidBody.h
+++ /dev/null
@@ -1,594 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_RIGIDBODY_H
-#define BT_RIGIDBODY_H
-
-#include "BulletCommon/btAlignedObjectArray.h"
-#include "BulletCommon/btTransform.h"
-#include "btBroadphaseProxy.h"
-#include "btCollisionObject.h"
-
-class btCollisionShape;
-class btMotionState;
-class btTypedConstraint;
-
-
-extern btScalar gDeactivationTime;
-extern bool gDisableDeactivation;
-
-#ifdef BT_USE_DOUBLE_PRECISION
-#define btRigidBodyData	btRigidBodyDoubleData
-#define btRigidBodyDataName	"btRigidBodyDoubleData"
-#else
-#define btRigidBodyData	btRigidBodyFloatData
-#define btRigidBodyDataName	"btRigidBodyFloatData"
-#endif //BT_USE_DOUBLE_PRECISION
-
-
-enum	btRigidBodyFlags
-{
-	BT_DISABLE_WORLD_GRAVITY = 1,
-	///The BT_ENABLE_GYROPSCOPIC_FORCE can easily introduce instability
-	///So generally it is best to not enable it. 
-	///If really needed, run at a high frequency like 1000 Hertz:	///See Demos/GyroscopicDemo for an example use
-	BT_ENABLE_GYROPSCOPIC_FORCE = 2
-};
-
-
-///The btRigidBody is the main class for rigid body objects. It is derived from btCollisionObject, so it keeps a pointer to a btCollisionShape.
-///It is recommended for performance and memory use to share btCollisionShape objects whenever possible.
-///There are 3 types of rigid bodies: 
-///- A) Dynamic rigid bodies, with positive mass. Motion is controlled by rigid body dynamics.
-///- B) Fixed objects with zero mass. They are not moving (basically collision objects)
-///- C) Kinematic objects, which are objects without mass, but the user can move them. There is on-way interaction, and Bullet calculates a velocity based on the timestep and previous and current world transform.
-///Bullet automatically deactivates dynamic rigid bodies, when the velocity is below a threshold for a given time.
-///Deactivated (sleeping) rigid bodies don't take any processing time, except a minor broadphase collision detection impact (to allow active objects to activate/wake up sleeping objects)
-class btRigidBody  : public btCollisionObject
-{
-
-	btMatrix3x3	m_invInertiaTensorWorld;
-	btVector3		m_linearVelocity;
-	btVector3		m_angularVelocity;
-	btScalar		m_inverseMass;
-	btVector3		m_linearFactor;
-
-	btVector3		m_gravity;	
-	btVector3		m_gravity_acceleration;
-	btVector3		m_invInertiaLocal;
-	btVector3		m_totalForce;
-	btVector3		m_totalTorque;
-	
-	btScalar		m_linearDamping;
-	btScalar		m_angularDamping;
-
-	bool			m_additionalDamping;
-	btScalar		m_additionalDampingFactor;
-	btScalar		m_additionalLinearDampingThresholdSqr;
-	btScalar		m_additionalAngularDampingThresholdSqr;
-	btScalar		m_additionalAngularDampingFactor;
-
-
-	btScalar		m_linearSleepingThreshold;
-	btScalar		m_angularSleepingThreshold;
-
-	//m_optionalMotionState allows to automatic synchronize the world transform for active objects
-	btMotionState*	m_optionalMotionState;
-
-	//keep track of typed constraints referencing this rigid body
-	btAlignedObjectArray<btTypedConstraint*> m_constraintRefs;
-
-	int				m_rigidbodyFlags;
-	
-	int				m_debugBodyId;
-	
-
-protected:
-
-	btVector3		m_angularFactor;
-	
-
-public:
-
-
-	///The btRigidBodyConstructionInfo structure provides information to create a rigid body. Setting mass to zero creates a fixed (non-dynamic) rigid body.
-	///For dynamic objects, you can use the collision shape to approximate the local inertia tensor, otherwise use the zero vector (default argument)
-	///You can use the motion state to synchronize the world transform between physics and graphics objects. 
-	///And if the motion state is provided, the rigid body will initialize its initial world transform from the motion state,
-	///m_startWorldTransform is only used when you don't provide a motion state.
-	struct	btRigidBodyConstructionInfo
-	{
-		btScalar			m_mass;
-
-		///When a motionState is provided, the rigid body will initialize its world transform from the motion state
-		///In this case, m_startWorldTransform is ignored.
-		btMotionState*		m_motionState;
-		btTransform	m_startWorldTransform;
-
-		btCollisionShape*	m_collisionShape;
-		btVector3			m_localInertia;
-		btScalar			m_linearDamping;
-		btScalar			m_angularDamping;
-
-		///best simulation results when friction is non-zero
-		btScalar			m_friction;
-		///the m_rollingFriction prevents rounded shapes, such as spheres, cylinders and capsules from rolling forever.
-		///See Bullet/Demos/RollingFrictionDemo for usage
-		btScalar			m_rollingFriction;
-		///best simulation results using zero restitution.
-		btScalar			m_restitution;
-
-		btScalar			m_linearSleepingThreshold;
-		btScalar			m_angularSleepingThreshold;
-
-		//Additional damping can help avoiding lowpass jitter motion, help stability for ragdolls etc.
-		//Such damping is undesirable, so once the overall simulation quality of the rigid body dynamics system has improved, this should become obsolete
-		bool				m_additionalDamping;
-		btScalar			m_additionalDampingFactor;
-		btScalar			m_additionalLinearDampingThresholdSqr;
-		btScalar			m_additionalAngularDampingThresholdSqr;
-		btScalar			m_additionalAngularDampingFactor;
-
-		btRigidBodyConstructionInfo(	btScalar mass, btMotionState* motionState, btCollisionShape* collisionShape, const btVector3& localInertia=btVector3(0,0,0)):
-		m_mass(mass),
-			m_motionState(motionState),
-			m_collisionShape(collisionShape),
-			m_localInertia(localInertia),
-			m_linearDamping(btScalar(0.)),
-			m_angularDamping(btScalar(0.)),
-			m_friction(btScalar(0.5)),
-			m_rollingFriction(btScalar(0)),
-			m_restitution(btScalar(0.)),
-			m_linearSleepingThreshold(btScalar(0.8)),
-			m_angularSleepingThreshold(btScalar(1.f)),
-			m_additionalDamping(false),
-			m_additionalDampingFactor(btScalar(0.005)),
-			m_additionalLinearDampingThresholdSqr(btScalar(0.01)),
-			m_additionalAngularDampingThresholdSqr(btScalar(0.01)),
-			m_additionalAngularDampingFactor(btScalar(0.01))
-		{
-			m_startWorldTransform.setIdentity();
-		}
-	};
-
-	///btRigidBody constructor using construction info
-	btRigidBody(	const btRigidBodyConstructionInfo& constructionInfo);
-
-	///btRigidBody constructor for backwards compatibility. 
-	///To specify friction (etc) during rigid body construction, please use the other constructor (using btRigidBodyConstructionInfo)
-	btRigidBody(	btScalar mass, btMotionState* motionState, btCollisionShape* collisionShape, const btVector3& localInertia=btVector3(0,0,0));
-
-
-	virtual ~btRigidBody()
-        { 
-                //No constraints should point to this rigidbody
-		//Remove constraints from the dynamics world before you delete the related rigidbodies. 
-                btAssert(m_constraintRefs.size()==0); 
-        }
-
-protected:
-
-	///setupRigidBody is only used internally by the constructor
-	void	setupRigidBody(const btRigidBodyConstructionInfo& constructionInfo);
-
-public:
-
-	void			proceedToTransform(const btTransform& newTrans); 
-	
-	///to keep collision detection and dynamics separate we don't store a rigidbody pointer
-	///but a rigidbody is derived from btCollisionObject, so we can safely perform an upcast
-	static const btRigidBody*	upcast(const btCollisionObject* colObj)
-	{
-		if (colObj->getInternalType()&btCollisionObject::CO_RIGID_BODY)
-			return (const btRigidBody*)colObj;
-		return 0;
-	}
-	static btRigidBody*	upcast(btCollisionObject* colObj)
-	{
-		if (colObj->getInternalType()&btCollisionObject::CO_RIGID_BODY)
-			return (btRigidBody*)colObj;
-		return 0;
-	}
-	
-	/// continuous collision detection needs prediction
-	void			predictIntegratedTransform(btScalar step, btTransform& predictedTransform) ;
-	
-	void			saveKinematicState(btScalar step);
-	
-	void			applyGravity();
-	
-	void			setGravity(const btVector3& acceleration);  
-
-	const btVector3&	getGravity() const
-	{
-		return m_gravity_acceleration;
-	}
-
-	void			setDamping(btScalar lin_damping, btScalar ang_damping);
-
-	btScalar getLinearDamping() const
-	{
-		return m_linearDamping;
-	}
-
-	btScalar getAngularDamping() const
-	{
-		return m_angularDamping;
-	}
-
-	btScalar getLinearSleepingThreshold() const
-	{
-		return m_linearSleepingThreshold;
-	}
-
-	btScalar getAngularSleepingThreshold() const
-	{
-		return m_angularSleepingThreshold;
-	}
-
-	void			applyDamping(btScalar timeStep);
-
-	SIMD_FORCE_INLINE const btCollisionShape*	getCollisionShape() const {
-		return m_collisionShape;
-	}
-
-	SIMD_FORCE_INLINE btCollisionShape*	getCollisionShape() {
-			return m_collisionShape;
-	}
-	
-	void			setMassProps(btScalar mass, const btVector3& inertia);
-	
-	const btVector3& getLinearFactor() const
-	{
-		return m_linearFactor;
-	}
-	void setLinearFactor(const btVector3& linearFactor)
-	{
-		m_linearFactor = linearFactor;
-	}
-	btScalar		getInvMass() const { return m_inverseMass; }
-	const btMatrix3x3& getInvInertiaTensorWorld() const { 
-		return m_invInertiaTensorWorld; 
-	}
-		
-	void			integrateVelocities(btScalar step);
-
-	void			setCenterOfMassTransform(const btTransform& xform);
-
-	void			applyCentralForce(const btVector3& force)
-	{
-		m_totalForce += force*m_linearFactor;
-	}
-
-	const btVector3& getTotalForce() const
-	{
-		return m_totalForce;
-	};
-
-	const btVector3& getTotalTorque() const
-	{
-		return m_totalTorque;
-	};
-    
-	const btVector3& getInvInertiaDiagLocal() const
-	{
-		return m_invInertiaLocal;
-	};
-
-	void	setInvInertiaDiagLocal(const btVector3& diagInvInertia)
-	{
-		m_invInertiaLocal = diagInvInertia;
-	}
-
-	void	setSleepingThresholds(btScalar linear,btScalar angular)
-	{
-		m_linearSleepingThreshold = linear;
-		m_angularSleepingThreshold = angular;
-	}
-
-	void	applyTorque(const btVector3& torque)
-	{
-		m_totalTorque += torque*m_angularFactor;
-	}
-	
-	void	applyForce(const btVector3& force, const btVector3& rel_pos) 
-	{
-		applyCentralForce(force);
-		applyTorque(rel_pos.cross(force*m_linearFactor));
-	}
-	
-	void applyCentralImpulse(const btVector3& impulse)
-	{
-		m_linearVelocity += impulse *m_linearFactor * m_inverseMass;
-	}
-	
-  	void applyTorqueImpulse(const btVector3& torque)
-	{
-			m_angularVelocity += m_invInertiaTensorWorld * torque * m_angularFactor;
-	}
-	
-	void applyImpulse(const btVector3& impulse, const btVector3& rel_pos) 
-	{
-		if (m_inverseMass != btScalar(0.))
-		{
-			applyCentralImpulse(impulse);
-			if (m_angularFactor)
-			{
-				applyTorqueImpulse(rel_pos.cross(impulse*m_linearFactor));
-			}
-		}
-	}
-
-	void clearForces() 
-	{
-		m_totalForce.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
-		m_totalTorque.setValue(btScalar(0.0), btScalar(0.0), btScalar(0.0));
-	}
-	
-	void updateInertiaTensor();    
-	
-	const btVector3&     getCenterOfMassPosition() const { 
-		return m_worldTransform.getOrigin(); 
-	}
-	btQuaternion getOrientation() const;
-	
-	const btTransform&  getCenterOfMassTransform() const { 
-		return m_worldTransform; 
-	}
-	const btVector3&   getLinearVelocity() const { 
-		return m_linearVelocity; 
-	}
-	const btVector3&    getAngularVelocity() const { 
-		return m_angularVelocity; 
-	}
-	
-
-	inline void setLinearVelocity(const btVector3& lin_vel)
-	{ 
-		m_linearVelocity = lin_vel; 
-	}
-
-	inline void setAngularVelocity(const btVector3& ang_vel) 
-	{ 
-		m_angularVelocity = ang_vel; 
-	}
-
-	btVector3 getVelocityInLocalPoint(const btVector3& rel_pos) const
-	{
-		//we also calculate lin/ang velocity for kinematic objects
-		return m_linearVelocity + m_angularVelocity.cross(rel_pos);
-
-		//for kinematic objects, we could also use use:
-		//		return 	(m_worldTransform(rel_pos) - m_interpolationWorldTransform(rel_pos)) / m_kinematicTimeStep;
-	}
-
-	void translate(const btVector3& v) 
-	{
-		m_worldTransform.getOrigin() += v; 
-	}
-
-	
-	void	getAabb(btVector3& aabbMin,btVector3& aabbMax) const;
-
-
-
-
-	
-	SIMD_FORCE_INLINE btScalar computeImpulseDenominator(const btVector3& pos, const btVector3& normal) const
-	{
-		btVector3 r0 = pos - getCenterOfMassPosition();
-
-		btVector3 c0 = (r0).cross(normal);
-
-		btVector3 vec = (c0 * getInvInertiaTensorWorld()).cross(r0);
-
-		return m_inverseMass + normal.dot(vec);
-
-	}
-
-	SIMD_FORCE_INLINE btScalar computeAngularImpulseDenominator(const btVector3& axis) const
-	{
-		btVector3 vec = axis * getInvInertiaTensorWorld();
-		return axis.dot(vec);
-	}
-
-	SIMD_FORCE_INLINE void	updateDeactivation(btScalar timeStep)
-	{
-		if ( (getActivationState() == ISLAND_SLEEPING) || (getActivationState() == DISABLE_DEACTIVATION))
-			return;
-
-		if ((getLinearVelocity().length2() < m_linearSleepingThreshold*m_linearSleepingThreshold) &&
-			(getAngularVelocity().length2() < m_angularSleepingThreshold*m_angularSleepingThreshold))
-		{
-			m_deactivationTime += timeStep;
-		} else
-		{
-			m_deactivationTime=btScalar(0.);
-			setActivationState(0);
-		}
-
-	}
-
-	SIMD_FORCE_INLINE bool	wantsSleeping()
-	{
-
-		if (getActivationState() == DISABLE_DEACTIVATION)
-			return false;
-
-		//disable deactivation
-		if (gDisableDeactivation || (gDeactivationTime == btScalar(0.)))
-			return false;
-
-		if ( (getActivationState() == ISLAND_SLEEPING) || (getActivationState() == WANTS_DEACTIVATION))
-			return true;
-
-		if (m_deactivationTime> gDeactivationTime)
-		{
-			return true;
-		}
-		return false;
-	}
-
-
-	
-	const btBroadphaseProxy*	getBroadphaseProxy() const
-	{
-		return m_broadphaseHandle;
-	}
-	btBroadphaseProxy*	getBroadphaseProxy() 
-	{
-		return m_broadphaseHandle;
-	}
-	void	setNewBroadphaseProxy(btBroadphaseProxy* broadphaseProxy)
-	{
-		m_broadphaseHandle = broadphaseProxy;
-	}
-
-	//btMotionState allows to automatic synchronize the world transform for active objects
-	btMotionState*	getMotionState()
-	{
-		return m_optionalMotionState;
-	}
-	const btMotionState*	getMotionState() const
-	{
-		return m_optionalMotionState;
-	}
-	void	setMotionState(btMotionState* motionState)
-	{
-		m_optionalMotionState = motionState;
-		if (m_optionalMotionState)
-			motionState->getWorldTransform(m_worldTransform);
-	}
-
-	//for experimental overriding of friction/contact solver func
-	int	m_contactSolverType;
-	int	m_frictionSolverType;
-
-	void	setAngularFactor(const btVector3& angFac)
-	{
-		m_angularFactor = angFac;
-	}
-
-	void	setAngularFactor(btScalar angFac)
-	{
-		m_angularFactor.setValue(angFac,angFac,angFac);
-	}
-	const btVector3&	getAngularFactor() const
-	{
-		return m_angularFactor;
-	}
-
-	//is this rigidbody added to a btCollisionWorld/btDynamicsWorld/btBroadphase?
-	bool	isInWorld() const
-	{
-		return (getBroadphaseProxy() != 0);
-	}
-
-	virtual bool checkCollideWithOverride(const  btCollisionObject* co) const;
-
-	void addConstraintRef(btTypedConstraint* c);
-	void removeConstraintRef(btTypedConstraint* c);
-
-	btTypedConstraint* getConstraintRef(int index)
-	{
-		return m_constraintRefs[index];
-	}
-
-	int getNumConstraintRefs() const
-	{
-		return m_constraintRefs.size();
-	}
-
-	void	setFlags(int flags)
-	{
-		m_rigidbodyFlags = flags;
-	}
-
-	int getFlags() const
-	{
-		return m_rigidbodyFlags;
-	}
-
-	btVector3 computeGyroscopicForce(btScalar maxGyroscopicForce) const;
-
-	///////////////////////////////////////////////
-
-	virtual	int	calculateSerializeBufferSize()	const;
-
-	///fills the dataBuffer and returns the struct name (and 0 on failure)
-	virtual	const char*	serialize(void* dataBuffer,  class btSerializer* serializer) const;
-
-	virtual void serializeSingleObject(class btSerializer* serializer) const;
-
-};
-
-//@todo add m_optionalMotionState and m_constraintRefs to btRigidBodyData
-///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
-struct	btRigidBodyFloatData
-{
-	btCollisionObjectFloatData	m_collisionObjectData;
-	btMatrix3x3FloatData		m_invInertiaTensorWorld;
-	btVector3FloatData		m_linearVelocity;
-	btVector3FloatData		m_angularVelocity;
-	btVector3FloatData		m_angularFactor;
-	btVector3FloatData		m_linearFactor;
-	btVector3FloatData		m_gravity;	
-	btVector3FloatData		m_gravity_acceleration;
-	btVector3FloatData		m_invInertiaLocal;
-	btVector3FloatData		m_totalForce;
-	btVector3FloatData		m_totalTorque;
-	float					m_inverseMass;
-	float					m_linearDamping;
-	float					m_angularDamping;
-	float					m_additionalDampingFactor;
-	float					m_additionalLinearDampingThresholdSqr;
-	float					m_additionalAngularDampingThresholdSqr;
-	float					m_additionalAngularDampingFactor;
-	float					m_linearSleepingThreshold;
-	float					m_angularSleepingThreshold;
-	int						m_additionalDamping;
-};
-
-///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
-struct	btRigidBodyDoubleData
-{
-	btCollisionObjectDoubleData	m_collisionObjectData;
-	btMatrix3x3DoubleData		m_invInertiaTensorWorld;
-	btVector3DoubleData		m_linearVelocity;
-	btVector3DoubleData		m_angularVelocity;
-	btVector3DoubleData		m_angularFactor;
-	btVector3DoubleData		m_linearFactor;
-	btVector3DoubleData		m_gravity;	
-	btVector3DoubleData		m_gravity_acceleration;
-	btVector3DoubleData		m_invInertiaLocal;
-	btVector3DoubleData		m_totalForce;
-	btVector3DoubleData		m_totalTorque;
-	double					m_inverseMass;
-	double					m_linearDamping;
-	double					m_angularDamping;
-	double					m_additionalDampingFactor;
-	double					m_additionalLinearDampingThresholdSqr;
-	double					m_additionalAngularDampingThresholdSqr;
-	double					m_additionalAngularDampingFactor;
-	double					m_linearSleepingThreshold;
-	double					m_angularSleepingThreshold;
-	int						m_additionalDamping;
-	char	m_padding[4];
-};
-
-
-
-#endif //BT_RIGIDBODY_H
-
diff --git a/opencl/gpu_rigidbody/host/btSolverBody.h b/opencl/gpu_rigidbody/host/btSolverBody.h
deleted file mode 100644
index ea5e49ef9..000000000
--- a/opencl/gpu_rigidbody/host/btSolverBody.h
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_SOLVER_BODY_H
-#define BT_SOLVER_BODY_H
-
-class	btRigidBody;
-#include "BulletCommon/btVector3.h"
-#include "BulletCommon/btMatrix3x3.h"
-
-#include "BulletCommon/btAlignedAllocator.h"
-#include "BulletCommon/btTransformUtil.h"
-
-///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision
-#ifdef BT_USE_SSE
-#define USE_SIMD 1
-#endif //
-
-
-#ifdef USE_SIMD
-
-struct	btSimdScalar
-{
-	SIMD_FORCE_INLINE	btSimdScalar()
-	{
-
-	}
-
-	SIMD_FORCE_INLINE	btSimdScalar(float	fl)
-	:m_vec128 (_mm_set1_ps(fl))
-	{
-	}
-
-	SIMD_FORCE_INLINE	btSimdScalar(__m128 v128)
-		:m_vec128(v128)
-	{
-	}
-	union
-	{
-		__m128		m_vec128;
-		float		m_floats[4];
-		int			m_ints[4];
-		btScalar	m_unusedPadding;
-	};
-	SIMD_FORCE_INLINE	__m128	get128()
-	{
-		return m_vec128;
-	}
-
-	SIMD_FORCE_INLINE	const __m128	get128() const
-	{
-		return m_vec128;
-	}
-
-	SIMD_FORCE_INLINE	void	set128(__m128 v128)
-	{
-		m_vec128 = v128;
-	}
-
-	SIMD_FORCE_INLINE	operator       __m128()       
-	{ 
-		return m_vec128; 
-	}
-	SIMD_FORCE_INLINE	operator const __m128() const 
-	{ 
-		return m_vec128; 
-	}
-	
-	SIMD_FORCE_INLINE	operator float() const 
-	{ 
-		return m_floats[0]; 
-	}
-
-};
-
-///@brief Return the elementwise product of two btSimdScalar
-SIMD_FORCE_INLINE btSimdScalar 
-operator*(const btSimdScalar& v1, const btSimdScalar& v2) 
-{
-	return btSimdScalar(_mm_mul_ps(v1.get128(),v2.get128()));
-}
-
-///@brief Return the elementwise product of two btSimdScalar
-SIMD_FORCE_INLINE btSimdScalar 
-operator+(const btSimdScalar& v1, const btSimdScalar& v2) 
-{
-	return btSimdScalar(_mm_add_ps(v1.get128(),v2.get128()));
-}
-
-
-#else
-#define btSimdScalar btScalar
-#endif
-
-///The btSolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance.
-ATTRIBUTE_ALIGNED64 (struct)	btSolverBody
-{
-	BT_DECLARE_ALIGNED_ALLOCATOR();
-	btTransform		m_worldTransform;
-	btVector3		m_deltaLinearVelocity;
-	btVector3		m_deltaAngularVelocity;
-	btVector3		m_angularFactor;
-	btVector3		m_linearFactor;
-	btVector3		m_invMass;
-	btVector3		m_pushVelocity;
-	btVector3		m_turnVelocity;
-	btVector3		m_linearVelocity;
-	btVector3		m_angularVelocity;
-
-	union 
-	{
-		void*	m_originalBody;
-		int		m_originalBodyIndex;
-	};
-
-
-	void	setWorldTransform(const btTransform& worldTransform)
-	{
-		m_worldTransform = worldTransform;
-	}
-
-	const btTransform& getWorldTransform() const
-	{
-		return m_worldTransform;
-	}
-	
-	SIMD_FORCE_INLINE void	getVelocityInLocalPointObsolete(const btVector3& rel_pos, btVector3& velocity ) const
-	{
-		if (m_originalBody)
-			velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
-		else
-			velocity.setValue(0,0,0);
-	}
-
-	SIMD_FORCE_INLINE void	getAngularVelocity(btVector3& angVel) const
-	{
-		if (m_originalBody)
-			angVel =m_angularVelocity+m_deltaAngularVelocity;
-		else
-			angVel.setValue(0,0,0);
-	}
-
-
-	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
-	SIMD_FORCE_INLINE void applyImpulse(const btVector3& linearComponent, const btVector3& angularComponent,const btScalar impulseMagnitude)
-	{
-		if (m_originalBody)
-		{
-			m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
-			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
-		}
-	}
-
-	SIMD_FORCE_INLINE void internalApplyPushImpulse(const btVector3& linearComponent, const btVector3& angularComponent,btScalar impulseMagnitude)
-	{
-		if (m_originalBody)
-		{
-			m_pushVelocity += linearComponent*impulseMagnitude*m_linearFactor;
-			m_turnVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
-		}
-	}
-
-
-
-	const btVector3& getDeltaLinearVelocity() const
-	{
-		return m_deltaLinearVelocity;
-	}
-
-	const btVector3& getDeltaAngularVelocity() const
-	{
-		return m_deltaAngularVelocity;
-	}
-
-	const btVector3& getPushVelocity() const 
-	{
-		return m_pushVelocity;
-	}
-
-	const btVector3& getTurnVelocity() const 
-	{
-		return m_turnVelocity;
-	}
-
-
-	////////////////////////////////////////////////
-	///some internal methods, don't use them
-		
-	btVector3& internalGetDeltaLinearVelocity()
-	{
-		return m_deltaLinearVelocity;
-	}
-
-	btVector3& internalGetDeltaAngularVelocity()
-	{
-		return m_deltaAngularVelocity;
-	}
-
-	const btVector3& internalGetAngularFactor() const
-	{
-		return m_angularFactor;
-	}
-
-	const btVector3& internalGetInvMass() const
-	{
-		return m_invMass;
-	}
-
-	void internalSetInvMass(const btVector3& invMass)
-	{
-		m_invMass = invMass;
-	}
-	
-	btVector3& internalGetPushVelocity()
-	{
-		return m_pushVelocity;
-	}
-
-	btVector3& internalGetTurnVelocity()
-	{
-		return m_turnVelocity;
-	}
-
-	SIMD_FORCE_INLINE void	internalGetVelocityInLocalPointObsolete(const btVector3& rel_pos, btVector3& velocity ) const
-	{
-		velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos);
-	}
-
-	SIMD_FORCE_INLINE void	internalGetAngularVelocity(btVector3& angVel) const
-	{
-		angVel = m_angularVelocity+m_deltaAngularVelocity;
-	}
-
-
-	//Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position
-	SIMD_FORCE_INLINE void internalApplyImpulse(const btVector3& linearComponent, const btVector3& angularComponent,const btScalar impulseMagnitude)
-	{
-		if (m_originalBody)
-		{
-			m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor;
-			m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor);
-		}
-	}
-		
-	
-	
-
-	void	writebackVelocity()
-	{
-		if (m_originalBody)
-		{
-			m_linearVelocity +=m_deltaLinearVelocity;
-			m_angularVelocity += m_deltaAngularVelocity;
-			
-			//m_originalBody->setCompanionId(-1);
-		}
-	}
-
-
-	void	writebackVelocityAndTransform(btScalar timeStep, btScalar splitImpulseTurnErp)
-	{
-        (void) timeStep;
-		if (m_originalBody)
-		{
-			m_linearVelocity += m_deltaLinearVelocity;
-			m_angularVelocity += m_deltaAngularVelocity;
-			
-			//correct the position/orientation based on push/turn recovery
-			btTransform newTransform;
-			if (m_pushVelocity[0]!=0.f || m_pushVelocity[1]!=0 || m_pushVelocity[2]!=0 || m_turnVelocity[0]!=0.f || m_turnVelocity[1]!=0 || m_turnVelocity[2]!=0)
-			{
-			//	btQuaternion orn = m_worldTransform.getRotation();
-				btTransformUtil::integrateTransform(m_worldTransform,m_pushVelocity,m_turnVelocity*splitImpulseTurnErp,timeStep,newTransform);
-				m_worldTransform = newTransform;
-			}
-			//m_worldTransform.setRotation(orn);
-			//m_originalBody->setCompanionId(-1);
-		}
-	}
-	
-
-
-};
-
-#endif //BT_SOLVER_BODY_H
-
-
diff --git a/opencl/gpu_rigidbody/host/btSolverConstraint.h b/opencl/gpu_rigidbody/host/btSolverConstraint.h
deleted file mode 100644
index 582066aba..000000000
--- a/opencl/gpu_rigidbody/host/btSolverConstraint.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2006 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_SOLVER_CONSTRAINT_H
-#define BT_SOLVER_CONSTRAINT_H
-
-class	btRigidBody;
-#include "BulletCommon/btVector3.h"
-#include "BulletCommon/btMatrix3x3.h"
-#include "btJacobianEntry.h"
-#include "BulletCommon/btAlignedObjectArray.h"
-
-//#define NO_FRICTION_TANGENTIALS 1
-#include "btSolverBody.h"
-
-
-///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints.
-ATTRIBUTE_ALIGNED16 (struct)	btSolverConstraint
-{
-	BT_DECLARE_ALIGNED_ALLOCATOR();
-
-	btVector3		m_relpos1CrossNormal;
-	btVector3		m_contactNormal;
-
-	btVector3		m_relpos2CrossNormal;
-	//btVector3		m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal
-
-	btVector3		m_angularComponentA;
-	btVector3		m_angularComponentB;
-	
-	mutable btSimdScalar	m_appliedPushImpulse;
-	mutable btSimdScalar	m_appliedImpulse;
-
-	btScalar	m_friction;
-	btScalar	m_jacDiagABInv;
-	btScalar		m_rhs;
-	btScalar		m_cfm;
-	
-    btScalar		m_lowerLimit;
-	btScalar		m_upperLimit;
-	btScalar		m_rhsPenetration;
-    union
-	{
-		void*		m_originalContactPoint;
-		btScalar	m_unusedPadding4;
-	};
-
-	int	m_overrideNumSolverIterations;
-    int			m_frictionIndex;
-	int m_solverBodyIdA;
-	int m_solverBodyIdB;
-
-    
-	enum		btSolverConstraintType
-	{
-		BT_SOLVER_CONTACT_1D = 0,
-		BT_SOLVER_FRICTION_1D
-	};
-};
-
-typedef btAlignedObjectArray<btSolverConstraint>	btConstraintArray;
-
-
-#endif //BT_SOLVER_CONSTRAINT_H
-
-
-
diff --git a/opencl/gpu_rigidbody/host/btTypedConstraint.h b/opencl/gpu_rigidbody/host/btTypedConstraint.h
deleted file mode 100644
index 6c0ab2003..000000000
--- a/opencl/gpu_rigidbody/host/btTypedConstraint.h
+++ /dev/null
@@ -1,482 +0,0 @@
-/*
-Bullet Continuous Collision Detection and Physics Library
-Copyright (c) 2003-2010 Erwin Coumans  http://continuousphysics.com/Bullet/
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose, 
-including commercial applications, and to alter it and redistribute it freely, 
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-
-#ifndef BT_TYPED_CONSTRAINT_H
-#define BT_TYPED_CONSTRAINT_H
-
-
-#include "BulletCommon/btScalar.h"
-#include "btSolverConstraint.h"
-#include "btRigidBody.h"
-
-class btSerializer;
-
-//Don't change any of the existing enum values, so add enum types at the end for serialization compatibility
-enum btTypedConstraintType
-{
-	POINT2POINT_CONSTRAINT_TYPE=3,
-	HINGE_CONSTRAINT_TYPE,
-	CONETWIST_CONSTRAINT_TYPE,
-	D6_CONSTRAINT_TYPE,
-	SLIDER_CONSTRAINT_TYPE,
-	CONTACT_CONSTRAINT_TYPE,
-	D6_SPRING_CONSTRAINT_TYPE,
-	GEAR_CONSTRAINT_TYPE,
-	MAX_CONSTRAINT_TYPE
-};
-
-
-enum btConstraintParams
-{
-	BT_CONSTRAINT_ERP=1,
-	BT_CONSTRAINT_STOP_ERP,
-	BT_CONSTRAINT_CFM,
-	BT_CONSTRAINT_STOP_CFM
-};
-
-#if 1
-	#define btAssertConstrParams(_par) btAssert(_par) 
-#else
-	#define btAssertConstrParams(_par)
-#endif
-
-
-ATTRIBUTE_ALIGNED16(struct)	btJointFeedback
-{
-	btVector3	m_appliedForceBodyA;
-	btVector3	m_appliedTorqueBodyA;
-	btVector3	m_appliedForceBodyB;
-	btVector3	m_appliedTorqueBodyB;
-};
-
-
-///TypedConstraint is the baseclass for Bullet constraints and vehicles
-ATTRIBUTE_ALIGNED16(class) btTypedConstraint : public btTypedObject
-{
-	int	m_userConstraintType;
-
-	union
-	{
-		int	m_userConstraintId;
-		void* m_userConstraintPtr;
-	};
-
-	btScalar	m_breakingImpulseThreshold;
-	bool		m_isEnabled;
-	bool		m_needsFeedback;
-	int			m_overrideNumSolverIterations;
-
-
-	btTypedConstraint&	operator=(btTypedConstraint&	other)
-	{
-		btAssert(0);
-		(void) other;
-		return *this;
-	}
-
-protected:
-	btRigidBody&	m_rbA;
-	btRigidBody&	m_rbB;
-	btScalar	m_appliedImpulse;
-	btScalar	m_dbgDrawSize;
-	btJointFeedback*	m_jointFeedback;
-
-	///internal method used by the constraint solver, don't use them directly
-	btScalar getMotorFactor(btScalar pos, btScalar lowLim, btScalar uppLim, btScalar vel, btScalar timeFact);
-	
-
-public:
-
-	BT_DECLARE_ALIGNED_ALLOCATOR();
-
-	virtual ~btTypedConstraint() {};
-	btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA);
-	btTypedConstraint(btTypedConstraintType type, btRigidBody& rbA,btRigidBody& rbB);
-
-	struct btConstraintInfo1 {
-		int m_numConstraintRows,nub;
-	};
-
-	static btRigidBody& getFixedBody();
-
-	struct btConstraintInfo2 {
-		// integrator parameters: frames per second (1/stepsize), default error
-		// reduction parameter (0..1).
-		btScalar fps,erp;
-
-		// for the first and second body, pointers to two (linear and angular)
-		// n*3 jacobian sub matrices, stored by rows. these matrices will have
-		// been initialized to 0 on entry. if the second body is zero then the
-		// J2xx pointers may be 0.
-		btScalar *m_J1linearAxis,*m_J1angularAxis,*m_J2linearAxis,*m_J2angularAxis;
-
-		// elements to jump from one row to the next in J's
-		int rowskip;
-
-		// right hand sides of the equation J*v = c + cfm * lambda. cfm is the
-		// "constraint force mixing" vector. c is set to zero on entry, cfm is
-		// set to a constant value (typically very small or zero) value on entry.
-		btScalar *m_constraintError,*cfm;
-
-		// lo and hi limits for variables (set to -/+ infinity on entry).
-		btScalar *m_lowerLimit,*m_upperLimit;
-
-		// findex vector for variables. see the LCP solver interface for a
-		// description of what this does. this is set to -1 on entry.
-		// note that the returned indexes are relative to the first index of
-		// the constraint.
-		int *findex;
-		// number of solver iterations
-		int m_numIterations;
-
-		//damping of the velocity
-		btScalar	m_damping;
-	};
-
-	int	getOverrideNumSolverIterations() const
-	{
-		return m_overrideNumSolverIterations;
-	}
-
-	///override the number of constraint solver iterations used to solve this constraint
-	///-1 will use the default number of iterations, as specified in SolverInfo.m_numIterations
-	void setOverrideNumSolverIterations(int overideNumIterations)
-	{
-		m_overrideNumSolverIterations = overideNumIterations;
-	}
-
-	///internal method used by the constraint solver, don't use them directly
-	virtual void	buildJacobian() {};
-
-	///internal method used by the constraint solver, don't use them directly
-	virtual	void	setupSolverConstraint(btConstraintArray& ca, int solverBodyA,int solverBodyB, btScalar timeStep)
-	{
-        (void)ca;
-        (void)solverBodyA;
-        (void)solverBodyB;
-        (void)timeStep;
-	}
-	
-	///internal method used by the constraint solver, don't use them directly
-	virtual void getInfo1 (btConstraintInfo1* info)=0;
-
-	///internal method used by the constraint solver, don't use them directly
-	virtual void getInfo2 (btConstraintInfo2* info)=0;
-
-	///internal method used by the constraint solver, don't use them directly
-	void	internalSetAppliedImpulse(btScalar appliedImpulse)
-	{
-		m_appliedImpulse = appliedImpulse;
-	}
-	///internal method used by the constraint solver, don't use them directly
-	btScalar	internalGetAppliedImpulse()
-	{
-		return m_appliedImpulse;
-	}
-
-
-	btScalar	getBreakingImpulseThreshold() const
-	{
-		return 	m_breakingImpulseThreshold;
-	}
-
-	void	setBreakingImpulseThreshold(btScalar threshold)
-	{
-		m_breakingImpulseThreshold = threshold;
-	}
-
-	bool	isEnabled() const
-	{
-		return m_isEnabled;
-	}
-
-	void	setEnabled(bool enabled)
-	{
-		m_isEnabled=enabled;
-	}
-
-
-	///internal method used by the constraint solver, don't use them directly
-	virtual	void	solveConstraintObsolete(btSolverBody& /*bodyA*/,btSolverBody& /*bodyB*/,btScalar	/*timeStep*/) {};
-
-	
-	const btRigidBody& getRigidBodyA() const
-	{
-		return m_rbA;
-	}
-	const btRigidBody& getRigidBodyB() const
-	{
-		return m_rbB;
-	}
-
-	btRigidBody& getRigidBodyA() 
-	{
-		return m_rbA;
-	}
-	btRigidBody& getRigidBodyB()
-	{
-		return m_rbB;
-	}
-
-	int getUserConstraintType() const
-	{
-		return m_userConstraintType ;
-	}
-
-	void	setUserConstraintType(int userConstraintType)
-	{
-		m_userConstraintType = userConstraintType;
-	};
-
-	void	setUserConstraintId(int uid)
-	{
-		m_userConstraintId = uid;
-	}
-
-	int getUserConstraintId() const
-	{
-		return m_userConstraintId;
-	}
-
-	void	setUserConstraintPtr(void* ptr)
-	{
-		m_userConstraintPtr = ptr;
-	}
-
-	void*	getUserConstraintPtr()
-	{
-		return m_userConstraintPtr;
-	}
-
-	void	setJointFeedback(btJointFeedback* jointFeedback)
-	{
-		m_jointFeedback = jointFeedback;
-	}
-
-	const btJointFeedback* getJointFeedback() const
-	{
-		return m_jointFeedback;
-	}
-
-	btJointFeedback* getJointFeedback()
-	{
-		return m_jointFeedback;
-	}
-
-
-	int getUid() const
-	{
-		return m_userConstraintId;   
-	} 
-
-	bool	needsFeedback() const
-	{
-		return m_needsFeedback;
-	}
-
-	///enableFeedback will allow to read the applied linear and angular impulse
-	///use getAppliedImpulse, getAppliedLinearImpulse and getAppliedAngularImpulse to read feedback information
-	void	enableFeedback(bool needsFeedback)
-	{
-		m_needsFeedback = needsFeedback;
-	}
-
-	///getAppliedImpulse is an estimated total applied impulse. 
-	///This feedback could be used to determine breaking constraints or playing sounds.
-	btScalar	getAppliedImpulse() const
-	{
-		btAssert(m_needsFeedback);
-		return m_appliedImpulse;
-	}
-
-	btTypedConstraintType getConstraintType () const
-	{
-		return btTypedConstraintType(m_objectType);
-	}
-	
-	void setDbgDrawSize(btScalar dbgDrawSize)
-	{
-		m_dbgDrawSize = dbgDrawSize;
-	}
-	btScalar getDbgDrawSize()
-	{
-		return m_dbgDrawSize;
-	}
-
-	///override the default global value of a parameter (such as ERP or CFM), optionally provide the axis (0..5). 
-	///If no axis is provided, it uses the default axis for this constraint.
-	virtual	void	setParam(int num, btScalar value, int axis = -1) = 0;
-
-	///return the local value of parameter
-	virtual	btScalar getParam(int num, int axis = -1) const = 0;
-	
-	virtual	int	calculateSerializeBufferSize() const=0;
-
-	///fills the dataBuffer and returns the struct name (and 0 on failure)
-	virtual	const char*	serialize(void* dataBuffer, btSerializer* serializer) const=0;
-
-};
-
-// returns angle in range [-SIMD_2_PI, SIMD_2_PI], closest to one of the limits 
-// all arguments should be normalized angles (i.e. in range [-SIMD_PI, SIMD_PI])
-SIMD_FORCE_INLINE btScalar btAdjustAngleToLimits(btScalar angleInRadians, btScalar angleLowerLimitInRadians, btScalar angleUpperLimitInRadians)
-{
-	if(angleLowerLimitInRadians >= angleUpperLimitInRadians)
-	{
-		return angleInRadians;
-	}
-	else if(angleInRadians < angleLowerLimitInRadians)
-	{
-		btScalar diffLo = btFabs(btNormalizeAngle(angleLowerLimitInRadians - angleInRadians));
-		btScalar diffHi = btFabs(btNormalizeAngle(angleUpperLimitInRadians - angleInRadians));
-		return (diffLo < diffHi) ? angleInRadians : (angleInRadians + SIMD_2_PI);
-	}
-	else if(angleInRadians > angleUpperLimitInRadians)
-	{
-		btScalar diffHi = btFabs(btNormalizeAngle(angleInRadians - angleUpperLimitInRadians));
-		btScalar diffLo = btFabs(btNormalizeAngle(angleInRadians - angleLowerLimitInRadians));
-		return (diffLo < diffHi) ? (angleInRadians - SIMD_2_PI) : angleInRadians;
-	}
-	else
-	{
-		return angleInRadians;
-	}
-}
-
-///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64
-struct	btTypedConstraintData
-{
-	btRigidBodyData		*m_rbA;
-	btRigidBodyData		*m_rbB;
-	char	*m_name;
-
-	int	m_objectType;
-	int	m_userConstraintType;
-	int	m_userConstraintId;
-	int	m_needsFeedback;
-
-	float	m_appliedImpulse;
-	float	m_dbgDrawSize;
-
-	int	m_disableCollisionsBetweenLinkedBodies;
-	int	m_overrideNumSolverIterations;
-
-	float	m_breakingImpulseThreshold;
-	int		m_isEnabled;
-	
-};
-
-SIMD_FORCE_INLINE	int	btTypedConstraint::calculateSerializeBufferSize() const
-{
-	return sizeof(btTypedConstraintData);
-}
-
-
-
-class btAngularLimit
-{
-private:
-	btScalar 
-		m_center,
-		m_halfRange,
-		m_softness,
-		m_biasFactor,
-		m_relaxationFactor,
-		m_correction,
-		m_sign;
-
-	bool
-		m_solveLimit;
-
-public:
-	/// Default constructor initializes limit as inactive, allowing free constraint movement
-	btAngularLimit()
-		:m_center(0.0f),
-		m_halfRange(-1.0f),
-		m_softness(0.9f),
-		m_biasFactor(0.3f),
-		m_relaxationFactor(1.0f),
-		m_correction(0.0f),
-		m_sign(0.0f),
-		m_solveLimit(false)
-	{}
-
-	/// Sets all limit's parameters.
-	/// When low > high limit becomes inactive.
-	/// When high - low > 2PI limit is ineffective too becouse no angle can exceed the limit
-	void set(btScalar low, btScalar high, btScalar _softness = 0.9f, btScalar _biasFactor = 0.3f, btScalar _relaxationFactor = 1.0f);
-
-	/// Checks conastaint angle against limit. If limit is active and the angle violates the limit
-	/// correction is calculated.
-	void test(const btScalar angle);
-
-	/// Returns limit's softness
-	inline btScalar getSoftness() const
-	{
-		return m_softness;
-	}
-
-	/// Returns limit's bias factor
-	inline btScalar getBiasFactor() const
-	{
-		return m_biasFactor;
-	}
-
-	/// Returns limit's relaxation factor
-	inline btScalar getRelaxationFactor() const
-	{
-		return m_relaxationFactor;
-	}
-
-	/// Returns correction value evaluated when test() was invoked 
-	inline btScalar getCorrection() const
-	{
-		return m_correction;
-	}
-
-	/// Returns sign value evaluated when test() was invoked 
-	inline btScalar getSign() const
-	{
-		return m_sign;
-	}
-
-	/// Gives half of the distance between min and max limit angle
-	inline btScalar getHalfRange() const
-	{
-		return m_halfRange;
-	}
-
-	/// Returns true when the last test() invocation recognized limit violation
-	inline bool isLimit() const
-	{
-		return m_solveLimit;
-	}
-
-	/// Checks given angle against limit. If limit is active and angle doesn't fit it, the angle
-	/// returned is modified so it equals to the limit closest to given angle.
-	void fit(btScalar& angle) const;
-
-	/// Returns correction value multiplied by sign value
-	btScalar getError() const;
-
-	btScalar getLow() const;
-
-	btScalar getHigh() const;
-
-};
-
-
-
-#endif //BT_TYPED_CONSTRAINT_H
diff --git a/opencl/lds_bank_conflict/main.cpp b/opencl/lds_bank_conflict/main.cpp
index 68771b06a..1171d3ea0 100644
--- a/opencl/lds_bank_conflict/main.cpp
+++ b/opencl/lds_bank_conflict/main.cpp
@@ -16,7 +16,7 @@
 // limitations under the License.
 
 
-#include "btOpenCLUtils.h"
+#include "b3OpenCLUtils.h"
 #include "../parallel_primitives/host/btOpenCLArray.h"
 #include "../parallel_primitives/host/btLauncherCL.h"
 #include "BulletCommon/btQuickprof.h"
@@ -124,11 +124,11 @@ int main(int argc, char **argv)
 	cl_kernel			transposeNoBankConflictsKernel= 0;
 	
 
-	ctx = btOpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
-	btOpenCLUtils::printPlatformInfo(platformId);
+	ctx = b3OpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
+	b3OpenCLUtils::printPlatformInfo(platformId);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
-	device = btOpenCLUtils::getDevice(ctx,0);
-	btOpenCLUtils::printDeviceInfo(device);
+	device = b3OpenCLUtils::getDevice(ctx,0);
+	b3OpenCLUtils::printDeviceInfo(device);
 	queue = clCreateCommandQueue(ctx, device, 0, &ciErrNum);
 
 	const char* cSourceFile = "opencl/lds_bank_conflict/lds_kernels.cl";
@@ -166,11 +166,11 @@ char flags[1024]={0};
 #endif//CL_PLATFORM_INTEL
 
 	
-	copyKernel  = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"copyKernel",&ciErrNum,0,flags);
-	copySharedMemKernel  = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"copySharedMemKernel",&ciErrNum,0,flags);
-	transposeNaiveKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeNaiveKernel",&ciErrNum,0,flags);
-	transposeCoalescedKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeCoalescedKernel",&ciErrNum,0,flags);
-	transposeNoBankConflictsKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeNoBankConflictsKernel",&ciErrNum,0,flags);
+	copyKernel  = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"copyKernel",&ciErrNum,0,flags);
+	copySharedMemKernel  = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"copySharedMemKernel",&ciErrNum,0,flags);
+	transposeNaiveKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeNaiveKernel",&ciErrNum,0,flags);
+	transposeCoalescedKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeCoalescedKernel",&ciErrNum,0,flags);
+	transposeNoBankConflictsKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,cSourceCL,"transposeNoBankConflictsKernel",&ciErrNum,0,flags);
 	
 	btFillCL clMemSet(ctx,device,queue);
 
diff --git a/opencl/lds_bank_conflict/premake4.lua b/opencl/lds_bank_conflict/premake4.lua
index 1f615b192..ca19dca73 100644
--- a/opencl/lds_bank_conflict/premake4.lua
+++ b/opencl/lds_bank_conflict/premake4.lua
@@ -25,8 +25,8 @@ function createProject (vendor)
 		
 		files {
 			"main.cpp",
-			"../basic_initialize/btOpenCLUtils.cpp",
-			"../basic_initialize/btOpenCLUtils.h",
+			"../basic_initialize/b3OpenCLUtils.cpp",
+			"../basic_initialize/b3OpenCLUtils.h",
 			"../../src/BulletCommon/btAlignedAllocator.cpp",
 			"../../src/BulletCommon/btAlignedAllocator.h",
 			"../../src/BulletCommon/btAlignedObjectArray.h",
diff --git a/opencl/parallel_primitives/benchmark/premake4.lua b/opencl/parallel_primitives/benchmark/premake4.lua
index 9f710c9e3..7740f2feb 100644
--- a/opencl/parallel_primitives/benchmark/premake4.lua
+++ b/opencl/parallel_primitives/benchmark/premake4.lua
@@ -19,8 +19,8 @@ function createProject(vendor)
 		
 		files {
 			"test_large_problem_sorting.cpp",
-			"../../basic_initialize/btOpenCLUtils.cpp",
-			"../../basic_initialize/btOpenCLUtils.h",
+			"../../basic_initialize/b3OpenCLUtils.cpp",
+			"../../basic_initialize/b3OpenCLUtils.h",
 			"../host/btFillCL.cpp",
 			"../host/btPrefixScanCL.cpp",
 			"../host/btRadixSort32CL.cpp",
diff --git a/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp b/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp
index 8bb6a77ea..049687b41 100644
--- a/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp
+++ b/opencl/parallel_primitives/benchmark/test_large_problem_sorting.cpp
@@ -65,7 +65,7 @@
 */
 
 #include "../host/btRadixSort32CL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "BulletCommon/btQuickprof.h"
 
 cl_context g_cxMainContext;
@@ -78,7 +78,7 @@ cl_command_queue g_cqCommandQueue;
 
 bool g_verbose;
 ///Preferred OpenCL device/platform. When < 0 then no preference is used. 
-///Note that btOpenCLUtils might still use the preference of using a platform vendor that matches the SDK vendor used to build the application.
+///Note that b3OpenCLUtils might still use the preference of using a platform vendor that matches the SDK vendor used to build the application.
 ///Preferred device/platform take priority over this platform-vendor match
 int gPreferredDeviceId = -1;
 int gPreferredPlatformId = -1;
diff --git a/opencl/parallel_primitives/host/btBoundSearchCL.cpp b/opencl/parallel_primitives/host/btBoundSearchCL.cpp
index 9395e9cc8..565dc7374 100644
--- a/opencl/parallel_primitives/host/btBoundSearchCL.cpp
+++ b/opencl/parallel_primitives/host/btBoundSearchCL.cpp
@@ -21,7 +21,7 @@ subject to the following restrictions:
 
 
 #include "btBoundSearchCL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "btLauncherCL.h"
 #include "../kernels/BoundSearchKernelsCL.h"
 
@@ -37,20 +37,20 @@ btBoundSearchCL::btBoundSearchCL(cl_context ctx, cl_device_id device, cl_command
 	cl_int pErrNum;
 	const char* kernelSource = boundSearchKernelsCL;
 
-	cl_program boundSearchProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, BOUNDSEARCH_PATH);
+	cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, BOUNDSEARCH_PATH);
 	btAssert(boundSearchProg);
 
-	m_lowerSortDataKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg,additionalMacros );
+	m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg,additionalMacros );
 	btAssert(m_lowerSortDataKernel );
 
-	m_upperSortDataKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg,additionalMacros );
+	m_upperSortDataKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg,additionalMacros );
 	btAssert(m_upperSortDataKernel);
 
 	m_subtractKernel = 0;
 
 	if( maxSize )
 	{
-		m_subtractKernel= btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg,additionalMacros );
+		m_subtractKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg,additionalMacros );
 		btAssert(m_subtractKernel);
 	}
 
diff --git a/opencl/parallel_primitives/host/btFillCL.cpp b/opencl/parallel_primitives/host/btFillCL.cpp
index 18a7e2093..9f11b7ef5 100644
--- a/opencl/parallel_primitives/host/btFillCL.cpp
+++ b/opencl/parallel_primitives/host/btFillCL.cpp
@@ -1,5 +1,5 @@
 #include "btFillCL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "btBufferInfoCL.h"
 #include "btLauncherCL.h"
 
@@ -14,21 +14,21 @@ btFillCL::btFillCL(cl_context ctx, cl_device_id device, cl_command_queue queue)
 	cl_int pErrNum;
 	const char* additionalMacros = "";
 
-	cl_program fillProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, FILL_CL_PROGRAM_PATH);
+	cl_program fillProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, FILL_CL_PROGRAM_PATH);
 	btAssert(fillProg);
 
-	m_fillIntKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillIntKernel", &pErrNum, fillProg,additionalMacros );
+	m_fillIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillIntKernel", &pErrNum, fillProg,additionalMacros );
 	btAssert(m_fillIntKernel);
 
-	m_fillUnsignedIntKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillUnsignedIntKernel", &pErrNum, fillProg,additionalMacros );
+	m_fillUnsignedIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillUnsignedIntKernel", &pErrNum, fillProg,additionalMacros );
 	btAssert(m_fillIntKernel);
 
-	m_fillFloatKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillFloatKernel", &pErrNum, fillProg,additionalMacros );
+	m_fillFloatKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillFloatKernel", &pErrNum, fillProg,additionalMacros );
 	btAssert(m_fillFloatKernel);
 
 	
 
-	m_fillKernelInt2 = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillInt2Kernel", &pErrNum, fillProg,additionalMacros );
+	m_fillKernelInt2 = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillInt2Kernel", &pErrNum, fillProg,additionalMacros );
 	btAssert(m_fillKernelInt2);
 	
 }
diff --git a/opencl/parallel_primitives/host/btOpenCLArray.h b/opencl/parallel_primitives/host/btOpenCLArray.h
index 02db9dc84..032cd36b7 100644
--- a/opencl/parallel_primitives/host/btOpenCLArray.h
+++ b/opencl/parallel_primitives/host/btOpenCLArray.h
@@ -2,7 +2,7 @@
 #define BT_OPENCL_ARRAY_H
 
 #include "BulletCommon/btAlignedObjectArray.h"
-#include "../../basic_initialize/btOpenCLInclude.h"
+#include "../../basic_initialize/b3OpenCLInclude.h"
 
 template <typename T> 
 class btOpenCLArray
diff --git a/opencl/parallel_primitives/host/btPrefixScanCL.cpp b/opencl/parallel_primitives/host/btPrefixScanCL.cpp
index c584097c5..abbdf0f0d 100644
--- a/opencl/parallel_primitives/host/btPrefixScanCL.cpp
+++ b/opencl/parallel_primitives/host/btPrefixScanCL.cpp
@@ -3,7 +3,7 @@
 #define BT_PREFIXSCAN_PROG_PATH "opencl/parallel_primitives/kernels/PrefixScanKernels.cl"
 
 #include "btLauncherCL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "../kernels/PrefixScanKernelsCL.h"
 
 btPrefixScanCL::btPrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size)
@@ -14,14 +14,14 @@ btPrefixScanCL::btPrefixScanCL(cl_context ctx, cl_device_id device, cl_command_q
 	char* additionalMacros=0;
 
 	m_workBuffer = new btOpenCLArray<unsigned int>(ctx,queue,size);
-	cl_program scanProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, BT_PREFIXSCAN_PROG_PATH);
+	cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, BT_PREFIXSCAN_PROG_PATH);
 	btAssert(scanProg);
 
-	m_localScanKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg,additionalMacros );
+	m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg,additionalMacros );
 	btAssert(m_localScanKernel );
-	m_blockSumKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg,additionalMacros );
+	m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg,additionalMacros );
 	btAssert(m_blockSumKernel );
-	m_propagationKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg,additionalMacros );
+	m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg,additionalMacros );
 	btAssert(m_propagationKernel );
 }
 
diff --git a/opencl/parallel_primitives/host/btRadixSort32CL.cpp b/opencl/parallel_primitives/host/btRadixSort32CL.cpp
index 6d007fef2..321b5a36f 100644
--- a/opencl/parallel_primitives/host/btRadixSort32CL.cpp
+++ b/opencl/parallel_primitives/host/btRadixSort32CL.cpp
@@ -1,7 +1,7 @@
 
 #include "btRadixSort32CL.h"
 #include "btLauncherCL.h"
-#include "../../basic_initialize/btOpenCLUtils.h"
+#include "../../basic_initialize/b3OpenCLUtils.h"
 #include "btPrefixScanCL.h"
 #include "btFillCL.h"
 
@@ -13,7 +13,7 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
 :m_commandQueue(queue)
 {
 	btOpenCLDeviceInfo info;
-	btOpenCLUtils::getDeviceInfo(device,&info);
+	b3OpenCLUtils::getDeviceInfo(device,&info);
 	m_deviceCPU = (info.m_deviceType & CL_DEVICE_TYPE_CPU)!=0;
 
 	m_workBuffer1 = new btOpenCLArray<unsigned int>(ctx,queue);
@@ -42,15 +42,15 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
 	cl_int pErrNum;
 	const char* kernelSource = radixSort32KernelsCL;
 	
-	cl_program sortProg = btOpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, RADIXSORT32_PATH);
+	cl_program sortProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, RADIXSORT32_PATH);
 	btAssert(sortProg);
 
-	m_streamCountSortDataKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountSortDataKernel", &pErrNum, sortProg,additionalMacros );
+	m_streamCountSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountSortDataKernel", &pErrNum, sortProg,additionalMacros );
 	btAssert(m_streamCountSortDataKernel );
 
 
 	
-	m_streamCountKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountKernel", &pErrNum, sortProg,additionalMacros );
+	m_streamCountKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountKernel", &pErrNum, sortProg,additionalMacros );
 	btAssert(m_streamCountKernel);
 
 
@@ -58,19 +58,19 @@ btRadixSort32CL::btRadixSort32CL(cl_context ctx, cl_device_id device, cl_command
 	if (m_deviceCPU)
 	{
 		
-		m_sortAndScatterSortDataKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernelSerial", &pErrNum, sortProg,additionalMacros );
+		m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernelSerial", &pErrNum, sortProg,additionalMacros );
 		btAssert(m_sortAndScatterSortDataKernel);
-		m_sortAndScatterKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernelSerial", &pErrNum, sortProg,additionalMacros );
+		m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernelSerial", &pErrNum, sortProg,additionalMacros );
 		btAssert(m_sortAndScatterKernel);
 	} else
 	{
-		m_sortAndScatterSortDataKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernel", &pErrNum, sortProg,additionalMacros );
+		m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernel", &pErrNum, sortProg,additionalMacros );
 		btAssert(m_sortAndScatterSortDataKernel);
-		m_sortAndScatterKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernel", &pErrNum, sortProg,additionalMacros );
+		m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernel", &pErrNum, sortProg,additionalMacros );
 		btAssert(m_sortAndScatterKernel);
 	}
 		
-	m_prefixScanKernel = btOpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "PrefixScanKernel", &pErrNum, sortProg,additionalMacros );
+	m_prefixScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "PrefixScanKernel", &pErrNum, sortProg,additionalMacros );
 	btAssert(m_prefixScanKernel);
 		
 }
diff --git a/opencl/parallel_primitives/test/main.cpp b/opencl/parallel_primitives/test/main.cpp
index 59efe2871..7f3ccbc63 100644
--- a/opencl/parallel_primitives/test/main.cpp
+++ b/opencl/parallel_primitives/test/main.cpp
@@ -14,7 +14,7 @@ subject to the following restrictions:
 
 
 #include <stdio.h>
-#include "../basic_initialize/btOpenCLUtils.h"
+#include "../basic_initialize/b3OpenCLUtils.h"
 #include "../host/btFillCL.h"
 #include "../host/btBoundSearchCL.h"
 #include "../host/btRadixSort32CL.h"
@@ -45,17 +45,17 @@ void initCL(int preferredDeviceIndex, int preferredPlatformIndex)
 
 	cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
 
-	g_context = btOpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
+	g_context = b3OpenCLUtils::createContextFromType(deviceType, &ciErrNum, 0,0,preferredDeviceIndex, preferredPlatformIndex);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
-	int numDev = btOpenCLUtils::getNumDevices(g_context);
+	int numDev = b3OpenCLUtils::getNumDevices(g_context);
 	if (numDev>0)
 	{
 		btOpenCLDeviceInfo info;
-		g_device= btOpenCLUtils::getDevice(g_context,0);
+		g_device= b3OpenCLUtils::getDevice(g_context,0);
 		g_queue = clCreateCommandQueue(g_context, g_device, 0, &ciErrNum);
 		oclCHECKERROR(ciErrNum, CL_SUCCESS);
-        btOpenCLUtils::printDeviceInfo(g_device);
-		btOpenCLUtils::getDeviceInfo(g_device,&info);
+        b3OpenCLUtils::printDeviceInfo(g_device);
+		b3OpenCLUtils::getDeviceInfo(g_device,&info);
 		g_deviceName = info.m_deviceName;
 	}
 }
diff --git a/opencl/parallel_primitives/test/premake4.lua b/opencl/parallel_primitives/test/premake4.lua
index 1037a61ef..74b88834e 100644
--- a/opencl/parallel_primitives/test/premake4.lua
+++ b/opencl/parallel_primitives/test/premake4.lua
@@ -16,9 +16,9 @@ function createProject(vendor)
 		
 		files {
 			"main.cpp",
-			"../../basic_initialize/btOpenCLInclude.h",
-			"../../basic_initialize/btOpenCLUtils.cpp",
-			"../../basic_initialize/btOpenCLUtils.h",
+			"../../basic_initialize/b3OpenCLInclude.h",
+			"../../basic_initialize/b3OpenCLUtils.cpp",
+			"../../basic_initialize/b3OpenCLUtils.h",
 			"../host/btFillCL.cpp",
 			"../host/btFillCL.h",
 			"../host/btBoundSearchCL.cpp",
diff --git a/opencl/reduce/main.cpp b/opencl/reduce/main.cpp
index f925f6855..a92ba0c47 100644
--- a/opencl/reduce/main.cpp
+++ b/opencl/reduce/main.cpp
@@ -1,5 +1,5 @@
 ///original author: Erwin Coumans
-#include "btOpenCLUtils.h"
+#include "b3OpenCLUtils.h"
 #include "../parallel_primitives/host/btOpenCLArray.h"
 #include "../parallel_primitives/host/btLauncherCL.h"
 #include <stdio.h>
@@ -45,17 +45,17 @@ int main(int argc, char* argv[])
 	cl_command_queue	queue;
 	cl_device_id		device;
 	cl_kernel			addKernel;
-	ctx = btOpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
-	btOpenCLUtils::printPlatformInfo(platformId);
+	ctx = b3OpenCLUtils::createContextFromType(CL_DEVICE_TYPE_ALL, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
+	b3OpenCLUtils::printPlatformInfo(platformId);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	if (!ctx) {
 		printf("No OpenCL capable GPU found!");
 		return 0;
 	}
 
-	device = btOpenCLUtils::getDevice(ctx,0);
+	device = b3OpenCLUtils::getDevice(ctx,0);
 	queue = clCreateCommandQueue(ctx, device, 0, &ciErrNum);
-	addKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"ReduceGlobal",&ciErrNum);
+	addKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"ReduceGlobal",&ciErrNum);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	int numElements = 1024*1024;
 	btOpenCLArray<int> a(ctx,queue);
diff --git a/opencl/reduce/premake4.lua b/opencl/reduce/premake4.lua
index 43d900c45..0aa468f65 100644
--- a/opencl/reduce/premake4.lua
+++ b/opencl/reduce/premake4.lua
@@ -25,8 +25,8 @@ function createProject (vendor)
 		
 		files {
 			"main.cpp",
-			"../basic_initialize/btOpenCLUtils.cpp",
-			"../basic_initialize/btOpenCLUtils.h",
+			"../basic_initialize/b3OpenCLUtils.cpp",
+			"../basic_initialize/b3OpenCLUtils.h",
 			"../../src/BulletCommon/btAlignedAllocator.cpp",
 			"../../src/BulletCommon/btAlignedAllocator.h",
 			"../../src/BulletCommon/btAlignedObjectArray.h",
diff --git a/opencl/vector_add/premake4.lua b/opencl/vector_add/premake4.lua
index ccaabd73b..1bf00306e 100644
--- a/opencl/vector_add/premake4.lua
+++ b/opencl/vector_add/premake4.lua
@@ -15,8 +15,8 @@ function createProject(vendor)
 
 		files {
 			"main.cpp",
-			"../basic_initialize/btOpenCLUtils.cpp",
-			"../basic_initialize/btOpenCLUtils.h"
+			"../basic_initialize/b3OpenCLUtils.cpp",
+			"../basic_initialize/b3OpenCLUtils.h"
 		}
 		
 	end
diff --git a/opencl/vector_add_simplified/main.cpp b/opencl/vector_add_simplified/main.cpp
index d911ec2c2..0fcae0e34 100644
--- a/opencl/vector_add_simplified/main.cpp
+++ b/opencl/vector_add_simplified/main.cpp
@@ -1,5 +1,5 @@
 ///original author: Erwin Coumans
-#include "btOpenCLUtils.h"
+#include "b3OpenCLUtils.h"
 #include "../parallel_primitives/host/btOpenCLArray.h"
 #include "../parallel_primitives/host/btLauncherCL.h"
 #include <stdio.h>
@@ -29,17 +29,17 @@ int main(int argc, char* argv[])
 	cl_command_queue	queue;
 	cl_device_id		device;
 	cl_kernel			addKernel;
-	ctx = btOpenCLUtils::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
-	btOpenCLUtils::printPlatformInfo(platformId);
+	ctx = b3OpenCLUtils::createContextFromType(CL_DEVICE_TYPE_GPU, &ciErrNum,0,0,preferred_device,preferred_platform,&platformId);
+	b3OpenCLUtils::printPlatformInfo(platformId);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	if (!ctx) {
 		printf("No OpenCL capable GPU found!");
 		return 0;
 	}
 
-	device = btOpenCLUtils::getDevice(ctx,0);
+	device = b3OpenCLUtils::getDevice(ctx,0);
 	queue = clCreateCommandQueue(ctx, device, 0, &ciErrNum);
-	addKernel = btOpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"VectorAdd",&ciErrNum);
+	addKernel = b3OpenCLUtils::compileCLKernelFromString(ctx,device,kernelString,"VectorAdd",&ciErrNum);
 	oclCHECKERROR(ciErrNum, CL_SUCCESS);
 	int numElements = 32;
 	btOpenCLArray<float> a(ctx,queue);
diff --git a/opencl/vector_add_simplified/premake4.lua b/opencl/vector_add_simplified/premake4.lua
index 5f41c4f0c..04eac59fe 100644
--- a/opencl/vector_add_simplified/premake4.lua
+++ b/opencl/vector_add_simplified/premake4.lua
@@ -25,8 +25,8 @@ function createProject (vendor)
 		
 		files {
 			"main.cpp",
-			"../basic_initialize/btOpenCLUtils.cpp",
-			"../basic_initialize/btOpenCLUtils.h",
+			"../basic_initialize/b3OpenCLUtils.cpp",
+			"../basic_initialize/b3OpenCLUtils.h",
 			"../../src/BulletCommon/btAlignedAllocator.cpp",
 			"../../src/BulletCommon/btAlignedAllocator.h",
 			"../../src/BulletCommon/btAlignedObjectArray.h",
diff --git a/readme.txt b/readme.txt
new file mode 100644
index 000000000..71904c07e
--- /dev/null
+++ b/readme.txt
@@ -0,0 +1,6 @@
+
+Bullet 3.x GPU rigid body pipeline.
+
+See docs folder for information, 
+including how to build the project.
+