I wasn't yet able to solve your problem completely but at least I got a messy reference implementation running which does the raycasting in world space:

Code:
float4x4 matWorldViewProj;
float4x4 matWorld;
float4x4 matViewProj;
float4x4 matView;
float4x4 matProj;
float4 vecViewPos;
float4 vecViewDir;
float4 vecViewPort;

texture bmp_camera_bmap;
texture bmp_depth_bmap;

sampler CameraSampler = sampler_state 
{ 
	Texture = <bmp_camera_bmap>; 
	AddressU  = border; 
	AddressV  = border; 
}; 

sampler DepthSampler = sampler_state 
{ 
	Texture = <bmp_depth_bmap>; 
	AddressU  = Clamp; 
	AddressV  = Clamp; 
}; 

// Vertex Shader: 
void ReflectVS( 
in float4 InPos    : POSITION, 
in float3 InNormal : NORMAL, 
in float2 InTex    : TEXCOORD0, 

out float2 OutTex   : TEXCOORD0, 
out float3 OutNormal: TEXCOORD1,
out float3 wPos     : TEXCOORD2,
out float4 OutPos   : TEXCOORD3,
out float4 pPos     : POSITION

) 
{ 
	pPos = mul(InPos, matWorldViewProj);
	OutPos = pPos;
	
	OutNormal = normalize(mul(InNormal, matWorld));
	
	OutTex = InTex; 
	
	wPos = mul(InPos, matWorld);
} 

float2 calc_ScreenPos(float4 pPos)
{
	return (float2(pPos.x,-pPos.y)/pPos.w+vecViewPort.zw)*0.5+0.5;
}

// Pixel Shader: 
void ReflectPS( 
in float2 Tex    : TEXCOORD0, 
in float3 Normal : TEXCOORD1,
in float3 wPos   : TEXCOORD2,
in float4 pPos   : TEXCOORD3,

out float4 COL : COLOR0
)
{ 
	Normal = normalize(Normal);
	float3 View = normalize(vecViewPos.xyz - wPos);
	
	float3 R = -reflect(View, Normal);
	float3 sR = mul(R, matView);
	sR = R;
	
	float2 sTex = calc_ScreenPos(pPos);
	
	//--------------//
	const float srel = vecViewPort.y/vecViewPort.x;
	
	//	float2 PomOffDir=sR.xy*0.05;//*vecViewPort.zw;
	//	//	PomOffDir = sR.xy/-sR.z*0.1;
	//	PomOffDir.y *= -1.f;
	//	PomOffDir.y *= srel;
	
	//	float PomOff=IntersectionTest(DepthSampler, sTex, PomOffDir, 0.0f);
	//	sTex+=PomOffDir*PomOff;

	float3 screenPos = float3(sTex.xy, pPos.w);
	screenPos.xy = mul(float4(wPos, 1), matView).xy;
	screenPos = wPos;
	
	float3 screenDir = sR;

	//	screenDir.y *= -1.f;
	screenDir = normalize(screenDir);
	//	screenDir.xy /= abs(screenDir.z);

	const int step_count = 32;
	const float init_step_size = 20.f;
	
	int halve_again = 0;
	float step_size = init_step_size;
	float depth;
	float2 sUV;

	float3 oldPos = screenPos;
	float3 newPos = screenPos + screenDir * step_size;

	for(int i=0; i<step_count; ++i)
	{
		sUV = calc_ScreenPos(mul(float4(newPos, 1.f), matViewProj));
		depth = tex2Dlod(DepthSampler, float4(sUV,0,0)).x;
		
		if(dot(vecViewDir, newPos-vecViewPos.xyz) < depth)
		//		if(newPos.z < depth)
		{
			oldPos = newPos;
			
			if(halve_again)
			step_size *= 0.5f;
			halve_again = 0;
			
			newPos += screenDir * step_size;
		}
		else
		{
			step_size *= 0.5f;
			newPos = oldPos + screenDir * step_size;
			//			step_size *= 0.5f;
			halve_again = 1;
		}
	}

	sUV = calc_ScreenPos(mul(float4(newPos, 1.f), matViewProj));
	depth = tex2Dlod(DepthSampler, float4(sUV,0,0)).x;

	//	if(dot(vecViewDir, newPos-vecViewPos.xyz) < depth)
	//	if(newPos.z < depth)
	if(step_size == init_step_size)
	{
		COL = 0;
		return;	
	}
	//--------------//
	
	COL = tex2D(CameraSampler, sUV);
	
	//	COL.rgb = dot(vecViewDir, wPos-vecViewPos.xyz)*0.01f;
	//	COL = tex2D(CameraSampler, calc_ScreenPos(mul(float4(newPos, 1.f), matProj)));
	//	COL = tex2D(CameraSampler, calc_ScreenPos(mul(float4(screenPos, 1.f), matProj)));
	//		COL = tex2D(CameraSampler, sTex);
	//	COL = tex2D(DepthSampler, sTex).x*0.0005;
	//		COL = float4(sR.xy, 0, 1);
	//		COL.rgb = screenDir.z;
	//		COL.rgb = sR.z;
	//		COL.rgb = mul(float4(R,0), matViewProj).z;
	//		COL.rgb = abs(screenDir);
	//COL.rgb = mul(R, matView).z;
	//COL.rgb=mul(float4(wPos, 1), matView).y;
	//COL.rgb=pPos.w*0.01f;
	COL.a = 1;
} 

technique ReflectTechnique 
{ 
	pass P0 
	{ 
		VertexShader = compile vs_3_0 ReflectVS(); 
		PixelShader  = compile ps_3_0 ReflectPS(); 
	} 
}


Note that I changed the depth output in Depth.fx to linear depth.

Also, if you want to achieve multiple reflections don't forget to update your depthmap besides your colormap also.

Once the algo works right you should consider to add a quadtree like structure (min/max-depth mipmap pyramid) to your depthmap to speed up the raycasting process and make it more stable for extreme cases like long search distances and sudden changes in the depthmap. wink

Hope that helps.

Last edited by Hummel; 09/26/12 00:27.