how about shader solution?
Code:
//from rendermonkey
float4 lightness = float4( 0.30, 0.59, 0.11, 0.00 );
float4 tex2DN_ATI(uniform sampler2D BaseMap,float2 texCoord,float res) : COLOR
{	


	float off = 1.0 / res;
	float4 s00 = tex2D(BaseMap, texCoord + float2(-off, -off));
	float4 s01 = tex2D(BaseMap, texCoord + float2( 0,   -off));
	float4 s02 = tex2D(BaseMap, texCoord + float2( off, -off));
	float4 s10 = tex2D(BaseMap, texCoord + float2(-off,  0));
	float4 s12 = tex2D(BaseMap, texCoord + float2( off,  0));
	float4 s20 = tex2D(BaseMap, texCoord + float2(-off,  off));
	float4 s21 = tex2D(BaseMap, texCoord + float2( 0,    off));
	float4 s22 = tex2D(BaseMap, texCoord + float2( off,  off));
	float4 sobelX = s00 + 2 * s10 + s20 - s02 - 2 * s12 - s22;
	float4 sobelY = s00 + 2 * s01 + s02 - s20 - 2 * s21 - s22;
	float sx = dot(sobelX, lightness);
	float sy = dot(sobelY, lightness);
	float3 normal = normalize(float3(sx, sy, 1));
	return float4(normal * 0.5 + 0.5, 1);
	
}

...
float4 normal=2*(tex2DN_ATI(sampler,coord,512)-0.5);
...



result is not very good but maybe you'd like to use anyway.