one workaround is to draw things yourself, for example using dx9's DrawPrimitiveUp. DrawPrimitiveUp is an old feature that has legacy support in directx 9 but it gets the job done. also it doesn't require you to create mesh data on your graphics card. instead it directly sends the vertex data to it (has pros and cons).

this is what I'm using for my 2D renderer and it works pretty well. here's a basic example using the fixed function pipeline (using shaders would work, too). I didn't include culling, partial clipping of images and some other stuff that I had in there, (you might want to split setting the sampler states and the FVF into another function as this only needs to be done once every frame).

Code:
#define PRAGMA_POINTER
#include <acknex.h>
#include <d3d9.h>

// some datatypes

typedef byte bool;

typedef struct int_x
{
	int x, y;
} int_xy;

typedef struct int_xyzw
{
	int x, y, z, w;
} int_xyzw;

typedef struct int_rect
{
	int left, top, right, bottom;
} int_rect;

typedef struct float_rect
{
	float left, top, right, bottom;
} float_rect;

//

typedef struct BMAP_DRAW_VERTEX
{ 
	float x, y, z;
	float rhw;
	D3DCOLOR color; 
	float u, v;
} BMAP_DRAW_VERTEX;

#define D3DFVF_BMAP_DRAW_VERTEX (D3DFVF_XYZRHW | D3DFVF_DIFFUSE | D3DFVF_TEX1) // vertex's content ("Flexible" Vertex Format)

BMAP_DRAW_VERTEX _bmap_draw_vertices[4]; // vertices used for _bmap_draw, defining them locally inside the function was slower when I tested it, I think?

//

void bmap_draw(BMAP * Bmap, int_rect * iRect, float_rect * fUV, bool AlphaBlendEnable, D3DCOLOR Color)
{
	/// vertices-setup ///
	
	_bmap_draw_vertices[0].x = _bmap_draw_vertices[2].x = iRect->left;
	_bmap_draw_vertices[0].y = _bmap_draw_vertices[1].y = iRect->top;
	_bmap_draw_vertices[1].x = _bmap_draw_vertices[3].x = iRect->right;
	_bmap_draw_vertices[2].y = _bmap_draw_vertices[3].y = iRect->bottom;
	
	_bmap_draw_vertices[0].u = _bmap_draw_vertices[2].u = fUV->left;
	_bmap_draw_vertices[0].v = _bmap_draw_vertices[1].v = fUV->top;
	_bmap_draw_vertices[1].u = _bmap_draw_vertices[3].u = fUV->right;
	_bmap_draw_vertices[2].v = _bmap_draw_vertices[3].v = fUV->bottom;
	
	_bmap_draw_vertices[0].color = _bmap_draw_vertices[1].color = _bmap_draw_vertices[2].color = _bmap_draw_vertices[3].color = Color;
	
	/// drawing ///
	
	// get the active D3D device
	LPDIRECT3DDEVICE9 pd3dDev = (LPDIRECT3DDEVICE9)draw_begin();
	
	if(!AlphaBlendEnable) // alphablending?
	{
		pd3dDev->SetRenderState(D3DRS_ALPHABLENDENABLE, FALSE);
		
		pd3dDev->SetTextureStageState(0, D3DRS_SRCBLEND, 2); // ONE
		pd3dDev->SetTextureStageState(0, D3DRS_DESTBLEND, 1); // ZERO
	}
	else
	{
		pd3dDev->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
		
		pd3dDev->SetTextureStageState(0, D3DRS_SRCBLEND, 5); // SRCALPHA
		pd3dDev->SetTextureStageState(0, D3DRS_DESTBLEND, 6); // INCSRCALPHA
	}
	
	// set texture
	pd3dDev->SetTexture(0, Bmap->d3dtex); // NOTE: BMAP* needs to have a valid format! (8888 works, for example)
	
	pd3dDev->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TEXTURE); // texture sampling and color-op
	pd3dDev->SetTextureStageState(0, D3DTSS_COLORARG2, D3DTA_DIFFUSE);
	pd3dDev->SetTextureStageState(0, D3DTSS_COLOROP, D3DTOP_MODULATE); // multiply texture with color
	
	// draw triangles
	pd3dDev->SetFVF(D3DFVF_BMAP_DRAW_VERTEX);
	pd3dDev->DrawPrimitiveUP(D3DPT_TRIANGLESTRIP, 2, (LPVOID)_bmap_draw_vertices, sizeof(BMAP_DRAW_VERTEX));
}

int get_next_pow2(int x) // get next power of two
{
	int xp2;
	for(xp2 = 1; xp2 < x; xp2 <<= 1);
	return xp2;
}

void bmap_UV_to_fUV(int_xy * BmapDimensions, int_rect * UV, float_rect * Dest_fUV) // convert uv in pixels (UV) to uv in [0,1] range (Dest_fUV)
{
	if(UV != NULL)
	{
		Dest_fUV->left   = UV->left;
		Dest_fUV->top    = UV->top;
		Dest_fUV->right  = UV->right;
		Dest_fUV->bottom = UV->bottom;
	}
	else
	{
		Dest_fUV->left   = 0.0;
		Dest_fUV->top    = 0.0;
		Dest_fUV->right  = BmapDimensions->x;
		Dest_fUV->bottom = BmapDimensions->y;
	}
	
	int xpow2 = get_next_pow2(BmapDimensions->x);
	int ypow2 = get_next_pow2(BmapDimensions->y);
	
	// apply half pixel offset and compensate strechting (due to non power of two dimensions)
	Dest_fUV->left   = (Dest_fUV->left   + 0.5) / (float)xpow2;
	Dest_fUV->top    = (Dest_fUV->top    + 0.5) / (float)ypow2;
	Dest_fUV->right  = (Dest_fUV->right  + 0.5) / (float)xpow2;
	Dest_fUV->bottom = (Dest_fUV->bottom + 0.5) / (float)ypow2;
}

void main()
{
	fps_max = 60;
	
	wait(1);
	
	// only needed to set once (since vertex array is global)
	
	_bmap_draw_vertices[0].z = _bmap_draw_vertices[1].z = _bmap_draw_vertices[2].z = _bmap_draw_vertices[3].z = 0.0; // no depth needed
	_bmap_draw_vertices[0].rhw = _bmap_draw_vertices[1].rhw = _bmap_draw_vertices[2].rhw = _bmap_draw_vertices[3].rhw = 1.0; // no perspective
	
	//
	
	BMAP * bmap = bmap_create(ImageFileNameHere);
	bmap_to_format(bmap, 8888); // depending on the bmap's format, this is sometimes necessary to force the engine to create a valid d3dtexture for the bmap
	
	int_rect DrawRect; // target position for drawing
	DrawRect.left = 50;
	DrawRect.top = 30;
	DrawRect.right = DrawRect.left + bmap_width(bmap);
	DrawRect.bottom = DrawRect.top + bmap_height(bmap);
	
	int_xyzw BmapDimensions; // size of the bmap in pixels
	BmapDimensions.x = bmap_width(bmap);
	BmapDimensions.y = bmap_height(bmap);
	
	int_rect UV; // min/max UVs in PIXELS
	UV.left = UV.top = 0.0;
	UV.right = BmapDimensions.x;
	UV.bottom = BmapDimensions.y;
	
	float_rect fUV; // float UVs in [0,1] range
	bmap_UV_to_fUV(&BmapDimensions, &UV, &fUV); // also possible: use NULL as input UV to use the whole image
	
	while(1)
	{
		bmap_draw(bmap, &DrawRect, &fUV, true, 0xFFFFFFFF);
		
		wait(1);
	}
}


Also keep in mind that setting those samplers might cause other 2d stuff to render incorrectly (like DEBUG_BMAP maybe), just in case some things render strangely. It works for me, though.

Edit: I forgot: this also works together with bmap_rendertarget()


POTATO-MAN saves the day! - Random