Direct Compute Example Code Listing

From OpenVIDIA

Jump to: navigation, search
/*
 * Copyright 1993-2007 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO USER:
 *
 * This source code is subject to NVIDIA ownership rights under U.S. and
 * international Copyright laws.  Users and possessors of this source code
 * are hereby granted a nonexclusive, royalty-free license to use this code
 * in individual and commercial software.
 *
 * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
 * CODE FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
 * IMPLIED WARRANTY OF ANY KIND.  NVIDIA DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
 * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
 * OF USE, DATA OR PROFITS,  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
 * OR OTHER TORTIOUS ACTION,  ARISING OUT OF OR IN CONNECTION WITH THE USE
 * OR PERFORMANCE OF THIS SOURCE CODE.
 *
 * U.S. Government End Users.   This source code is a "commercial item" as
 * that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of
 * "commercial computer  software"  and "commercial computer software
 * documentation" as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995)
 * and is provided to the U.S. Government only as a commercial end item.
 * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
 * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
 * source code with only those rights set forth herein.
 *
 * Any use of this source code in individual and commercial software must
 * include, in the user documentation and internal comments to the code,
 * the above Disclaimer and U.S. Government End Users Notice.
 */

#include <windows.h>
#include <D3DX11tex.h>
#include <D3DX11.h>
#include <D3DX11core.h>
#include <D3DX11async.h>
#include <D3Dcompiler.h>
#include "resource.h"
#include <cassert>
#include <D3DX10Math.h>
#include <stdio.h>

#if defined(DEBUG) || defined(_DEBUG)
#ifndef V
#define V(x)           { hr = (x); if( FAILED(hr) ) { DXUTTrace( __FILE__, (DWORD)__LINE__, hr, L#x, true ); } }
#endif
#ifndef V_RETURN
#define V_RETURN(x)    { hr = (x); if( FAILED(hr) ) { return DXUTTrace( __FILE__, (DWORD)__LINE__, hr, L#x, true ); } }
#endif
#else
#ifndef V
#define V(x)           { hr = (x); }
#endif
#ifndef V_RETURN
#define V_RETURN(x)    { hr = (x); if( FAILED(hr) ) { return hr; } }
#endif
#endif

#ifndef SAFE_RELEASE
#define SAFE_RELEASE(p)      { if (p) { (p)->Release(); (p)=NULL; } }
#endif

//--------------------------------------------------------------------------------------
// Global Variables
//--------------------------------------------------------------------------------------
ID3D11ComputeShader* g_pComputeShader;
ID3D11UnorderedAccessView *g_pStructuredBufferUAV;
ID3D11UnorderedAccessView *g_pStagingBufferUAV;

ID3D11Buffer *pStructuredBuffer;
ID3D11Buffer *pStagingBuffer;
ID3D11Buffer *pConstantBuffer;

// Global D3D device/context/feature pointers
ID3D11Device *g_pD3DDevice;
ID3D11DeviceContext *g_pD3DContext;
D3D_FEATURE_LEVEL g_D3DFeatureLevel;

bool verbose = true;

//--------------------------------------------------------------------------------------
// Helper function to compile an hlsl shader from file, 
// its binary compiled code is returned
//--------------------------------------------------------------------------------------
HRESULT CompileShaderFromFile( WCHAR* szFileName, LPCSTR szEntryPoint, 
LPCSTR szShaderModel, ID3DBlob** ppBlobOut )
{
    HRESULT hr = S_OK;

    // find the file
    //WCHAR str[MAX_PATH];
    //V_RETURN( DXUTFindDXSDKMediaFileCch( str, MAX_PATH, szFileName ) );
	WCHAR *str = szFileName;
    // open the file 
	//"createFile" creates or opens the file
    HANDLE hFile = CreateFile( szFileName, GENERIC_READ, FILE_SHARE_READ, NULL, 
        OPEN_EXISTING,
        FILE_FLAG_SEQUENTIAL_SCAN, NULL );
    if( INVALID_HANDLE_VALUE == hFile )
        return E_FAIL;

    // Get the file size
    LARGE_INTEGER FileSize;
    GetFileSizeEx( hFile, &FileSize );

    // create enough space for the file data
    BYTE* pFileData = new BYTE[ FileSize.LowPart ];
    if( !pFileData )
        return E_OUTOFMEMORY;

    // read the data in
    DWORD BytesRead;
    if( !ReadFile( hFile, pFileData, FileSize.LowPart, &BytesRead, NULL ) )
        return E_FAIL; 

    CloseHandle( hFile );

    // Compile the shader
    char pFilePathName[MAX_PATH];        
    WideCharToMultiByte(CP_ACP, 0, str, -1, pFilePathName, MAX_PATH, NULL, NULL);
    ID3DBlob* pErrorBlob;
    hr = D3DCompile( pFileData, FileSize.LowPart, pFilePathName, NULL, NULL, szEntryPoint, 
                      szShaderModel, D3D10_SHADER_ENABLE_STRICTNESS, 0, ppBlobOut, &pErrorBlob );


    delete []pFileData;
  
    if( FAILED(hr) )
    {
        OutputDebugStringA( (char*)pErrorBlob->GetBufferPointer() );
        SAFE_RELEASE( pErrorBlob );
        return hr;
    }
    SAFE_RELEASE( pErrorBlob );

    return S_OK;
}

HRESULT InitDeviceContext()
{
	HRESULT hr;


	D3D_FEATURE_LEVEL levelsWanted[] = 
	{ 
		D3D_FEATURE_LEVEL_11_0, 
		D3D_FEATURE_LEVEL_10_1, 
		D3D_FEATURE_LEVEL_10_0,
	};
	UINT numLevelsWanted = sizeof( levelsWanted ) / sizeof( levelsWanted[0] );

	D3D_DRIVER_TYPE driverTypes[] =
	{
		D3D_DRIVER_TYPE_REFERENCE,
		D3D_DRIVER_TYPE_HARDWARE,
	};
	UINT numDriverTypes = sizeof( driverTypes ) / sizeof( driverTypes[0] );


	// iterate through driver types, try reference driver type first, then software driver 
	// break on the first success.
	// change the orders above to try different configurations
	// here, we take D3D 11 in reference mode to demonstrate the API
	for( UINT driverTypeIndex = 0; driverTypeIndex < numDriverTypes; driverTypeIndex++ )
	{
		D3D_DRIVER_TYPE g_driverType = driverTypes[driverTypeIndex];
		UINT createDeviceFlags = NULL;
		hr = D3D11CreateDevice( NULL, g_driverType, NULL, createDeviceFlags, 
                                     levelsWanted, numLevelsWanted, D3D11_SDK_VERSION, 
                                     &g_pD3DDevice, &g_D3DFeatureLevel, &g_pD3DContext );

		if( SUCCEEDED( hr ) ) {
			if( g_driverType == D3D_DRIVER_TYPE_HARDWARE ) {
				printf("Created HW Device\n");
			}
			if( g_driverType == D3D_DRIVER_TYPE_REFERENCE ) {
				printf("Created REFERENCE Device\n");
			}
			if( g_D3DFeatureLevel == D3D_FEATURE_LEVEL_11_0 ) {
				printf("Created D3D_FEATURE_LEVEL_11_0\n");
			}
			if( g_D3DFeatureLevel == D3D_FEATURE_LEVEL_10_0 ) {
				printf("Created D3D_FEATURE_LEVEL_10_0\n");
			}
			if( g_D3DFeatureLevel == D3D_FEATURE_LEVEL_10_1 ) {
				printf("Created D3D_FEATURE_LEVEL_10_1\n");
			}
			break;
		}
	}
	return hr;
}

HRESULT createBuffers(int w, int h) 
{
	HRESULT hr = S_OK;

	// Create Structured Buffer
	// D3DXVECTOR4 Declared in D3DX10Math.h
	// http://msdn.microsoft.com/en-us/library/bb205130(VS.85).aspx
	D3D11_BUFFER_DESC sbDesc;
	sbDesc.BindFlags		=	D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE ;
	sbDesc.Usage			=	D3D11_USAGE_DEFAULT;
	sbDesc.CPUAccessFlags	=	0;
	sbDesc.MiscFlags		=	D3D11_RESOURCE_MISC_BUFFER_STRUCTURED ;
	sbDesc.StructureByteStride	=	sizeof(D3DXVECTOR4);
	sbDesc.ByteWidth		=	sizeof(D3DXVECTOR4) * w * h;


	hr = g_pD3DDevice->CreateBuffer( &sbDesc, NULL, &pStructuredBuffer );
	if( SUCCEEDED(hr) )
	{
		if( verbose ) printf("Created Structured Buffer\n");
	} else {
		printf("Failed Making Structured Buffer\n");
	}

	// Create an Unordered Access View to the Structured Buffer 
	D3D11_UNORDERED_ACCESS_VIEW_DESC	sbUAVDesc;
	sbUAVDesc.Buffer.FirstElement	=	0;		
	sbUAVDesc.Buffer.Flags	= 0;			
	sbUAVDesc.Buffer.NumElements	= w * h;
	sbUAVDesc.Format		= DXGI_FORMAT_UNKNOWN;	
	sbUAVDesc.ViewDimension	= D3D11_UAV_DIMENSION_BUFFER;   

	hr = g_pD3DDevice->CreateUnorderedAccessView( pStructuredBuffer, &sbUAVDesc, 
                                             &g_pStructuredBufferUAV );
	if( SUCCEEDED(hr) )
	{
		if( verbose ) printf("Created UAV View\n");
	} else {
		printf("Failed creating UAV View\n");
	}

	//Create a "Staging" Resource to actually copy data to-from the GPU buffer. 
	// Create Structured Buffer
	// D3DXVECTOR4 Declared in D3DX10Math.h
	D3D11_BUFFER_DESC stagingBufferDesc;
	stagingBufferDesc.BindFlags		=0 ;
	stagingBufferDesc.Usage		=D3D11_USAGE_STAGING;  
	stagingBufferDesc.CPUAccessFlags=	D3D11_CPU_ACCESS_READ;
	stagingBufferDesc.MiscFlags		=D3D11_RESOURCE_MISC_BUFFER_STRUCTURED ;
	stagingBufferDesc.StructureByteStride	=sizeof(D3DXVECTOR4);
	stagingBufferDesc.ByteWidth		=sizeof(D3DXVECTOR4) * w * h;

	hr = g_pD3DDevice->CreateBuffer( &stagingBufferDesc, NULL, &pStagingBuffer );
	if( SUCCEEDED(hr) )
	{
		if( verbose ) printf("Created Staging Buffer OK\n");
	}

	// Create Constant Buffer
	// D3DXVECTOR4 Declared in D3DX10Math.h
	// http://msdn.microsoft.com/en-us/library/bb205130(VS.85).aspx
	D3D11_BUFFER_DESC cbDesc;
	cbDesc.BindFlags		=D3D11_BIND_CONSTANT_BUFFER ;
	cbDesc.Usage		=	D3D11_USAGE_DYNAMIC;  
                cbDesc.CPUAccessFlags	=	D3D11_CPU_ACCESS_WRITE;
	cbDesc.MiscFlags		=	0;
	cbDesc.ByteWidth		=	sizeof(D3DXVECTOR4) ;

	hr = g_pD3DDevice->CreateBuffer( &cbDesc, NULL, &pConstantBuffer );
	if( SUCCEEDED(hr) )
	{
		printf("Created Constant  Buffer\n");
	} else {
		printf("Failed Making Constant Buffer\n" );
	}

	// must use D3D11_MAP_WRITE_DISCARD
	// http://msdn.microsoft.com/en-us/library/bb205318(VS.85).aspx
	D3D11_MAPPED_SUBRESOURCE mappedResource;
	g_pD3DContext->Map( pConstantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0, 
                                           &mappedResource );
	unsigned int *data = (unsigned int *)(mappedResource.pData);
	for( int i=0 ; i<4; i++ ) data[i] = 50+i;
	g_pD3DContext->Unmap( pConstantBuffer, 0 );

	return hr;
}

HRESULT createComputeShader()
{
	HRESULT hr = S_OK;
	ID3DBlob *pBlob;
	// This function loads the shader into a string, and then compiles it. 
                // It produces a "blob" of compiled code. 
	hr = CompileShaderFromFile( L"ExampleComputeShader.hlsl", "main", 
                                "cs_4_0", &pBlob );
	if( SUCCEEDED(hr) )
	{
		printf("Created Shader Blob OK\n");
	} else {
		printf("Failed creating Shader Blob\n");
	}

                // this function takes the compiled "blob" of code and makes it into a shader object, 
                //g_pComputeShader. 
	hr = g_pD3DDevice->CreateComputeShader( pBlob->GetBufferPointer(), 
                                              pBlob->GetBufferSize(), NULL, &g_pComputeShader );
	if( SUCCEEDED(hr) )
	{
		printf("Created Shader Object\n");
	} else {
		printf("Failed creating Shader object\n");
	}
	return hr;
}

void doComputePass()
{
	D3D11_QUERY_DESC pQueryDesc;
	pQueryDesc.Query = D3D11_QUERY_EVENT;
	pQueryDesc.MiscFlags = 0;
	ID3D11Query *pEventQuery;
	g_pD3DDevice->CreateQuery( &pQueryDesc, &pEventQuery );

	g_pD3DContext->End( pEventQuery );
	while( g_pD3DContext->GetData( pEventQuery, NULL, 0, 0 ) == S_FALSE ) {} 
                // spin until event is finished

	g_pD3DContext->CopyResource( pStructuredBuffer, pStagingBuffer );

	g_pD3DContext->End( pEventQuery );
	while( g_pD3DContext->GetData( pEventQuery, NULL, 0, 0 ) == S_FALSE ) {} 
                 // spin until event is finished

	pEventQuery->Release();

	// now make the compute shader active
	g_pD3DContext->CSSetShader( g_pComputeShader, NULL, 0 );
	// To bind the UAV to the computer shader, we use the code: 
	// http://msdn.microsoft.com/en-us/library/dd445761.aspx
	UINT initCounts = 0;
	g_pD3DContext->CSSetUnorderedAccessViews( 0, 1, &g_pStructuredBufferUAV, &initCounts );
	g_pD3DContext->CSSetConstantBuffers( 0 ,1,  &pConstantBuffer );
	g_pD3DContext->CSSetConstantBuffers( 1 ,1,  &pConstantBuffer );

	// now dispatch ("run") the compute shader, with a set of 16x16 groups.
	g_pD3DContext->Dispatch( 16, 16, 1 );


	g_pD3DContext->CopyResource( pStagingBuffer, pStructuredBuffer );
	// http://msdn.microsoft.com/en-us/library/bb173512(VS.85).aspx D3D10 
                // had mappable buffers, but D3D11 moves this to context function
	// http://www.slideshare.net/repii/your-game-needs-direct3d-11-so-get-started-now
	D3D11_MAPPED_SUBRESOURCE mappedResource;
	g_pD3DContext->Map( pStagingBuffer, 0, D3D11_MAP_READ, 0, &mappedResource);
	unsigned int *data = (unsigned int *)(mappedResource.pData);
	int offset = 31*4;
	printf(" %d %d %d %d\n", data[offset+0], data[offset+1], data[offset+2], data[offset+3] );
	g_pD3DContext->Unmap( pStagingBuffer, 0);

	// D3D11 on D3D10 hW: only a single UAV can be bound to a pipeline at once. 
	// set to NULL to unbind
	ID3D11UnorderedAccessView *pNullUAV = NULL;
	g_pD3DContext->CSSetUnorderedAccessViews( 0, 1, &pNullUAV, &initCounts );

}


int _cdecl main( int , char**  )
{
	if( FAILED( InitDeviceContext() ) )
		return 0;

	// Initialize some GPU data buffers to operate on. 
	if( FAILED( createBuffers(640, 480 ) ) )
		return 0;

	// Load/Compile/Create a shader object from hlsl shader code. 
	if( FAILED( createComputeShader() ) ) 
		return 0;

	doComputePass();

    return 0;
}
This page was last modified on 16 September 2009, at 02:17. This page has been accessed 5,556 times.