From OpenVIDIA
/*
* Copyright 1993-2007 NVIDIA Corporation. All rights reserved.
*
* NOTICE TO USER:
*
* This source code is subject to NVIDIA ownership rights under U.S. and
* international Copyright laws. Users and possessors of this source code
* are hereby granted a nonexclusive, royalty-free license to use this code
* in individual and commercial software.
*
* NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
* CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
* IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
* REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
* IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
* OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
* OR PERFORMANCE OF THIS SOURCE CODE.
*
* U.S. Government End Users. This source code is a "commercial item" as
* that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
* "commercial computer software" and "commercial computer software
* documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
* and is provided to the U.S. Government only as a commercial end item.
* Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
* 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
* source code with only those rights set forth herein.
*
* Any use of this source code in individual and commercial software must
* include, in the user documentation and internal comments to the code,
* the above Disclaimer and U.S. Government End Users Notice.
*/
#include <windows.h>
#include <D3DX11tex.h>
#include <D3DX11.h>
#include <D3DX11core.h>
#include <D3DX11async.h>
#include <D3Dcompiler.h>
#include "resource.h"
#include <cassert>
#include <D3DX10Math.h>
#include <stdio.h>
#if defined(DEBUG) || defined(_DEBUG)
#ifndef V
#define V(x) { hr = (x); if( FAILED(hr) ) { DXUTTrace( __FILE__, (DWORD)__LINE__, hr, L#x, true ); } }
#endif
#ifndef V_RETURN
#define V_RETURN(x) { hr = (x); if( FAILED(hr) ) { return DXUTTrace( __FILE__, (DWORD)__LINE__, hr, L#x, true ); } }
#endif
#else
#ifndef V
#define V(x) { hr = (x); }
#endif
#ifndef V_RETURN
#define V_RETURN(x) { hr = (x); if( FAILED(hr) ) { return hr; } }
#endif
#endif
#ifndef SAFE_RELEASE
#define SAFE_RELEASE(p) { if (p) { (p)->Release(); (p)=NULL; } }
#endif
//--------------------------------------------------------------------------------------
// Global Variables
//--------------------------------------------------------------------------------------
ID3D11ComputeShader* g_pComputeShader;
ID3D11UnorderedAccessView *g_pStructuredBufferUAV;
ID3D11UnorderedAccessView *g_pStagingBufferUAV;
ID3D11Buffer *pStructuredBuffer;
ID3D11Buffer *pStagingBuffer;
ID3D11Buffer *pConstantBuffer;
// Global D3D device/context/feature pointers
ID3D11Device *g_pD3DDevice;
ID3D11DeviceContext *g_pD3DContext;
D3D_FEATURE_LEVEL g_D3DFeatureLevel;
bool verbose = true;
//--------------------------------------------------------------------------------------
// Helper function to compile an hlsl shader from file,
// its binary compiled code is returned
//--------------------------------------------------------------------------------------
HRESULT CompileShaderFromFile( WCHAR* szFileName, LPCSTR szEntryPoint,
LPCSTR szShaderModel, ID3DBlob** ppBlobOut )
{
HRESULT hr = S_OK;
// find the file
//WCHAR str[MAX_PATH];
//V_RETURN( DXUTFindDXSDKMediaFileCch( str, MAX_PATH, szFileName ) );
WCHAR *str = szFileName;
// open the file
//"createFile" creates or opens the file
HANDLE hFile = CreateFile( szFileName, GENERIC_READ, FILE_SHARE_READ, NULL,
OPEN_EXISTING,
FILE_FLAG_SEQUENTIAL_SCAN, NULL );
if( INVALID_HANDLE_VALUE == hFile )
return E_FAIL;
// Get the file size
LARGE_INTEGER FileSize;
GetFileSizeEx( hFile, &FileSize );
// create enough space for the file data
BYTE* pFileData = new BYTE[ FileSize.LowPart ];
if( !pFileData )
return E_OUTOFMEMORY;
// read the data in
DWORD BytesRead;
if( !ReadFile( hFile, pFileData, FileSize.LowPart, &BytesRead, NULL ) )
return E_FAIL;
CloseHandle( hFile );
// Compile the shader
char pFilePathName[MAX_PATH];
WideCharToMultiByte(CP_ACP, 0, str, -1, pFilePathName, MAX_PATH, NULL, NULL);
ID3DBlob* pErrorBlob;
hr = D3DCompile( pFileData, FileSize.LowPart, pFilePathName, NULL, NULL, szEntryPoint,
szShaderModel, D3D10_SHADER_ENABLE_STRICTNESS, 0, ppBlobOut, &pErrorBlob );
delete []pFileData;
if( FAILED(hr) )
{
OutputDebugStringA( (char*)pErrorBlob->GetBufferPointer() );
SAFE_RELEASE( pErrorBlob );
return hr;
}
SAFE_RELEASE( pErrorBlob );
return S_OK;
}
HRESULT InitDeviceContext()
{
HRESULT hr;
D3D_FEATURE_LEVEL levelsWanted[] =
{
D3D_FEATURE_LEVEL_11_0,
D3D_FEATURE_LEVEL_10_1,
D3D_FEATURE_LEVEL_10_0,
};
UINT numLevelsWanted = sizeof( levelsWanted ) / sizeof( levelsWanted[0] );
D3D_DRIVER_TYPE driverTypes[] =
{
D3D_DRIVER_TYPE_REFERENCE,
D3D_DRIVER_TYPE_HARDWARE,
};
UINT numDriverTypes = sizeof( driverTypes ) / sizeof( driverTypes[0] );
// iterate through driver types, try reference driver type first, then software driver
// break on the first success.
// change the orders above to try different configurations
// here, we take D3D 11 in reference mode to demonstrate the API
for( UINT driverTypeIndex = 0; driverTypeIndex < numDriverTypes; driverTypeIndex++ )
{
D3D_DRIVER_TYPE g_driverType = driverTypes[driverTypeIndex];
UINT createDeviceFlags = NULL;
hr = D3D11CreateDevice( NULL, g_driverType, NULL, createDeviceFlags,
levelsWanted, numLevelsWanted, D3D11_SDK_VERSION,
&g_pD3DDevice, &g_D3DFeatureLevel, &g_pD3DContext );
if( SUCCEEDED( hr ) ) {
if( g_driverType == D3D_DRIVER_TYPE_HARDWARE ) {
printf("Created HW Device\n");
}
if( g_driverType == D3D_DRIVER_TYPE_REFERENCE ) {
printf("Created REFERENCE Device\n");
}
if( g_D3DFeatureLevel == D3D_FEATURE_LEVEL_11_0 ) {
printf("Created D3D_FEATURE_LEVEL_11_0\n");
}
if( g_D3DFeatureLevel == D3D_FEATURE_LEVEL_10_0 ) {
printf("Created D3D_FEATURE_LEVEL_10_0\n");
}
if( g_D3DFeatureLevel == D3D_FEATURE_LEVEL_10_1 ) {
printf("Created D3D_FEATURE_LEVEL_10_1\n");
}
break;
}
}
return hr;
}
HRESULT createBuffers(int w, int h)
{
HRESULT hr = S_OK;
// Create Structured Buffer
// D3DXVECTOR4 Declared in D3DX10Math.h
// http://msdn.microsoft.com/en-us/library/bb205130(VS.85).aspx
D3D11_BUFFER_DESC sbDesc;
sbDesc.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE ;
sbDesc.Usage = D3D11_USAGE_DEFAULT;
sbDesc.CPUAccessFlags = 0;
sbDesc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED ;
sbDesc.StructureByteStride = sizeof(D3DXVECTOR4);
sbDesc.ByteWidth = sizeof(D3DXVECTOR4) * w * h;
hr = g_pD3DDevice->CreateBuffer( &sbDesc, NULL, &pStructuredBuffer );
if( SUCCEEDED(hr) )
{
if( verbose ) printf("Created Structured Buffer\n");
} else {
printf("Failed Making Structured Buffer\n");
}
// Create an Unordered Access View to the Structured Buffer
D3D11_UNORDERED_ACCESS_VIEW_DESC sbUAVDesc;
sbUAVDesc.Buffer.FirstElement = 0;
sbUAVDesc.Buffer.Flags = 0;
sbUAVDesc.Buffer.NumElements = w * h;
sbUAVDesc.Format = DXGI_FORMAT_UNKNOWN;
sbUAVDesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
hr = g_pD3DDevice->CreateUnorderedAccessView( pStructuredBuffer, &sbUAVDesc,
&g_pStructuredBufferUAV );
if( SUCCEEDED(hr) )
{
if( verbose ) printf("Created UAV View\n");
} else {
printf("Failed creating UAV View\n");
}
//Create a "Staging" Resource to actually copy data to-from the GPU buffer.
// Create Structured Buffer
// D3DXVECTOR4 Declared in D3DX10Math.h
D3D11_BUFFER_DESC stagingBufferDesc;
stagingBufferDesc.BindFlags =0 ;
stagingBufferDesc.Usage =D3D11_USAGE_STAGING;
stagingBufferDesc.CPUAccessFlags= D3D11_CPU_ACCESS_READ;
stagingBufferDesc.MiscFlags =D3D11_RESOURCE_MISC_BUFFER_STRUCTURED ;
stagingBufferDesc.StructureByteStride =sizeof(D3DXVECTOR4);
stagingBufferDesc.ByteWidth =sizeof(D3DXVECTOR4) * w * h;
hr = g_pD3DDevice->CreateBuffer( &stagingBufferDesc, NULL, &pStagingBuffer );
if( SUCCEEDED(hr) )
{
if( verbose ) printf("Created Staging Buffer OK\n");
}
// Create Constant Buffer
// D3DXVECTOR4 Declared in D3DX10Math.h
// http://msdn.microsoft.com/en-us/library/bb205130(VS.85).aspx
D3D11_BUFFER_DESC cbDesc;
cbDesc.BindFlags =D3D11_BIND_CONSTANT_BUFFER ;
cbDesc.Usage = D3D11_USAGE_DYNAMIC;
cbDesc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
cbDesc.MiscFlags = 0;
cbDesc.ByteWidth = sizeof(D3DXVECTOR4) ;
hr = g_pD3DDevice->CreateBuffer( &cbDesc, NULL, &pConstantBuffer );
if( SUCCEEDED(hr) )
{
printf("Created Constant Buffer\n");
} else {
printf("Failed Making Constant Buffer\n" );
}
// must use D3D11_MAP_WRITE_DISCARD
// http://msdn.microsoft.com/en-us/library/bb205318(VS.85).aspx
D3D11_MAPPED_SUBRESOURCE mappedResource;
g_pD3DContext->Map( pConstantBuffer, 0, D3D11_MAP_WRITE_DISCARD, 0,
&mappedResource );
unsigned int *data = (unsigned int *)(mappedResource.pData);
for( int i=0 ; i<4; i++ ) data[i] = 50+i;
g_pD3DContext->Unmap( pConstantBuffer, 0 );
return hr;
}
HRESULT createComputeShader()
{
HRESULT hr = S_OK;
ID3DBlob *pBlob;
// This function loads the shader into a string, and then compiles it.
// It produces a "blob" of compiled code.
hr = CompileShaderFromFile( L"ExampleComputeShader.hlsl", "main",
"cs_4_0", &pBlob );
if( SUCCEEDED(hr) )
{
printf("Created Shader Blob OK\n");
} else {
printf("Failed creating Shader Blob\n");
}
// this function takes the compiled "blob" of code and makes it into a shader object,
//g_pComputeShader.
hr = g_pD3DDevice->CreateComputeShader( pBlob->GetBufferPointer(),
pBlob->GetBufferSize(), NULL, &g_pComputeShader );
if( SUCCEEDED(hr) )
{
printf("Created Shader Object\n");
} else {
printf("Failed creating Shader object\n");
}
return hr;
}
void doComputePass()
{
D3D11_QUERY_DESC pQueryDesc;
pQueryDesc.Query = D3D11_QUERY_EVENT;
pQueryDesc.MiscFlags = 0;
ID3D11Query *pEventQuery;
g_pD3DDevice->CreateQuery( &pQueryDesc, &pEventQuery );
g_pD3DContext->End( pEventQuery );
while( g_pD3DContext->GetData( pEventQuery, NULL, 0, 0 ) == S_FALSE ) {}
// spin until event is finished
g_pD3DContext->CopyResource( pStructuredBuffer, pStagingBuffer );
g_pD3DContext->End( pEventQuery );
while( g_pD3DContext->GetData( pEventQuery, NULL, 0, 0 ) == S_FALSE ) {}
// spin until event is finished
pEventQuery->Release();
// now make the compute shader active
g_pD3DContext->CSSetShader( g_pComputeShader, NULL, 0 );
// To bind the UAV to the computer shader, we use the code:
// http://msdn.microsoft.com/en-us/library/dd445761.aspx
UINT initCounts = 0;
g_pD3DContext->CSSetUnorderedAccessViews( 0, 1, &g_pStructuredBufferUAV, &initCounts );
g_pD3DContext->CSSetConstantBuffers( 0 ,1, &pConstantBuffer );
g_pD3DContext->CSSetConstantBuffers( 1 ,1, &pConstantBuffer );
// now dispatch ("run") the compute shader, with a set of 16x16 groups.
g_pD3DContext->Dispatch( 16, 16, 1 );
g_pD3DContext->CopyResource( pStagingBuffer, pStructuredBuffer );
// http://msdn.microsoft.com/en-us/library/bb173512(VS.85).aspx D3D10
// had mappable buffers, but D3D11 moves this to context function
// http://www.slideshare.net/repii/your-game-needs-direct3d-11-so-get-started-now
D3D11_MAPPED_SUBRESOURCE mappedResource;
g_pD3DContext->Map( pStagingBuffer, 0, D3D11_MAP_READ, 0, &mappedResource);
unsigned int *data = (unsigned int *)(mappedResource.pData);
int offset = 31*4;
printf(" %d %d %d %d\n", data[offset+0], data[offset+1], data[offset+2], data[offset+3] );
g_pD3DContext->Unmap( pStagingBuffer, 0);
// D3D11 on D3D10 hW: only a single UAV can be bound to a pipeline at once.
// set to NULL to unbind
ID3D11UnorderedAccessView *pNullUAV = NULL;
g_pD3DContext->CSSetUnorderedAccessViews( 0, 1, &pNullUAV, &initCounts );
}
int _cdecl main( int , char** )
{
if( FAILED( InitDeviceContext() ) )
return 0;
// Initialize some GPU data buffers to operate on.
if( FAILED( createBuffers(640, 480 ) ) )
return 0;
// Load/Compile/Create a shader object from hlsl shader code.
if( FAILED( createComputeShader() ) )
return 0;
doComputePass();
return 0;
}