---
还没在这个blog上写过随笔
今天先看看这个blog的效果
Code
#include <cufft.h>
#include <math_constants.h>

//Round a / b to nearest higher integer value
int cuda_iDivUp(int a, int b)


{
return (a + (b - 1)) / b;
}


// complex math functions
__device__
float2 conjugate(float2 arg)


{
return make_float2(arg.x, -arg.y);
}

__device__
float2 complex_exp(float arg)


{
return make_float2(cosf(arg), sinf(arg));
}

__device__
float2 complex_add(float2 a, float2 b)


{
return make_float2(a.x + b.x, a.y + b.y);
}

__device__
float2 complex_mult(float2 ab, float2 cd)


{
return make_float2(ab.x * cd.x - ab.y * cd.y, ab.x * cd.y + ab.y * cd.x);
}

// generate wave heightfield at time t based on initial heightfield and dispersion relationship
__global__ void generateSpectrumKernel(float2* h0, float2 *ht, unsigned int width, unsigned int height, float t, float patchSize)


{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
unsigned int i = y*width+x;
// calculate coordinates
float2 k;
k.x = CUDART_PI_F * x / (float) patchSize;
k.y = 2.0f * CUDART_PI_F * y / (float) patchSize;

// calculate dispersion w(k)
float k_len = sqrtf(k.x*k.x + k.y*k.y);
float w = sqrtf(9.81f * k_len);

float2 h0_k = h0[i];
float2 h0_mk = h0[(((height-1)-y)*width)+x];

float2 h_tilda = complex_add( complex_mult(h0_k, complex_exp(w * t)),
complex_mult(conjugate(h0_mk), complex_exp(-w * t)) );

// output frequency-space complex values

if ((x < width) && (y < height))
{
ht[i] = h_tilda;
}
}


// generate slope by partial differences in spatial domain
__global__ void calculateSlopeKernel(float* h, float2 *slopeOut, unsigned int width, unsigned int height)


{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
unsigned int i = y*width+x;

float2 slope;

if ((x > 0) && (y > 0) && (x < width-1) && (y < height-1))
{
slope.x = h[i+1] - h[i-1];
slope.y = h[i+width] - h[i-width];

} else
{
slope = make_float2(0.0f, 0.0f);
}
slopeOut[i] = slope;
}

extern "C"
void cudaGenerateSpectrumKernel(float2* d_h0, float2 *d_ht,
unsigned int width, unsigned int height,
float animTime, float patchSize)


{
dim3 block(8, 8, 1);
dim3 grid(cuda_iDivUp(width, block.x), cuda_iDivUp(height, block.y), 1);
generateSpectrumKernel<<<grid, block>>>(d_h0, d_ht, width, height, animTime, patchSize);
}

extern "C"
void cudaCalculateSlopeKernel( float* hptr, float2 *slopeOut,
unsigned int width, unsigned int height)


{
dim3 block(8, 8, 1);
dim3 grid2(cuda_iDivUp(width, block.x), cuda_iDivUp(height, block.y), 1);
calculateSlopeKernel<<<grid2, block>>>(hptr, slopeOut, width, height);
}

再来测试图片

然后, 恩,再做一个上传文件,测试上传功能。
效果确实,设置也挺人性化,我很喜欢
唯一有点不足的地方就是速度稍微有点慢
不过这个不影响整体的效果
/Files/frischzenger/TestDirectX.rar