compute/example/simple_moving_average.cpp
2018-12-27 12:25:16 +01:00

140 lines
4.2 KiB
C++

//---------------------------------------------------------------------------//
// Copyright (c) 2014 Benoit Dequidt <benoit.dequidt@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#include <iostream>
#include <cstdlib>
#include <boost/compute/core.hpp>
#include <boost/compute/algorithm/copy.hpp>
#include <boost/compute/algorithm/inclusive_scan.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/type_traits/type_name.hpp>
#include <boost/compute/utility/source.hpp>
namespace compute = boost::compute;
/// warning precision is not precise due
/// to the float error accumulation when size is large enough
/// for more precision use double
/// or a kahan sum else results can diverge
/// from the CPU implementation
compute::program make_sma_program(const compute::context& context)
{
const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
__kernel void SMA(__global const float *scannedValues, int size, __global float *output, int wSize)
{
const int gid = get_global_id(0);
float cumValues = 0.f;
int endIdx = gid + wSize/2;
int startIdx = gid -1 - wSize/2;
if(endIdx > size -1)
endIdx = size -1;
cumValues += scannedValues[endIdx];
if(startIdx < 0)
startIdx = -1;
else
cumValues -= scannedValues[startIdx];
output[gid] =(float)( cumValues / ( float )(endIdx - startIdx));
}
);
// create sma program
return compute::program::build_with_source(source,context);
}
bool check_results(const std::vector<float>& values, const std::vector<float>& smoothValues, unsigned int wSize)
{
int size = values.size();
if(size != (int)smoothValues.size()) return false;
int semiWidth = wSize/2;
bool ret = true;
for(int idx = 0 ; idx < size ; ++idx)
{
int start = (std::max)(idx - semiWidth,0);
int end = (std::min)(idx + semiWidth,size-1);
float res = 0;
for(int j = start ; j <= end ; ++j)
{
res+= values[j];
}
res /= float(end - start +1);
if(std::abs(res-smoothValues[idx]) > 1e-3)
{
std::cout << "idx = " << idx << " -- expected = " << res << " -- result = " << smoothValues[idx] << std::endl;
ret = false;
}
}
return ret;
}
// generate a uniform law over [0,10]
float myRand()
{
static const double divisor = double(RAND_MAX)+1.;
return double(rand())/divisor * 10.;
}
int main()
{
unsigned int size = 1024;
// wSize must be odd
unsigned int wSize = 21;
// get the default device
compute::device device = compute::system::default_device();
// create a context for the device
compute::context context(device);
// get the program
compute::program program = make_sma_program(context);
// create vector of random numbers on the host
std::vector<float> host_vector(size);
std::vector<float> host_result(size);
std::generate(host_vector.begin(), host_vector.end(), myRand);
compute::vector<float> a(size,context);
compute::vector<float> b(size,context);
compute::vector<float> c(size,context);
compute::command_queue queue(context, device);
compute::copy(host_vector.begin(),host_vector.end(),a.begin(),queue);
// scan values
compute::inclusive_scan(a.begin(),a.end(),b.begin(),queue);
// sma kernel
compute::kernel kernel(program, "SMA");
kernel.set_arg(0,b.get_buffer());
kernel.set_arg(1,(int)b.size());
kernel.set_arg(2,c.get_buffer());
kernel.set_arg(3,(int)wSize);
using compute::uint_;
uint_ tpb = 128;
uint_ workSize = size;
queue.enqueue_1d_range_kernel(kernel,0,workSize,tpb);
compute::copy(c.begin(),c.end(),host_result.begin(),queue);
bool res = check_results(host_vector,host_result,wSize);
std::string status = res ? "results are equivalent" : "GPU results differs from CPU one's";
std::cout << status << std::endl;
return 0;
}