307 lines
9.8 KiB
C++
307 lines
9.8 KiB
C++
|
//---------------------------------------------------------------------------//
|
||
|
// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
|
||
|
//
|
||
|
// Distributed under the Boost Software License, Version 1.0
|
||
|
// See accompanying file LICENSE_1_0.txt or copy at
|
||
|
// http://www.boost.org/LICENSE_1_0.txt
|
||
|
//
|
||
|
// See http://boostorg.github.com/compute for more information.
|
||
|
//---------------------------------------------------------------------------//
|
||
|
|
||
|
#ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
|
||
|
#define BOOST_COMPUTE_ALGORITHM_FILL_HPP
|
||
|
|
||
|
#include <iterator>
|
||
|
|
||
|
#include <boost/mpl/int.hpp>
|
||
|
#include <boost/mpl/vector.hpp>
|
||
|
#include <boost/mpl/contains.hpp>
|
||
|
#include <boost/utility/enable_if.hpp>
|
||
|
|
||
|
#include <boost/compute/cl.hpp>
|
||
|
#include <boost/compute/system.hpp>
|
||
|
#include <boost/compute/command_queue.hpp>
|
||
|
#include <boost/compute/algorithm/copy.hpp>
|
||
|
#include <boost/compute/async/future.hpp>
|
||
|
#include <boost/compute/iterator/constant_iterator.hpp>
|
||
|
#include <boost/compute/iterator/discard_iterator.hpp>
|
||
|
#include <boost/compute/detail/is_buffer_iterator.hpp>
|
||
|
#include <boost/compute/detail/iterator_range_size.hpp>
|
||
|
|
||
|
namespace boost {
|
||
|
namespace compute {
|
||
|
namespace detail {
|
||
|
|
||
|
namespace mpl = boost::mpl;
|
||
|
|
||
|
// fills the range [first, first + count) with value using copy()
|
||
|
template<class BufferIterator, class T>
|
||
|
inline void fill_with_copy(BufferIterator first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue)
|
||
|
{
|
||
|
::boost::compute::copy(
|
||
|
::boost::compute::make_constant_iterator(value, 0),
|
||
|
::boost::compute::make_constant_iterator(value, count),
|
||
|
first,
|
||
|
queue
|
||
|
);
|
||
|
}
|
||
|
|
||
|
// fills the range [first, first + count) with value using copy_async()
|
||
|
template<class BufferIterator, class T>
|
||
|
inline future<void> fill_async_with_copy(BufferIterator first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue)
|
||
|
{
|
||
|
return ::boost::compute::copy_async(
|
||
|
::boost::compute::make_constant_iterator(value, 0),
|
||
|
::boost::compute::make_constant_iterator(value, count),
|
||
|
first,
|
||
|
queue
|
||
|
);
|
||
|
}
|
||
|
|
||
|
#if defined(CL_VERSION_1_2)
|
||
|
|
||
|
// meta-function returing true if Iterator points to a range of values
|
||
|
// that can be filled using clEnqueueFillBuffer(). to meet this criteria
|
||
|
// it must have a buffer accessible through iter.get_buffer() and the
|
||
|
// size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
|
||
|
template<class Iterator>
|
||
|
struct is_valid_fill_buffer_iterator :
|
||
|
public mpl::and_<
|
||
|
is_buffer_iterator<Iterator>,
|
||
|
mpl::contains<
|
||
|
mpl::vector<
|
||
|
mpl::int_<1>,
|
||
|
mpl::int_<2>,
|
||
|
mpl::int_<4>,
|
||
|
mpl::int_<8>,
|
||
|
mpl::int_<16>,
|
||
|
mpl::int_<32>,
|
||
|
mpl::int_<64>,
|
||
|
mpl::int_<128>
|
||
|
>,
|
||
|
mpl::int_<
|
||
|
sizeof(typename std::iterator_traits<Iterator>::value_type)
|
||
|
>
|
||
|
>
|
||
|
>::type { };
|
||
|
|
||
|
template<>
|
||
|
struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {};
|
||
|
|
||
|
// specialization which uses clEnqueueFillBuffer for buffer iterators
|
||
|
template<class BufferIterator, class T>
|
||
|
inline void
|
||
|
dispatch_fill(BufferIterator first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue,
|
||
|
typename boost::enable_if<
|
||
|
is_valid_fill_buffer_iterator<BufferIterator>
|
||
|
>::type* = 0)
|
||
|
{
|
||
|
typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
|
||
|
|
||
|
if(count == 0){
|
||
|
// nothing to do
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
|
||
|
if(!queue.check_device_version(1, 2)){
|
||
|
return fill_with_copy(first, count, value, queue);
|
||
|
}
|
||
|
|
||
|
value_type pattern = static_cast<value_type>(value);
|
||
|
size_t offset = static_cast<size_t>(first.get_index());
|
||
|
|
||
|
if(count == 1){
|
||
|
// use clEnqueueWriteBuffer() directly when writing a single value
|
||
|
// to the device buffer. this is potentially more efficient and also
|
||
|
// works around a bug in the intel opencl driver.
|
||
|
queue.enqueue_write_buffer(
|
||
|
first.get_buffer(),
|
||
|
offset * sizeof(value_type),
|
||
|
sizeof(value_type),
|
||
|
&pattern
|
||
|
);
|
||
|
}
|
||
|
else {
|
||
|
queue.enqueue_fill_buffer(
|
||
|
first.get_buffer(),
|
||
|
&pattern,
|
||
|
sizeof(value_type),
|
||
|
offset * sizeof(value_type),
|
||
|
count * sizeof(value_type)
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
template<class BufferIterator, class T>
|
||
|
inline future<void>
|
||
|
dispatch_fill_async(BufferIterator first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue,
|
||
|
typename boost::enable_if<
|
||
|
is_valid_fill_buffer_iterator<BufferIterator>
|
||
|
>::type* = 0)
|
||
|
{
|
||
|
typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
|
||
|
|
||
|
// check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
|
||
|
if(!queue.check_device_version(1, 2)){
|
||
|
return fill_async_with_copy(first, count, value, queue);
|
||
|
}
|
||
|
|
||
|
value_type pattern = static_cast<value_type>(value);
|
||
|
size_t offset = static_cast<size_t>(first.get_index());
|
||
|
|
||
|
event event_ =
|
||
|
queue.enqueue_fill_buffer(first.get_buffer(),
|
||
|
&pattern,
|
||
|
sizeof(value_type),
|
||
|
offset * sizeof(value_type),
|
||
|
count * sizeof(value_type));
|
||
|
|
||
|
return future<void>(event_);
|
||
|
}
|
||
|
|
||
|
#ifdef CL_VERSION_2_0
|
||
|
// specializations for svm_ptr<T>
|
||
|
template<class T>
|
||
|
inline void dispatch_fill(svm_ptr<T> first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue)
|
||
|
{
|
||
|
if(count == 0){
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
queue.enqueue_svm_fill(
|
||
|
first.get(), &value, sizeof(T), count * sizeof(T)
|
||
|
);
|
||
|
}
|
||
|
|
||
|
template<class T>
|
||
|
inline future<void> dispatch_fill_async(svm_ptr<T> first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue)
|
||
|
{
|
||
|
if(count == 0){
|
||
|
return future<void>();
|
||
|
}
|
||
|
|
||
|
event event_ = queue.enqueue_svm_fill(
|
||
|
first.get(), &value, sizeof(T), count * sizeof(T)
|
||
|
);
|
||
|
|
||
|
return future<void>(event_);
|
||
|
}
|
||
|
#endif // CL_VERSION_2_0
|
||
|
|
||
|
// default implementations
|
||
|
template<class BufferIterator, class T>
|
||
|
inline void
|
||
|
dispatch_fill(BufferIterator first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue,
|
||
|
typename boost::disable_if<
|
||
|
is_valid_fill_buffer_iterator<BufferIterator>
|
||
|
>::type* = 0)
|
||
|
{
|
||
|
fill_with_copy(first, count, value, queue);
|
||
|
}
|
||
|
|
||
|
template<class BufferIterator, class T>
|
||
|
inline future<void>
|
||
|
dispatch_fill_async(BufferIterator first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue,
|
||
|
typename boost::disable_if<
|
||
|
is_valid_fill_buffer_iterator<BufferIterator>
|
||
|
>::type* = 0)
|
||
|
{
|
||
|
return fill_async_with_copy(first, count, value, queue);
|
||
|
}
|
||
|
#else
|
||
|
template<class BufferIterator, class T>
|
||
|
inline void dispatch_fill(BufferIterator first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue)
|
||
|
{
|
||
|
fill_with_copy(first, count, value, queue);
|
||
|
}
|
||
|
|
||
|
template<class BufferIterator, class T>
|
||
|
inline future<void> dispatch_fill_async(BufferIterator first,
|
||
|
size_t count,
|
||
|
const T &value,
|
||
|
command_queue &queue)
|
||
|
{
|
||
|
return fill_async_with_copy(first, count, value, queue);
|
||
|
}
|
||
|
#endif // !defined(CL_VERSION_1_2)
|
||
|
|
||
|
} // end detail namespace
|
||
|
|
||
|
/// Fills the range [\p first, \p last) with \p value.
|
||
|
///
|
||
|
/// \param first first element in the range to fill
|
||
|
/// \param last last element in the range to fill
|
||
|
/// \param value value to copy to each element
|
||
|
/// \param queue command queue to perform the operation
|
||
|
///
|
||
|
/// For example, to fill a vector on the device with sevens:
|
||
|
/// \code
|
||
|
/// // vector on the device
|
||
|
/// boost::compute::vector<int> vec(10, context);
|
||
|
///
|
||
|
/// // fill vector with sevens
|
||
|
/// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
|
||
|
/// \endcode
|
||
|
///
|
||
|
/// \see boost::compute::fill_n()
|
||
|
template<class BufferIterator, class T>
|
||
|
inline void fill(BufferIterator first,
|
||
|
BufferIterator last,
|
||
|
const T &value,
|
||
|
command_queue &queue = system::default_queue())
|
||
|
{
|
||
|
size_t count = detail::iterator_range_size(first, last);
|
||
|
if(count == 0){
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
detail::dispatch_fill(first, count, value, queue);
|
||
|
}
|
||
|
|
||
|
template<class BufferIterator, class T>
|
||
|
inline future<void> fill_async(BufferIterator first,
|
||
|
BufferIterator last,
|
||
|
const T &value,
|
||
|
command_queue &queue = system::default_queue())
|
||
|
{
|
||
|
size_t count = detail::iterator_range_size(first, last);
|
||
|
if(count == 0){
|
||
|
return future<void>();
|
||
|
}
|
||
|
|
||
|
return detail::dispatch_fill_async(first, count, value, queue);
|
||
|
}
|
||
|
|
||
|
} // end compute namespace
|
||
|
} // end boost namespace
|
||
|
|
||
|
#endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP
|