Loading...
Searching...
No Matches
CUDAArray-Impl.hpp
Go to the documentation of this file.
1// This code is based on Jet framework.
2// Copyright (c) 2018 Doyub Kim
3// CubbyFlow is voxel-based fluid simulation engine for computer games.
4// Copyright (c) 2020 CubbyFlow Team
5// Core Part: Chris Ohk, Junwoo Hwang, Jihong Sin, Seungwoo Yoo
6// AI Part: Dongheon Cho, Minseo Kim
7// We are making my contributions/submissions to this project solely in our
8// personal capacity and are not conveying any rights to any intellectual
9// property of any third parties.
10
11#ifndef CUBBYFLOW_CUDA_ARRAY_IMPL_HPP
12#define CUBBYFLOW_CUDA_ARRAY_IMPL_HPP
13
14#ifdef CUBBYFLOW_USE_CUDA
15
16namespace CubbyFlow
17{
18#ifdef __CUDACC__
19namespace Internal
20{
21template <typename T, size_t N, size_t I>
23{
24 template <typename... RemainingIndices>
25 CUBBYFLOW_CUDA_HOST_DEVICE static void Call(CUDAArrayView<const T, N> src,
26 CUDAStdArray<size_t, N> size,
27 CUDAArrayView<T, N> dst,
28 RemainingIndices... indices)
29 {
30 for (size_t i = 0; i < size[I - 1]; ++i)
31 {
32 CUDABlockCopyHelper<T, N, I - 1>::Call(src, size, dst, i,
33 indices...);
34 }
35 }
36};
37
38template <typename T, size_t N>
39struct CUDABlockCopyHelper<T, N, 1>
40{
41 template <typename... RemainingIndices>
42 CUBBYFLOW_CUDA_HOST_DEVICE static void Call(CUDAArrayView<const T, N> src,
43 CUDAStdArray<size_t, N> size,
44 CUDAArrayView<T, N> dst,
45 RemainingIndices... indices)
46 {
47 for (size_t i = 0; i < size[0]; ++i)
48 {
49 dst(i, indices...) = src(i, indices...);
50 }
51 }
52};
53
54template <typename T, size_t N>
58{
59 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
60 if (i < size[N - 1])
61 {
62 CUDABlockCopyHelper<T, N, N - 1>::Call(src, size, dst, i);
63 }
64}
65
66template <typename T>
70{
71 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
72 if (i < size[0])
73 {
74 dst[i] = src[i];
75 }
76}
77
78template <typename T, size_t N>
79struct CUDABlockCopy
80{
81 static void Call(CUDAArrayView<const T, N> src,
82 CUDAStdArray<size_t, N> size, CUDAArrayView<T, N> dst)
83 {
84 if (size[N - 1] == 0)
85 {
86 return;
87 }
88
89 // Assuming i-major
90 unsigned int numBlocks, numThreads;
91 CUDAComputeGridSize((unsigned int)size[N - 1], 256, numBlocks,
92 numThreads);
93 CUDABlockCopyKernelN<<<numBlocks, numThreads>>>(src, size, dst);
94 CUBBYFLOW_CUDA_CHECK_LAST_ERROR(
95 "Failed executing CUDABlockCopyKernelN");
96 }
97};
98
99template <typename T>
100struct CUDABlockCopy<T, 1>
101{
102 static void Call(CUDAArrayView<const T, 1> src,
103 CUDAStdArray<size_t, 1> size, CUDAArrayView<T, 1> dst)
104 {
105 if (size[0] == 0)
106 {
107 return;
108 }
109
110 // Assuming i-major
111 unsigned int numBlocks, numThreads;
112 CUDAComputeGridSize((unsigned int)size[0], 256, numBlocks, numThreads);
113 CUDABlockCopyKernel1<<<numBlocks, numThreads>>>(src, size, dst);
114 CUBBYFLOW_CUDA_CHECK_LAST_ERROR(
115 "Failed executing CUDABlockCopyKernel1");
116 }
117};
118} // namespace Internal
119#endif
120
121template <typename T, size_t N>
122CUDAArray<T, N>::CUDAArray() : Base()
123{
124 // Do nothing
125}
126
127template <typename T, size_t N>
128CUDAArray<T, N>::CUDAArray(const CUDAStdArray<size_t, N>& size,
129 const T& initVal)
130 : CUDAArray{}
131{
132 size_t l = size[0];
133
134 for (size_t i = 1; i < N; ++i)
135 {
136 l *= size[i];
137 }
138
139 m_data.Resize(l, initVal);
140 Base::SetPtrAndSize(m_data.data(), size);
141}
142
143#ifdef __CUDACC__
144template <typename T, size_t N>
145template <typename... Args>
146CUDAArray<T, N>::CUDAArray(size_t nx, Args... args) : CUDAArray{}
147{
148 Vector<size_t, N> newSizeVec;
149 T initVal;
150 Internal::GetSizeAndInitVal<T, N, N - 1>::Call(newSizeVec, initVal, nx,
151 args...);
152 CUDAStdArray<size_t, N> newSize(newSizeVec);
153 CUDAArray newArray(newSize, initVal);
154 *this = std::move(newArray);
155}
156
157template <typename T, size_t N>
158CUDAArray<T, N>::CUDAArray(NestedInitializerListsT<T, N> lst) : CUDAArray{}
159{
160 Vector<size_t, N> newSize;
161 Internal::GetSizeFromInitList<T, N, N>::Call(newSize, lst);
162
163 Array<T, N> newCpuArray(newSize);
164 Internal::SetArrayFromInitList<T, N, N>::Call(newCpuArray, lst);
165 CopyFrom(newCpuArray);
166}
167#endif
168
169template <typename T, size_t N>
170template <size_t M>
171CUDAArray<T, N>::CUDAArray(
172 const std::enable_if_t<(M == 1), std::vector<T>>& vec)
173 : CUDAArray()
174{
175 CopyFrom(vec);
176}
177
178template <typename T, size_t N>
179template <typename OtherDerived>
180CUDAArray<T, N>::CUDAArray(const ArrayBase<T, N, OtherDerived>& other)
181 : CUDAArray()
182{
183 CopyFrom(other);
184}
185
186template <typename T, size_t N>
187template <typename OtherDerived>
188CUDAArray<T, N>::CUDAArray(const CUDAArrayBase<T, N, OtherDerived>& other)
189 : CUDAArray()
190{
191 CopyFrom(other);
192}
193
194template <typename T, size_t N>
195CUDAArray<T, N>::CUDAArray(const CUDAArray& other) : CUDAArray{}
196{
197 CopyFrom(other);
198}
199
200template <typename T, size_t N>
201CUDAArray<T, N>::CUDAArray(CUDAArray&& other) noexcept : CUDAArray{}
202{
203 *this = std::move(other);
204}
205
206template <typename T, size_t N>
207template <size_t M>
208CUDAArray<T, N>& CUDAArray<T, N>::operator=(
209 const std::enable_if_t<(M == 1), std::vector<T>>& vec)
210{
211 CopyFrom(vec);
212 return *this;
213}
214
215template <typename T, size_t N>
216template <typename OtherDerived>
217CUDAArray<T, N>& CUDAArray<T, N>::operator=(
218 const ArrayBase<T, N, OtherDerived>& other)
219{
220 CopyFrom(other);
221 return *this;
222}
223
224template <typename T, size_t N>
225template <typename OtherDerived>
226CUDAArray<T, N>& CUDAArray<T, N>::operator=(
227 const ArrayBase<const T, N, OtherDerived>& other)
228{
229 CopyFrom(other);
230 return *this;
231}
232
233template <typename T, size_t N>
234template <typename OtherDerived>
235CUDAArray<T, N>& CUDAArray<T, N>::operator=(
236 const CUDAArrayBase<T, N, OtherDerived>& other)
237{
238 CopyFrom(other);
239 return *this;
240}
241
242template <typename T, size_t N>
243template <typename OtherDerived>
244CUDAArray<T, N>& CUDAArray<T, N>::operator=(
245 const CUDAArrayBase<const T, N, OtherDerived>& other)
246{
247 CopyFrom(other);
248 return *this;
249}
250
251template <typename T, size_t N>
252CUDAArray<T, N>& CUDAArray<T, N>::operator=(const CUDAArray& other)
253{
254 m_data = other.m_data;
255 Base::SetPtrAndSize(m_data.data(), other.Size());
256 return *this;
257}
258
259template <typename T, size_t N>
260CUDAArray<T, N>& CUDAArray<T, N>::operator=(CUDAArray&& other) noexcept
261{
262 Swap(other);
263 other.Clear();
264 return *this;
265}
266
267template <typename T, size_t N>
268template <typename A, size_t M>
269std::enable_if_t<(M == 1), void> CUDAArray<T, N>::CopyFrom(
270 const std::vector<T, A>& vec)
271{
272 CUDAArray newArray(vec.size());
273 newArray.m_data.CopyFrom(vec);
274 newArray.SetPtrAndSize(newArray.m_data.data(), newArray.Size());
275 *this = std::move(newArray);
276}
277
278template <typename T, size_t N>
279template <typename OtherDerived>
280void CUDAArray<T, N>::CopyFrom(const ArrayBase<T, N, OtherDerived>& other)
281{
282 CUDAArray newArray(other.Size());
283 CUDACopyHostToDevice(other.data(), other.Length(), newArray.data());
284 *this = std::move(newArray);
285}
286
287template <typename T, size_t N>
288template <typename OtherDerived>
289void CUDAArray<T, N>::CopyFrom(const ArrayBase<const T, N, OtherDerived>& other)
290{
291 CUDAArray newArray(other.Size());
292 CUDACopyHostToDevice(other.data(), other.Length(), newArray.data());
293 *this = std::move(newArray);
294}
295
296template <typename T, size_t N>
297template <typename OtherDerived>
298void CUDAArray<T, N>::CopyFrom(const CUDAArrayBase<T, N, OtherDerived>& other)
299{
300 CUDAArray newArray(other.Size());
301 CUDACopyDeviceToDevice(other.data(), other.Length(), newArray.data());
302 *this = std::move(newArray);
303}
304
305template <typename T, size_t N>
306template <typename OtherDerived>
307void CUDAArray<T, N>::CopyFrom(
308 const CUDAArrayBase<const T, N, OtherDerived>& other)
309{
310 CUDAArray newArray(other.Size());
311 CUDACopyDeviceToDevice(other.data(), other.Length(), newArray.data());
312 *this = std::move(newArray);
313}
314
315template <typename T, size_t N>
316template <typename A, size_t M>
317std::enable_if_t<(M == 1), void> CUDAArray<T, N>::CopyTo(std::vector<T, A>& vec)
318{
319 vec.resize(Length());
320 CUDACopyDeviceToHost(data(), Length(), vec.data());
321}
322
323template <typename T, size_t N>
324void CUDAArray<T, N>::CopyTo(Array<T, N>& other)
325{
326 other.Resize(m_size.ToVector());
327 CUDACopyDeviceToHost(data(), Length(), other.data());
328}
329
330template <typename T, size_t N>
331void CUDAArray<T, N>::CopyTo(ArrayView<T, N>& other)
332{
333 assert(m_size.ToVector() == other.Size());
334 CUDACopyDeviceToHost(data(), Length(), other.data());
335}
336
337template <typename T, size_t N>
338void CUDAArray<T, N>::CopyTo(CUDAArray<T, N>& other)
339{
340 other.Resize(m_size.ToVector());
341 CUDACopyDeviceToDevice(data(), Length(), other.data());
342}
343
344template <typename T, size_t N>
345void CUDAArray<T, N>::CopyTo(CUDAArrayView<T, N>& other)
346{
347 assert(Length() == other.Length());
348 CUDACopyDeviceToDevice(data(), Length(), other.data());
349}
350
351template <typename T, size_t N>
352void CUDAArray<T, N>::Fill(const T& val)
353{
354 m_data.Fill(val);
355}
356
357#ifdef __CUDACC__
358template <typename T, size_t N>
359void CUDAArray<T, N>::Resize(CUDAStdArray<size_t, N> newSize, const T& initVal)
360{
361 CUDAArray newArray(newSize, initVal);
362 CUDAStdArray<size_t, N> minSize;
363
364 for (size_t i = 0; i < N; ++i)
365 {
366 minSize[i] = std::min(m_size[i], newArray.m_size[i]);
367 }
368
369 Internal::CUDABlockCopy<T, N>::Call(View(), minSize, newArray.View());
370
371 *this = std::move(newArray);
372}
373
374template <typename T, size_t N>
375template <typename... Args>
376void CUDAArray<T, N>::Resize(size_t nx, Args... args)
377{
378 Vector<size_t, N> newSizeVec;
379 T initVal;
380
381 Internal::GetSizeAndInitVal<T, N, N - 1>::Call(newSizeVec, initVal, nx,
382 args...);
383
384 CUDAStdArray<size_t, N> newSize(newSizeVec);
385 Resize(newSize, initVal);
386}
387#endif
388
389template <typename T, size_t N>
390template <size_t M>
391std::enable_if_t<(M == 1), void> CUDAArray<T, N>::Append(const T& val)
392{
393 m_data.PushBack(val);
394 Base::SetPtrAndSize(m_data.data(), m_data.Size());
395}
396
397template <typename T, size_t N>
398template <typename A, size_t M>
399std::enable_if_t<(M == 1), void> CUDAArray<T, N>::Append(
400 const std::vector<T, A>& extra)
401{
402 m_data.Append(extra);
403 m_size[0] = m_data.size();
404}
405
406template <typename T, size_t N>
407template <typename OtherDerived, size_t M>
408std::enable_if_t<(M == 1), void> CUDAArray<T, N>::Append(
409 const ArrayBase<T, N, OtherDerived>& extra)
410{
411 CUDAArray newArray(Length() + extra.Length());
412 CUDACopy(data(), Length(), newArray.data());
413 CUDACopyHostToDevice(extra.data(), extra.Length(),
414 newArray.data() + m_size[0]);
415 Swap(newArray);
416}
417
418template <typename T, size_t N>
419template <typename OtherDerived, size_t M>
420std::enable_if_t<(M == 1), void> CUDAArray<T, N>::Append(
421 const CUDAArrayBase<T, N, OtherDerived>& extra)
422{
423 CUDAArray newArray(Length() + extra.Length());
424 CUDACopy(data(), Length(), newArray.data());
425 CUDACopy(extra.data(), extra.Length(), newArray.data() + m_size[0]);
426 Swap(newArray);
427}
428
429template <typename T, size_t N>
430void CUDAArray<T, N>::Clear()
431{
432 Base::ClearPtrAndSize();
433 m_data.Clear();
434}
435
436template <typename T, size_t N>
437void CUDAArray<T, N>::Swap(CUDAArray& other)
438{
439 Base::SwapPtrAndSize(other);
440 m_data.Swap(other.m_data);
441}
442
443template <typename T, size_t N>
444CUDAArrayView<T, N> CUDAArray<T, N>::View()
445{
446 return CUDAArrayView<T, N>(*this);
447};
448
449template <typename T, size_t N>
450CUDAArrayView<const T, N> CUDAArray<T, N>::View() const
451{
452 return CUDAArrayView<const T, N>(*this);
453};
454} // namespace CubbyFlow
455
456#endif
457
458#endif
Definition pybind11Utils.hpp:21
Matrix< T, Rows, 1 > Vector
Definition Matrix.hpp:738