Actual source code: cudavecimpl.h
4: #include <petscvec.h>
5: #include <petscdevice.h>
6: #include <petsc/private/vecimpl.h>
8: typedef struct {
9: PetscScalar *GPUarray; /* this always holds the GPU data */
10: PetscScalar *GPUarray_allocated; /* if the array was allocated by PETSc this is its pointer */
11: cudaStream_t stream; /* A stream for doing asynchronous data transfers */
12: PetscBool nvshmem; /* Is GPUarray_allocated allocated in nvshmem? It is used to allocate Mvctx->lvec in nvshmem */
13: } Vec_CUDA;
15: PETSC_INTERN PetscErrorCode VecCUDAGetArrays_Private(Vec,const PetscScalar**,const PetscScalar**,PetscOffloadMask*);
16: PETSC_INTERN PetscErrorCode VecDotNorm2_SeqCUDA(Vec,Vec,PetscScalar*, PetscScalar*);
17: PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqCUDA(Vec,Vec,Vec);
18: PETSC_INTERN PetscErrorCode VecWAXPY_SeqCUDA(Vec,PetscScalar,Vec,Vec);
19: PETSC_INTERN PetscErrorCode VecMDot_SeqCUDA(Vec,PetscInt,const Vec[],PetscScalar*);
20: PETSC_EXTERN PetscErrorCode VecSet_SeqCUDA(Vec,PetscScalar);
21: PETSC_INTERN PetscErrorCode VecMAXPY_SeqCUDA(Vec,PetscInt,const PetscScalar*,Vec*);
22: PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqCUDA(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec);
23: PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqCUDA(Vec,Vec,Vec);
24: PETSC_INTERN PetscErrorCode VecPlaceArray_SeqCUDA(Vec,const PetscScalar*);
25: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUDA(Vec);
26: PETSC_INTERN PetscErrorCode VecReplaceArray_SeqCUDA(Vec,const PetscScalar*);
27: PETSC_INTERN PetscErrorCode VecDot_SeqCUDA(Vec,Vec,PetscScalar*);
28: PETSC_INTERN PetscErrorCode VecTDot_SeqCUDA(Vec,Vec,PetscScalar*);
29: PETSC_INTERN PetscErrorCode VecScale_SeqCUDA(Vec,PetscScalar);
30: PETSC_EXTERN PetscErrorCode VecCopy_SeqCUDA(Vec,Vec);
31: PETSC_INTERN PetscErrorCode VecSwap_SeqCUDA(Vec,Vec);
32: PETSC_EXTERN PetscErrorCode VecAXPY_SeqCUDA(Vec,PetscScalar,Vec);
33: PETSC_INTERN PetscErrorCode VecAXPBY_SeqCUDA(Vec,PetscScalar,PetscScalar,Vec);
34: PETSC_INTERN PetscErrorCode VecDuplicate_SeqCUDA(Vec,Vec*);
35: PETSC_INTERN PetscErrorCode VecConjugate_SeqCUDA(Vec xin);
36: PETSC_INTERN PetscErrorCode VecNorm_SeqCUDA(Vec,NormType,PetscReal*);
37: PETSC_INTERN PetscErrorCode VecCUDACopyToGPU(Vec);
38: PETSC_INTERN PetscErrorCode VecCUDAAllocateCheck(Vec);
39: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUDA(Vec);
40: PETSC_INTERN PetscErrorCode VecCreate_SeqCUDA_Private(Vec,const PetscScalar*);
41: PETSC_INTERN PetscErrorCode VecCreate_MPICUDA(Vec);
42: PETSC_INTERN PetscErrorCode VecCreate_MPICUDA_Private(Vec,PetscBool,PetscInt,const PetscScalar*);
43: PETSC_INTERN PetscErrorCode VecCreate_CUDA(Vec);
44: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUDA(Vec);
45: PETSC_INTERN PetscErrorCode VecDestroy_MPICUDA(Vec);
46: PETSC_INTERN PetscErrorCode VecAYPX_SeqCUDA(Vec,PetscScalar,Vec);
47: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUDA(Vec,PetscRandom);
48: PETSC_INTERN PetscErrorCode VecGetLocalVector_SeqCUDA(Vec,Vec);
49: PETSC_INTERN PetscErrorCode VecRestoreLocalVector_SeqCUDA(Vec,Vec);
50: PETSC_INTERN PetscErrorCode VecGetLocalVectorRead_SeqCUDA(Vec,Vec);
51: PETSC_INTERN PetscErrorCode VecRestoreLocalVectorRead_SeqCUDA(Vec,Vec);
52: PETSC_INTERN PetscErrorCode VecGetArrayWrite_SeqCUDA(Vec,PetscScalar**);
53: PETSC_INTERN PetscErrorCode VecGetArray_SeqCUDA(Vec,PetscScalar**);
54: PETSC_INTERN PetscErrorCode VecRestoreArray_SeqCUDA(Vec,PetscScalar**);
55: PETSC_INTERN PetscErrorCode VecGetArrayAndMemType_SeqCUDA(Vec,PetscScalar**,PetscMemType*);
56: PETSC_INTERN PetscErrorCode VecRestoreArrayAndMemType_SeqCUDA(Vec,PetscScalar**);
57: PETSC_INTERN PetscErrorCode VecCopy_SeqCUDA_Private(Vec,Vec);
58: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUDA_Private(Vec);
59: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUDA_Private(Vec);
60: PETSC_INTERN PetscErrorCode VecMax_SeqCUDA(Vec,PetscInt*,PetscReal*);
61: PETSC_INTERN PetscErrorCode VecMin_SeqCUDA(Vec,PetscInt*,PetscReal*);
62: PETSC_INTERN PetscErrorCode VecReciprocal_SeqCUDA(Vec);
63: PETSC_INTERN PetscErrorCode VecSum_SeqCUDA(Vec,PetscScalar*);
64: PETSC_INTERN PetscErrorCode VecShift_SeqCUDA(Vec,PetscScalar);
66: #if defined(PETSC_HAVE_NVSHMEM)
67: PETSC_EXTERN PetscErrorCode PetscNvshmemInitializeCheck(void);
68: PETSC_EXTERN PetscErrorCode PetscNvshmemMalloc(size_t,void**);
69: PETSC_EXTERN PetscErrorCode PetscNvshmemCalloc(size_t,void**);
70: PETSC_EXTERN PetscErrorCode PetscNvshmemFree_Private(void*);
71: #define PetscNvshmemFree(ptr) ((ptr) && (PetscNvshmemFree_Private(ptr),(ptr)=NULL,0))
72: PETSC_INTERN PetscErrorCode PetscNvshmemSum(PetscInt,PetscScalar*,const PetscScalar*);
73: PETSC_INTERN PetscErrorCode PetscNvshmemMax(PetscInt,PetscReal*,const PetscReal*);
74: PETSC_INTERN PetscErrorCode VecNormAsync_NVSHMEM(Vec,NormType,PetscReal*);
75: PETSC_INTERN PetscErrorCode VecAllocateNVSHMEM_SeqCUDA(Vec);
76: #endif
78: /* complex single */
79: #if defined(PETSC_USE_COMPLEX)
80: #if defined(PETSC_USE_REAL_SINGLE)
81: #define cublasXaxpy(a,b,c,d,e,f,g) cublasCaxpy((a),(b),(cuComplex*)(c),(cuComplex*)(d),(e),(cuComplex*)(f),(g))
82: #define cublasXscal(a,b,c,d,e) cublasCscal((a),(b),(cuComplex*)(c),(cuComplex*)(d),(e))
83: #define cublasXdotu(a,b,c,d,e,f,g) cublasCdotu((a),(b),(cuComplex*)(c),(d),(cuComplex*)(e),(f),(cuComplex*)(g))
84: #define cublasXdot(a,b,c,d,e,f,g) cublasCdotc((a),(b),(cuComplex*)(c),(d),(cuComplex*)(e),(f),(cuComplex*)(g))
85: #define cublasXswap(a,b,c,d,e,f) cublasCswap((a),(b),(cuComplex*)(c),(d),(cuComplex*)(e),(f))
86: #define cublasXnrm2(a,b,c,d,e) cublasScnrm2((a),(b),(cuComplex*)(c),(d),(e))
87: #define cublasIXamax(a,b,c,d,e) cublasIcamax((a),(b),(cuComplex*)(c),(d),(e))
88: #define cublasXasum(a,b,c,d,e) cublasScasum((a),(b),(cuComplex*)(c),(d),(e))
89: #define cublasXgemv(a,b,c,d,e,f,g,h,i,j,k,l) cublasCgemv((a),(b),(c),(d),(cuComplex*)(e),(cuComplex*)(f),(g),(cuComplex*)(h),(i),(cuComplex*)(j),(cuComplex*)(k),(l))
90: #define cublasXgemm(a,b,c,d,e,f,g,h,i,j,k,l,m,n) cublasCgemm((a),(b),(c),(d),(e),(f),(cuComplex*)(g),(cuComplex*)(h),(i),(cuComplex*)(j),(k),(cuComplex*)(l),(cuComplex*)(m),(n))
91: #define cublasXgeam(a,b,c,d,e,f,g,h,i,j,k,l,m) cublasCgeam((a),(b),(c),(d),(e),(cuComplex*)(f),(cuComplex*)(g),(h),(cuComplex*)(i),(cuComplex*)(j),(k),(cuComplex*)(l),(m))
92: #else /* complex double */
93: #define cublasXaxpy(a,b,c,d,e,f,g) cublasZaxpy((a),(b),(cuDoubleComplex*)(c),(cuDoubleComplex*)(d),(e),(cuDoubleComplex*)(f),(g))
94: #define cublasXscal(a,b,c,d,e) cublasZscal((a),(b),(cuDoubleComplex*)(c),(cuDoubleComplex*)(d),(e))
95: #define cublasXdotu(a,b,c,d,e,f,g) cublasZdotu((a),(b),(cuDoubleComplex*)(c),(d),(cuDoubleComplex*)(e),(f),(cuDoubleComplex*)(g))
96: #define cublasXdot(a,b,c,d,e,f,g) cublasZdotc((a),(b),(cuDoubleComplex*)(c),(d),(cuDoubleComplex*)(e),(f),(cuDoubleComplex*)(g))
97: #define cublasXswap(a,b,c,d,e,f) cublasZswap((a),(b),(cuDoubleComplex*)(c),(d),(cuDoubleComplex*)(e),(f))
98: #define cublasXnrm2(a,b,c,d,e) cublasDznrm2((a),(b),(cuDoubleComplex*)(c),(d),(e))
99: #define cublasIXamax(a,b,c,d,e) cublasIzamax((a),(b),(cuDoubleComplex*)(c),(d),(e))
100: #define cublasXasum(a,b,c,d,e) cublasDzasum((a),(b),(cuDoubleComplex*)(c),(d),(e))
101: #define cublasXgemv(a,b,c,d,e,f,g,h,i,j,k,l) cublasZgemv((a),(b),(c),(d),(cuDoubleComplex*)(e),(cuDoubleComplex*)(f),(g),(cuDoubleComplex*)(h),(i),(cuDoubleComplex*)(j),(cuDoubleComplex*)(k),(l))
102: #define cublasXgemm(a,b,c,d,e,f,g,h,i,j,k,l,m,n) cublasZgemm((a),(b),(c),(d),(e),(f),(cuDoubleComplex*)(g),(cuDoubleComplex*)(h),(i),(cuDoubleComplex*)(j),(k),(cuDoubleComplex*)(l),(cuDoubleComplex*)(m),(n))
103: #define cublasXgeam(a,b,c,d,e,f,g,h,i,j,k,l,m) cublasZgeam((a),(b),(c),(d),(e),(cuDoubleComplex*)(f),(cuDoubleComplex*)(g),(h),(cuDoubleComplex*)(i),(cuDoubleComplex*)(j),(k),(cuDoubleComplex*)(l),(m))
104: #endif
105: #else /* real single */
106: #if defined(PETSC_USE_REAL_SINGLE)
107: #define cublasXaxpy cublasSaxpy
108: #define cublasXscal cublasSscal
109: #define cublasXdotu cublasSdot
110: #define cublasXdot cublasSdot
111: #define cublasXswap cublasSswap
112: #define cublasXnrm2 cublasSnrm2
113: #define cublasIXamax cublasIsamax
114: #define cublasXasum cublasSasum
115: #define cublasXgemv cublasSgemv
116: #define cublasXgemm cublasSgemm
117: #define cublasXgeam cublasSgeam
118: #else /* real double */
119: #define cublasXaxpy cublasDaxpy
120: #define cublasXscal cublasDscal
121: #define cublasXdotu cublasDdot
122: #define cublasXdot cublasDdot
123: #define cublasXswap cublasDswap
124: #define cublasXnrm2 cublasDnrm2
125: #define cublasIXamax cublasIdamax
126: #define cublasXasum cublasDasum
127: #define cublasXgemv cublasDgemv
128: #define cublasXgemm cublasDgemm
129: #define cublasXgeam cublasDgeam
130: #endif
131: #endif
133: #endif