Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion nvml_cublas.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ void calculate( int const &m, int const &n, int const &k, nvmlClass &nvml ) {

/* Fill the matrices with test data */
/* Assume square matrices */
/* Note: We generate data in Row-Major format, but cuBLAS interprets it as Column-Major.
* This means matrices are effectively transposed when passed to cuBLAS.
* To compensate, we will compute C = B × A instead of C = A × B. */
for ( int i = 0; i < m * m; i++ ) {
h_A[i] = std::rand( ) / static_cast<data_type>( RAND_MAX );
h_B[i] = std::rand( ) / static_cast<data_type>( RAND_MAX );
Expand All @@ -124,7 +127,8 @@ void calculate( int const &m, int const &n, int const &k, nvmlClass &nvml ) {
data_type *d_C_ptr = thrust::raw_pointer_cast( &d_C[0] );

/* Performs operation using cublas */
cublasSgemm( handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, d_A_ptr, lda, d_B_ptr, ldb, &beta, d_C_ptr, ldc );
/* Compute C = B * A to compensate for Row-Major to Column-Major mismatch. */
cublasSgemm( handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, &alpha, d_B_ptr, ldb, d_A_ptr, lda, &beta, d_C_ptr, ldc );
CUDA_RT_CALL( cudaDeviceSynchronize( ) );

/* Allocate host memory for reading back the result from device memory */
Expand Down