@@ -49,7 +49,7 @@ typedef void (*to_fp32_cuda_t)(const void * x, float * y, int k, cudaStream_t st
4949typedef void (*ggml_cuda_func_t )(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
5050typedef void (*ggml_cuda_op_t )(
5151 const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
52- float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1, cudaStream_t & cudaStream_main);
52+ float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1, cudaStream_t & cudaStream_main);
5353
5454// QK = number of values after dequantization
5555// QR = QK / number of values before dequantization
@@ -537,26 +537,26 @@ void ggml_cuda_host_free(void * ptr) {
537537}
538538
539539static cudaError_t ggml_cuda_h2d_tensor_2d (
540- void * dst, const struct ggml_tensor * src, uint64_t i3, uint64_t i2, uint64_t i1_low, uint64_t i1_high, cudaStream_t stream) {
540+ void * dst, const struct ggml_tensor * src, int64_t i3, int64_t i2, int64_t i1_low, int64_t i1_high, cudaStream_t stream) {
541541
542542 char * dst_char = (char *) dst;
543- const uint64_t ne0 = src->ne [0 ];
544- const uint64_t nb0 = src->nb [0 ];
545- const uint64_t nb1 = src->nb [1 ];
546- const uint64_t nb2 = src->nb [2 ];
547- const uint64_t nb3 = src->nb [3 ];
543+ const int64_t ne0 = src->ne [0 ];
544+ const int64_t nb0 = src->nb [0 ];
545+ const int64_t nb1 = src->nb [1 ];
546+ const int64_t nb2 = src->nb [2 ];
547+ const int64_t nb3 = src->nb [3 ];
548548 const enum ggml_type type = src->type ;
549- const size_t ts = ggml_type_size (type);
550- const size_t bs = ggml_blck_size (type);
551- uint64_t i1_diff = i1_high - i1_low;
549+ const int64_t ts = ggml_type_size (type);
550+ const int64_t bs = ggml_blck_size (type);
551+ int64_t i1_diff = i1_high - i1_low;
552552
553553 const void * x = (const void *) ((const char *) src->data + i1_low*nb1 + i2*nb2 + i3*nb3);
554554 if (nb0 == ts && nb1 == ts*ne0/bs) {
555555 return cudaMemcpyAsync (dst_char, x, i1_diff*nb1, cudaMemcpyHostToDevice, stream);
556556 } else if (nb0 == ts) {
557557 return cudaMemcpy2DAsync (dst_char, ts*ne0/bs, x, nb1, ts*ne0/bs, i1_diff, cudaMemcpyHostToDevice, stream);
558558 } else {
559- for (uint64_t i1 = 0 ; i1 < i1_diff; i1++) {
559+ for (int64_t i1 = 0 ; i1 < i1_diff; i1++) {
560560 const void * rx = (const void *) ((const char *) x + i1*nb1);
561561 void * rd = (void *) (dst_char + i1*ts*ne0/bs);
562562 // pretend the row is a matrix with cols=1
@@ -569,20 +569,20 @@ static cudaError_t ggml_cuda_h2d_tensor_2d(
569569
570570inline void ggml_cuda_op_mul (
571571 const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
572- float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1,
572+ float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1,
573573 cudaStream_t & cudaStream_main){
574574
575575 GGML_ASSERT (src0_ddf_i != nullptr );
576576 GGML_ASSERT (src1_ddf_i != nullptr );
577577 GGML_ASSERT (dst_ddf_i != nullptr );
578578
579- const uint64_t ne00 = src0->ne [0 ];
579+ const int64_t ne00 = src0->ne [0 ];
580580
581- const uint64_t ne10 = src1->ne [0 ];
582- const uint64_t ne11 = src1->ne [1 ];
581+ const int64_t ne10 = src1->ne [0 ];
582+ const int64_t ne11 = src1->ne [1 ];
583583
584- for (uint64_t i01 = i0_low; i01 < i0_high; i01++) {
585- const uint64_t i11 = i1*ne11 + i01%ne11; // broadcast src1 across src0
584+ for (int64_t i01 = i0_low; i01 < i0_high; i01++) {
585+ const int64_t i11 = i1*ne11 + i01%ne11; // broadcast src1 across src0
586586
587587 float * src0_ddf_i01 = src0_ddf_i + i01*ne00;
588588 float * src1_ddf_i01 = src1_ddf_i + i11*ne10;
@@ -599,7 +599,7 @@ inline void ggml_cuda_op_mul(
599599
600600inline void ggml_cuda_op_dequantize_mul_mat_vec (
601601 const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
602- float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1,
602+ float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1,
603603 cudaStream_t & cudaStream_main){
604604
605605 GGML_ASSERT (src0_ddq_i != nullptr );
@@ -642,7 +642,7 @@ inline void ggml_cuda_op_dequantize_mul_mat_vec(
642642
643643inline void ggml_cuda_op_mul_mat_cublas (
644644 const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, char * src0_ddq_i,
645- float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, uint64_t i0_low, uint64_t i0_high, int i1,
645+ float * src0_ddf_i, float * src1_ddf_i, float * dst_ddf_i, int64_t i0_low, int64_t i0_high, int i1,
646646 cudaStream_t & cudaStream_main){
647647
648648 GGML_ASSERT (src0_ddf_i != nullptr );
@@ -652,12 +652,12 @@ inline void ggml_cuda_op_mul_mat_cublas(
652652 const float alpha = 1 .0f ;
653653 const float beta = 0 .0f ;
654654
655- const uint64_t ne00 = src0->ne [0 ];
655+ const int64_t ne00 = src0->ne [0 ];
656656
657- const uint64_t ne10 = src1->ne [0 ];
658- const uint64_t ne11 = src1->ne [1 ];
657+ const int64_t ne10 = src1->ne [0 ];
658+ const int64_t ne11 = src1->ne [1 ];
659659
660- const uint64_t i0_diff = i0_high - i0_low;
660+ const int64_t i0_diff = i0_high - i0_low;
661661
662662 int id;
663663 CUDA_CHECK (cudaGetDevice (&id));
@@ -988,7 +988,7 @@ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensor, const
988988 continue ;
989989 }
990990
991- uint64_t nrows_split = row_high - row_low;
991+ int64_t nrows_split = row_high - row_low;
992992
993993 const size_t offset_split = offset + row_low*nb1;
994994 const size_t size = ggml_nbytes_split (tensor, nrows_split);
0 commit comments