@@ -1671,6 +1671,7 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
16711671
16721672static __device__ __forceinline__ float vec_dot_iq1_s_q8_1 (
16731673 const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) {
1674+ #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 610
16741675 const block_iq1_s * bq1 = (const block_iq1_s *) vbq;
16751676
16761677 const int qs_packed = get_int_b2 (bq1->qs , iqs);
@@ -1697,10 +1698,12 @@ static __device__ __forceinline__ float vec_dot_iq1_s_q8_1(
16971698 const float delta = -1 .0f + IQ1S_DELTA - (qh & 0x8000 ) * (2 .0f *IQ1S_DELTA/0x8000 );
16981699 const float2 ds = __half22float2 (bq8_1[iqs].ds );
16991700 return d1q * (ds.x *sumi + ds.y *delta);
1701+ #endif
17001702}
17011703
17021704static __device__ __forceinline__ float vec_dot_iq1_m_q8_1 (
17031705 const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs) {
1706+ #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 610
17041707
17051708 const block_iq1_m * bq1 = (const block_iq1_m *) vbq;
17061709
@@ -1741,6 +1744,7 @@ static __device__ __forceinline__ float vec_dot_iq1_m_q8_1(
17411744 const int sc0 = 2 *((tmp >> 0 ) & 0x07 ) + 1 ;
17421745 const int sc1 = 2 *((tmp >> 3 ) & 0x07 ) + 1 ;
17431746 return d * ((sumi[0 ] + sumf[0 ]) * sc0 + (sumi[1 ] + sumf[1 ]) * sc1);
1747+ #endif
17441748}
17451749
17461750static __device__ __forceinline__ void get_int_from_table_16 (const uint32_t & q4, const uint8_t * values,
0 commit comments