@@ -261,6 +261,7 @@ class ReplicatedLinear(LinearBase):
261261 quant_config: Quantization configure.
262262 prefix: The name of the layer in the state dict, including all parents
263263 (e.g. model.layers.0.qkv_proj)
264+ return_bias: If true, return bias together with outputs in forward pass.
264265 """
265266
266267 def __init__ (
@@ -523,6 +524,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
523524 quant_config: Quantization configure.
524525 prefix: The name of the layer in the state dict, including all parents
525526 (e.g. model.layers.0.qkv_proj)
527+ return_bias: If true, return bias together with outputs in forward pass.
526528 """
527529
528530 def __init__ (
@@ -805,6 +807,7 @@ class QKVParallelLinear(ColumnParallelLinear):
805807 quant_config: Quantization configure.
806808 prefix: The name of the layer in the state dict, including all parents
807809 (e.g. model.layers.0.qkv_proj)
810+ return_bias: If true, return bias together with outputs in forward pass.
808811 """
809812
810813 def __init__ (
@@ -1155,7 +1158,13 @@ class RowParallelLinear(LinearBase):
11551158 bias can be fused with other element-wise operations.
11561159 We skip adding bias but instead return it.
11571160 params_dtype: Data type for the parameters.
1161+ reduce_results: If true, call all-reduce on output and make Y available
1162+ to all GPUs, otherwise, every GPU will have its output
1163+ which is Y = X_iA_i
11581164 quant_config: Quantization configure.
1165+ prefix: The name of the layer in the state dict, including all parents
1166+ (e.g. model.layers.0.down_proj)
1167+ return_bias: If true, return bias together with outputs in forward pass.
11591168 """
11601169
11611170 def __init__ (
0 commit comments