Skip to content

Commit 7508ebf

Browse files
committed
code cleanup for -i-1 and -T params
dsa_perf_micros can run in 3 modes, iteration count, a time period and an infinite loop, this patch consolidates BW/latency measurement into the do_results() function. Reviewed-by: Nirav N Shah <[email protected]> Signed-off-by: Nikhil Rao <[email protected]>
1 parent 1bc3cd5 commit 7508ebf

File tree

6 files changed

+163
-116
lines changed

6 files changed

+163
-116
lines changed

scripts/test_basic.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ test_params_loop() {
3232
$bin -o3 -q16 -n32 -cf
3333
# test offset
3434
$bin -o3 -O32,40 -cf
35+
# test -T
36+
$bin -o3 -T2
3537
}
3638

3739
test_all_cfg() {

src/common.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#define ARRAY_SIZE(x) (sizeof((x))/sizeof((x)[0]))
2020
#define MAX_COMP_RETRY 2000000000
2121
#define PTR_ADD(p, a) { p = (void *)((uintptr_t)(p) + (uintptr_t)a); }
22+
#define TIME_DELAY_SEC 4
2223

2324
/* max number of operands e.g., dual cast - src1, dst1, dst2 */
2425
#define NUM_ADDR_MAX 3
@@ -246,7 +247,7 @@ struct tcfg {
246247
uint64_t retry; /* completion polling retries */
247248
uint64_t mwait_cycles; /* cycles spent in mwait */
248249
float cpu_util; /* cpu utilization needed to prep/submit descriptors */
249-
int ops_rate; /* operation rate - descriptors/sec */
250+
int kops_rate; /* operation rate - kilo operations/sec */
250251
float latency; /* latency for n descriptors */
251252
float bw; /* operation BW */
252253
uint64_t retries_per_sec; /* pollling retries the CPU can do per sec */
@@ -286,6 +287,13 @@ extern struct log_ctx log_ctx;
286287

287288
void init_buffers(struct tcfg_cpu *tcpu);
288289

290+
static inline bool
291+
is_work_rate_sub_test(struct tcfg *tcfg)
292+
{
293+
return tcfg->op == DSA_OPCODE_NOOP &&
294+
tcfg->misc_flags & (TEST_M64 | TEST_DB | TEST_M64MEM | TEST_ENQ | TEST_ENQMEM);
295+
}
296+
289297
static inline void
290298
cldemote(volatile void *p)
291299
{

src/cpu.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,10 @@ test_memcpy(struct tcfg_cpu *tcpu)
145145
tcpu->cycles = 0;
146146
test_barrier(tcfg, 0);
147147

148-
for (i = 0; i < tcfg->iter; i++) {
148+
/* notify main thread that the test has started */
149+
tcpu->tstart = rdtsc();
150+
151+
for (i = 0; i < tcfg->iter || (tcfg->tval_secs && !tcfg->stop); i++) {
149152
uint64_t c;
150153

151154
src = tcpu->src;
@@ -155,12 +158,9 @@ test_memcpy(struct tcfg_cpu *tcpu)
155158
src2 = tcpu->src2;
156159

157160
/*
158-
* If cpus > 1, then you can either measure the BW for a
159-
* single buffer for a single iteration or if the number
160-
* iterations is > 1 then use a working set such that for a single threaded
161-
* config nb_bufs * blen exceeds the L2 size for LLC BW measurement or
162-
* nb_cpus * nb_bufs * blen exceeds total L2 + LLC size for memory BW
163-
* measurement. Halve the sizes for MT.
161+
* when using > 1 CPU, data placement operations & data
162+
* movement will happen concurrenty across CPUs, hence
163+
* data placement is used only when using a single CPU
164164
*/
165165
if (tcfg->nb_cpus == 1)
166166
do_cache_ops(tcpu);
@@ -210,7 +210,5 @@ test_memcpy(struct tcfg_cpu *tcpu)
210210
tcpu->curr_stat.iter++;
211211
}
212212

213-
if (tcfg->iter)
214-
tcpu->cycles /= tcfg->iter;
215213
tcpu->err = 0;
216214
}

src/main.c

Lines changed: 39 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,6 @@ submit_test_desc(struct tcfg_cpu *tcpu)
493493
uint32_t i;
494494
int d;
495495
struct tcfg *tcfg;
496-
bool inf;
497496
int nb_desc;
498497

499498
tcfg = tcpu->tcfg;
@@ -534,16 +533,14 @@ submit_test_desc(struct tcfg_cpu *tcpu)
534533
else
535534
INFO_CPU(tcpu, "Running BW test\n");
536535

537-
inf = tcfg->iter == ~0U;
538-
539536
if (do_single_iter(tcpu, nb_desc)) {
540537
ERR("Failed initial descriptor submission\n");
541538
goto error2;
542539
}
543540

544541
tcpu->curr_stat.iter = 0;
545542
tcpu->tstart = rdtsc();
546-
for (i = 0; !tcfg->stop && (inf || i < tcfg->iter) ; i++) {
543+
for (i = 0; !tcfg->stop && (tcfg->tval_secs || i < tcfg->iter) ; i++) {
547544
if (do_single_iter(tcpu, nb_desc)) {
548545
ERR("Error iteration: %d\n", i);
549546
goto error2;
@@ -553,7 +550,6 @@ submit_test_desc(struct tcfg_cpu *tcpu)
553550
tcpu->tend = rdtsc();
554551

555552
tcpu->cycles = tcpu->tend - tcpu->tstart;
556-
tcpu->cycles /= tcfg->iter;
557553

558554
d = 0;
559555
while (d < tcpu->qd) {
@@ -622,7 +618,6 @@ test_run_fn(void *arg)
622618
{
623619
struct tcfg_cpu *tcpu = arg;
624620
struct tcfg *tcfg = tcpu->tcfg;
625-
bool do_sub_rate;
626621

627622
cpu_pin(tcpu->cpu_num);
628623

@@ -631,49 +626,14 @@ test_run_fn(void *arg)
631626
return 0;
632627
}
633628

634-
do_sub_rate = tcfg->misc_flags &
635-
(TEST_M64 | TEST_DB | TEST_M64MEM |
636-
TEST_ENQ | TEST_ENQMEM);
637-
638-
if (tcfg->op == DSA_OPCODE_NOOP && do_sub_rate)
629+
if (is_work_rate_sub_test(tcfg))
639630
work_sub_rate_test(tcpu);
640631
else
641632
do_desc_work(tcpu);
642633

643634
return 0;
644635
}
645636

646-
static void
647-
iter_count_stat(uint64_t *stat, uint64_t *prev, uint64_t *curr)
648-
{
649-
*stat += *curr - *prev;
650-
*prev = *curr;
651-
}
652-
653-
static void
654-
count_stat(struct iter_stat *is, struct tcfg_cpu *tcpu)
655-
{
656-
int i;
657-
658-
for (i = 0; i < ARRAY_SIZE(is->stat); i++)
659-
iter_count_stat(&is->stat[i], &tcpu->prev_stat.stat[i],
660-
&tcpu->curr_stat.stat[i]);
661-
}
662-
663-
static void
664-
iter_count(struct tcfg *tcfg, struct iter_stat *iter_stat)
665-
{
666-
unsigned int i;
667-
struct iter_stat is = {};
668-
669-
for (i = 0; i < tcfg->nb_cpus; i++) {
670-
struct tcfg_cpu *tcpu = &tcfg->tcpu[i];
671-
672-
count_stat(&is, tcpu);
673-
}
674-
675-
*iter_stat = is;
676-
}
677637

678638
static void *
679639
test_fn(void *arg)
@@ -702,10 +662,36 @@ test_fn(void *arg)
702662
return NULL;
703663
}
704664

665+
static void
666+
print_results(struct tcfg *tcfg)
667+
{
668+
if (is_work_rate_sub_test(tcfg)) {
669+
printf("kopsrate = %d", tcfg->kops_rate);
670+
if (tcfg->misc_flags & (TEST_ENQ | TEST_ENQMEM))
671+
printf(" latency = %d ns", (1000 * 1000) / tcfg->kops_rate);
672+
printf("\n");
673+
return;
674+
}
675+
676+
printf("GB per sec = %f", tcfg->bw);
677+
if (tcfg->qd == 1 || tcfg->nb_bufs == 1)
678+
printf(" latency(cycles) = %f, %f ns, cycles/sec =%ld",
679+
tcfg->latency, (tcfg->latency * 1E9)/tcfg->cycles_per_sec,
680+
tcfg->cycles_per_sec);
681+
printf(" cpu %f kopsrate = %d\n", tcfg->cpu_util, tcfg->kops_rate);
682+
683+
if (tcfg->drain_desc) {
684+
double drain_usec = ((1.0 * tcfg->drain_lat)/tcfg->cycles_per_sec) * 1000000;
685+
686+
printf("Drain desc latency = %lu cycles | %f uSec\n", tcfg->drain_lat, drain_usec);
687+
}
688+
}
689+
705690
static int
706691
test_run(struct tcfg *tcfg)
707692
{
708693
int i, err = 0;
694+
bool inf = tcfg->iter == -1;
709695

710696
for (i = 0; i < tcfg->nb_cpus; i++) {
711697

@@ -728,19 +714,16 @@ test_run(struct tcfg *tcfg)
728714
}
729715
}
730716

731-
if (tcfg->iter == ~0U) {
732-
uint64_t iter_bytes = data_size_per_iter(tcfg);
717+
if (tcfg->tval_secs) {
733718

734719
err = false;
735-
while (!err) {
736-
float bw;
737-
struct iter_stat is;
720+
while (!tcfg->stop) {
738721

739-
sleep(tcfg->tval_secs);
740-
iter_count(tcfg, &is);
722+
while (tcfg->tcpu[0].tstart == 0)
723+
__builtin_ia32_pause();
741724

742-
bw = (is.iter * iter_bytes)/(1E9 * tcfg->tval_secs);
743-
bw = bw/tcfg->nb_cpus;
725+
sleep(tcfg->tval_secs);
726+
do_results(tcfg);
744727

745728
for (i = 0, err = false; i < tcfg->nb_cpus; i++) {
746729
struct tcfg_cpu *tcpu = &tcfg->tcpu[i];
@@ -757,11 +740,8 @@ test_run(struct tcfg *tcfg)
757740
continue;
758741
}
759742

760-
tcfg->retry = is.retry / tcfg->nb_cpus;
761-
tcfg->mwait_cycles = is.mwait_cycles / tcfg->nb_cpus;
762-
calc_cpu_for_sec(tcfg, tcfg->tval_secs);
763-
764-
fprintf(stdout, "BW %f GB cpu util %f\n", bw, tcfg->cpu_util);
743+
print_results(tcfg);
744+
tcfg->stop = !inf;
765745
}
766746
}
767747

@@ -803,7 +783,7 @@ main(int argc, char **argv)
803783
.flags_nth_desc = 1,
804784
.flags_cmask = -1,
805785
.flags_smask = 0,
806-
.tval_secs = 4,
786+
.tval_secs = 0,
807787
.numa_node_default = { -1, -1, -1 },
808788
.place_op = { OP_FLUSH, OP_FLUSH, OP_FLUSH },
809789
.access_op = { OP_WRITE, OP_WRITE, OP_WRITE },
@@ -838,20 +818,8 @@ main(int argc, char **argv)
838818

839819
do_results(&tcfg);
840820

841-
printf("GB per sec = %f", tcfg.bw);
842-
if (tcfg.qd == 1 || tcfg.nb_bufs == 1)
843-
printf(" latency(cycles) = %f, %f ns,"
844-
" cycles/sec =%ld",
845-
tcfg.latency, (tcfg.latency * 1E9)/tcfg.cycles_per_sec,
846-
tcfg.cycles_per_sec);
847-
printf(" cpu %f kopsrate = %d\n", tcfg.cpu_util, tcfg.ops_rate);
848-
849-
850-
if (tcfg.drain_desc) {
851-
double drain_usec = ((1.0 * tcfg.drain_lat)/tcfg.cycles_per_sec) * 1000000;
852-
853-
printf("Drain desc latency = %lu cycles | %f uSec\n", tcfg.drain_lat, drain_usec);
854-
}
821+
if (!tcfg.tval_secs)
822+
print_results(&tcfg);
855823

856824
err_ret:
857825
test_free(&tcfg);

src/options.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ void print_usage(void)
9696
"\t-F <flag_bits_to_clear:flag_bits_to_set:every_nth_desc>\n"
9797
"\t e.g., -F 0xFFFFF7:0x8:4 to clear Addr2_TC flag and set RCR=1 on every 4th descriptor\n"
9898
"\t-h ; Print this message\n"
99-
"\t-i <iterations> ; The number of iterations to run the test for\n"
99+
"\t-i <iterations> ; The number of iterations to run the test for, use i-1 for continuous run (periodic BW/Latency output)\n"
100100
"\t-j ; deprecated parameter - default behavior\n"
101101
"\t-k <CPU list> ; List of CPUs (e.g., -k0, -k0,1,2,3, -k0-2, -k0-2,3)\n"
102102
"\t-K <CPU/WQ list> ; List of CPUs and associated WQ (e.g., [0,1]@dsa0,0,[2-3]@dsa0,1)\n"
@@ -601,6 +601,8 @@ fixup_options(struct tcfg *tc, struct parse_info *pi)
601601
break;
602602
}
603603

604+
if (tc->iter == -1 && tc->tval_secs == 0)
605+
tc->tval_secs = TIME_DELAY_SEC;
604606
}
605607

606608
static void

0 commit comments

Comments
 (0)