Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions mooncake-integration/transfer_engine/transfer_engine_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,12 @@ uintptr_t TransferEnginePy::getFirstBufferAddress(
return segment_desc->buffers[0].addr;
}

std::string TransferEnginePy::getLocalTopology() {
std::string TransferEnginePy::getLocalTopology(const char *device_name) {
pybind11::gil_scoped_release release;
auto device_name_safe = device_name ? std::string(device_name) : "";
auto device_filter = buildDeviceFilter(device_name_safe);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

The buildDeviceFilter function does not handle whitespace around the device names in the comma-separated list. For an input like "mlx5_0, mlx5_1", this will result in tokens "mlx5_0" and " mlx5_1". The leading space in the second token will cause the device filter to fail for mlx5_1 because the device name matching is exact.

Please update buildDeviceFilter to trim whitespace from each token. Here is a suggested implementation:

std::vector<std::string> buildDeviceFilter(const std::string &device_names) {
    std::stringstream ss(device_names);
    std::string item;
    std::vector<std::string> tokens;
    while (getline(ss, item, ',')) {
        // trim leading whitespace
        size_t first = item.find_first_not_of(" \t\n\r\f\v");
        if (std::string::npos == first) {
            continue;
        }
        // trim trailing whitespace
        size_t last = item.find_last_not_of(" \t\n\r\f\v");
        tokens.push_back(item.substr(first, (last - first + 1)));
    }
    return tokens;
}

std::shared_ptr<TransferEngine> tmp_engine =
std::make_shared<TransferEngine>(true);
std::make_shared<TransferEngine>(true, device_filter);

std::string metadata_conn_string{"P2PHANDSHAKE"}, local_server_name{};
tmp_engine->init(metadata_conn_string, local_server_name);
Expand Down Expand Up @@ -723,7 +725,8 @@ PYBIND11_MODULE(engine, m) {
&TransferEnginePy::batchRegisterMemory)
.def("batch_unregister_memory",
&TransferEnginePy::batchUnregisterMemory)
.def("get_local_topology", &TransferEnginePy::getLocalTopology)
.def("get_local_topology", &TransferEnginePy::getLocalTopology,
py::arg("device_name") = nullptr)
.def("get_first_buffer_address",
&TransferEnginePy::getFirstBufferAddress)
.def("get_notifies", &TransferEnginePy::getNotifies)
Expand Down
2 changes: 1 addition & 1 deletion mooncake-integration/transfer_engine/transfer_engine_py.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class TransferEnginePy {

int batchUnregisterMemory(std::vector<uintptr_t> buffer_addresses);

std::string getLocalTopology();
std::string getLocalTopology(const char *device_name);

std::vector<TransferNotify> getNotifies();

Expand Down
18 changes: 17 additions & 1 deletion mooncake-wheel/mooncake/transfer_engine_topology_dump.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
import argparse
import sys
import os

def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Dump device topology",
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--device-name",
type=str,
default="",
help="Filter topology by given device name"
)
return parser.parse_args()

def main():
args = parse_args()
os.environ['MC_LOG_LEVEL'] = 'ERROR'
os.environ['MC_CUSTOM_TOPO_JSON'] = ''
from mooncake.engine import TransferEngine
engine = TransferEngine()
print('Local topology: ', engine.get_local_topology())
print('Local topology: ', engine.get_local_topology(device_name=args.device_name))

if __name__ == "__main__":
sys.exit(main())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

This file is missing a final newline character. It's a common convention to end files with a newline, and some tools might not process the last line correctly without it.

Suggested change
sys.exit(main())
sys.exit(main())

Loading