forked from flexflow/flexflow-train
-
Notifications
You must be signed in to change notification settings - Fork 0
/
machine_config_example
49 lines (42 loc) · 2.27 KB
/
machine_config_example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# This is an example of config file for the new machine model
# comp_device:
# Compute devices are created evenly based on the following settings.
num_nodes = 2
num_sockets_per_node = 2
num_cpus_per_socket = 10
num_gpus_per_socket = 2
# mem_device:
# Memories are created automatically. Currently, we support three kinds of memories - system memory, zero-copy memory, and GPU framebuffer memory. Each socket has one system memory (sys_mem) and one zero-copy memory (z_copy_mem); each GPU has one frame buffer memory (gpu_fb_mem).
# comm_device:
# Communication devices describe the links between the memories. Each communication device needs two parameters - latency in ms and bandwidth in GB/s. An easy way to get these numbers is using the Memspeed benchmark in legion/test/realm. Currently, we provide the following communication devices:
# memcpy
membus_latency = 0.00003
membus_bandwidth = 4.26623
# inter-socket links
upi_latency = 0.0004
upi_bandwidth = 10.14039
# inter-node links, the third argument means the number of NICs per socket (O means one NIC per node)
nic_latency = 0.000507
nic_bandwidth = 10.9448431
nic_persocket = 0
# pci-e between CPU and GPU
pci_latency = 0.001
pci_bandwidth = 12.578468749999999
# nvlinks between GPUs
nvlink_latency = 0.001
nvlink_bandwidth = 18.52
# paths:
# This section describes the communication paths (a list of communication devices) between memories. These paths could change based on many factors, such as hardware, the version and settings of Gasnet and Legion. Please refer to the find_shortest_path function in legoin/runtime/realm/transfer/lowlevel_dma.cc to see the exact paths.
# Setting a path to null will ignore any cost of the communications on that path.
intra_socket_sys_mem_to_sys_mem = membus
inter_socket_sys_mem_to_sys_mem = upi
inter_node_sys_mem_to_sys_mem = nic
intra_socket_gpu_fb_mem_to_gpu_fb_mem = nvlink
inter_socket_gpu_fb_mem_to_gpu_fb_mem = nvlink
inter_node_gpu_fb_mem_to_gpu_fb_mem = pci_to_host nic pci_to_dev
intra_socket_sys_mem_to_gpu_fb_mem = membus pci_to_dev
inter_socket_sys_mem_to_gpu_fb_mem = upi pci_to_dev
inter_node_sys_mem_to_gpu_fb_mem = nic pci_to_dev
intra_socket_gpu_fb_mem_to_sys_mem = pci_to_host
inter_socket_gpu_fb_mem_to_sys_mem = pci_to_host upi
inter_node_gpu_fb_mem_to_sys_mem = pci_to_host nic membus