test: add tests for topology-aware tree

Add coordinate files for testing and two python scripts to render the
network topology file and the dumped tree json files.

Co-authored-by: Xi Luo <xi.luo@intel.com>
Co-authored-by: Gengbin Zheng <gengbin.zheng@intel.com>
Co-authored-by: Todd Schwartz <todd.schwartz@intel.com>
Esse commit está contido em:
Xi Luo
2023-09-11 21:38:29 -07:00
commit a34461250e
7 arquivos alterados com 304 adições e 0 exclusões
+73
Ver Arquivo
@@ -0,0 +1,73 @@
##
## Copyright (C) by Argonne National Laboratory
## See COPYRIGHT in top-level directory
##
# This python script renders the file dumped by MPIR_CVAR_COORDINATES_DUMP.
# Input: Network coordinates file dumpped by MPIR_CVAR_COORDINATES_DUMP.
# Output: Graph of the coordinates. Output filename is defined by '--output'.
# Example: python3 ./render-topo.py coords --format png --output coords
import sys
import argparse
try:
from graphviz import Digraph
except ImportError as e:
sys.exit('Unable to import graphviz module: {}.\n'.format(e) +
'Please install the graphviz module by running the following command:\n'
'\n'
' python -m pip install graphviz\n')
def main():
args = parse_args()
dot = Digraph(args.output)
edges = set()
with open(args.filename) as coords_file:
for line in coords_file:
if (line[0] == '#'):
continue
rank_label, coord_node_labels = parse_line(line)
dot.node(rank_label)
[dot.node(label) for label in coord_node_labels]
for index in range(len(coord_node_labels) - 1):
edges.add((coord_node_labels[index], coord_node_labels[index+1]))
edges.add((coord_node_labels[-1], rank_label))
dot.edges(edges)
dot.render(format=args.format, view=True)
def parse_args():
description = 'Render a per-rank coordinates topology file as a graphical set of trees.'
parser = argparse.ArgumentParser(description=description)
parser.add_argument('filename', help='The file contains the coordinates.')
parser.add_argument('--format', '-f', default='svg', help='Output format. Can be any format supported by graphviz. Default: svg.')
parser.add_argument('--output', '-o', default='coords', help='Output file name. Default: coords.')
return parser.parse_args()
def parse_line(line):
rank_label, coord_set = line.split(':')
# ignor the last coord, which is the port number
coords = coord_set.split()[:-1]
coord_node_labels = create_coord_node_labels(coords)
return rank_label, coord_node_labels
def create_coord_node_labels(coords):
result = []
coord_combo = ''
for prefix, coord in zip(['G', 'S'], coords):
if 0 == len(coord_combo):
coord_combo = coord
else:
coord_combo += "." + coord
result.append(prefix + coord_combo)
return result
if __name__ == '__main__':
main()
+117
Ver Arquivo
@@ -0,0 +1,117 @@
##
## Copyright (C) by Argonne National Laboratory
## See COPYRIGHT in top-level directory
##
# This python script renders the files dumped by MPIR_CVAR_TREE_DUMP.
# Input: Tree files dumpped by MPIR_CVAR_TREE_DUMP.
# Output: Graph of the tree. Output filename is defined by '--output'.
# Example: python3 ./render-tree.py tree* --format png --output tree
import json
import sys
import argparse
try:
from graphviz import Digraph
except ImportError as e:
sys.exit('Unable to import graphviz module: {}.\n'.format(e) +
'Please install the graphviz module by running the following command:\n'
'\n'
' python -m pip install graphviz\n')
def main():
args = parse_args()
input_tree = load_tree(args.node_files)
root = find_root(input_tree)
if root == -1:
print('cannot find root')
level_array = bfs(input_tree, root)
render_tree(input_tree, level_array, args.format, args.output)
# Suppose input_data is:
# [{'rank': 0, 'nranks': 4, 'parent': -1, 'children': [2, 1]},
# {'rank': 1, 'nranks': 8, 'parent': 0, 'children': []},
# {'rank': 2, 'nranks': 4, 'parent': 0, 'children': [3]},
# {'rank': 3, 'nranks': 4, 'parent': 2, 'children': []}]
# The input_tree stores the input_data in a 2D array:
# [[2, 1], [], [3], []]
def load_tree(node_files):
input_data = []
for filename in node_files:
with open(filename) as the_file:
input_data.append(json.load(the_file))
input_tree = []
for i in range(input_data[0]['nranks']):
input_tree.append([])
for node in input_data:
for child in node['children']:
input_tree[node['rank']].append(child)
print(input_tree)
return input_tree
# Find the root of the input_tree
def find_root(input_tree):
check_list = []
for i in range(len(input_tree)):
check_list.append(0);
for node in input_tree:
for child in node:
check_list[child] = 1
for i in range(len(check_list)):
if (check_list[i] == 0):
return i
return -1
# Perform a breadth-first search
# The level_array would be: [[0], [2, 1], [3]]
# Level 0 has rank 0, level 1 has rank 2 and 1 and level 2 has rank 3
def bfs(input_tree, root):
level_array = []
node_queue = []
node_queue.append(root)
cur_level = 0
while node_queue:
level_array.append([])
cur_queue_len = len(node_queue)
for i in range(cur_queue_len):
cur_node = node_queue.pop(0)
level_array[cur_level].append(cur_node)
for child in input_tree[cur_node]:
node_queue.append(child)
cur_level = cur_level + 1
print(level_array)
return level_array
def render_tree(input_tree, level_array, format, output):
dot = Digraph(output)
# Create invisible edges to keep the order of the children
for i in range(len(level_array)):
with dot.subgraph() as s:
s.attr(rank='same')
s.attr(rankdir='LR')
for cur_node in level_array[i]:
s.node(str(cur_node))
if len(level_array[i]) >= 2:
for j in range(len(level_array[i])):
if (j >= 1):
dot.edge(str(level_array[i][j-1]), str(level_array[i][j]), style='invis')
for i in range(len(input_tree)):
for j in range(len(input_tree[i])):
dot.edge(str(i), str(input_tree[i][j]))
dot.render(format=format, view=True)
def parse_args():
description = 'Render a topology-aware collective tree as a graphical tree.'
parser = argparse.ArgumentParser(description=description)
parser.add_argument('node_files', metavar='FILE', nargs='+', help='File(s) containing the JSON-formatted tree nodes to be rendered (e.g. tree-node-*.json).')
parser.add_argument('--format', '-f', default='svg', help='Output format. Can be any format supported by graphviz. Default: svg.')
parser.add_argument('--output', '-o', default='tree', help='Output file name. Default: tree.')
return parser.parse_args()
if __name__ == '__main__':
main()
+17
Ver Arquivo
@@ -0,0 +1,17 @@
# rank: switch-group id, switch id, port number (this line will be skipped)
0: 0 0 -1
1: 1 0 -1
2: 0 2 -1
3: 0 2 -1
4: 1 1 -1
5: 1 0 -1
6: 1 3 -1
7: 0 0 -1
8: 1 0 -1
9: 1 3 -1
10: 1 3 -1
11: 0 3 -1
12: 0 2 -1
13: 1 1 -1
14: 1 2 -1
15: 0 0 -1
+17
Ver Arquivo
@@ -0,0 +1,17 @@
# rank: switch-group id, switch id, port number (this line will be skipped)
0: 0 0 -1
1: 0 0 -1
2: 0 0 -1
3: 0 0 -1
4: 0 1 -1
5: 0 1 -1
6: 0 1 -1
7: 0 1 -1
8: 1 2 -1
9: 1 2 -1
10: 1 2 -1
11: 1 2 -1
12: 1 3 -1
13: 1 3 -1
14: 1 3 -1
15: 1 3 -1
+33
Ver Arquivo
@@ -0,0 +1,33 @@
# rank: switch-group id, switch id, port number (this line will be skipped)
0: 1 7 -1
1: 1 10 -1
2: 1 8 -1
3: 3 3 -1
4: 0 3 -1
5: 3 8 -1
6: 2 9 -1
7: 2 11 -1
8: 3 6 -1
9: 1 10 -1
10: 3 14 -1
11: 1 7 -1
12: 0 14 -1
13: 0 6 -1
14: 1 9 -1
15: 2 7 -1
16: 0 6 -1
17: 1 7 -1
18: 0 8 -1
19: 3 2 -1
20: 0 7 -1
21: 0 15 -1
22: 0 14 -1
23: 3 14 -1
24: 1 15 -1
25: 2 5 -1
26: 2 10 -1
27: 2 8 -1
28: 3 6 -1
29: 1 7 -1
30: 2 14 -1
31: 3 14 -1
+33
Ver Arquivo
@@ -0,0 +1,33 @@
# rank: switch-group id, switch id, port number (this line will be skipped)
0: 0 2 13
1: 0 3 1
2: 0 2 15
3: 0 0 13
4: 0 2 2
5: 1 2 12
6: 0 2 6
7: 1 1 1
8: 1 3 2
9: 1 3 14
10: 1 2 4
11: 1 1 13
12: 1 2 10
13: 0 1 13
14: 0 2 12
15: 0 1 0
16: 1 0 15
17: 0 0 0
18: 0 2 10
19: 0 3 4
20: 0 0 10
21: 1 3 8
22: 1 1 2
23: 1 1 3
24: 1 1 10
25: 1 0 1
26: 1 1 1
27: 0 2 13
28: 1 2 2
29: 1 1 1
30: 1 2 9
31: 1 3 8
+14
Ver Arquivo
@@ -203,3 +203,17 @@ p_neighb_alltoallv 4
p_neighb_alltoallw 4
p_order 2
p_inactive 1 timeLimit=10
# Tests for topo-aware
bcasttest 16 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_aware env=MPIR_CVAR_COORDINATES_FILE=coords-16x2.txt
bcasttest 16 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_aware env=MPIR_CVAR_COORDINATES_FILE=coords-16x2-random.txt
bcasttest 32 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_aware env=MPIR_CVAR_COORDINATES_FILE=coords-32x2.txt
bcasttest 32 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_aware env=MPIR_CVAR_COORDINATES_FILE=coords-32x3.txt
bcasttest 16 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_aware_k env=MPIR_CVAR_COORDINATES_FILE=coords-16x2.txt
bcasttest 16 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_aware_k env=MPIR_CVAR_COORDINATES_FILE=coords-16x2-random.txt
bcasttest 32 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_aware_k env=MPIR_CVAR_COORDINATES_FILE=coords-32x2.txt
bcasttest 32 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_aware_k env=MPIR_CVAR_COORDINATES_FILE=coords-32x3.txt
bcasttest 16 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_wave env=MPIR_CVAR_COORDINATES_FILE=coords-16x2.txt
bcasttest 16 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_wave env=MPIR_CVAR_COORDINATES_FILE=coords-16x2-random.txt
bcasttest 32 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_wave env=MPIR_CVAR_COORDINATES_FILE=coords-32x2.txt
bcasttest 32 env=MPIR_CVAR_BCAST_DEVICE_COLLECTIVE=0 env=MPIR_CVAR_COLLECTIVE_FALLBACK=silent env=MPIR_CVAR_BCAST_INTRA_ALGORITHM=tree env=MPIR_CVAR_BCAST_TREE_TYPE=topology_wave env=MPIR_CVAR_COORDINATES_FILE=coords-32x3.txt