UCP/WIREUP: Implement support for SHM/multi-lane in case of CM#5696
UCP/WIREUP: Implement support for SHM/multi-lane in case of CM#5696yosefe merged 1 commit intoopenucx:masterfrom
Conversation
f5b69ba to
9831a93
Compare
|
/azp run |
|
Azure Pipelines successfully started running 1 pipeline(s). |
|
@brminich @alinask @evgeny-leksikov could you review pls? |
src/ucp/wireup/address.c
Outdated
| sizeof(ucp_address_packed_iface_attr_t); | ||
| (sizeof(ucp_address_packed_iface_attr_t) + | ||
| ((flags & UCP_ADDRESS_PACK_FLAG_TL_RSC_IDX) ? | ||
| sizeof(ucp_rsc_index_t) : 0)); |
There was a problem hiding this comment.
uint8_t since it's going to be on wire
src/ucp/wireup/address.c
Outdated
| *(ucp_rsc_index_t*)ptr = rsc_index; | ||
| packed_len += sizeof(rsc_index); |
There was a problem hiding this comment.
uint8_t for both lines and everywhere below
src/ucp/wireup/address.h
Outdated
|
|
||
| UCP_ADDRESS_PACK_FLAG_LAST, | ||
|
|
||
| UCP_ADDRESS_PACK_FLAG_TL_RSC_IDX = UCS_BIT(5), /* Pack TL resource index */ |
There was a problem hiding this comment.
why it goes after UCP_ADDRESS_PACK_FLAG_LAST?
There was a problem hiding this comment.
we don't want to pack resource index always to not increase the length of the worker address
we want to pack it only during exchanging by WIREUP MSGs
src/ucp/wireup/wireup.c
Outdated
| } | ||
|
|
||
| static ucp_lane_index_t | ||
| ucp_wireup_ep_config_find_reused_lane(ucp_ep_h ep, |
There was a problem hiding this comment.
the same, ep is not needed here
There was a problem hiding this comment.
it gets config from the ep
There was a problem hiding this comment.
i mean can pass key instead of EP
src/ucp/wireup/wireup.c
Outdated
| key.dst_md_cmpts = ucs_alloca(sizeof(*key.dst_md_cmpts) * UCP_MAX_MDS); | ||
| ucp_wireup_get_reachable_mds(ep, remote_address, &key); | ||
|
|
||
| if (ep->cfg_index != UCP_WORKER_CFG_INDEX_NULL) { |
There was a problem hiding this comment.
it casts long func to long long func :) need to break up
BTW, can it be united with check at line 1245?
There was a problem hiding this comment.
it couldn't be united with the check below, since we need to generate the config key index first
wrapped new code into the function
src/ucp/wireup/wireup.c
Outdated
| [reuse_lane].rsc_index; | ||
| /* reset the UCT EP from the previous WIREUP lane to not | ||
| * destroy it, since it's not needed anymore in the new | ||
| * configuratiob, but will be used for WIREUP MSG */ |
src/ucp/core/ucp_ep.inl
Outdated
| } | ||
|
|
||
| static inline int | ||
| ucp_ep_config_key_get_cm_lane(const ucp_ep_config_key_t *config_key) |
There was a problem hiding this comment.
is it really needed? config_key->cm_lane looks shorter..
src/ucp/wireup/address.c
Outdated
| unsigned pack_flags, | ||
| int enable_atomics) | ||
| { | ||
| int packed_len = 0; |
There was a problem hiding this comment.
minor: no need to initialize
src/ucp/wireup/address.c
Outdated
| packed_len = sizeof(*packed); | ||
|
|
||
| if (pack_flags & UCP_ADDRESS_PACK_FLAG_TL_RSC_IDX) { | ||
| ptr = UCS_PTR_BYTE_OFFSET(ptr, packed_len); |
There was a problem hiding this comment.
minor: could do packed + 1
src/ucp/wireup/select.c
Outdated
| [select_info->addr_index].md_index; | ||
| return ucp_wireup_add_lane_desc(select_info, dst_md_index, lane_type, | ||
| is_proxy, select_ctx); | ||
| dst_md_index = addr_entry->md_index; |
There was a problem hiding this comment.
minor: can get rid of it as well and pass addr_entry->md_index to ucp_wireup_add_lane_desc directly
src/ucp/wireup/wireup.c
Outdated
| /* mark the endpoint as connected to remote */ | ||
| if (!ucp_ep_config(ep)->p2p_lanes) { | ||
| /* don't mark as connected to remote now in case of CM, since it destroys | ||
| * CM wireup EP (if it is hidden in the CM lane) thas is used for sending |
src/ucp/wireup/wireup.c
Outdated
| /* set the correct WIREUP MSG lane in case of CM */ | ||
| if ((key.wireup_lane != UCP_NULL_LANE) && | ||
| (UCP_NULL_LANE != | ||
| (reuse_lane = ucp_wireup_ep_config_find_reused_lane( |
There was a problem hiding this comment.
i'd define reuse_lane at first for readability
There was a problem hiding this comment.
unfortunately, it depends on key.wireup_lane
so, we can't do it
src/ucp/wireup/wireup_cm.c
Outdated
| ucs_diag("ep %p: UCT EP %p for CM lane doesn't exist, it" | ||
| "has already been discarded", ucp_ep, uct_cm_ep); |
There was a problem hiding this comment.
plz add whitespace between it and has
|
@evgeny-leksikov @brminich your comments were fixed, could pls review? |
src/ucp/core/ucp_ep.c
Outdated
| return (/* compare resource indices, if both are specified */ | ||
| (key1->lanes[lane1].dst_rsc_index != UCP_NULL_RESOURCE) == | ||
| (key2->lanes[lane2].dst_rsc_index != UCP_NULL_RESOURCE)) && | ||
| (key1->lanes[lane1].dst_rsc_index != key2->lanes[lane2].dst_rsc_index); |
There was a problem hiding this comment.
fixed
it works since ucp_ep_config_lane_is_same_dst_rsc_index() was called without negation
src/ucp/core/ucp_ep.c
Outdated
| ucp_lane_index_t lane2) | ||
| { | ||
| return (/* compare resource indices, if both are specified */ | ||
| (key1->lanes[lane1].dst_rsc_index != UCP_NULL_RESOURCE) == |
There was a problem hiding this comment.
why need to check it separately?
src/ucp/wireup/wireup_cm.c
Outdated
| } | ||
|
|
||
| ucp_wireup_remote_connected(ucp_ep); | ||
| /* TCP don't have CONNECT_TO_EP support and has internal connection |
src/ucp/wireup/wireup_cm.c
Outdated
| uct_cm_ep); | ||
| UCS_ASYNC_UNBLOCK(&worker->async); | ||
|
|
||
| if (!discard_uct_ep) { |
src/ucp/core/ucp_ep.c
Outdated
| (proxy_lane1 != UCP_NULL_RESOURCE) && | ||
| (proxy_lane2 != UCP_NULL_RESOURCE) && | ||
| ucp_ep_config_lane_is_same_peer(key1, proxy_lane1, | ||
| key2, proxy_lane2)) || | ||
| (/* both lanes are not proxies */ | ||
| (proxy_lane1 == UCP_NULL_RESOURCE) && | ||
| (proxy_lane2 == UCP_NULL_RESOURCE)); |
There was a problem hiding this comment.
maybe move
(/* both lanes are not proxies */
(proxy_lane1 == UCP_NULL_RESOURCE) &&
(proxy_lane2 == UCP_NULL_RESOURCE));
prior /* both lanes are proxies for the same peer */
and move
(proxy_lane1 != UCP_NULL_RESOURCE) &&
(proxy_lane2 != UCP_NULL_RESOURCE) &&
inside ucp_ep_config_lane_is_same_peer like parameters validation?
then it could look limpler
src/ucp/wireup/wireup.c
Outdated
| return UCS_ERR_UNREACHABLE; | ||
| } | ||
|
|
||
| static ucp_lane_index_t |
There was a problem hiding this comment.
according to name *can_reuse_lane I would expect return true/false, pls rename, also it's not clear that lane is lane index of key1
src/ucp/wireup/wireup.c
Outdated
| ucp_wireup_ep_config_find_intersect_lane(const ucp_ep_config_key_t *old_key, | ||
| const ucp_lane_index_t *intersect_map, | ||
| ucp_lane_index_t new_lane) |
There was a problem hiding this comment.
pls abstract away from usage, no need new/old prefixes, you just do value lookup in array
src/ucp/wireup/wireup.c
Outdated
|
|
||
| ucp_wireup_ep_config_intersect(old_key, new_key, reuse_lane_map); | ||
|
|
||
| if (ucp_ep_config_key_has_cm_lane(new_key)) { |
There was a problem hiding this comment.
is this func called for non-CM wireup as well?
There was a problem hiding this comment.
but we did not need it before, what's changed?
There was a problem hiding this comment.
I don't see any problems to check config intersection for non-CM wireup
Also, I think we could support EP reconfiguration now - just need to remove the check for reconfiguration
There was a problem hiding this comment.
I have no objections, I'm trying to understand side effects of this. WRT ep reconf - it would be great
There was a problem hiding this comment.
I'm trying to understand side effects of this
ic, I guess the one side effect is time that is wasted for searching intersections (and trying to re-use lanes)
There was a problem hiding this comment.
ok then since it's local and number of lanes is limited -
There was a problem hiding this comment.
ok then since it's local and number of lanes is limited -
yep
There was a problem hiding this comment.
if not cm -> skip ucp_wireup_check_config_intersect altogether
|
@brminich @evgeny-leksikov can you pls review? |
src/ucp/wireup/wireup.c
Outdated
| ucp_wireup_ep_configs_find_reuse_lane(const ucp_ep_config_key_t *key1, | ||
| ucp_lane_index_t lane1, | ||
| const ucp_ep_config_key_t *key2) |
There was a problem hiding this comment.
seems like we need typedef for currently anonymous struct inside key, then it could be like
static ucp_lane_index_t
ucp_ep_config_key_lane_by_attrs(const ucp_ep_config_key_t *key,
const ucp_ep_config_lane_attrs_t *attrs)
i guess it could simplify other places
There was a problem hiding this comment.
do you mean to replace ucp_wireup_ep_configs_find_reuse_lane() by ucp_ep_config_key_lane_by_attrs(), but I don't understand how will it work
There was a problem hiding this comment.
for example from your code if i don't mess indexes...
intersect_map[lane1_idx] =
ucp_ep_config_key_lane_by_attrs(key2, &key1->lanes[lane1_idx]);
There was a problem hiding this comment.
unfortunately, it's not possible ucp_ep_config_find_match_lane() has to know the lane1 number to report it as a result in case of CM or sockaddr stub lane
and ucp_ep_config_lane_is_same_peer() requires two keys ant lanes
There was a problem hiding this comment.
actually, with new typedef
ucp_ep_config_lane_is_same_peer could be ucp_ep_config_lane_attrs_peer_is_equal(lane_attrs1, lane_attrs2)
and CM/sockaddr stub lane cases have to be handled separately
There was a problem hiding this comment.
still it will be impossible, since we have ucp_ep_config_lane_is_proxy_equal() that requires key1/key2 to check proxy lane
I'd remain it as is, and evaluate it then
There was a problem hiding this comment.
just fixed the names of the functions as you suggested
There was a problem hiding this comment.
still it will be impossible, since we have
ucp_ep_config_lane_is_proxy_equal()that requireskey1/key2to check proxy lane
I'd remain it as is, and evaluate it then
As I see proxy lane is part of "arrt" key2->lanes[lane2].proxy_lane
There was a problem hiding this comment.
yes, but we won't be able to get attributes for proxy_lane if we pass only attributes for the original lane
There was a problem hiding this comment.
to check proxy attrs it could be separate call with proxy lanes attrs in args
src/ucp/wireup/wireup.c
Outdated
| static void | ||
| ucp_wireup_ep_config_intersect(const ucp_ep_config_key_t *old_key, | ||
| const ucp_ep_config_key_t *new_key, | ||
| ucp_wireup_ep_config_intersect(const ucp_ep_config_key_t *key1, |
There was a problem hiding this comment.
ucp_ep_config_lanes_intersect
src/ucp/wireup/wireup.c
Outdated
|
|
||
| static ucp_lane_index_t | ||
| ucp_wireup_ep_config_find_intersect_lane(const ucp_ep_config_key_t *old_key, | ||
| ucp_wireup_ep_config_find_intersect_lane(const ucp_ep_config_key_t *key, |
There was a problem hiding this comment.
maybe ucp_ep_config_key_match_lane(key, lane, lane_indexes)?
|
Is it relevant? |
src/ucp/core/ucp_ep.c
Outdated
| (/* both lanes are proxies for themselves */ | ||
| (proxy_lane1 == lane1) && (proxy_lane2 == lane2)) || | ||
| /* both lanes are proxies for the same peer */ | ||
| ucp_ep_config_lane_is_peer_equal(key1, proxy_lane1, |
There was a problem hiding this comment.
why does ucp_ep_config_lane_is_peer_equal calls ucp_ep_config_lane_is_proxy_equal?
There was a problem hiding this comment.
in order to check that lanes are proxies for the same lanes
src/ucp/wireup/wireup.c
Outdated
| /* Get all reachable MDs from full remote address list and join with | ||
| * current ep configuration | ||
| */ | ||
| key.dst_md_cmpts = ucs_alloca(sizeof(*key.dst_md_cmpts) * UCP_MAX_MDS); | ||
| ucp_wireup_get_reachable_mds(ep, remote_address, &key); |
src/ucp/wireup/wireup.c
Outdated
| /* UCT EP has already been allocated for this EP (it could be | ||
| * done only during CM connection establishment phase) */ |
There was a problem hiding this comment.
we should not call ucp_wireup_connect_lane_to_iface for reused ep's in the first place
so maybe return lane_map to ucp_wireup_init_lanes() function, from ucp_wireup_check_config_intersect
src/ucp/wireup/wireup.c
Outdated
| } | ||
| } | ||
|
|
||
| if (!ucp_ep_has_cm_lane(ep) && |
There was a problem hiding this comment.
but we exit this function immediately if it's not CM..
src/ucp/wireup/wireup_cm.c
Outdated
|
|
||
| /* check that all TL resources in the TL bitmap have the same dev_index */ | ||
| #if ENABLE_ASSERT | ||
| #ifdef ENABLE_ASSERT |
There was a problem hiding this comment.
I've changed to #if and then removed, so it is now initial code
done
src/ucp/wireup/wireup.c
Outdated
|
|
||
| ucp_wireup_ep_config_intersect(old_key, new_key, reuse_lane_map); | ||
|
|
||
| if (ucp_ep_config_key_has_cm_lane(new_key)) { |
There was a problem hiding this comment.
if not cm -> skip ucp_wireup_check_config_intersect altogether
src/ucp/wireup/wireup.c
Outdated
| ucp_wireup_check_config_intersect(ep, &key, reuse_lane_map, | ||
| &replay_pending_queue); |
There was a problem hiding this comment.
can we just return a bitmap of which lanes need to connect (e.g which lanes are new)?
calculate the map inside ucp_wireup_check_config_intersect as part of lane reuse logic
src/ucp/wireup/wireup_cm.c
Outdated
|
|
||
| /* check that all TL resources in the TL bitmap have the same dev_index */ | ||
| #if ENABLE_ASSERT | ||
| #ifdef ENABLE_ASSERT |
src/ucp/wireup/wireup.c
Outdated
| uint64_t tl_bitmap = local_tl_bitmap & | ||
| worker->context->tl_bitmap; | ||
| ucp_rsc_index_t cm_idx = UCP_NULL_RESOURCE; | ||
| ucp_lane_index_t connect_lane_bitmap = UINT8_MAX; |
There was a problem hiding this comment.
this should be initialized in ucp_wireup_check_config_intersect to ucs_mask(num_lanes)
| ucp_lane_index_t lane, unsigned path_index, | ||
| ucp_rsc_index_t rsc_index, | ||
| ucp_worker_iface_t *wiface, | ||
| ucp_rsc_index_t rsc_index, |
604bc2c to
e910f9c
Compare
done |
|
@dmitrygx seems there is a build issue: need to change ucs_mask to UCS_MASK |
fixed |
e910f9c to
55ba790
Compare
55ba790 to
ec81c6e
Compare
|
/azp run |
|
Azure Pipelines successfully started running 1 pipeline(s). |
|
issue seems relevant from discard uct ep: |
|
@yosefe seems that discard just detected corruption inside |
|
/azp run |
|
Azure Pipelines successfully started running 1 pipeline(s). |
|
@yosefe do we need restart one more time? |
|
/azp run |
|
Azure Pipelines successfully started running 1 pipeline(s). |
1 similar comment
|
Azure Pipelines successfully started running 1 pipeline(s). |
|
/azp run |
|
Azure Pipelines successfully started running 1 pipeline(s). |
|
locally test did >300k repetitions with Valgrind, and the problem wasn't reproduced |
|
Ported to yosefe#223 |
What
Implement support for SHM/multi-lane in case of CM
Why ?
If the connection was established thru CM (i.e. RDMACM or TCPCM), UCP EP config has lanes configured on the same device where CM is running on.
It means that SHM/CUDA/multi-lane couldn't be used in the current implementation.
This PR addresses this.
How ?
After CM flow, UCP EPs start WIREUP MSG protocol to exchange by all addresses and try to select lanes with the best available TLs.
WIREUP MSGs are sent through WIREUP lane (UCT EP that was created on the same device as CM). If after reconfiguration UCT EP for WIREUP lane isn't selected, change WIREUP lane to CM lane and hide UCT EP from WIREUP lane to the CM_WIREUP_EP/AUX_EP and use it for WIREUP MSGs.