Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid detaching EFA ENIs #1237

Merged
merged 1 commit into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions pkg/awsutils/awsutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ type APIs interface {
// GetIPv4sFromEC2 returns the IPv4 addresses for a given ENI
GetIPv4sFromEC2(eniID string) (addrList []*ec2.NetworkInterfacePrivateIpAddress, err error)

// DescribeAllENIs calls EC2 and returns the ENIMetadata and a tag map for each ENI
DescribeAllENIs() (eniMetadata []ENIMetadata, tagMap map[string]TagMap, trunkENI string, err error)
// DescribeAllENIs calls EC2 and returns a fully populated DescribeAllENIsResult struct and an error
DescribeAllENIs() (DescribeAllENIsResult, error)

// AllocIPAddress allocates an IP address for an ENI
AllocIPAddress(eniID string) error
Expand Down Expand Up @@ -213,6 +213,14 @@ func (eni ENIMetadata) PrimaryIPv4Address() string {
// TagMap keeps track of the EC2 tags on each ENI
type TagMap map[string]string

// DescribeAllENIsResult contains the fully
type DescribeAllENIsResult struct {
ENIMetadata []ENIMetadata
TagMap map[string]TagMap
TrunkENI string
EFAENIs map[string]bool
}

// msSince returns milliseconds since start.
func msSince(start time.Time) float64 {
return float64(time.Since(start) / time.Millisecond)
Expand Down Expand Up @@ -967,11 +975,11 @@ func (cache *EC2InstanceMetadataCache) GetIPv4sFromEC2(eniID string) (addrList [
}

// DescribeAllENIs calls EC2 to refresh the ENIMetadata and tags for all attached ENIs
func (cache *EC2InstanceMetadataCache) DescribeAllENIs() ([]ENIMetadata, map[string]TagMap, string, error) {
func (cache *EC2InstanceMetadataCache) DescribeAllENIs() (DescribeAllENIsResult, error) {
// Fetch all local ENI info from metadata
allENIs, err := cache.GetAttachedENIs()
if err != nil {
return nil, nil, "", errors.Wrap(err, "DescribeAllENIs: failed to get local ENI metadata")
return DescribeAllENIsResult{}, errors.Wrap(err, "DescribeAllENIs: failed to get local ENI metadata")
}

eniMap := make(map[string]ENIMetadata, len(allENIs))
Expand Down Expand Up @@ -1016,7 +1024,7 @@ func (cache *EC2InstanceMetadataCache) DescribeAllENIs() ([]ENIMetadata, map[str
}

if err != nil {
return nil, nil, "", err
return DescribeAllENIsResult{}, err
}

// Collect the verified ENIs
Expand All @@ -1027,6 +1035,7 @@ func (cache *EC2InstanceMetadataCache) DescribeAllENIs() ([]ENIMetadata, map[str

// Collect ENI response into ENI metadata and tags.
var trunkENI string
efaENIs := make(map[string]bool, 0)
tagMap := make(map[string]TagMap, len(ec2Response.NetworkInterfaces))
for _, ec2res := range ec2Response.NetworkInterfaces {
if ec2res.Attachment != nil && aws.Int64Value(ec2res.Attachment.DeviceIndex) == 0 && !aws.BoolValue(ec2res.Attachment.DeleteOnTermination) {
Expand All @@ -1039,14 +1048,22 @@ func (cache *EC2InstanceMetadataCache) DescribeAllENIs() ([]ENIMetadata, map[str
if interfaceType == "trunk" {
trunkENI = eniID
}
if interfaceType == "efa" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we consider to use switch/case to evaluate interfaceTypes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we get a third custom interface type, I think it's about time to switch. 😀

efaENIs[eniID] = true
}
// Check IPv4 addresses
logOutOfSyncState(eniID, eniMetadata.IPv4Addresses, ec2res.PrivateIpAddresses)
tags := getTags(ec2res, eniMetadata.ENIID)
if len(tags) > 0 {
tagMap[eniMetadata.ENIID] = tags
}
}
return verifiedENIs, tagMap, trunkENI, nil
return DescribeAllENIsResult{
ENIMetadata: verifiedENIs,
TagMap: tagMap,
TrunkENI: trunkENI,
EFAENIs: efaENIs,
}, nil
}

// getTags collects tags from an EC2 DescribeNetworkInterfaces call
Expand Down
4 changes: 2 additions & 2 deletions pkg/awsutils/awsutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,9 +302,9 @@ func TestDescribeAllENIs(t *testing.T) {
for _, tc := range testCases {
mockEC2.EXPECT().DescribeNetworkInterfacesWithContext(gomock.Any(), gomock.Any(), gomock.Any()).Times(tc.n).Return(result, tc.awsErr)
ins := &EC2InstanceMetadataCache{imds: TypedIMDS{mockMetadata}, ec2SVC: mockEC2}
_, tags, _, err := ins.DescribeAllENIs()
metaData, err := ins.DescribeAllENIs()
assert.Equal(t, tc.expErr, err, tc.name)
assert.Equal(t, tc.exptags, tags, tc.name)
assert.Equal(t, tc.exptags, metaData.TagMap, tc.name)
}
}

Expand Down
10 changes: 4 additions & 6 deletions pkg/awsutils/mocks/awsutils_mocks.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 22 additions & 1 deletion pkg/ipamd/datastore/data_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ type ENI struct {
IsPrimary bool
// IsTrunk indicates whether this ENI is used to provide pods with dedicated ENIs
IsTrunk bool
// IsEFA indicates whether this ENI is tagged as an EFA
IsEFA bool
// DeviceNumber is the device number of ENI (0 means the primary ENI)
DeviceNumber int
// IPv4Addresses shows whether each address is assigned, the key is IP address, which must
Expand Down Expand Up @@ -397,7 +399,7 @@ func (ds *DataStore) writeBackingStoreUnsafe() error {
}

// AddENI add ENI to data store
func (ds *DataStore) AddENI(eniID string, deviceNumber int, isPrimary, isTrunk bool) error {
func (ds *DataStore) AddENI(eniID string, deviceNumber int, isPrimary, isTrunk, isEFA bool) error {
ds.lock.Lock()
defer ds.lock.Unlock()

Expand All @@ -411,6 +413,7 @@ func (ds *DataStore) AddENI(eniID string, deviceNumber int, isPrimary, isTrunk b
createTime: time.Now(),
IsPrimary: isPrimary,
IsTrunk: isTrunk,
IsEFA: isEFA,
ID: eniID,
DeviceNumber: deviceNumber,
IPv4Addresses: make(map[string]*AddressInfo)}
Expand Down Expand Up @@ -560,6 +563,19 @@ func (ds *DataStore) GetTrunkENI() string {
return ""
}

// GetEFAENIs returns the a map containing all attached EFA ENIs
func (ds *DataStore) GetEFAENIs() map[string]bool {
ds.lock.Lock()
defer ds.lock.Unlock()
ret := make(map[string]bool)
for _, eni := range ds.eniPool {
if eni.IsEFA {
ret[eni.ID] = true
}
}
return ret
}

// IsRequiredForWarmIPTarget determines if this ENI has warm IPs that are required to fulfill whatever WARM_IP_TARGET is
// set to.
func (ds *DataStore) isRequiredForWarmIPTarget(warmIPTarget int, eni *ENI) bool {
Expand Down Expand Up @@ -621,6 +637,11 @@ func (ds *DataStore) getDeletableENI(warmIPTarget int, minimumIPTarget int) *ENI
continue
}

if eni.IsEFA {
ds.log.Debugf("ENI %s cannot be deleted because it is an EFA ENI", eni.ID)
continue
}

ds.log.Debugf("getDeletableENI: found a deletable ENI %s", eni.ID)
return eni
}
Expand Down
45 changes: 29 additions & 16 deletions pkg/ipamd/datastore/data_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ var log = logger.New(&logConfig)
func TestAddENI(t *testing.T) {
ds := NewDataStore(log, NullCheckpoint{})

err := ds.AddENI("eni-1", 1, true, false)
err := ds.AddENI("eni-1", 1, true, false, false)
assert.NoError(t, err)

err = ds.AddENI("eni-1", 1, true, false)
err = ds.AddENI("eni-1", 1, true, false, false)
assert.Error(t, err)

err = ds.AddENI("eni-2", 2, false, false)
err = ds.AddENI("eni-2", 2, false, false, false)
assert.NoError(t, err)

assert.Equal(t, len(ds.eniPool), 2)
Expand All @@ -51,13 +51,13 @@ func TestAddENI(t *testing.T) {
func TestDeleteENI(t *testing.T) {
ds := NewDataStore(log, NullCheckpoint{})

err := ds.AddENI("eni-1", 1, true, false)
err := ds.AddENI("eni-1", 1, true, false, false)
assert.NoError(t, err)

err = ds.AddENI("eni-2", 2, false, false)
err = ds.AddENI("eni-2", 2, false, false, false)
assert.NoError(t, err)

err = ds.AddENI("eni-3", 3, false, false)
err = ds.AddENI("eni-3", 3, false, false, false)
assert.NoError(t, err)

eniInfos := ds.GetENIInfos()
Expand Down Expand Up @@ -94,10 +94,10 @@ func TestDeleteENI(t *testing.T) {
func TestAddENIIPv4Address(t *testing.T) {
ds := NewDataStore(log, NullCheckpoint{})

err := ds.AddENI("eni-1", 1, true, false)
err := ds.AddENI("eni-1", 1, true, false, false)
assert.NoError(t, err)

err = ds.AddENI("eni-2", 2, false, false)
err = ds.AddENI("eni-2", 2, false, false, false)
assert.NoError(t, err)

err = ds.AddIPv4AddressToStore("eni-1", "1.1.1.1")
Expand Down Expand Up @@ -132,10 +132,10 @@ func TestAddENIIPv4Address(t *testing.T) {
func TestGetENIIPs(t *testing.T) {
ds := NewDataStore(log, NullCheckpoint{})

err := ds.AddENI("eni-1", 1, true, false)
err := ds.AddENI("eni-1", 1, true, false, false)
assert.NoError(t, err)

err = ds.AddENI("eni-2", 2, false, false)
err = ds.AddENI("eni-2", 2, false, false, false)
assert.NoError(t, err)

err = ds.AddIPv4AddressToStore("eni-1", "1.1.1.1")
Expand Down Expand Up @@ -164,7 +164,7 @@ func TestGetENIIPs(t *testing.T) {

func TestDelENIIPv4Address(t *testing.T) {
ds := NewDataStore(log, NullCheckpoint{})
err := ds.AddENI("eni-1", 1, true, false)
err := ds.AddENI("eni-1", 1, true, false, false)
assert.NoError(t, err)

err = ds.AddIPv4AddressToStore("eni-1", "1.1.1.1")
Expand Down Expand Up @@ -217,10 +217,10 @@ func TestPodIPv4Address(t *testing.T) {
checkpoint := NewTestCheckpoint(struct{}{})
ds := NewDataStore(log, checkpoint)

err := ds.AddENI("eni-1", 1, true, false)
err := ds.AddENI("eni-1", 1, true, false, false)
assert.NoError(t, err)

err = ds.AddENI("eni-2", 2, false, false)
err = ds.AddENI("eni-2", 2, false, false, false)
assert.NoError(t, err)

err = ds.AddIPv4AddressToStore("eni-1", "1.1.1.1")
Expand Down Expand Up @@ -332,9 +332,9 @@ func TestPodIPv4Address(t *testing.T) {
func TestWarmENIInteractions(t *testing.T) {
ds := NewDataStore(log, NullCheckpoint{})

_ = ds.AddENI("eni-1", 1, true, false)
_ = ds.AddENI("eni-2", 2, false, false)
_ = ds.AddENI("eni-3", 3, false, false)
_ = ds.AddENI("eni-1", 1, true, false, false)
_ = ds.AddENI("eni-2", 2, false, false, false)
_ = ds.AddENI("eni-3", 3, false, false, false)

_ = ds.AddIPv4AddressToStore("eni-1", "1.1.1.1")
key1 := IPAMKey{"net0", "sandbox-1", "eth0"}
Expand Down Expand Up @@ -374,4 +374,17 @@ func TestWarmENIInteractions(t *testing.T) {
assert.Contains(t, []string{"eni-2", "eni-3"}, secondRemovedEni)

assert.NotEqual(t, removedEni, secondRemovedEni, "The two removed ENIs should not be the same ENI.")

_ = ds.AddENI("eni-4", 3, false, true, false)
_ = ds.AddENI("eni-5", 3, false, false, true)

_ = ds.AddIPv4AddressToStore("eni-4", "1.1.4.1")
_ = ds.AddIPv4AddressToStore("eni-5", "1.1.5.1")

ds.eniPool["eni-4"].createTime = time.Time{}
ds.eniPool["eni-5"].createTime = time.Time{}
thirdRemovedEni := ds.RemoveUnusedENIFromStore(noWarmIPTarget, 2)
// None of the others can be removed...
assert.Equal(t, "", thirdRemovedEni)
assert.Equal(t, 3, ds.GetENIs())
}
37 changes: 21 additions & 16 deletions pkg/ipamd/ipamd.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,21 +342,21 @@ func (c *IPAMContext) nodeInit() error {
return errors.Wrap(err, "ipamd init: failed to set up host network")
}

eniMetadata, tagMap, trunkENI, err := c.awsClient.DescribeAllENIs()
metadataResult, err := c.awsClient.DescribeAllENIs()
if err != nil {
return errors.New("ipamd init: failed to retrieve attached ENIs info")
}
log.Debugf("DescribeAllENIs success: ENIs: %d, tagged: %d", len(eniMetadata), len(tagMap))
c.setUnmanagedENIs(tagMap)
enis := c.filterUnmanagedENIs(eniMetadata)
log.Debugf("DescribeAllENIs success: ENIs: %d, tagged: %d", len(metadataResult.ENIMetadata), len(metadataResult.TagMap))
c.setUnmanagedENIs(metadataResult.TagMap)
enis := c.filterUnmanagedENIs(metadataResult.ENIMetadata)

for _, eni := range enis {
log.Debugf("Discovered ENI %s, trying to set it up", eni.ENIID)
// Retry ENI sync
retry := 0
for {
retry++
if err = c.setupENI(eni.ENIID, eni, trunkENI); err == nil {
if err = c.setupENI(eni.ENIID, eni, eni.ENIID == metadataResult.TrunkENI, metadataResult.EFAENIs[eni.ENIID]); err == nil {
log.Infof("ENI %s set up.", eni.ENIID)
break
}
Expand Down Expand Up @@ -409,7 +409,7 @@ func (c *IPAMContext) nodeInit() error {
}

// If we started on a node with a trunk ENI already attached, add the node label.
if trunkENI != "" {
if metadataResult.TrunkENI != "" {
// Signal to VPC Resource Controller that the node has a trunk already
err := c.SetNodeLabel("vpc.amazonaws.com/has-trunk-attached", "true")
if err != nil {
Expand Down Expand Up @@ -699,7 +699,8 @@ func (c *IPAMContext) tryAllocateENI() error {
return err
}

err = c.setupENI(eni, eniMetadata, c.dataStore.GetTrunkENI())
// The CNI does not create trunk or EFA ENIs, so they will always be false here
err = c.setupENI(eni, eniMetadata, false, false)
if err != nil {
ipamdErrInc("increaseIPPoolsetupENIFailed")
log.Errorf("Failed to increase pool size: %v", err)
Expand Down Expand Up @@ -747,10 +748,10 @@ func (c *IPAMContext) tryAssignIPs() (increasedPool bool, err error) {
// 1) add ENI to datastore
// 2) set up linux ENI related networking stack.
// 3) add all ENI's secondary IP addresses to datastore
func (c *IPAMContext) setupENI(eni string, eniMetadata awsutils.ENIMetadata, trunkENI string) error {
func (c *IPAMContext) setupENI(eni string, eniMetadata awsutils.ENIMetadata, isTrunkENI, isEFAENI bool) error {
primaryENI := c.awsClient.GetPrimaryENI()
// Add the ENI to the datastore
err := c.dataStore.AddENI(eni, eniMetadata.DeviceNumber, eni == primaryENI, eni == trunkENI)
err := c.dataStore.AddENI(eni, eniMetadata.DeviceNumber, eni == primaryENI, isTrunkENI, isEFAENI)
if err != nil && err.Error() != datastore.DuplicatedENIError {
return errors.Wrapf(err, "failed to add ENI %s to data store", eni)
}
Expand Down Expand Up @@ -935,6 +936,8 @@ func (c *IPAMContext) nodeIPPoolReconcile(interval time.Duration) {
attachedENIs := c.filterUnmanagedENIs(allENIs)
currentENIs := c.dataStore.GetENIInfos().ENIs
trunkENI := c.dataStore.GetTrunkENI()
// Initialize the set with the known EFA interfaces
efaENIs := c.dataStore.GetEFAENIs()

// Check if a new ENI was added, if so we need to update the tags.
needToUpdateTags := false
Expand All @@ -945,14 +948,14 @@ func (c *IPAMContext) nodeIPPoolReconcile(interval time.Duration) {
}
}
if needToUpdateTags {
log.Debugf("A new ENI added but not by ipamd, updating tags")
allENIs, tagMap, trunk, err := c.awsClient.DescribeAllENIs()
log.Debugf("A new ENI added but not by ipamd, updating tags by calling EC2")
metadataResult, err := c.awsClient.DescribeAllENIs()
if err != nil {
log.Warnf("Failed to call EC2 to describe ENIs, aborting reconcile: %v", err)
return
}

if c.enablePodENI && trunk != "" {
if c.enablePodENI && metadataResult.TrunkENI != "" {
// Label the node that we have a trunk
err = c.SetNodeLabel("vpc.amazonaws.com/has-trunk-attached", "true")
if err != nil {
Expand All @@ -962,9 +965,11 @@ func (c *IPAMContext) nodeIPPoolReconcile(interval time.Duration) {
}
}
// Update trunk ENI
trunkENI = trunk
c.setUnmanagedENIs(tagMap)
attachedENIs = c.filterUnmanagedENIs(allENIs)
trunkENI = metadataResult.TrunkENI
// Just copy values of the EFA set
efaENIs = metadataResult.EFAENIs
c.setUnmanagedENIs(metadataResult.TagMap)
attachedENIs = c.filterUnmanagedENIs(metadataResult.ENIMetadata)
}

// Mark phase
Expand All @@ -982,7 +987,7 @@ func (c *IPAMContext) nodeIPPoolReconcile(interval time.Duration) {

// Add new ENI
log.Debugf("Reconcile and add a new ENI %s", attachedENI)
err = c.setupENI(attachedENI.ENIID, attachedENI, trunkENI)
err = c.setupENI(attachedENI.ENIID, attachedENI, attachedENI.ENIID == trunkENI, efaENIs[attachedENI.ENIID])
if err != nil {
log.Errorf("IP pool reconcile: Failed to set up ENI %s network: %v", attachedENI.ENIID, err)
ipamdErrInc("eniReconcileAdd")
Expand Down
Loading