Skip to content

Commit 32a094b

Browse files
authored
Merge pull request #2035 from kubernetes-sigs/nfs-multi-subnet-fix-1.29
[release-1.29] fix: nfs mount failure when there are multiple subnets in the cluster
2 parents 6d4d537 + 6a8274a commit 32a094b

File tree

5 files changed

+99
-74
lines changed

5 files changed

+99
-74
lines changed

pkg/azurefile/azure.go

+74-37
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ import (
3434
"k8s.io/klog/v2"
3535
"k8s.io/utils/pointer"
3636
"sigs.k8s.io/cloud-provider-azure/pkg/azclient/configloader"
37+
azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
3738
azure "sigs.k8s.io/cloud-provider-azure/pkg/provider"
39+
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
3840
)
3941

4042
const (
@@ -170,9 +172,10 @@ func getKubeConfig(kubeconfig string, enableWindowsHostProcess bool) (config *re
170172
return config, err
171173
}
172174

173-
func (d *Driver) updateSubnetServiceEndpoints(ctx context.Context, vnetResourceGroup, vnetName, subnetName string) error {
175+
func (d *Driver) updateSubnetServiceEndpoints(ctx context.Context, vnetResourceGroup, vnetName, subnetName string) ([]string, error) {
176+
var vnetResourceIDs []string
174177
if d.cloud.SubnetsClient == nil {
175-
return fmt.Errorf("SubnetsClient is nil")
178+
return vnetResourceIDs, fmt.Errorf("SubnetsClient is nil")
176179
}
177180

178181
if vnetResourceGroup == "" {
@@ -186,55 +189,89 @@ func (d *Driver) updateSubnetServiceEndpoints(ctx context.Context, vnetResourceG
186189
if vnetName == "" {
187190
vnetName = d.cloud.VnetName
188191
}
189-
if subnetName == "" {
190-
subnetName = d.cloud.SubnetName
191-
}
192192

193193
klog.V(2).Infof("updateSubnetServiceEndpoints on vnetName: %s, subnetName: %s, location: %s", vnetName, subnetName, location)
194-
if subnetName == "" || vnetName == "" || location == "" {
195-
return fmt.Errorf("value of subnetName, vnetName or location is empty")
194+
if vnetName == "" || location == "" {
195+
return vnetResourceIDs, fmt.Errorf("vnetName or location is empty")
196196
}
197197

198198
lockKey := vnetResourceGroup + vnetName + subnetName
199-
d.subnetLockMap.LockEntry(lockKey)
200-
defer d.subnetLockMap.UnlockEntry(lockKey)
201-
202-
subnet, err := d.cloud.SubnetsClient.Get(ctx, vnetResourceGroup, vnetName, subnetName, "")
199+
cache, err := d.subnetCache.Get(lockKey, azcache.CacheReadTypeDefault)
203200
if err != nil {
204-
return fmt.Errorf("failed to get the subnet %s under vnet %s: %v", subnetName, vnetName, err)
205-
}
206-
endpointLocaions := []string{location}
207-
storageServiceEndpoint := network.ServiceEndpointPropertiesFormat{
208-
Service: &storageService,
209-
Locations: &endpointLocaions,
210-
}
211-
storageServiceExists := false
212-
if subnet.SubnetPropertiesFormat == nil {
213-
subnet.SubnetPropertiesFormat = &network.SubnetPropertiesFormat{}
201+
return nil, err
214202
}
215-
if subnet.SubnetPropertiesFormat.ServiceEndpoints == nil {
216-
subnet.SubnetPropertiesFormat.ServiceEndpoints = &[]network.ServiceEndpointPropertiesFormat{}
203+
if cache != nil {
204+
vnetResourceIDs = cache.([]string)
205+
klog.V(2).Infof("subnet %s under vnet %s in rg %s is already updated, vnetResourceIDs: %v", subnetName, vnetName, vnetResourceGroup, vnetResourceIDs)
206+
return vnetResourceIDs, nil
217207
}
218-
serviceEndpoints := *subnet.SubnetPropertiesFormat.ServiceEndpoints
219-
for _, v := range serviceEndpoints {
220-
if strings.HasPrefix(pointer.StringDeref(v.Service, ""), storageService) {
221-
storageServiceExists = true
222-
klog.V(4).Infof("serviceEndpoint(%s) is already in subnet(%s)", storageService, subnetName)
223-
break
208+
209+
d.subnetLockMap.LockEntry(lockKey)
210+
defer d.subnetLockMap.UnlockEntry(lockKey)
211+
212+
var subnets []network.Subnet
213+
if subnetName != "" {
214+
// list multiple subnets separated by comma
215+
subnetNames := strings.Split(subnetName, ",")
216+
for _, sn := range subnetNames {
217+
sn = strings.TrimSpace(sn)
218+
subnet, rerr := d.cloud.SubnetsClient.Get(ctx, vnetResourceGroup, vnetName, sn, "")
219+
if rerr != nil {
220+
return vnetResourceIDs, fmt.Errorf("failed to get the subnet %s under rg %s vnet %s: %v", subnetName, vnetResourceGroup, vnetName, rerr.Error())
221+
}
222+
subnets = append(subnets, subnet)
223+
}
224+
} else {
225+
var rerr *retry.Error
226+
subnets, rerr = d.cloud.SubnetsClient.List(ctx, vnetResourceGroup, vnetName)
227+
if rerr != nil {
228+
return vnetResourceIDs, fmt.Errorf("failed to list the subnets under rg %s vnet %s: %v", vnetResourceGroup, vnetName, rerr.Error())
224229
}
225230
}
226231

227-
if !storageServiceExists {
228-
serviceEndpoints = append(serviceEndpoints, storageServiceEndpoint)
229-
subnet.SubnetPropertiesFormat.ServiceEndpoints = &serviceEndpoints
232+
for _, subnet := range subnets {
233+
if subnet.Name == nil {
234+
return vnetResourceIDs, fmt.Errorf("subnet name is nil")
235+
}
236+
sn := *subnet.Name
237+
vnetResourceID := d.getSubnetResourceID(vnetResourceGroup, vnetName, sn)
238+
klog.V(2).Infof("set vnetResourceID %s", vnetResourceID)
239+
vnetResourceIDs = append(vnetResourceIDs, vnetResourceID)
240+
241+
endpointLocaions := []string{location}
242+
storageServiceEndpoint := network.ServiceEndpointPropertiesFormat{
243+
Service: &storageService,
244+
Locations: &endpointLocaions,
245+
}
246+
storageServiceExists := false
247+
if subnet.SubnetPropertiesFormat == nil {
248+
subnet.SubnetPropertiesFormat = &network.SubnetPropertiesFormat{}
249+
}
250+
if subnet.SubnetPropertiesFormat.ServiceEndpoints == nil {
251+
subnet.SubnetPropertiesFormat.ServiceEndpoints = &[]network.ServiceEndpointPropertiesFormat{}
252+
}
253+
serviceEndpoints := *subnet.SubnetPropertiesFormat.ServiceEndpoints
254+
for _, v := range serviceEndpoints {
255+
if strings.HasPrefix(pointer.StringDeref(v.Service, ""), storageService) {
256+
storageServiceExists = true
257+
klog.V(4).Infof("serviceEndpoint(%s) is already in subnet(%s)", storageService, sn)
258+
break
259+
}
260+
}
261+
262+
if !storageServiceExists {
263+
serviceEndpoints = append(serviceEndpoints, storageServiceEndpoint)
264+
subnet.SubnetPropertiesFormat.ServiceEndpoints = &serviceEndpoints
230265

231-
if err := d.cloud.SubnetsClient.CreateOrUpdate(ctx, vnetResourceGroup, vnetName, subnetName, subnet); err != nil {
232-
return fmt.Errorf("failed to update the subnet %s under vnet %s: %v", subnetName, vnetName, err)
266+
klog.V(2).Infof("begin to update the subnet %s under vnet %s in rg %s", sn, vnetName, vnetResourceGroup)
267+
if err := d.cloud.SubnetsClient.CreateOrUpdate(ctx, vnetResourceGroup, vnetName, sn, subnet); err != nil {
268+
return vnetResourceIDs, fmt.Errorf("failed to update the subnet %s under vnet %s: %v", sn, vnetName, err)
269+
}
233270
}
234-
klog.V(2).Infof("serviceEndpoint(%s) is appended in subnet(%s)", storageService, subnetName)
235271
}
236-
237-
return nil
272+
// cache the subnet update
273+
d.subnetCache.Set(lockKey, vnetResourceIDs)
274+
return vnetResourceIDs, nil
238275
}
239276

240277
// inClusterConfig is copied from https://github.com/kubernetes/client-go/blob/b46677097d03b964eab2d67ffbb022403996f4d4/rest/config.go#L507-L541

pkg/azurefile/azure_test.go

+14-25
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ import (
2828
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2022-07-01/network"
2929
"github.com/stretchr/testify/assert"
3030
"go.uber.org/mock/gomock"
31+
"k8s.io/utils/pointer"
3132

3233
"sigs.k8s.io/azurefile-csi-driver/test/utils/testutil"
3334
"sigs.k8s.io/cloud-provider-azure/pkg/azureclients/subnetclient/mocksubnetclient"
34-
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
3535

3636
azureprovider "sigs.k8s.io/cloud-provider-azure/pkg/provider"
3737
)
@@ -246,25 +246,14 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
246246
testFunc func(t *testing.T)
247247
}{
248248
{
249-
name: "[fail] no subnet",
250-
testFunc: func(t *testing.T) {
251-
retErr := retry.NewError(false, fmt.Errorf("the subnet does not exist"))
252-
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(network.Subnet{}, retErr).Times(1)
253-
expectedErr := fmt.Errorf("failed to get the subnet %s under vnet %s: %v", config.SubnetName, config.VnetName, retErr)
254-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
255-
if !reflect.DeepEqual(err, expectedErr) {
256-
t.Errorf("Unexpected error: %v", err)
257-
}
258-
},
259-
},
260-
{
261-
name: "[success] subnetPropertiesFormat is nil",
249+
name: "[fail] subnet name is nil",
262250
testFunc: func(t *testing.T) {
263251
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(network.Subnet{}, nil).Times(1)
264252
mockSubnetClient.EXPECT().CreateOrUpdate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
265253

266-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
267-
if !reflect.DeepEqual(err, nil) {
254+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "subnetname")
255+
expectedErr := fmt.Errorf("subnet name is nil")
256+
if !reflect.DeepEqual(err, expectedErr) {
268257
t.Errorf("Unexpected error: %v", err)
269258
}
270259
},
@@ -274,12 +263,11 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
274263
testFunc: func(t *testing.T) {
275264
fakeSubnet := network.Subnet{
276265
SubnetPropertiesFormat: &network.SubnetPropertiesFormat{},
266+
Name: pointer.String("subnetName"),
277267
}
278268

279269
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).Times(1)
280-
mockSubnetClient.EXPECT().CreateOrUpdate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
281-
282-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
270+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "subnetname")
283271
if !reflect.DeepEqual(err, nil) {
284272
t.Errorf("Unexpected error: %v", err)
285273
}
@@ -292,12 +280,12 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
292280
SubnetPropertiesFormat: &network.SubnetPropertiesFormat{
293281
ServiceEndpoints: &[]network.ServiceEndpointPropertiesFormat{},
294282
},
283+
Name: pointer.String("subnetName"),
295284
}
296285

297-
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).Times(1)
298-
mockSubnetClient.EXPECT().CreateOrUpdate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
286+
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).AnyTimes()
299287

300-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
288+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "subnetname")
301289
if !reflect.DeepEqual(err, nil) {
302290
t.Errorf("Unexpected error: %v", err)
303291
}
@@ -314,11 +302,12 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
314302
},
315303
},
316304
},
305+
Name: pointer.String("subnetName"),
317306
}
318307

319-
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).Times(1)
308+
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).AnyTimes()
320309

321-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
310+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "subnetname")
322311
if !reflect.DeepEqual(err, nil) {
323312
t.Errorf("Unexpected error: %v", err)
324313
}
@@ -329,7 +318,7 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
329318
testFunc: func(t *testing.T) {
330319
d.cloud.SubnetsClient = nil
331320
expectedErr := fmt.Errorf("SubnetsClient is nil")
332-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
321+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
333322
if !reflect.DeepEqual(err, expectedErr) {
334323
t.Errorf("Unexpected error: %v", err)
335324
}

pkg/azurefile/azurefile.go

+6
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,8 @@ type Driver struct {
283283
volStatsCache azcache.Resource
284284
// a timed cache storing account which should use sastoken for azcopy based volume cloning
285285
azcopySasTokenCache azcache.Resource
286+
// a timed cache storing subnet operations
287+
subnetCache azcache.Resource
286288
// sas expiry time for azcopy in volume clone
287289
sasTokenExpirationMinutes int
288290
// azcopy timeout for volume clone and snapshot restore
@@ -367,6 +369,10 @@ func NewDriver(options *DriverOptions) *Driver {
367369
klog.Fatalf("%v", err)
368370
}
369371

372+
if driver.subnetCache, err = azcache.NewTimedCache(10*time.Minute, getter, false); err != nil {
373+
klog.Fatalf("%v", err)
374+
}
375+
370376
return &driver
371377
}
372378

pkg/azurefile/controllerserver.go

+3-10
Original file line numberDiff line numberDiff line change
@@ -362,16 +362,9 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
362362
setKeyValueInMap(parameters, protocolField, protocol)
363363

364364
if !pointer.BoolDeref(createPrivateEndpoint, false) {
365-
// set VirtualNetworkResourceIDs for storage account firewall setting
366-
subnets := strings.Split(subnetName, ",")
367-
for _, subnet := range subnets {
368-
subnet = strings.TrimSpace(subnet)
369-
vnetResourceID := d.getSubnetResourceID(vnetResourceGroup, vnetName, subnet)
370-
klog.V(2).Infof("set vnetResourceID(%s) for NFS protocol", vnetResourceID)
371-
vnetResourceIDs = append(vnetResourceIDs, vnetResourceID)
372-
if err := d.updateSubnetServiceEndpoints(ctx, vnetResourceGroup, vnetName, subnet); err != nil {
373-
return nil, status.Errorf(codes.Internal, "update service endpoints failed with error: %v", err)
374-
}
365+
var err error
366+
if vnetResourceIDs, err = d.updateSubnetServiceEndpoints(ctx, vnetResourceGroup, vnetName, subnetName); err != nil {
367+
return nil, status.Errorf(codes.Internal, "update service endpoints failed with error: %v", err)
375368
}
376369
}
377370
}

pkg/azurefile/controllerserver_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -667,9 +667,9 @@ func TestCreateVolume(t *testing.T) {
667667
mockSubnetClient := mocksubnetclient.NewMockInterface(ctrl)
668668
fakeCloud.SubnetsClient = mockSubnetClient
669669

670-
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(network.Subnet{}, retErr).Times(1)
670+
mockSubnetClient.EXPECT().List(gomock.Any(), gomock.Any(), gomock.Any()).Return([]network.Subnet{}, retErr).Times(1)
671671

672-
expectedErr := status.Errorf(codes.Internal, "update service endpoints failed with error: failed to get the subnet fake-subnet under vnet fake-vnet: &{false 0 0001-01-01 00:00:00 +0000 UTC the subnet does not exist}")
672+
expectedErr := status.Errorf(codes.Internal, "update service endpoints failed with error: failed to list the subnets under rg rg vnet fake-vnet: Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: the subnet does not exist")
673673
_, err := d.CreateVolume(ctx, req)
674674
if !reflect.DeepEqual(err, expectedErr) {
675675
t.Errorf("Unexpected error: %v", err)

0 commit comments

Comments
 (0)