Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions contrib/test/ci/vars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ kata_skip_network_tests:
- 'test "Connect to pod hostport from the host"'
- 'test "Clean up network if pod sandbox fails"'
- 'test "Clean up network if pod sandbox gets killed"'
- 'test "Network recovery after reboot with destroyed netns"'
kata_skip_pod_tests:
- 'test "pass pod sysctls to runtime"'
- 'test "pass pod sysctls to runtime when in userns"'
Expand Down
3 changes: 0 additions & 3 deletions internal/storage/image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ var _ = t.Describe("Image", func() {
testDockerRegistry = "docker.io"
testQuayRegistry = "quay.io"
testRedHatRegistry = "registry.access.redhat.com"
testFedoraRegistry = "registry.fedoraproject.org"
testImageName = "image"
testImageAlias = "image-for-testing"
testImageAliasResolved = "registry.crio.test.com/repo"
Expand Down Expand Up @@ -193,7 +192,6 @@ var _ = t.Describe("Image", func() {
Expect(refsToNames(refs)).To(Equal([]string{
testQuayRegistry + "/" + testImageName + ":latest",
testRedHatRegistry + "/" + testImageName + ":latest",
testFedoraRegistry + "/" + testImageName + ":latest",
testDockerRegistry + "/library/" + testImageName + ":latest",
}))
})
Expand Down Expand Up @@ -247,7 +245,6 @@ var _ = t.Describe("Image", func() {
Expect(refsToNames(refs)).To(Equal([]string{
testQuayRegistry + "/" + testImageName + "@sha256:" + testSHA256,
testRedHatRegistry + "/" + testImageName + "@sha256:" + testSHA256,
testFedoraRegistry + "/" + testImageName + "@sha256:" + testSHA256,
testDockerRegistry + "/library/" + testImageName + "@sha256:" + testSHA256,
}))
})
Expand Down
39 changes: 29 additions & 10 deletions server/sandbox_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,23 +181,42 @@

podNetwork, err := s.newPodNetwork(ctx, sb)
if err != nil {
return err
return fmt.Errorf("failed to create pod network for sandbox %s(%s): %w", sb.Name(), sb.ID(), err)

Check warning on line 184 in server/sandbox_network.go

View check run for this annotation

Codecov / codecov/patch

server/sandbox_network.go#L184

Added line #L184 was not covered by tests
}

if err := s.config.CNIPlugin().TearDownPodWithContext(stopCtx, podNetwork); err != nil {
retErr := fmt.Errorf("failed to destroy network for pod sandbox %s(%s): %w", sb.Name(), sb.ID(), err)

// Check if the network namespace file exists and is valid before attempting CNI teardown.
// If the file doesn't exist or is invalid, skip CNI teardown and mark network as stopped.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We had IP leaks on containerd because of a similar change that ignored calling the network CNI plugin on some cases, causing the CNI to no release the IP address containerd/containerd#12132

I can not see this is similar or not, but @danwinship you should ensure that in this scenario the plugin gets the CNI DEL to release any resource associated to the Pod

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aojea Thanks for calling that out. I can see the potential issue there. Let me go ahead and fix the issue.

if podNetwork.NetNS != "" {
if _, statErr := os.Stat(podNetwork.NetNS); statErr != nil {
return fmt.Errorf("%w: stat netns path %q: %w", retErr, podNetwork.NetNS, statErr)
// Network namespace file doesn't exist, mark network as stopped and return success
log.Debugf(ctx, "Network namespace file %s does not exist for pod sandbox %s(%s), skipping CNI teardown",
podNetwork.NetNS, sb.Name(), sb.ID())

return sb.SetNetworkStopped(ctx, true)
}

Check warning on line 196 in server/sandbox_network.go

View check run for this annotation

Codecov / codecov/patch

server/sandbox_network.go#L191-L196

Added lines #L191 - L196 were not covered by tests

if validateErr := s.validateNetworkNamespace(podNetwork.NetNS); validateErr != nil {
// Network namespace file exists but is invalid (e.g., corrupted or fake file)
log.Warnf(ctx, "Network namespace file %s is invalid for pod sandbox %s(%s): %v, removing and skipping CNI teardown",
podNetwork.NetNS, sb.Name(), sb.ID(), validateErr)
s.cleanupNetns(ctx, podNetwork.NetNS, sb)

return sb.SetNetworkStopped(ctx, true)
}
}

// The netns file may still exists, which means that it's likely
// corrupted. Remove it to allow cleanup of the network namespace:
if rmErr := os.RemoveAll(podNetwork.NetNS); rmErr != nil {
return fmt.Errorf("%w: failed to remove netns path: %w", retErr, rmErr)
if err := s.config.CNIPlugin().TearDownPodWithContext(stopCtx, podNetwork); err != nil {
log.Warnf(ctx, "Failed to destroy network for pod sandbox %s(%s): %v", sb.Name(), sb.ID(), err)

// If the network namespace exists but CNI teardown failed, try to clean it up.
if podNetwork.NetNS != "" {
if _, statErr := os.Stat(podNetwork.NetNS); statErr == nil {
// Clean up the netns file since CNI teardown failed.
s.cleanupNetns(ctx, podNetwork.NetNS, sb)
}

Check warning on line 216 in server/sandbox_network.go

View check run for this annotation

Codecov / codecov/patch

server/sandbox_network.go#L213-L216

Added lines #L213 - L216 were not covered by tests
}

log.Warnf(ctx, "Removed invalid netns path %s from pod sandbox %s(%s)", podNetwork.NetNS, sb.Name(), sb.ID())
return fmt.Errorf("network teardown failed for pod sandbox %s(%s): %w", sb.Name(), sb.ID(), err)
}

return sb.SetNetworkStopped(ctx, true)
Expand Down
24 changes: 24 additions & 0 deletions server/sandbox_network_freebsd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//go:build freebsd
// +build freebsd

package server

import (
"context"

"github.com/cri-o/cri-o/internal/lib/sandbox"
"github.com/cri-o/cri-o/internal/log"
)

// validateNetworkNamespace checks if the given path is a valid network namespace
// On FreeBSD, this is a no-op since network namespaces are Linux-specific.
func (s *Server) validateNetworkNamespace(netnsPath string) error {
// Network namespaces are Linux-specific, so on FreeBSD we assume it's valid
return nil
}

// cleanupNetns removes a network namespace file and logs the action
// On FreeBSD, this is a no-op since network namespaces are Linux-specific.
func (s *Server) cleanupNetns(ctx context.Context, netnsPath string, sb *sandbox.Sandbox) {
log.Debugf(ctx, "Network namespace cleanup not supported on this platform")
}
36 changes: 36 additions & 0 deletions server/sandbox_network_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
//go:build linux
// +build linux

package server

import (
"context"
"fmt"
"os"

"github.com/containernetworking/plugins/pkg/ns"

"github.com/cri-o/cri-o/internal/lib/sandbox"
"github.com/cri-o/cri-o/internal/log"
)

// validateNetworkNamespace checks if the given path is a valid network namespace.
func (s *Server) validateNetworkNamespace(netnsPath string) error {
netns, err := ns.GetNS(netnsPath)
if err != nil {
return fmt.Errorf("invalid network namespace: %w", err)
}

defer netns.Close()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd remove the defer here.


return nil
}

// cleanupNetns removes a network namespace file and logs the action.
func (s *Server) cleanupNetns(ctx context.Context, netnsPath string, sb *sandbox.Sandbox) {
if rmErr := os.RemoveAll(netnsPath); rmErr != nil {
log.Warnf(ctx, "Failed to remove netns path %s: %v", netnsPath, rmErr)

Check warning on line 32 in server/sandbox_network_linux.go

View check run for this annotation

Codecov / codecov/patch

server/sandbox_network_linux.go#L32

Added line #L32 was not covered by tests
} else {
log.Infof(ctx, "Removed netns path %s from pod sandbox %s(%s)", netnsPath, sb.Name(), sb.ID())
}
}
23 changes: 23 additions & 0 deletions server/sandbox_network_unsupported.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
//go:build !linux && !freebsd
// +build !linux,!freebsd

package server

import (
"context"

"github.com/cri-o/cri-o/internal/lib/sandbox"
"github.com/cri-o/cri-o/internal/log"
)

// validateNetworkNamespace checks if the given path is a valid network namespace
// On unsupported platforms, this is a no-op since network namespaces are Linux-specific.
func (s *Server) validateNetworkNamespace(netnsPath string) error {
return nil
}

// cleanupNetns removes a network namespace file and logs the action
// On unsupported platforms, this is a no-op since network namespaces are Linux-specific.
func (s *Server) cleanupNetns(ctx context.Context, netnsPath string, sb *sandbox.Sandbox) {
log.Debugf(ctx, "Network namespace cleanup not supported on this platform")
}
4 changes: 3 additions & 1 deletion test/image.bats
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ function teardown() {
mkdir -p "$TESTDIR/imagestore"
CONTAINER_IMAGESTORE="$TESTDIR/imagestore" start_crio

FEDORA="registry.fedoraproject.org/fedora"
# registry.fedoraproject.org is pretty flaky
# Moving to the stable quay.io
FEDORA="quay.io/fedora/fedora"
crictl pull $FEDORA
imageid=$(crictl images --quiet "$FEDORA")
[ "$imageid" != "" ]
Expand Down
39 changes: 38 additions & 1 deletion test/network.bats
Original file line number Diff line number Diff line change
Expand Up @@ -185,5 +185,42 @@ function check_networking() {

# be able to remove the sandbox
crictl rmp -f "$POD"
grep -q "Removed invalid netns path $NETNS_PATH$NS from pod sandbox" "$CRIO_LOG"
grep -q "Removed netns path $NETNS_PATH$NS from pod sandbox" "$CRIO_LOG"
}

@test "Network recovery after reboot with destroyed netns" {
# This test simulates a reboot scenario where network namespaces are destroyed
# but CRI-O needs to clean up pod network resources gracefully.

start_crio

pod_id=$(crictl runp "$TESTDATA"/sandbox_config.json)

# Get the network namespace path
NETNS_PATH=/var/run/netns/
NS=$(crictl inspectp "$pod_id" |
jq -er '.info.runtimeSpec.linux.namespaces[] | select(.type == "network").path | sub("'$NETNS_PATH'"; "")')

# Remove the network namespace.
ip netns del "$NS"

# Create a fake netns file.
touch "$NETNS_PATH$NS"

restart_crio

# Try to remove the pod.
crictl rmp -f "$pod_id" 2> /dev/null || true

grep -q "Successfully cleaned up network for pod" "$CRIO_LOG"

new_pod_id=$(crictl runp "$TESTDATA"/sandbox_config.json)

# Verify the new pod is running.
output=$(crictl inspectp "$new_pod_id" | jq -r '.status.state')
[[ "$output" == "SANDBOX_READY" ]]

# Clean up the new pod
crictl stopp "$new_pod_id"
crictl rmp "$new_pod_id"
}
2 changes: 1 addition & 1 deletion test/registries.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
unqualified-search-registries = ['quay.io' ,'registry.access.redhat.com', 'registry.fedoraproject.org', 'docker.io']
unqualified-search-registries = ['quay.io', 'registry.access.redhat.com', 'docker.io']

[aliases]
"image-for-testing" = "registry.crio.test.com/repo"
2 changes: 1 addition & 1 deletion test/testdata/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM registry.fedoraproject.org/fedora-minimal:38
FROM quay.io/fedora/fedora-minimal:38
RUN microdnf install -y coreutils \
gcc \
gzip \
Expand Down
Loading