Skip to content

Commit f95c9cc

Browse files
mprycclaude
andcommitted
Add timeout and retry to CLI log/describe commands in e2e tests
Prevents tests from hanging indefinitely when streaming logs from object storage. Uses 5-minute timeout with 3 retries. Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Michal Pryc <mpryc@redhat.com>
1 parent f75a38d commit f95c9cc

3 files changed

Lines changed: 193 additions & 10 deletions

File tree

tests/e2e/lib/backup_cli.go

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"log"
77
"strings"
8+
"time"
89

910
velero "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
1011
"k8s.io/apimachinery/pkg/util/wait"
@@ -191,37 +192,73 @@ func IsBackupCompletedSuccessfullyViaCLI(name string) (bool, error) {
191192
)
192193
}
193194

194-
// DescribeBackupViaCLI describes backup using the OADP CLI
195+
// DescribeBackupViaCLI describes backup using the OADP CLI with default timeout and retry.
196+
// The timeout prevents the command from hanging when retrieving backup details from object storage.
195197
func DescribeBackupViaCLI(name string) (backupDescription string) {
198+
return DescribeBackupViaCLIWithOptions(name, DefaultCLITimeout, DefaultCLIRetries, DefaultCLIRetryDelay)
199+
}
200+
201+
// DescribeBackupViaCLIWithOptions describes backup using the OADP CLI with specified timeout and retry options.
202+
func DescribeBackupViaCLIWithOptions(name string, timeout time.Duration, maxRetries int, retryDelay time.Duration) (backupDescription string) {
196203
// Use CLI to describe backup
197204
cmd := &CLICommand{
198205
Resource: "backup",
199206
Action: "describe",
200207
Name: name,
201208
Options: []string{"--details"},
202209
}
203-
output, err := cmd.Execute()
210+
211+
var output []byte
212+
var err error
213+
214+
if maxRetries > 1 {
215+
output, err = cmd.ExecuteWithTimeoutAndRetry(timeout, maxRetries, retryDelay)
216+
} else {
217+
output, err = cmd.ExecuteWithTimeout(timeout)
218+
}
219+
204220
if err != nil {
205221
return fmt.Sprintf("could not describe backup via CLI: %v, output: %s", err, string(output))
206222
}
207223

208224
return string(output)
209225
}
210226

211-
// BackupLogsViaCLI gets backup logs using the OADP CLI
227+
// BackupLogsViaCLI gets backup logs using the OADP CLI with default timeout and retry.
228+
// The timeout prevents the command from hanging indefinitely when streaming logs from object storage.
229+
// Retry logic helps handle transient network issues.
212230
func BackupLogsViaCLI(name string) (backupLogs string, err error) {
231+
return BackupLogsViaCLIWithOptions(name, DefaultCLITimeout, DefaultCLIRetries, DefaultCLIRetryDelay)
232+
}
233+
234+
// BackupLogsViaCLIWithTimeout gets backup logs using the OADP CLI with a specified timeout (no retry).
235+
func BackupLogsViaCLIWithTimeout(name string, timeout time.Duration) (backupLogs string, err error) {
236+
return BackupLogsViaCLIWithOptions(name, timeout, 1, 0)
237+
}
238+
239+
// BackupLogsViaCLIWithOptions gets backup logs using the OADP CLI with specified timeout and retry options.
240+
func BackupLogsViaCLIWithOptions(name string, timeout time.Duration, maxRetries int, retryDelay time.Duration) (backupLogs string, err error) {
213241
if name == "" {
214242
return "", fmt.Errorf("backup name cannot be empty")
215243
}
216244

217-
// Use CLI to get backup logs
245+
// Use CLI to get backup logs with timeout and retry to prevent hanging
218246
cmd := &CLICommand{
219247
Resource: "backup",
220248
Action: "logs",
221249
Name: name,
222250
Options: []string{},
223251
}
224-
output, cmdErr := cmd.ExecuteOutput()
252+
253+
var output []byte
254+
var cmdErr error
255+
256+
if maxRetries > 1 {
257+
output, cmdErr = cmd.ExecuteOutputWithTimeoutAndRetry(timeout, maxRetries, retryDelay)
258+
} else {
259+
output, cmdErr = cmd.ExecuteOutputWithTimeout(timeout)
260+
}
261+
225262
if cmdErr != nil {
226263
return "", fmt.Errorf("failed to get backup logs via CLI: %v", cmdErr)
227264
}

tests/e2e/lib/cli_common.go

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,31 @@
11
package lib
22

33
import (
4+
"context"
45
"fmt"
56
"log"
67
"os"
78
"os/exec"
89
"path/filepath"
910
"strings"
11+
"time"
12+
)
13+
14+
// Default timeout for CLI commands that may hang (e.g., log streaming)
15+
const DefaultCLITimeout = 5 * time.Minute
16+
17+
// Default retry settings for CLI commands
18+
const (
19+
DefaultCLIRetries = 3
20+
DefaultCLIRetryDelay = 10 * time.Second
1021
)
1122

1223
type CLICommand struct {
1324
Resource string // "backup" or "restore"
1425
Action string // "create", "get", "delete", etc.
1526
Name string
1627
Options []string
28+
Timeout time.Duration // Optional timeout for commands that may hang
1729
}
1830

1931
func (c *CLICommand) Execute() ([]byte, error) {
@@ -40,6 +52,103 @@ func (c *CLICommand) ExecuteOutput() ([]byte, error) {
4052
return cmd.Output()
4153
}
4254

55+
// ExecuteWithTimeout executes the CLI command with a timeout.
56+
// If the timeout is exceeded, the command is killed and an error is returned.
57+
func (c *CLICommand) ExecuteWithTimeout(timeout time.Duration) ([]byte, error) {
58+
args := []string{"oadp", c.Resource, c.Action}
59+
if c.Name != "" {
60+
args = append(args, c.Name)
61+
}
62+
args = append(args, c.Options...)
63+
64+
c.LogCLICommand()
65+
66+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
67+
defer cancel()
68+
69+
cmd := exec.CommandContext(ctx, "kubectl", args...)
70+
output, err := cmd.CombinedOutput()
71+
72+
if ctx.Err() == context.DeadlineExceeded {
73+
return output, fmt.Errorf("command timed out after %v: kubectl %s", timeout, strings.Join(args, " "))
74+
}
75+
76+
return output, err
77+
}
78+
79+
// ExecuteOutputWithTimeout executes the CLI command with a timeout and returns stdout only.
80+
// If the timeout is exceeded, the command is killed and an error is returned.
81+
func (c *CLICommand) ExecuteOutputWithTimeout(timeout time.Duration) ([]byte, error) {
82+
args := []string{"oadp", c.Resource, c.Action}
83+
if c.Name != "" {
84+
args = append(args, c.Name)
85+
}
86+
args = append(args, c.Options...)
87+
88+
c.LogCLICommand()
89+
90+
ctx, cancel := context.WithTimeout(context.Background(), timeout)
91+
defer cancel()
92+
93+
cmd := exec.CommandContext(ctx, "kubectl", args...)
94+
output, err := cmd.Output()
95+
96+
if ctx.Err() == context.DeadlineExceeded {
97+
return output, fmt.Errorf("command timed out after %v: kubectl %s", timeout, strings.Join(args, " "))
98+
}
99+
100+
return output, err
101+
}
102+
103+
// ExecuteOutputWithTimeoutAndRetry executes the CLI command with a timeout and retry logic.
104+
// It retries the command up to maxRetries times with a delay between attempts.
105+
// This is useful for commands that may fail due to transient issues (e.g., network problems).
106+
func (c *CLICommand) ExecuteOutputWithTimeoutAndRetry(timeout time.Duration, maxRetries int, retryDelay time.Duration) ([]byte, error) {
107+
var lastErr error
108+
var lastOutput []byte
109+
110+
for attempt := 1; attempt <= maxRetries; attempt++ {
111+
output, err := c.ExecuteOutputWithTimeout(timeout)
112+
if err == nil {
113+
return output, nil
114+
}
115+
116+
lastErr = err
117+
lastOutput = output
118+
119+
if attempt < maxRetries {
120+
log.Printf("CLI command failed (attempt %d/%d): %v. Retrying in %v...", attempt, maxRetries, err, retryDelay)
121+
time.Sleep(retryDelay)
122+
}
123+
}
124+
125+
return lastOutput, fmt.Errorf("CLI command failed after %d attempts: %v", maxRetries, lastErr)
126+
}
127+
128+
// ExecuteWithTimeoutAndRetry executes the CLI command with a timeout and retry logic.
129+
// It retries the command up to maxRetries times with a delay between attempts.
130+
func (c *CLICommand) ExecuteWithTimeoutAndRetry(timeout time.Duration, maxRetries int, retryDelay time.Duration) ([]byte, error) {
131+
var lastErr error
132+
var lastOutput []byte
133+
134+
for attempt := 1; attempt <= maxRetries; attempt++ {
135+
output, err := c.ExecuteWithTimeout(timeout)
136+
if err == nil {
137+
return output, nil
138+
}
139+
140+
lastErr = err
141+
lastOutput = output
142+
143+
if attempt < maxRetries {
144+
log.Printf("CLI command failed (attempt %d/%d): %v. Retrying in %v...", attempt, maxRetries, err, retryDelay)
145+
time.Sleep(retryDelay)
146+
}
147+
}
148+
149+
return lastOutput, fmt.Errorf("CLI command failed after %d attempts: %v", maxRetries, lastErr)
150+
}
151+
43152
func (c *CLICommand) LogCLICommand() {
44153
args := []string{"kubectl", "oadp", c.Resource, c.Action}
45154
if c.Name != "" {

tests/e2e/lib/restore_cli.go

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"log"
77
"strings"
8+
"time"
89

910
velero "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
1011
"k8s.io/apimachinery/pkg/util/wait"
@@ -166,37 +167,73 @@ func IsRestoreCompletedSuccessfullyViaCLI(name string) (bool, error) {
166167
)
167168
}
168169

169-
// DescribeRestoreViaCLI describes restore using the OADP CLI
170+
// DescribeRestoreViaCLI describes restore using the OADP CLI with default timeout and retry.
171+
// The timeout prevents the command from hanging when retrieving restore details from object storage.
170172
func DescribeRestoreViaCLI(name string) string {
173+
return DescribeRestoreViaCLIWithOptions(name, DefaultCLITimeout, DefaultCLIRetries, DefaultCLIRetryDelay)
174+
}
175+
176+
// DescribeRestoreViaCLIWithOptions describes restore using the OADP CLI with specified timeout and retry options.
177+
func DescribeRestoreViaCLIWithOptions(name string, timeout time.Duration, maxRetries int, retryDelay time.Duration) string {
171178
// Use CLI to describe restore
172179
cmd := &CLICommand{
173180
Resource: "restore",
174181
Action: "describe",
175182
Name: name,
176183
Options: []string{"--details"},
177184
}
178-
output, err := cmd.Execute()
185+
186+
var output []byte
187+
var err error
188+
189+
if maxRetries > 1 {
190+
output, err = cmd.ExecuteWithTimeoutAndRetry(timeout, maxRetries, retryDelay)
191+
} else {
192+
output, err = cmd.ExecuteWithTimeout(timeout)
193+
}
194+
179195
if err != nil {
180196
return fmt.Sprintf("could not describe restore via CLI: %v, output: %s", err, string(output))
181197
}
182198

183199
return string(output)
184200
}
185201

186-
// RestoreLogsViaCLI gets restore logs using the OADP CLI
202+
// RestoreLogsViaCLI gets restore logs using the OADP CLI with default timeout and retry.
203+
// The timeout prevents the command from hanging indefinitely when streaming logs from object storage.
204+
// Retry logic helps handle transient network issues.
187205
func RestoreLogsViaCLI(name string) (restoreLogs string, err error) {
206+
return RestoreLogsViaCLIWithOptions(name, DefaultCLITimeout, DefaultCLIRetries, DefaultCLIRetryDelay)
207+
}
208+
209+
// RestoreLogsViaCLIWithTimeout gets restore logs using the OADP CLI with a specified timeout (no retry).
210+
func RestoreLogsViaCLIWithTimeout(name string, timeout time.Duration) (restoreLogs string, err error) {
211+
return RestoreLogsViaCLIWithOptions(name, timeout, 1, 0)
212+
}
213+
214+
// RestoreLogsViaCLIWithOptions gets restore logs using the OADP CLI with specified timeout and retry options.
215+
func RestoreLogsViaCLIWithOptions(name string, timeout time.Duration, maxRetries int, retryDelay time.Duration) (restoreLogs string, err error) {
188216
if name == "" {
189217
return "", fmt.Errorf("restore name cannot be empty")
190218
}
191219

192-
// Use CLI to get restore logs
220+
// Use CLI to get restore logs with timeout and retry to prevent hanging
193221
cmd := &CLICommand{
194222
Resource: "restore",
195223
Action: "logs",
196224
Name: name,
197225
Options: []string{},
198226
}
199-
output, cmdErr := cmd.ExecuteOutput()
227+
228+
var output []byte
229+
var cmdErr error
230+
231+
if maxRetries > 1 {
232+
output, cmdErr = cmd.ExecuteOutputWithTimeoutAndRetry(timeout, maxRetries, retryDelay)
233+
} else {
234+
output, cmdErr = cmd.ExecuteOutputWithTimeout(timeout)
235+
}
236+
200237
if cmdErr != nil {
201238
return "", fmt.Errorf("failed to get restore logs via CLI: %v", cmdErr)
202239
}

0 commit comments

Comments
 (0)