Skip to content

Commit

Permalink
Make Google Batch auto retry codes configurable (#5148)
Browse files Browse the repository at this point in the history

Signed-off-by: Paolo Di Tommaso <[email protected]>
Signed-off-by: Ben Sherman <[email protected]>
Co-authored-by: Ben Sherman <[email protected]>
  • Loading branch information
pditommaso and bentsherman authored Jul 16, 2024
1 parent 3019583 commit e562ce0
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 6 deletions.
6 changes: 6 additions & 0 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,11 @@ Read the {ref}`google-page` page for more information.

The following settings are available for Google Cloud Batch:

`google.batch.autoRetryExitCodes`
: :::{versionadded} 24.07.0-edge
:::
: Defines the list of exit codes that will be automatically retried by Google Batch when `google.batch.maxSpotAttempts` is greater than 0 (default `[50001]`). Refer to the [Google Batch documentation](https://cloud.google.com/batch/docs/troubleshooting#reserved-exit-codes) for the list of retryable exit codes.

`google.enableRequesterPaysBuckets`
: When `true` uses the given Google Cloud project ID as the billing project for storage access. This is required when accessing data from *requester pays enabled* buckets. See [Requester Pays on Google Cloud Storage documentation](https://cloud.google.com/storage/docs/requester-pays) (default: `false`).

Expand All @@ -865,6 +870,7 @@ The following settings are available for Google Cloud Batch:
: :::{versionadded} 23.11.0-edge
:::
: Max number of execution attempts of a job interrupted by a Compute Engine spot reclaim event (default: `5`).
: See also: `google.batch.autoRetryExitCodes`

`google.project`
: The Google Cloud project ID to use for pipeline execution
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.Session
import nextflow.cloud.google.GoogleOpts
import nextflow.exception.ProcessUnrecoverableException
import nextflow.util.MemoryUnit
/**
* Model Google Batch config settings
Expand All @@ -32,6 +33,8 @@ import nextflow.util.MemoryUnit
@CompileStatic
class BatchConfig {

static private List<Integer> DEFAULT_RETRY_LIST = List.of(50001)

private GoogleOpts googleOpts
private GoogleCredentials credentials
private List<String> allowedLocations
Expand All @@ -46,6 +49,7 @@ class BatchConfig {
private String subnetwork
private String serviceAccountEmail
private BatchRetryConfig retryConfig
private List<Integer> autoRetryExitCodes

GoogleOpts getGoogleOpts() { return googleOpts }
GoogleCredentials getCredentials() { return credentials }
Expand All @@ -61,6 +65,7 @@ class BatchConfig {
String getSubnetwork() { subnetwork }
String getServiceAccountEmail() { serviceAccountEmail }
BatchRetryConfig getRetryConfig() { retryConfig }
List<Integer> getAutoRetryExitCodes() { autoRetryExitCodes }

static BatchConfig create(Session session) {
final result = new BatchConfig()
Expand All @@ -78,6 +83,7 @@ class BatchConfig {
result.subnetwork = session.config.navigate('google.batch.subnetwork')
result.serviceAccountEmail = session.config.navigate('google.batch.serviceAccountEmail')
result.retryConfig = new BatchRetryConfig( session.config.navigate('google.batch.retryPolicy') as Map ?: Map.of() )
result.autoRetryExitCodes = session.config.navigate('google.batch.autoRetryExitCodes',DEFAULT_RETRY_LIST) as List<Integer>
return result
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,33 @@ class BatchConfigTest extends Specification {
def 'should create batch config' () {
given:
def CONFIG = [google: [
batch: [
spot: true,
retryPolicy: [maxAttempts: 10]
]
] ]
batch: [
spot: true
]
] ]
def session = Mock(Session) { getConfig()>>CONFIG }

when:
def config = BatchConfig.create(session)
then:
config.getSpot()
and:
config.retryConfig.maxAttempts == 5
config.maxSpotAttempts == 5
config.autoRetryExitCodes == [50001]
}

@Requires({System.getenv('GOOGLE_APPLICATION_CREDENTIALS')})
def 'should create batch config with custom settings' () {
given:
def CONFIG = [google: [
batch: [
spot: true,
maxSpotAttempts: 8,
autoRetryExitCodes: [50001, 50003, 50005],
retryPolicy: [maxAttempts: 10]
]
] ]
def session = Mock(Session) { getConfig()>>CONFIG }

when:
Expand All @@ -43,7 +65,8 @@ class BatchConfigTest extends Specification {
config.getSpot()
and:
config.retryConfig.maxAttempts == 10

config.maxSpotAttempts == 8
config.autoRetryExitCodes == [50001, 50003, 50005]
}

}

0 comments on commit e562ce0

Please sign in to comment.