From e193980ca37d9fbd1cd7eefc1f56d085a308903f Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 9 Jul 2025 11:21:59 +0200 Subject: [PATCH 01/20] Include support for git-remote-s3 Signed-off-by: jorgee --- .../groovy/nextflow/scm/AssetManager.groovy | 2 +- .../nextflow/scm/RepositoryFactory.groovy | 16 ++ .../cloud/aws/scm/S3ProviderConfig.groovy | 89 ++++++++++ .../cloud/aws/scm/S3RepositoryFactory.groovy | 85 +++++++++ .../cloud/aws/scm/S3RepositoryProvider.groovy | 163 ++++++++++++++++++ .../cloud/aws/scm/jgit/S3BaseConnection.java | 152 ++++++++++++++++ .../cloud/aws/scm/jgit/S3FetchConnection.java | 137 +++++++++++++++ .../scm/jgit/S3GitCredentialsProvider.java | 66 +++++++ .../cloud/aws/scm/jgit/S3PushConnection.java | 94 ++++++++++ .../cloud/aws/scm/jgit/TransportS3.java | 89 ++++++++++ .../src/resources/META-INF/extensions.idx | 1 + 11 files changed, 893 insertions(+), 1 deletion(-) create mode 100644 plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy create mode 100644 plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryFactory.groovy create mode 100644 plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy create mode 100644 plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java create mode 100644 plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java create mode 100644 plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3GitCredentialsProvider.java create mode 100644 plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java create mode 100644 plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/TransportS3.java diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/AssetManager.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/AssetManager.groovy index b35f1a06d2..f432b87fcc 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/AssetManager.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/AssetManager.groovy @@ -307,7 +307,7 @@ class AssetManager { @PackageScope String resolveNameFromGitUrl( String repository ) { - final isUrl = repository.startsWith('http://') || repository.startsWith('https://') || repository.startsWith('file:/') + final isUrl = repository.startsWith('http://') || repository.startsWith('https://') || repository.startsWith('file:/') || repository.startsWith('s3://') if( !isUrl ) return null diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryFactory.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryFactory.groovy index 0856ce24da..c305fee05b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryFactory.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryFactory.groovy @@ -88,6 +88,7 @@ class RepositoryFactory implements ExtensionPoint { // --== static definitions ==-- private static boolean codeCommitLoaded + private static boolean s3Loaded private static List factories0 private static List factories() { @@ -102,6 +103,11 @@ class RepositoryFactory implements ExtensionPoint { static RepositoryProvider newRepositoryProvider(ProviderConfig config, String project) { // check if it's needed to load new plugins + if( config.platform == 's3' && !s3Loaded){ + Plugins.startIfMissing('nf-amazon') + s3Loaded=true + factories0=null + } if( (config.name=='codecommit' || config.platform=='codecommit') && !codeCommitLoaded ) { Plugins.startIfMissing('nf-codecommit') codeCommitLoaded=true @@ -120,6 +126,11 @@ class RepositoryFactory implements ExtensionPoint { static ProviderConfig newProviderConfig(String name, Map attrs) { // check if it's needed to load new plugins + if( attrs.platform == 's3' && !s3Loaded){ + Plugins.startIfMissing('nf-amazon') + s3Loaded=true + factories0=null + } if( (name=='codecommit' || attrs.platform=='codecommit') && !codeCommitLoaded ) { Plugins.startIfMissing('nf-codecommit') codeCommitLoaded=true @@ -134,6 +145,11 @@ class RepositoryFactory implements ExtensionPoint { } static ProviderConfig getProviderConfig(List providers, GitUrl url) { + if( url.protocol.equals('s3') && !s3Loaded){ + Plugins.startIfMissing('nf-amazon') + s3Loaded=true + factories0=null + } if( url.domain.startsWith('git-codecommit.') && url.domain.endsWith('.amazonaws.com') && !codeCommitLoaded ) { Plugins.startIfMissing('nf-codecommit') codeCommitLoaded=true diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy new file mode 100644 index 0000000000..2d66a28d03 --- /dev/null +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy @@ -0,0 +1,89 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.Global +import nextflow.exception.AbortOperationException +import nextflow.scm.ProviderConfig +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider +import software.amazon.awssdk.regions.Region + +/** + * Implements a provider config for git-remote-s3 repositories + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class S3ProviderConfig extends ProviderConfig { + + private Region region = Region.US_EAST_1 + + private AwsCredentialsProvider awsCredentialsProvider = DefaultCredentialsProvider.builder().build() + + S3ProviderConfig(String name, Map values) { + super(name, values) + setDefaultsFromAwsConfig() + // Override with scm repo attributes + setValuesFromMap(values) + } + + S3ProviderConfig(String name){ + super(name,[ platform: 's3', server: "s3://$name"]) + setDefaultsFromAwsConfig() + } + + private void setDefaultsFromAwsConfig() { + final config = Global.session?.config?.aws as Map + if( config ) { + setValuesFromMap(config) + } + } + private void setValuesFromMap(Map values){ + if( values.region ) + region = Region.of(values.region as String) + if( values.accessKey && values.secretKey ){ + awsCredentialsProvider = StaticCredentialsProvider.create( + AwsBasicCredentials.builder() + .accessKeyId(values.accessKey as String) + .secretAccessKey(values.secretKey as String) + .build()) + } + } + + Region getRegion(){ + this.region + } + + AwsCredentialsProvider getAwsCredentialsProvider(){ + this.awsCredentialsProvider + } + + @Override + protected String resolveProjectName(String path){ + log.debug ("Resolving project name from $path. returning ") + if (!server.startsWith('s3://')) + new AbortOperationException("S3 project server doesn't start with s3://") + return "${server.substring('s3://'.size())}/$path" + } + +} diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryFactory.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryFactory.groovy new file mode 100644 index 0000000000..a6321e492e --- /dev/null +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryFactory.groovy @@ -0,0 +1,85 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.cloud.aws.scm.jgit.TransportS3 +import nextflow.plugin.Priority +import nextflow.scm.GitUrl +import nextflow.scm.ProviderConfig +import nextflow.scm.RepositoryFactory +import nextflow.scm.RepositoryProvider + +import java.util.concurrent.atomic.AtomicBoolean + +/** + * Implements a factory to create an instance of {@link S3RepositoryProvider} + * + * @author Jorge Ejarque + */ +@Slf4j +@Priority(-10) +@CompileStatic +class S3RepositoryFactory extends RepositoryFactory{ + + private static AtomicBoolean registered = new AtomicBoolean(false) + + @Override + protected RepositoryProvider createProviderInstance(ProviderConfig config, String project) { + if (!registered.get()) { + registered.set(true) + TransportS3.register() + } + + return config.platform == 's3' + ? new S3RepositoryProvider(project, config) + : null + } + + @Override + protected ProviderConfig getConfig(List providers, GitUrl url) { + // do not care about non AWS codecommit url + if( url.protocol != 's3' ) + return null + + // S3 repository config depends on the bucket name stored as domain + def config = providers.find( it -> it.domain == url.domain ) + if( config ) { + log.debug "Git url=$url (1) -> config=$config" + return config + } + + // still nothing, create a new instance + config = new S3ProviderConfig(url.domain) + + + return config + } + + @Override + protected ProviderConfig createConfigInstance(String name, Map attrs) { + final copy = new HashMap(attrs) + return copy.platform == 's3' + ? new S3ProviderConfig(name, copy) + : null + } + + + + +} diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy new file mode 100644 index 0000000000..35f4ecb6b1 --- /dev/null +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy @@ -0,0 +1,163 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm + +import groovy.transform.CompileStatic +import groovy.transform.Memoized +import groovy.util.logging.Slf4j +import nextflow.cloud.aws.scm.jgit.S3GitCredentialsProvider +import nextflow.exception.AbortOperationException +import nextflow.scm.ProviderConfig +import nextflow.scm.RepositoryProvider +import org.eclipse.jgit.api.Git +import org.eclipse.jgit.api.errors.TransportException +import org.eclipse.jgit.transport.CredentialsProvider + +import java.nio.file.Files + + +/** + * Implements a repository provider for git-remote-s3 repositories. + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class S3RepositoryProvider extends RepositoryProvider { + + S3RepositoryProvider(String project, ProviderConfig config) { + assert config instanceof S3ProviderConfig + log.debug("Creating S3 repository provider for $project") + this.project = project + this.config = config + } + /** {@inheritDoc} **/ + @Memoized + @Override + CredentialsProvider getGitCredentials() { + final providerConfig = this.config as S3ProviderConfig + final credentials = new S3GitCredentialsProvider() + if( providerConfig.region ) + credentials.setRegion(providerConfig.region) + if( providerConfig.awsCredentialsProvider ) + credentials.setAwsCredentialsProvider(providerConfig.awsCredentialsProvider) + return credentials + } + + /** {@inheritDoc} **/ + // called by AssetManager + // used to set credentials for a clone, pull, fetch, operation + @Override + boolean hasCredentials() { + // set to true + // uses AWS Credentials instead of username : password + // see getGitCredentials() + return true + } + + /** {@inheritDoc} **/ + @Override + String getName() { return project } + + /** {@inheritDoc} **/ + @Override + String getEndpointUrl() { + return "s3://$project" + } + + /** {@inheritDoc} **/ + // not used, but the abstract method needs to be overridden + @Override + String getContentUrl( String path ) { + throw new UnsupportedOperationException() + } + + /** {@inheritDoc} **/ + // called by AssetManager + @Override + String getCloneUrl() { getEndpointUrl() } + + /** {@inheritDoc} **/ + // called by AssetManager + @Override + String getRepositoryUrl() { getEndpointUrl() } + + /** {@inheritDoc} **/ + // called by AssetManager + // called by RepositoryProvider.readText() + @Override + byte[] readBytes( String path ) { + log.debug("Reading $path") + //Not possible to get a single file requires to clone the branch and get the file + final tmpDir = Files.createTempDirectory("s3-git-remote") + final command = Git.cloneRepository() + .setDirectory(tmpDir.toFile()) + .setCredentialsProvider(getGitCredentials()) + if( revision ) + command.setBranch(revision) + try { + command.call() + final file = tmpDir.resolve(path) + return file.getBytes() + } + catch (Exception e) { + log.debug(" unable to retrieve file: $path from repo: $project", e) + return null + } + finally{ + tmpDir.deleteDir() + } + } + + /** {@inheritDoc} **/ + // called by AssetManager + @Override + void validateRepo() { + // Nothing to check + } + + private String errMsg(Exception e) { + def msg = "Unable to access Git repository" + if( e.message ) + msg + " - ${e.message}" + else + msg += ": " + getCloneUrl() + return msg + } + + @Override + List getBranches() { + try { + return super.getBranches() + } + catch ( TransportException e) { + throw new AbortOperationException(errMsg(e), e) + } + } + + @Override + List getTags() { + try { + return super.getTags() + } + catch (TransportException e) { + throw new AbortOperationException(errMsg(e), e) + } + } + + +} diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java new file mode 100644 index 0000000000..e9e017a347 --- /dev/null +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java @@ -0,0 +1,152 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm.jgit; + +import org.eclipse.jgit.lib.*; +import org.eclipse.jgit.transport.Connection; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.core.ResponseInputStream; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.S3Object; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Base class for connections with a git-remote-s3 compatibility + * @author Jorge Ejarque + * + */ +public class S3BaseConnection implements Connection { + + protected static final Logger log = LoggerFactory.getLogger(S3BaseConnection.class); + protected TransportS3 transport; + protected String bucket; + protected String key; + protected S3Client s3; + private final Map advertisedRefs = new HashMap(); + + public S3BaseConnection(TransportS3 transport) { + this.transport = transport; + this.bucket = transport.getURI().getHost(); + this.key = transport.getURI().getPath().substring(1); + S3GitCredentialsProvider credentials = (S3GitCredentialsProvider) transport.getCredentialsProvider(); + this.s3 = S3Client.builder() + .region(credentials.getRegion()) + .credentialsProvider(credentials.getAwsCredentialsProvider()) + .build(); + try { + loadRefsMap(); + }catch (IOException e){ + final String message = String.format("Unable to get refs from remote s3://%s/%s", bucket, key); + throw new RuntimeException(message, e); + } + log.trace("Created S3 Connection for s3://{}/{}", bucket, key); + } + + private String getDefaultBranchRef() throws IOException { + + GetObjectRequest getObjectRequest = GetObjectRequest.builder() + .bucket(bucket) + .key(key + "/HEAD") + .build(); + + try (ResponseInputStream inputStream = s3.getObject(getObjectRequest)) { + return new String(inputStream.readAllBytes(), StandardCharsets.UTF_8); + } + } + + + + private void loadRefsMap() throws IOException { + final String defaultBranch = getDefaultBranchRef(); + addRefs("heads"); + addRefs("tags"); + Ref target = advertisedRefs.get(defaultBranch); + if (target != null) + advertisedRefs.put(Constants.HEAD, new SymbolicRef(Constants.HEAD, target)); + } + + private void addRefs(String refType) { + final List list = s3.listObjectsV2(ListObjectsV2Request.builder() + .bucket(bucket) + .prefix(key + "/refs/" + refType) + .build() + ).contents(); + + if (list == null || list.isEmpty()) { + log.debug("No {} refs found for s3://{}/{}", refType, bucket, key); + return; + } + + for (S3Object obj : list) { + addRef(obj); + } + } + private void addRef(S3Object obj) { + String key = obj.key(); + String[] parts = key.split("/"); + if (parts.length < 5) throw new RuntimeException("Incorrect key parts"); + // Expect: repo/refs///.bundle + final String type = parts[2]; + final String rBranch = parts[3]; + final String sha = parts[4].replace(".bundle", ""); + String name = String.format("refs/%s/%s" , type, rBranch); + ObjectId objectId = ObjectId.fromString(sha); + if ("heads".equals(type)) { + advertisedRefs.put(name, new ObjectIdRef.PeeledNonTag(Ref.Storage.NETWORK, name, objectId)); + } else if ("tags".equals(type)) { + advertisedRefs.put(name, new ObjectIdRef.Unpeeled(Ref.Storage.NETWORK, name, objectId)); + } + } + + @Override + public Map getRefsMap() { + return advertisedRefs; + } + + @Override + public Collection getRefs() { + return advertisedRefs.values(); + } + + @Override + public Ref getRef(String name) { + return advertisedRefs.get(name); + } + + @Override + public void close() { } + + @Override + public String getMessages() { + return ""; + } + + @Override + public String getPeerUserAgent() { + return ""; + } + +} \ No newline at end of file diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java new file mode 100644 index 0000000000..ff06f8fea3 --- /dev/null +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java @@ -0,0 +1,137 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm.jgit; + +import org.eclipse.jgit.errors.TransportException; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.transport.*; +import org.eclipse.jgit.util.FileUtils; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; +import software.amazon.awssdk.services.s3.model.S3Object; + +import java.io.IOException; +import java.io.OutputStream; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; + +/** + * Fetch Connection implementation compatible with git-remote-s3 storage. + * + * @author Jorge Ejarque + */ +public class S3FetchConnection extends S3BaseConnection implements FetchConnection { + + public S3FetchConnection(TransportS3 transport) { + super(transport); + + } + + @Override + public void fetch(ProgressMonitor monitor, Collection want, Set have) throws TransportException { + Path tmpdir = null; + try { + tmpdir = Files.createTempDirectory("s3-remote-git-"); + for (Ref r : want) { + downloadBundle(r, tmpdir, monitor); + } + }catch (IOException e){ + throw new TransportException(transport.getURI(), "Exception fetching branches", e); + }finally { + if (tmpdir != null) + try { + FileUtils.delete(tmpdir.toFile(), FileUtils.RECURSIVE); + }catch (IOException e){ + throw new TransportException(transport.getURI(), "Exception fetching branches", e); + } + } + + } + + private void downloadBundle(Ref r, Path tmpdir, ProgressMonitor monitor) throws IOException{ + log.debug("Fetching {} in {}", r.getName(), tmpdir); + final List list = s3.listObjectsV2(ListObjectsV2Request.builder() + .bucket(bucket) + .prefix(key + '/' + r.getName()) + .build() + ).contents(); + + if( list == null || list.isEmpty() ) { + throw new TransportException(transport.getURI(), "No bundle for " + r.getName()); + } + + if( list.size() > 1 ){ + throw new TransportException(transport.getURI(), " More than one bundle for " +r.getName()); + } + String key = list.get(0).key(); + String bundleName = key.substring(key.lastIndexOf('/') + 1); + Path localBundle = tmpdir.resolve(bundleName); + Files.createDirectories(localBundle.getParent()); + log.trace("Downloading bundle {} for branch {} in {} ", key, r.getName(), localBundle); + + s3.getObject( + GetObjectRequest.builder().bucket(bucket).key(key).build(), + localBundle + ); + parseBundle( localBundle, monitor); + + } + + private void parseBundle( Path localBundle, ProgressMonitor monitor) throws TransportException { + try { + List specs = new ArrayList<>(); + specs.add(new RefSpec().setForceUpdate(true).setSourceDestination(Constants.R_REFS + '*', Constants.R_REFS + '*')); + Transport.open( transport.getLocal(), new URIish( localBundle.toUri().toString() ) ).fetch(monitor, specs); + + } catch (IOException | RuntimeException | URISyntaxException err) { + close(); + throw new TransportException(transport.getURI(), err.getMessage(), err); + } + } + + @Override + public void fetch(ProgressMonitor monitor, Collection want, Set have, OutputStream out) throws TransportException { + fetch(monitor,want,have); + } + + @Override + public boolean didFetchIncludeTags() { + return false; + } + + @Override + public boolean didFetchTestConnectivity() { + return false; + } + + @Override + public void setPackLockMessage(String message) { + // No pack lock message supported. + } + + @Override + public Collection getPackLocks() { + return Collections.emptyList(); + } + + +} diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3GitCredentialsProvider.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3GitCredentialsProvider.java new file mode 100644 index 0000000000..eb88fd0cd6 --- /dev/null +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3GitCredentialsProvider.java @@ -0,0 +1,66 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm.jgit; + +import org.eclipse.jgit.errors.UnsupportedCredentialItem; +import org.eclipse.jgit.transport.CredentialItem; +import org.eclipse.jgit.transport.CredentialsProvider; +import org.eclipse.jgit.transport.URIish; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider; +import software.amazon.awssdk.regions.Region; + +/** + * JGit credentials provider wrapper for the AWS credentialsProvider and other client configuration parameters. + * + * @author Jorge Ejarque + */ +public class S3GitCredentialsProvider extends CredentialsProvider { + + private Region region; + private AwsCredentialsProvider awsCredentialsProvider; + + public void setAwsCredentialsProvider(AwsCredentialsProvider provider){ + this.awsCredentialsProvider = provider; + } + public void setRegion(Region region) { + this.region = region; + } + + public Region getRegion(){ + return region != null ? region : Region.US_EAST_1; + } + + public AwsCredentialsProvider getAwsCredentialsProvider(){ + return awsCredentialsProvider != null ? awsCredentialsProvider : DefaultCredentialsProvider.builder().build(); + } + + @Override + public boolean isInteractive() { + return false; + } + + @Override + public boolean supports(CredentialItem... items) { + return false; + } + + @Override + public boolean get(URIish uri, CredentialItem... items) throws UnsupportedCredentialItem { + return false; + } +} diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java new file mode 100644 index 0000000000..1f2c846d79 --- /dev/null +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java @@ -0,0 +1,94 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm.jgit; + +import org.eclipse.jgit.errors.TransportException; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.transport.BundleWriter; +import org.eclipse.jgit.transport.PushConnection; +import org.eclipse.jgit.transport.RemoteRefUpdate; +import org.eclipse.jgit.util.FileUtils; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; + +/** + * Push connection implementation compatible with git-remote-s3 storage. + * + * @author Jorge Ejarque + */ +public class S3PushConnection extends S3BaseConnection implements PushConnection { + + public S3PushConnection(TransportS3 transport) { + super(transport); + } + + @Override + public void push(ProgressMonitor monitor, Map refUpdates) throws TransportException { + Path tmpdir = null; + try { + tmpdir = Files.createTempDirectory("s3-remote-git-"); + for (Map.Entry entry : refUpdates.entrySet()) { + log.trace("Generating bundle for {} in {} ", entry.getKey(), tmpdir); + Path bundleFile = bundle(entry.getKey(), tmpdir); + s3.putObject(PutObjectRequest.builder().bucket(bucket).key(key+'/'+entry.getKey()).build(),bundleFile); + } + }catch (IOException e){ + throw new TransportException(transport.getURI(), "Exception fetching branches", e); + }finally { + if (tmpdir != null) + try { + FileUtils.delete(tmpdir.toFile(), FileUtils.RECURSIVE); + }catch (IOException e){ + throw new TransportException(transport.getURI(), "Exception fetching branches", e); + } + } + + } + + @Override + public void push(ProgressMonitor monitor, Map refUpdates, OutputStream out) throws TransportException { + push(monitor, refUpdates); + + } + + private Path bundle(String refName, Path tmpdir) throws IOException { + final Ref ref = transport.getLocal().findRef(refName); + if( ref == null ) { + throw new IllegalStateException("Branch ${branch} not found"); + } + final BundleWriter writer = new BundleWriter(transport.getLocal()); + Path bundleFile = tmpdir.resolve(ref.getObjectId() +".bundle"); + writer.include(ref); + try (OutputStream out = new FileOutputStream(bundleFile.toFile())) { + writer.writeBundle(NullProgressMonitor.INSTANCE, out); + } + return bundleFile; + } + + @Override + public void close() { + + } +} \ No newline at end of file diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/TransportS3.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/TransportS3.java new file mode 100644 index 0000000000..4e39381393 --- /dev/null +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/TransportS3.java @@ -0,0 +1,89 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm.jgit; + +import org.eclipse.jgit.errors.TransportException; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.transport.*; + +import java.util.Collections; +import java.util.Set; + +/** + * JGit transport implementation compatible with git-remote-s3 storage. + * + * @author Jorge Ejarque + */ +public class TransportS3 extends Transport { + + public static final TransportProtocol PROTO_S3 = new S3TransportProtocol(); + + public TransportS3(Repository local, URIish uri) throws TransportException { + super(local, uri); + } + + @Override + public FetchConnection openFetch() throws TransportException { + return new S3FetchConnection(this); + } + + @Override + public PushConnection openPush() throws TransportException { + return new S3PushConnection(this); + } + + // Optional: Clean up if needed + @Override + public void close() { + // cleanup resources if needed + } + + public Repository getLocal(){ + return this.local; + } + + public static class S3TransportProtocol extends TransportProtocol { + @Override + public String getName() { + return "Amazon S3"; + } + + @Override + public Set getSchemes() { + return Collections.singleton("s3"); + } + + @Override + public boolean canHandle(URIish uri, Repository local, String remoteName) { + return "s3".equals(uri.getScheme()); + } + + @Override + public Transport open(URIish uri, Repository local, String remoteName) throws TransportException { + try { + return new TransportS3(local, uri); + } catch (TransportException e) { + throw e; + } + } + } + + public static void register() { + Transport.register(PROTO_S3); + } +} + diff --git a/plugins/nf-amazon/src/resources/META-INF/extensions.idx b/plugins/nf-amazon/src/resources/META-INF/extensions.idx index 50517c11c4..cff26a8e31 100644 --- a/plugins/nf-amazon/src/resources/META-INF/extensions.idx +++ b/plugins/nf-amazon/src/resources/META-INF/extensions.idx @@ -19,3 +19,4 @@ nextflow.cloud.aws.util.S3PathSerializer nextflow.cloud.aws.util.S3PathFactory nextflow.cloud.aws.fusion.AwsFusionEnv nextflow.cloud.aws.mail.AwsMailProvider +nextflow.cloud.aws.scm.S3RepositoryFactory From d3a4285c868b9b851a437260665e0d7f4c5e743e Mon Sep 17 00:00:00 2001 From: jorgee Date: Wed, 9 Jul 2025 17:39:01 +0200 Subject: [PATCH 02/20] small fixes Signed-off-by: jorgee --- .../src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy | 1 + .../src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy index 35f4ecb6b1..f5c8465fbe 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy @@ -105,6 +105,7 @@ class S3RepositoryProvider extends RepositoryProvider { //Not possible to get a single file requires to clone the branch and get the file final tmpDir = Files.createTempDirectory("s3-git-remote") final command = Git.cloneRepository() + .setURI(getEndpointUrl()) .setDirectory(tmpDir.toFile()) .setCredentialsProvider(getGitCredentials()) if( revision ) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java index ff06f8fea3..7120bb3b0a 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java @@ -71,7 +71,7 @@ private void downloadBundle(Ref r, Path tmpdir, ProgressMonitor monitor) throws log.debug("Fetching {} in {}", r.getName(), tmpdir); final List list = s3.listObjectsV2(ListObjectsV2Request.builder() .bucket(bucket) - .prefix(key + '/' + r.getName()) + .prefix(key + '/' + r.getName() + '/') .build() ).contents(); From ed0a5b5af8a6aeb3da918fed95ef8e91884c8a76 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 11 Jul 2025 09:59:57 +0200 Subject: [PATCH 03/20] fix limitation in fetch connection Signed-off-by: jorgee --- .../cloud/aws/scm/jgit/S3BaseConnection.java | 54 +++++++++-- .../cloud/aws/scm/jgit/S3PushConnection.java | 95 +++++++++++++++++-- 2 files changed, 131 insertions(+), 18 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java index e9e017a347..429f07298b 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java @@ -106,18 +106,12 @@ private void addRefs(String refType) { } private void addRef(S3Object obj) { String key = obj.key(); - String[] parts = key.split("/"); - if (parts.length < 5) throw new RuntimeException("Incorrect key parts"); - // Expect: repo/refs///.bundle - final String type = parts[2]; - final String rBranch = parts[3]; - final String sha = parts[4].replace(".bundle", ""); - String name = String.format("refs/%s/%s" , type, rBranch); - ObjectId objectId = ObjectId.fromString(sha); + BranchData branch = BranchData.fromKey(key); + String type = branch.getType(); if ("heads".equals(type)) { - advertisedRefs.put(name, new ObjectIdRef.PeeledNonTag(Ref.Storage.NETWORK, name, objectId)); + advertisedRefs.put(branch.getRefName(), new ObjectIdRef.PeeledNonTag(Ref.Storage.NETWORK, branch.getRefName(), branch.getObjectId())); } else if ("tags".equals(type)) { - advertisedRefs.put(name, new ObjectIdRef.Unpeeled(Ref.Storage.NETWORK, name, objectId)); + advertisedRefs.put(branch.getRefName(), new ObjectIdRef.Unpeeled(Ref.Storage.NETWORK, branch.getRefName(), branch.getObjectId())); } } @@ -149,4 +143,44 @@ public String getPeerUserAgent() { return ""; } + static class BranchData{ + private String type; + private String simpleName; + private String refName; + private ObjectId objectId; + + private BranchData(String type, String simpleName, ObjectId objectId){ + this.type = type; + this.simpleName = simpleName; + this.refName = String.format("refs/%s/%s" , type, simpleName); + this.objectId = objectId; + } + public static BranchData fromKey(String key){ + String[] parts = key.split("/"); + if (parts.length < 5) throw new RuntimeException("Incorrect key parts"); + // Expect: repo-path/refs///.bundle + final String type = parts[parts.length - 3]; + final String rBranch = parts[parts.length - 2]; + final String sha = parts[parts.length - 1].replace(".bundle", ""); + return new BranchData(type, rBranch, ObjectId.fromString(sha)); + + } + + public String getType() { + return type; + } + + public String getSimpleName() { + return simpleName; + } + + public String getRefName() { + return refName; + } + + public ObjectId getObjectId() { + return objectId; + } + } + } \ No newline at end of file diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java index 1f2c846d79..49aa4dbc65 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java @@ -18,19 +18,27 @@ import org.eclipse.jgit.errors.TransportException; import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.ObjectId; import org.eclipse.jgit.lib.ProgressMonitor; import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.transport.BundleWriter; import org.eclipse.jgit.transport.PushConnection; import org.eclipse.jgit.transport.RemoteRefUpdate; import org.eclipse.jgit.util.FileUtils; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.S3Object; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.lang.reflect.Field; import java.nio.file.Files; import java.nio.file.Path; +import java.util.List; import java.util.Map; /** @@ -50,9 +58,7 @@ public void push(ProgressMonitor monitor, Map refUpdate try { tmpdir = Files.createTempDirectory("s3-remote-git-"); for (Map.Entry entry : refUpdates.entrySet()) { - log.trace("Generating bundle for {} in {} ", entry.getKey(), tmpdir); - Path bundleFile = bundle(entry.getKey(), tmpdir); - s3.putObject(PutObjectRequest.builder().bucket(bucket).key(key+'/'+entry.getKey()).build(),bundleFile); + pushBranch(entry, tmpdir); } }catch (IOException e){ throw new TransportException(transport.getURI(), "Exception fetching branches", e); @@ -67,17 +73,90 @@ public void push(ProgressMonitor monitor, Map refUpdate } + private void pushBranch(Map.Entry entry, Path tmpdir) throws IOException { + final Ref ref = transport.getLocal().findRef(entry.getKey()); + if( ref == null || ref.getObjectId() == null) { + throw new IllegalStateException("Branch ${branch} not found"); + } + S3Object oldObject = checkExistingObjectInBranch(entry.getKey()); + if( oldObject != null && isSameObjectId(oldObject, ref.getObjectId())){ + setUpdateStatus(entry.getValue(), RemoteRefUpdate.Status.UP_TO_DATE); + return; + } + if( oldObject != null && !isCommitInBranch(oldObject, ref)) { + setUpdateStatus(entry.getValue(), RemoteRefUpdate.Status.REJECTED_REMOTE_CHANGED); + return; + } + log.trace("Generating bundle for branch {} in {}", entry.getKey(), tmpdir); + Path bundleFile = bundle(ref, tmpdir); + String objectKey = String.format("%s/%s/%s", key, entry.getKey(), bundleFile.getFileName().toString()); + + log.trace("Uploading bundle {} to s3://{}/{}", bundleFile, bucket, objectKey); + s3.putObject(PutObjectRequest.builder() + .bucket(bucket) + .key(objectKey) + .build(), + bundleFile); + if( oldObject != null ){ + log.trace("Deleting old bundle s3://{}/{}",bucket,oldObject.key()); + s3.deleteObject(DeleteObjectRequest.builder() + .bucket(bucket) + .key(oldObject.key()) + .build() + ); + } + setUpdateStatus(entry.getValue(), RemoteRefUpdate.Status.OK); + } + + private boolean isSameObjectId(S3Object s3object, ObjectId commitId){ + return BranchData.fromKey(s3object.key()).getObjectId().name().equals(commitId.name()); + } + + private void setUpdateStatus(RemoteRefUpdate update, RemoteRefUpdate.Status status) { + try { + Field statusField = RemoteRefUpdate.class.getDeclaredField("status"); + statusField.setAccessible(true); + statusField.set(update, status); + } catch (Exception e) { + throw new RuntimeException("Unable to set status on RemoteRefUpdate", e); + } + } + + public boolean isCommitInBranch(S3Object s3Object, Ref branchRef) throws IOException { + ObjectId commitId = BranchData.fromKey(s3Object.key()).getObjectId(); + try (RevWalk walk = new RevWalk(transport.getLocal())) { + RevCommit branchTip = walk.parseCommit(branchRef.getObjectId()); + RevCommit targetCommit = walk.parseCommit(commitId); + + // Check if the commit is reachable from the branch tip + return walk.isMergedInto(targetCommit, branchTip); + } + } + + private S3Object checkExistingObjectInBranch(String name) throws TransportException { + final List list = s3.listObjectsV2(ListObjectsV2Request.builder() + .bucket(bucket) + .prefix(key + '/' + name + '/') + .build() + ).contents(); + + if( list == null || list.isEmpty() ) { + return null; + } + + if( list.size() > 1 ){ + throw new TransportException(transport.getURI(), " More than one bundle for " + name); + } + return list.get(0); + } + @Override public void push(ProgressMonitor monitor, Map refUpdates, OutputStream out) throws TransportException { push(monitor, refUpdates); } - private Path bundle(String refName, Path tmpdir) throws IOException { - final Ref ref = transport.getLocal().findRef(refName); - if( ref == null ) { - throw new IllegalStateException("Branch ${branch} not found"); - } + private Path bundle(Ref ref, Path tmpdir) throws IOException { final BundleWriter writer = new BundleWriter(transport.getLocal()); Path bundleFile = tmpdir.resolve(ref.getObjectId() +".bundle"); writer.include(ref); From 40b43b6a03f8da4395f276d6e22a38b88eb0c444 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 18 Sep 2025 12:20:49 +0200 Subject: [PATCH 04/20] add first push implementation Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdPush.groovy | 267 ++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy new file mode 100644 index 0000000000..8ec223f183 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy @@ -0,0 +1,267 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cli +import com.beust.jcommander.Parameter +import com.beust.jcommander.Parameters +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.exception.AbortOperationException +import nextflow.plugin.Plugins +import nextflow.util.TestOnly +import org.eclipse.jgit.api.Git +import org.eclipse.jgit.errors.RepositoryNotFoundException + +/** + * CLI sub-command Push + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +@Parameters(commandDescription = "Pushes a local implementation to a remote repository") +class CmdPush extends CmdBase implements HubOptions { + + static final public NAME = 'push' + + @Parameter(description = 'Path to push', arity = 1) + String folderPath + + @Parameter(names=['-repo'], description = 'Defines the repository to push to', required = true) + String repository + + @Parameter(names=['-r','-revision'], description = 'Revision of the project to run (either a git branch, tag or commit SHA number)') + String revision = 'main' + + @Parameter(names=['-max-size'], description = 'Maximum file size in MB to push without confirmation (default: 10)') + int maxSizeMB = 10 + + @Parameter(names=['-message', '-m'], description = 'Commit message') + String message = 'Push from nextflow' + + @Override + final String getName() { NAME } + + @TestOnly + protected File root + + @Override + void run() { + if( !folderPath ) + throw new AbortOperationException('Missing folder argument') + + def folder = new File(folderPath).getAbsoluteFile() + + if( !folder.exists() ) + throw new AbortOperationException("Folder does not exist: ${folder.absolutePath}") + + if( !folder.isDirectory() ) + throw new AbortOperationException("Path is not a directory: ${folder.absolutePath}") + + log.info "Pushing folder ${folder.absolutePath} to repository ${repository}" + + // init plugin system + Plugins.init() + + try { + pushFolder(folder, repository, revision) + } + catch( Exception e ) { + throw new AbortOperationException("Failed to push folder: ${e.message}", e) + } + } + + private void pushFolder(File folder, String repo, String rev) { + def gitDir = new File(folder, '.git') + + if( gitDir.exists() ) { + log.debug "Found existing git repository in ${folder.absolutePath}" + validateExistingRepo(folder, repo) + } else { + log.debug "No git repository found, initializing new one" + initializeRepo(folder, repo, rev) + } + + checkFileSizes(folder) + stageAndCommitFiles(folder) + pushToRemote(folder, rev) + + log.info "Successfully pushed to ${repo} (revision: ${rev})" + } + + private void validateExistingRepo(File folder, String expectedRepo) { + try { + def git = Git.open(folder) + def config = git.getRepository().getConfig() + def remoteUrl = config.getString("remote", "origin", "url") + + if( remoteUrl ) { + def normalizedRemote = normalizeRepoUrl(remoteUrl) + def normalizedExpected = normalizeRepoUrl(expectedRepo) + + if( normalizedRemote != normalizedExpected ) { + throw new AbortOperationException( + "Repository mismatch!\n" + + " Local repository: ${remoteUrl}\n" + + " Expected repository: ${expectedRepo}\n" + + "Please remove the .git directory or specify the correct repository." + ) + } + } + git.close() + } + catch( RepositoryNotFoundException e ) { + throw new AbortOperationException("Invalid git repository in ${folder.absolutePath}") + } + } + + private String normalizeRepoUrl(String url) { + return url?.toLowerCase()?.replaceAll(/\.git$/, '')?.replaceAll(/\/$/, '') + } + + private void initializeRepo(File folder, String repo, String rev) { + try { + log.debug "Initializing git repository in ${folder.absolutePath}" + def git = Git.init().setDirectory(folder).call() + + // Add remote origin + git.remoteAdd() + .setName("origin") + .setUri(new org.eclipse.jgit.transport.URIish(repo)) + .call() + + git.close() + } + catch( Exception e ) { + throw new AbortOperationException("Failed to initialize git repository: ${e.message}", e) + } + } + + private void checkFileSizes(File folder) { + def maxSizeBytes = maxSizeMB * 1024 * 1024 + List> largeFiles = [] + + folder.eachFileRecurse { file -> + if( file.isFile() && !file.absolutePath.contains('/.git/') ) { + if( file.length() > maxSizeBytes ) { + Map fileEntry = [:] + fileEntry.file = file + fileEntry.sizeMB = file.length() / (1024 * 1024) + largeFiles.add(fileEntry) + } + } + } + + if( largeFiles ) { + log.warn "Found ${largeFiles.size()} large files:" + largeFiles.each { entry -> + def fileInfo = entry.file as File + def sizeMB = entry.sizeMB as Double + log.warn " ${fileInfo.name}: ${String.format('%.1f', sizeMB)} MB" + } + + print "Do you want to continue and push these large files? [y/N]: " + def response = System.in.newReader().readLine()?.trim()?.toLowerCase() + + if( response != 'y' && response != 'yes' ) { + // Add large files to .gitignore + def fileNames = largeFiles.collect { entry -> (entry.file as File).name } + addToGitignore(folder, fileNames) + throw new AbortOperationException("Push cancelled due to large files. Files have been added to .gitignore") + } + } + } + + private void addToGitignore(File folder, List filenames) { + def gitignoreFile = new File(folder, '.gitignore') + def content = [] + + if( gitignoreFile.exists() ) { + content = gitignoreFile.readLines() + } + + filenames.each { filename -> + if( !content.contains(filename) ) { + content.add(filename) + } + } + + gitignoreFile.text = content.join('\n') + '\n' + log.info "Added ${filenames.size()} large files to .gitignore" + } + + private void stageAndCommitFiles(File folder) { + try { + def git = Git.open(folder) + + // Add all files + git.add().addFilepattern(".").call() + + // Check if there are any changes to commit + def status = git.status().call() + if( status.clean ) { + log.info "No changes to commit" + git.close() + return + } + + // Commit changes + git.commit() + .setMessage(message) + .call() + + log.debug "Committed changes with message: ${message}" + git.close() + } + catch( Exception e ) { + throw new AbortOperationException("Failed to stage and commit files: ${e.message}", e) + } + } + + private void pushToRemote(File folder, String rev) { + try { + def git = Git.open(folder) + + // Create and checkout branch if it doesn't exist + try { + git.checkout().setName(rev).call() + } + catch( Exception ignored ) { + // Branch doesn't exist, create it + git.checkout() + .setCreateBranch(true) + .setName(rev) + .call() + } + + // Push to remote + def refSpec = "refs/heads/${rev}:refs/heads/${rev}" + def pushCommand = git.push() + .setRemote("origin") + .add(refSpec) + + pushCommand.call() + + log.debug "Push completed successfully" + git.close() + } + catch( Exception e ) { + throw new AbortOperationException("Failed to push to remote repository: ${e.message}", e) + } + } + + +} From 541c4f437f01d706136c023e82c823a11b3303b2 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 19 Sep 2025 12:19:10 +0200 Subject: [PATCH 05/20] update push command implementation Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdPush.groovy | 360 +++++++++++++----- .../main/groovy/nextflow/cli/Launcher.groovy | 1 + .../groovy/nextflow/scm/AssetManager.groovy | 63 +++ .../cloud/aws/scm/jgit/S3BaseConnection.java | 38 +- .../cloud/aws/scm/jgit/S3PushConnection.java | 34 +- 5 files changed, 388 insertions(+), 108 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy index 8ec223f183..6ecdf4a7c2 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy @@ -21,9 +21,12 @@ import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins +import nextflow.scm.AssetManager import nextflow.util.TestOnly import org.eclipse.jgit.api.Git import org.eclipse.jgit.errors.RepositoryNotFoundException +import org.eclipse.jgit.transport.RemoteConfig +import java.io.FileFilter /** * CLI sub-command Push @@ -38,9 +41,9 @@ class CmdPush extends CmdBase implements HubOptions { static final public NAME = 'push' @Parameter(description = 'Path to push', arity = 1) - String folderPath + List args - @Parameter(names=['-repo'], description = 'Defines the repository to push to', required = true) + @Parameter(names=['-repo'], description = 'Defines the repository to push to') String repository @Parameter(names=['-r','-revision'], description = 'Revision of the project to run (either a git branch, tag or commit SHA number)') @@ -60,10 +63,11 @@ class CmdPush extends CmdBase implements HubOptions { @Override void run() { - if( !folderPath ) - throw new AbortOperationException('Missing folder argument') + if( !args && args.size() > 1){ + throw new AbortOperationException('Incorrect folder argument') + } - def folder = new File(folderPath).getAbsoluteFile() + def folder = new File(args[0]).getAbsoluteFile() if( !folder.exists() ) throw new AbortOperationException("Folder does not exist: ${folder.absolutePath}") @@ -71,13 +75,17 @@ class CmdPush extends CmdBase implements HubOptions { if( !folder.isDirectory() ) throw new AbortOperationException("Path is not a directory: ${folder.absolutePath}") - log.info "Pushing folder ${folder.absolutePath} to repository ${repository}" - // init plugin system Plugins.init() try { - pushFolder(folder, repository, revision) + def resolvedRepo = repository + if( !resolvedRepo ) { + resolvedRepo = resolveRepository(folder) + } + + log.info "Pushing folder ${folder.absolutePath} to repository ${resolvedRepo}" + pushFolder(folder, resolvedRepo, revision) } catch( Exception e ) { throw new AbortOperationException("Failed to push folder: ${e.message}", e) @@ -86,45 +94,67 @@ class CmdPush extends CmdBase implements HubOptions { private void pushFolder(File folder, String repo, String rev) { def gitDir = new File(folder, '.git') + def remoteName = "origin" + def isNewRepo = false if( gitDir.exists() ) { log.debug "Found existing git repository in ${folder.absolutePath}" - validateExistingRepo(folder, repo) + remoteName = validateExistingRepo(folder, repo) + checkCurrentBranch(folder, rev) } else { log.debug "No git repository found, initializing new one" initializeRepo(folder, repo, rev) + isNewRepo = true } checkFileSizes(folder) + manageNextflowGitignore(folder) stageAndCommitFiles(folder) - pushToRemote(folder, rev) - + def manager = new AssetManager(folder, repo, this) + manager.upload(rev, remoteName, isNewRepo) log.info "Successfully pushed to ${repo} (revision: ${rev})" } - private void validateExistingRepo(File folder, String expectedRepo) { + private String validateExistingRepo(File folder, String expectedRepo) { + def git = Git.open(folder) + try { - def git = Git.open(folder) - def config = git.getRepository().getConfig() - def remoteUrl = config.getString("remote", "origin", "url") - - if( remoteUrl ) { - def normalizedRemote = normalizeRepoUrl(remoteUrl) - def normalizedExpected = normalizeRepoUrl(expectedRepo) - - if( normalizedRemote != normalizedExpected ) { - throw new AbortOperationException( - "Repository mismatch!\n" + - " Local repository: ${remoteUrl}\n" + - " Expected repository: ${expectedRepo}\n" + - "Please remove the .git directory or specify the correct repository." - ) + def remotes = git.remoteList().call() + + // Find all remotes and check if any matches the expected repo + def matchingRemote = null + + for( RemoteConfig remote : remotes ) { + if( remote.URIs ) { + def remoteUrl = remote.URIs[0].toString() + def normalizedRemote = normalizeRepoUrl(remoteUrl) + def normalizedExpected = normalizeRepoUrl(expectedRepo) + + if( normalizedRemote == normalizedExpected ) { + matchingRemote = remote.name + break + } } } - git.close() + + if( !matchingRemote ) { + def remotesList = remotes.collect { remote -> + def url = remote.URIs ? remote.URIs[0].toString() : 'no URL' + " ${remote.name}: ${url}" + }.join('\n') + + throw new AbortOperationException( + "Repository URL not found in remotes!\n" + + " Expected repository: ${expectedRepo}\n" + + " Available remotes:\n${remotesList}\n" + + "Please add the repository as a remote or specify the correct repository." + ) + } + + return matchingRemote } - catch( RepositoryNotFoundException e ) { - throw new AbortOperationException("Invalid git repository in ${folder.absolutePath}") + finally { + git.close() } } @@ -132,32 +162,62 @@ class CmdPush extends CmdBase implements HubOptions { return url?.toLowerCase()?.replaceAll(/\.git$/, '')?.replaceAll(/\/$/, '') } - private void initializeRepo(File folder, String repo, String rev) { + private void checkCurrentBranch(File folder, String requestedBranch) { + def git = Git.open(folder) + try { - log.debug "Initializing git repository in ${folder.absolutePath}" - def git = Git.init().setDirectory(folder).call() + def head = git.getRepository().findRef("HEAD") + if( !head ) { + log.debug "No HEAD found, assuming new repository" + git.close() + return + } - // Add remote origin - git.remoteAdd() - .setName("origin") - .setUri(new org.eclipse.jgit.transport.URIish(repo)) - .call() + def currentBranch = null + if( head.isSymbolic() ) { + currentBranch = git.getRepository().getBranch() + } else { + log.debug "HEAD is not symbolic (detached state)" + git.close() + throw new AbortOperationException("Repository is in detached HEAD state. Please checkout to a branch before pushing.") + } - git.close() + if( currentBranch && currentBranch != requestedBranch ) { + git.close() + throw new AbortOperationException( + "Current branch '${currentBranch}' does not match requested branch '${requestedBranch}'.\n" + + "Please checkout to branch '${requestedBranch}' before pushing or specify the correct branch with -r option." + ) + } + + log.debug "Current branch '${currentBranch}' matches requested branch '${requestedBranch}'" } - catch( Exception e ) { - throw new AbortOperationException("Failed to initialize git repository: ${e.message}", e) + finally { + git.close() } } + private void initializeRepo(File folder, String repo, String rev) { + log.debug "Initializing git repository in ${folder.absolutePath}" + def git = Git.init().setDirectory(folder).call() + + // Add remote origin + git.remoteAdd() + .setName("origin") + .setUri(new org.eclipse.jgit.transport.URIish(repo)) + .call() + + git.close() + } + private void checkFileSizes(File folder) { def maxSizeBytes = maxSizeMB * 1024 * 1024 - List> largeFiles = [] + List> largeFiles = [] folder.eachFileRecurse { file -> if( file.isFile() && !file.absolutePath.contains('/.git/') ) { if( file.length() > maxSizeBytes ) { - Map fileEntry = [:] + Map fileEntry = [:] fileEntry.file = file fileEntry.sizeMB = file.length() / (1024 * 1024) largeFiles.add(fileEntry) @@ -173,14 +233,14 @@ class CmdPush extends CmdBase implements HubOptions { log.warn " ${fileInfo.name}: ${String.format('%.1f', sizeMB)} MB" } - print "Do you want to continue and push these large files? [y/N]: " + print "Do you want to push these large files? [y/N]: " def response = System.in.newReader().readLine()?.trim()?.toLowerCase() if( response != 'y' && response != 'yes' ) { // Add large files to .gitignore def fileNames = largeFiles.collect { entry -> (entry.file as File).name } addToGitignore(folder, fileNames) - throw new AbortOperationException("Push cancelled due to large files. Files have been added to .gitignore") + println "Files have been added to .gitignore" } } } @@ -203,63 +263,193 @@ class CmdPush extends CmdBase implements HubOptions { log.info "Added ${filenames.size()} large files to .gitignore" } - private void stageAndCommitFiles(File folder) { - try { - def git = Git.open(folder) + private void manageNextflowGitignore(File folder) { + def gitignoreFile = new File(folder, '.gitignore') + List content = [] - // Add all files - git.add().addFilepattern(".").call() + if( gitignoreFile.exists() ) { + content = gitignoreFile.readLines() + } - // Check if there are any changes to commit - def status = git.status().call() - if( status.clean ) { - log.info "No changes to commit" - git.close() - return + // Default Nextflow entries to add + def nextflowEntries = [ + '.nextflow', + '.nextflow.log*' + ] + + def added = [] + nextflowEntries.each { entry -> + if( !content.contains(entry) ) { + content.add(entry) + added.add(entry) } + } - // Commit changes - git.commit() - .setMessage(message) - .call() + // Check for work directory + def workDirs = findWorkDirectories(folder) + if( workDirs ) { + def workEntriesToAdd = promptForWorkDirectories(workDirs, content) + workEntriesToAdd.each { workDir -> + if( !content.contains(workDir) ) { + content.add(workDir) + added.add(workDir) + } + } + } - log.debug "Committed changes with message: ${message}" - git.close() + if( added ) { + gitignoreFile.text = content.join('\n') + '\n' + log.info "Added ${added.size()} Nextflow entries to .gitignore: ${added.join(', ')}" + } else { + log.debug "All Nextflow entries already present in .gitignore" } - catch( Exception e ) { - throw new AbortOperationException("Failed to stage and commit files: ${e.message}", e) + } + + private List findWorkDirectories(File folder) { + List workDirs = [] + + // Check for the default Nextflow work directory + def workDir = new File(folder, 'work') + if( workDir.exists() && workDir.isDirectory() ) { + // Check if it looks like a Nextflow work directory + if( isNextflowWorkDirectory(workDir) ) { + workDirs.add('work') + } } + + return workDirs } - private void pushToRemote(File folder, String rev) { - try { - def git = Git.open(folder) + private boolean isNextflowWorkDirectory(File dir) { + // Check for typical Nextflow work directory structure + // Work directories contain subdirectories with hexadecimal names + def subDirs = dir.listFiles({ File f -> f.isDirectory() } as FileFilter) + if( !subDirs || subDirs.length == 0 ) { + return false + } - // Create and checkout branch if it doesn't exist - try { - git.checkout().setName(rev).call() + // Check if at least some subdirectories have hex-like names (Nextflow task hashes) + def hexPattern = /^[0-9a-f]{2}$/ + def hexDirs = subDirs.findAll { it.name.matches(hexPattern) } + + return hexDirs.size() >= Math.min(3, (int)(subDirs.length * 0.5)) + } + + private List promptForWorkDirectories(List workDirs, List currentGitignore) { + List toAdd = [] + + workDirs.each { workDir -> + // Check if already in .gitignore + if( currentGitignore.contains(workDir) ) { + log.debug "Work directory '${workDir}' already in .gitignore" + return // Skip this directory } - catch( Exception ignored ) { - // Branch doesn't exist, create it - git.checkout() - .setCreateBranch(true) - .setName(rev) - .call() + + println "Found Nextflow work directory: ${workDir}" + print "Do you want to add '${workDir}' to .gitignore? [Y/n]: " + def response = System.in.newReader().readLine()?.trim()?.toLowerCase() + + // Default to 'yes' if empty response or 'y'/'yes' + if( !response || response == 'y' || response == 'yes' ) { + toAdd.add(workDir) + log.info "Will add '${workDir}' to .gitignore" + } else { + log.info "Skipping '${workDir}'" } + } - // Push to remote - def refSpec = "refs/heads/${rev}:refs/heads/${rev}" - def pushCommand = git.push() - .setRemote("origin") - .add(refSpec) + return toAdd + } + + private void stageAndCommitFiles(File folder) { + def git = Git.open(folder) - pushCommand.call() + // Add all files + git.add().addFilepattern(".").call() - log.debug "Push completed successfully" + // Check if there are any changes to commit + def status = git.status().call() + if( status.clean ) { + log.info "No changes to commit" git.close() + return } - catch( Exception e ) { - throw new AbortOperationException("Failed to push to remote repository: ${e.message}", e) + + // Commit changes + git.commit() + .setMessage(message) + .call() + + log.debug "Committed changes with message: ${message}" + git.close() + } + + private String resolveRepository(File folder) { + def gitDir = new File(folder, '.git') + + if( !gitDir.exists() ) { + throw new AbortOperationException("No git repository found and no repository URL provided. Please specify a repository with -repo parameter.") + } + + def git = Git.open(folder) + + try { + def remotes = git.remoteList().call() + + if( remotes.empty ) { + throw new AbortOperationException("No remotes configured in git repository. Please add a remote or specify a repository with -repo parameter.") + } + + if( remotes.size() == 1 ) { + def remote = remotes[0] + def remoteUrl = remote.URIs[0].toString() + log.info "Using remote '${remote.name}': ${remoteUrl}" + return remoteUrl + } + + // Multiple remotes - ask user to choose + return selectRemoteFromUser(remotes) + } + finally { + git.close() + } + } + + private static String selectRemoteFromUser(List remotes) { + println "Multiple remotes found. Please select which remote to push to:" + + def remoteOptions = [:] + remotes.eachWithIndex { remote, index -> + def remoteUrl = remote.URIs[0].toString() + def remoteInfo = [name: remote.name, url: remoteUrl] + remoteOptions[index + 1] = remoteInfo + println " ${index + 1}. ${remote.name}: ${remoteUrl}" + } + + println " ${remotes.size() + 1}. Cancel" + + while( true ) { + print "Enter your choice [1-${remotes.size() + 1}]: " + def input = System.in.newReader().readLine()?.trim() + + try { + def choice = Integer.parseInt(input) + + if( choice == remotes.size() + 1 ) { + throw new AbortOperationException("Push operation cancelled by user.") + } + + if( choice >= 1 && choice <= remotes.size() ) { + def selected = remoteOptions[choice] + log.info "Selected remote '${selected['name']}': ${selected['url']}" + return selected['url'] + } + + println "Invalid choice. Please enter a number between 1 and ${remotes.size() + 1}." + } + catch( NumberFormatException ignored ) { + println "Invalid input. Please enter a number." + } } } diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy index 6afda06942..b79ad26bf7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/Launcher.groovy @@ -98,6 +98,7 @@ class Launcher { new CmdList(), new CmdLog(), new CmdPull(), + new CmdPush(), new CmdRun(), new CmdKubeRun(), new CmdDrop(), diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/AssetManager.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/AssetManager.groovy index f432b87fcc..ecf70d61e5 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/AssetManager.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/AssetManager.groovy @@ -112,6 +112,14 @@ class AssetManager { build(pipelineName, config) } + AssetManager(File path, String pipelineName, HubOptions cliOpts = null ) { + assert path + assert pipelineName + // build the object + def config = ProviderConfig.getDefault() + build(path, pipelineName, config, cliOpts) + } + /** * Build the asset manager internal data structure * @@ -135,6 +143,22 @@ class AssetManager { return this } + @PackageScope + AssetManager build( File path, String pipelineName, Map config = null, HubOptions cliOpts = null ) { + + this.providerConfigs = ProviderConfig.createFromMap(config) + + this.project = resolveName(pipelineName) + this.localPath = path + this.hub = checkHubProvider(cliOpts) + this.provider = createHubProvider(hub) + setupCredentials(cliOpts) + validateProjectDir() + + return this + } + + @PackageScope File getLocalGitConfig() { localPath ? new File(localPath,'.git/config') : null @@ -685,6 +709,45 @@ class AssetManager { } + /** + * Upload a pipeline to a remote repository + * + * @param revision The revision/branch to upload + * @param remoteName The name of the remote (default: origin) + * @param isNewRepo Whether this is a new repository initialization + * @result A message representing the operation result + */ + String upload(String revision, String remoteName = "origin", boolean isNewRepo = false) { + assert project + assert localPath + + // Create and checkout branch if it doesn't exist + try { + git.checkout().setName(revision).call() + } + catch( Exception ignored ) { + // Branch doesn't exist, create it + git.checkout() + .setCreateBranch(true) + .setName(revision) + .call() + } + + + def pushCommand = git.push() + .setRemote(remoteName) + + pushCommand.add(revision) + + if( provider.hasCredentials() ) + pushCommand.setCredentialsProvider( provider.getGitCredentials() ) + + def result = pushCommand.call() + return "pushed to ${remoteName} (${revision})" + } + + + /** * Clone a pipeline from a remote pipeline repository to the specified folder * diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java index 429f07298b..798f02421a 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java @@ -56,16 +56,11 @@ public S3BaseConnection(TransportS3 transport) { .region(credentials.getRegion()) .credentialsProvider(credentials.getAwsCredentialsProvider()) .build(); - try { - loadRefsMap(); - }catch (IOException e){ - final String message = String.format("Unable to get refs from remote s3://%s/%s", bucket, key); - throw new RuntimeException(message, e); - } + loadRefsMap(); log.trace("Created S3 Connection for s3://{}/{}", bucket, key); } - private String getDefaultBranchRef() throws IOException { + protected String getDefaultBranchRef() throws IOException { GetObjectRequest getObjectRequest = GetObjectRequest.builder() .bucket(bucket) @@ -79,13 +74,28 @@ private String getDefaultBranchRef() throws IOException { - private void loadRefsMap() throws IOException { - final String defaultBranch = getDefaultBranchRef(); - addRefs("heads"); - addRefs("tags"); - Ref target = advertisedRefs.get(defaultBranch); - if (target != null) - advertisedRefs.put(Constants.HEAD, new SymbolicRef(Constants.HEAD, target)); + private void loadRefsMap() { + try { + addRefs("heads"); + } catch (Exception e){ + log.debug("No heads found for s3://{}/{}: {}", bucket, key, e.getMessage()); + } + try { + addRefs("tags"); + } catch (Exception e){ + log.debug("No tags found for s3://{}/{}: {}", bucket, key, e.getMessage()); + } + try { + final String defaultBranch = getDefaultBranchRef(); + Ref target = advertisedRefs.get(defaultBranch); + if (target != null) + advertisedRefs.put(Constants.HEAD, new SymbolicRef(Constants.HEAD, target)); + } catch (Exception e){ + log.debug("No default refs found for s3://{}/{}: {}", bucket, key, e.getMessage()); + } + + + } private void addRefs(String refType) { diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java index 49aa4dbc65..8f56da8566 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java @@ -17,25 +17,21 @@ package nextflow.cloud.aws.scm.jgit; import org.eclipse.jgit.errors.TransportException; -import org.eclipse.jgit.lib.NullProgressMonitor; -import org.eclipse.jgit.lib.ObjectId; -import org.eclipse.jgit.lib.ProgressMonitor; -import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.lib.*; import org.eclipse.jgit.revwalk.RevCommit; import org.eclipse.jgit.revwalk.RevWalk; import org.eclipse.jgit.transport.BundleWriter; import org.eclipse.jgit.transport.PushConnection; import org.eclipse.jgit.transport.RemoteRefUpdate; import org.eclipse.jgit.util.FileUtils; -import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; -import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.S3Object; +import software.amazon.awssdk.core.sync.RequestBody; +import software.amazon.awssdk.services.s3.model.*; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.List; @@ -74,6 +70,7 @@ public void push(ProgressMonitor monitor, Map refUpdate } private void pushBranch(Map.Entry entry, Path tmpdir) throws IOException { + log.debug("Pushing {} reference", entry.getKey()); final Ref ref = transport.getLocal().findRef(entry.getKey()); if( ref == null || ref.getObjectId() == null) { throw new IllegalStateException("Branch ${branch} not found"); @@ -106,6 +103,25 @@ private void pushBranch(Map.Entry entry, Path tmpdir) t ); } setUpdateStatus(entry.getValue(), RemoteRefUpdate.Status.OK); + if ( getRef(Constants.HEAD) == null) { + updateRemoteHead(entry.getKey()); + } + } + + private void updateRemoteHead(String ref) { + try { + s3.headObject(HeadObjectRequest.builder() + .bucket(bucket) + .key(key + "/HEAD") + .build()); + } catch (NoSuchKeyException e) { + log.debug("No remote default branch. Setting to {}.", ref); + s3.putObject(PutObjectRequest.builder() + .bucket(bucket) + .key(key + "/HEAD") + .build(), + RequestBody.fromBytes(ref.getBytes(StandardCharsets.UTF_8))); + } } private boolean isSameObjectId(S3Object s3object, ObjectId commitId){ @@ -158,7 +174,7 @@ public void push(ProgressMonitor monitor, Map refUpdate private Path bundle(Ref ref, Path tmpdir) throws IOException { final BundleWriter writer = new BundleWriter(transport.getLocal()); - Path bundleFile = tmpdir.resolve(ref.getObjectId() +".bundle"); + Path bundleFile = tmpdir.resolve(ref.getObjectId().name() +".bundle"); writer.include(ref); try (OutputStream out = new FileOutputStream(bundleFile.toFile())) { writer.writeBundle(NullProgressMonitor.INSTANCE, out); From 514791fb25dbafa632ac6e77e0ab377554916a61 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 19 Sep 2025 19:10:14 +0200 Subject: [PATCH 06/20] fix pull issue and other changes in pull command Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdPush.groovy | 64 ++++++++++++------- .../cloud/aws/scm/jgit/S3FetchConnection.java | 19 +++--- 2 files changed, 51 insertions(+), 32 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy index 6ecdf4a7c2..cd36c70ac7 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy @@ -212,37 +212,55 @@ class CmdPush extends CmdBase implements HubOptions { private void checkFileSizes(File folder) { def maxSizeBytes = maxSizeMB * 1024 * 1024 - List> largeFiles = [] - - folder.eachFileRecurse { file -> - if( file.isFile() && !file.absolutePath.contains('/.git/') ) { - if( file.length() > maxSizeBytes ) { - Map fileEntry = [:] - fileEntry.file = file - fileEntry.sizeMB = file.length() / (1024 * 1024) + def git = Git.open(folder) + + try { + // Get Git status to find files that would be committed + def status = git.status().call() + def filesToBeCommitted = [] + + // Add untracked files + filesToBeCommitted.addAll(status.untracked) + // Add modified files + filesToBeCommitted.addAll(status.modified) + // Add added files + filesToBeCommitted.addAll(status.added) + + def largeFiles = [] + + filesToBeCommitted.each { relativePath -> + def file = new File(folder, relativePath as String) + if( file.exists() && file.isFile() && file.length() > maxSizeBytes ) { + def fileEntry = [ + file: file, + relativePath: relativePath, + sizeMB: file.length() / (1024 * 1024) + ] largeFiles.add(fileEntry) } } - } - if( largeFiles ) { - log.warn "Found ${largeFiles.size()} large files:" - largeFiles.each { entry -> - def fileInfo = entry.file as File - def sizeMB = entry.sizeMB as Double - log.warn " ${fileInfo.name}: ${String.format('%.1f', sizeMB)} MB" - } + if( largeFiles ) { + log.warn "Found ${largeFiles.size()} large files that would be committed:" + largeFiles.each { entry -> + def sizeMB = entry['sizeMB'] as Double + log.warn " ${entry['relativePath']}: ${String.format('%.1f', sizeMB)} MB" + } - print "Do you want to push these large files? [y/N]: " - def response = System.in.newReader().readLine()?.trim()?.toLowerCase() + print "Do you want to push these large files? [y/N]: " + def response = System.in.newReader().readLine()?.trim()?.toLowerCase() - if( response != 'y' && response != 'yes' ) { - // Add large files to .gitignore - def fileNames = largeFiles.collect { entry -> (entry.file as File).name } - addToGitignore(folder, fileNames) - println "Files have been added to .gitignore" + if( response != 'y' && response != 'yes' ) { + // Add large files to .gitignore + def relativePaths = largeFiles.collect { entry -> entry['relativePath'] as String } + addToGitignore(folder, relativePaths) + println "Files have been added to .gitignore" + } } } + finally { + git.close() + } } private void addToGitignore(File folder, List filenames) { diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java index 7120bb3b0a..77233ba1c2 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3FetchConnection.java @@ -52,7 +52,7 @@ public void fetch(ProgressMonitor monitor, Collection want, Set h try { tmpdir = Files.createTempDirectory("s3-remote-git-"); for (Ref r : want) { - downloadBundle(r, tmpdir, monitor); + downloadBundle(r, have, tmpdir, monitor); } }catch (IOException e){ throw new TransportException(transport.getURI(), "Exception fetching branches", e); @@ -67,7 +67,7 @@ public void fetch(ProgressMonitor monitor, Collection want, Set h } - private void downloadBundle(Ref r, Path tmpdir, ProgressMonitor monitor) throws IOException{ + private void downloadBundle(Ref r, Set have, Path tmpdir, ProgressMonitor monitor) throws IOException{ log.debug("Fetching {} in {}", r.getName(), tmpdir); final List list = s3.listObjectsV2(ListObjectsV2Request.builder() .bucket(bucket) @@ -92,16 +92,17 @@ private void downloadBundle(Ref r, Path tmpdir, ProgressMonitor monitor) throws GetObjectRequest.builder().bucket(bucket).key(key).build(), localBundle ); - parseBundle( localBundle, monitor); + parseBundle(r, have, localBundle, monitor); } - private void parseBundle( Path localBundle, ProgressMonitor monitor) throws TransportException { - try { - List specs = new ArrayList<>(); - specs.add(new RefSpec().setForceUpdate(true).setSourceDestination(Constants.R_REFS + '*', Constants.R_REFS + '*')); - Transport.open( transport.getLocal(), new URIish( localBundle.toUri().toString() ) ).fetch(monitor, specs); - + private void parseBundle(Ref r, Set have, Path localBundle, ProgressMonitor monitor) throws TransportException { + List specs = new ArrayList<>(); + List refs = new ArrayList<>(); + refs.add(r); + specs.add(new RefSpec().setForceUpdate(true).setSourceDestination(Constants.R_REFS + '*', Constants.R_REFS + '*')); + try(FetchConnection c = Transport.open(transport.getLocal(), new URIish(localBundle.toUri().toString())).openFetch(specs)){ + c.fetch(monitor, refs, have); } catch (IOException | RuntimeException | URISyntaxException err) { close(); throw new TransportException(transport.getURI(), err.getMessage(), err); From e5b7863c29bd558392458382f79ff8d25e015e21 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 9 Oct 2025 14:32:09 +0200 Subject: [PATCH 07/20] moving Plugins init before getting script file Signed-off-by: jorgee --- modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy index 5deb4d5942..0ad6c8c0b0 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdRun.groovy @@ -324,6 +324,7 @@ class CmdRun extends CmdBase implements HubOptions { printBanner() // -- resolve main script + Plugins.init() final scriptFile = getScriptFile(pipeline) // -- load command line params @@ -359,7 +360,6 @@ class CmdRun extends CmdBase implements HubOptions { Map configParams = builder.getConfigParams() // -- Load plugins (may register secret providers) - Plugins.init() Plugins.load(config) // -- Initialize real secrets system From 73e7964e8c4e90d6479eaec5f19ebf477183ba24 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 9 Oct 2025 20:09:04 +0200 Subject: [PATCH 08/20] Add unit and integration test in aws Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdPush.groovy | 26 +- .../groovy/nextflow/cli/CmdPushTest.groovy | 297 ++++++++++++++++++ .../cloud/aws/scm/S3ProviderConfig.groovy | 5 +- .../cloud/aws/scm/S3ProviderConfigTest.groovy | 245 +++++++++++++++ .../aws/scm/S3RepositoryFactoryTest.groovy | 227 +++++++++++++ .../aws/scm/S3RepositoryProviderTest.groovy | 202 ++++++++++++ validation/awsbatch.sh | 2 + validation/s3-remote-test-repo/main.nf | 18 ++ .../s3-remote-test-repo/nextflow.config | 1 + validation/test-s3-git-remote.sh | 66 ++++ 10 files changed, 1066 insertions(+), 23 deletions(-) create mode 100644 modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy create mode 100644 plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy create mode 100644 plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryFactoryTest.groovy create mode 100644 plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryProviderTest.groovy create mode 100644 validation/s3-remote-test-repo/main.nf create mode 100644 validation/s3-remote-test-repo/nextflow.config create mode 100644 validation/test-s3-git-remote.sh diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy index cd36c70ac7..fc5a9f0ba8 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy @@ -26,6 +26,8 @@ import nextflow.util.TestOnly import org.eclipse.jgit.api.Git import org.eclipse.jgit.errors.RepositoryNotFoundException import org.eclipse.jgit.transport.RemoteConfig +import org.eclipse.jgit.transport.URIish + import java.io.FileFilter /** @@ -63,7 +65,7 @@ class CmdPush extends CmdBase implements HubOptions { @Override void run() { - if( !args && args.size() > 1){ + if( !args || args.size() != 1){ throw new AbortOperationException('Incorrect folder argument') } @@ -204,7 +206,7 @@ class CmdPush extends CmdBase implements HubOptions { // Add remote origin git.remoteAdd() .setName("origin") - .setUri(new org.eclipse.jgit.transport.URIish(repo)) + .setUri(new URIish(repo)) .call() git.close() @@ -329,30 +331,12 @@ class CmdPush extends CmdBase implements HubOptions { // Check for the default Nextflow work directory def workDir = new File(folder, 'work') if( workDir.exists() && workDir.isDirectory() ) { - // Check if it looks like a Nextflow work directory - if( isNextflowWorkDirectory(workDir) ) { - workDirs.add('work') - } + workDirs.add('work') } return workDirs } - private boolean isNextflowWorkDirectory(File dir) { - // Check for typical Nextflow work directory structure - // Work directories contain subdirectories with hexadecimal names - def subDirs = dir.listFiles({ File f -> f.isDirectory() } as FileFilter) - if( !subDirs || subDirs.length == 0 ) { - return false - } - - // Check if at least some subdirectories have hex-like names (Nextflow task hashes) - def hexPattern = /^[0-9a-f]{2}$/ - def hexDirs = subDirs.findAll { it.name.matches(hexPattern) } - - return hexDirs.size() >= Math.min(3, (int)(subDirs.length * 0.5)) - } - private List promptForWorkDirectories(List workDirs, List currentGitignore) { List toAdd = [] diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy new file mode 100644 index 0000000000..4b8d20eb7d --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy @@ -0,0 +1,297 @@ +/* + * Copyright 2013-2024, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cli + +import org.eclipse.jgit.transport.URIish + +import java.nio.file.Files + +import nextflow.exception.AbortOperationException +import nextflow.plugin.Plugins +import org.eclipse.jgit.api.Git +import spock.lang.IgnoreIf +import spock.lang.Specification + +/** + * Tests for CmdPush command + * + * @author Jorge Ejarque + */ +@IgnoreIf({System.getenv('NXF_SMOKE')}) +class CmdPushTest extends Specification { + + def cleanup() { + Plugins.stop() + } + + def 'should fail with no arguments'() { + given: + def cmd = new CmdPush() + + when: + cmd.run() + + then: + thrown(AbortOperationException) + } + + def 'should fail when folder does not exist'() { + given: + def cmd = new CmdPush(args: ['/nonexistent/folder'], repository: 'https://github.com/test/repo.git') + + when: + cmd.run() + + then: + def e = thrown(AbortOperationException) + e.message.contains('Folder does not exist') + } + + def 'should fail when path is not a directory'() { + given: + def tempFile = Files.createTempFile('test', '.txt').toFile() + def cmd = new CmdPush(args: [tempFile.absolutePath], repository: 'https://github.com/test/repo.git') + + when: + cmd.run() + + then: + def e = thrown(AbortOperationException) + e.message.contains('Path is not a directory') + + cleanup: + tempFile?.delete() + } + + def 'should fail when no repository specified and no git repo exists'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def cmd = new CmdPush(args: [tempDir.absolutePath]) + + when: + cmd.run() + + then: + def e = thrown(AbortOperationException) + e.message.contains('No git repository found') + + cleanup: + tempDir?.deleteDir() + } + + def 'should normalize repository URLs correctly'() { + given: + def cmd = new CmdPush() + + expect: + cmd.normalizeRepoUrl('https://github.com/user/repo.git') == 'https://github.com/user/repo' + cmd.normalizeRepoUrl('https://github.com/user/repo') == 'https://github.com/user/repo' + cmd.normalizeRepoUrl('HTTPS://GITHUB.COM/USER/REPO.GIT') == 'https://github.com/user/repo' + cmd.normalizeRepoUrl('https://github.com/user/repo/') == 'https://github.com/user/repo' + } + + def 'should add files to gitignore'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def gitignoreFile = new File(tempDir, '.gitignore') + def cmd = new CmdPush() + + when: + cmd.addToGitignore(tempDir, ['file1.txt', 'file2.txt']) + + then: + gitignoreFile.exists() + def content = gitignoreFile.text + content.contains('file1.txt') + content.contains('file2.txt') + + cleanup: + tempDir?.deleteDir() + } + + def 'should not duplicate entries in gitignore'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def gitignoreFile = new File(tempDir, '.gitignore') + gitignoreFile.text = 'existing.txt\n' + def cmd = new CmdPush() + + when: + cmd.addToGitignore(tempDir, ['existing.txt', 'new.txt']) + + then: + def lines = gitignoreFile.readLines() + lines.count { it == 'existing.txt' } == 1 + lines.contains('new.txt') + + cleanup: + tempDir?.deleteDir() + } + + def 'should find work directories'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def workDir = new File(tempDir, 'work') + workDir.mkdirs() + + def cmd = new CmdPush() + + when: + def result = cmd.findWorkDirectories(tempDir) + + then: + result.size() == 1 + result[0] == 'work' + + cleanup: + tempDir?.deleteDir() + } + + def 'should not find work directories when none exist'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def cmd = new CmdPush() + + when: + def result = cmd.findWorkDirectories(tempDir) + + then: + result.isEmpty() + + cleanup: + tempDir?.deleteDir() + } + + def 'should fail when existing repo has wrong remote'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + + // Initialize a git repo with a different remote + def git = Git.init().setDirectory(tempDir).call() + git.remoteAdd() + .setName('origin') + .setUri(new URIish('https://github.com/wrong/repo.git')) + .call() + git.close() + + def cmd = new CmdPush( + args: [tempDir.absolutePath], + repository: 'https://github.com/correct/repo.git' + ) + + when: + cmd.run() + + then: + def e = thrown(AbortOperationException) + e.message.contains('Repository URL not found in remotes') + + cleanup: + tempDir?.deleteDir() + } + + def 'should fail when repo is in detached HEAD state'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + + // Initialize repo and create a commit + def git = Git.init().setDirectory(tempDir).call() + git.remoteAdd() + .setName('origin') + .setUri(new URIish('https://github.com/test/repo.git')) + .call() + + // Create a test file and commit + new File(tempDir, 'test.txt').text = 'test content' + git.add().addFilepattern('.').call() + def commit = git.commit().setMessage('initial commit').call() + + // Checkout to detached HEAD + git.checkout().setName(commit.name()).call() + git.close() + + def cmd = new CmdPush( + args: [tempDir.absolutePath], + repository: 'https://github.com/test/repo.git' + ) + + when: + cmd.run() + + then: + def e = thrown(AbortOperationException) + e.message.contains('detached HEAD state') + + cleanup: + tempDir?.deleteDir() + } + + def 'should fail when current branch does not match requested branch'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + + // Initialize repo + def git = Git.init().setDirectory(tempDir).call() + git.remoteAdd() + .setName('origin') + .setUri(new URIish('https://github.com/test/repo.git')) + .call() + + // Create initial commit on main branch + new File(tempDir, 'test.txt').text = 'test content' + git.add().addFilepattern('.').call() + git.commit().setMessage('initial commit').call() + + // Create and checkout dev branch + git.checkout().setCreateBranch(true).setName('dev').call() + git.close() + + def cmd = new CmdPush( + args: [tempDir.absolutePath], + repository: 'https://github.com/test/repo.git', + revision: 'main' + ) + + when: + cmd.run() + + then: + def e = thrown(AbortOperationException) + e.message.contains("Current branch 'dev' does not match requested branch 'main'") + + cleanup: + tempDir?.deleteDir() + } + + def 'should get command name'() { + given: + def cmd = new CmdPush() + + expect: + cmd.getName() == 'push' + } + + def 'should have default parameter values'() { + given: + def cmd = new CmdPush() + + expect: + cmd.revision == 'main' + cmd.maxSizeMB == 10 + cmd.message == 'Push from nextflow' + } +} diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy index 2d66a28d03..30369005ed 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy @@ -41,7 +41,7 @@ class S3ProviderConfig extends ProviderConfig { private AwsCredentialsProvider awsCredentialsProvider = DefaultCredentialsProvider.builder().build() S3ProviderConfig(String name, Map values) { - super(name, values) + super(name, [ server: "s3://$name"] + values) setDefaultsFromAwsConfig() // Override with scm repo attributes setValuesFromMap(values) @@ -59,8 +59,9 @@ class S3ProviderConfig extends ProviderConfig { } } private void setValuesFromMap(Map values){ - if( values.region ) + if( values.region ) { region = Region.of(values.region as String) + } if( values.accessKey && values.secretKey ){ awsCredentialsProvider = StaticCredentialsProvider.create( AwsBasicCredentials.builder() diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy new file mode 100644 index 0000000000..6e7c8a24c6 --- /dev/null +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy @@ -0,0 +1,245 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm + +import nextflow.Global +import nextflow.Session +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider +import software.amazon.awssdk.regions.Region +import spock.lang.Specification + +/** + * Tests for S3ProviderConfig + * + * @author Jorge Ejarque + */ +class S3ProviderConfigTest extends Specification { + + def cleanup() { + Global.session = null + } + + def 'should create S3 provider config with name only'() { + when: + def config = new S3ProviderConfig('my-bucket') + + then: + config.name == 'my-bucket' + config.platform == 's3' + config.server == 's3://my-bucket' + config.region == Region.US_EAST_1 + config.awsCredentialsProvider != null + } + + def 'should create S3 provider config with values map'() { + given: + def values = [ + platform: 's3', + server: 's3://test-bucket', + region: 'us-west-2' + ] + + when: + def config = new S3ProviderConfig('test-bucket', values) + + then: + config.name == 'test-bucket' + config.platform == 's3' + config.server == 's3://test-bucket' + config.region == Region.US_WEST_2 + } + + def 'should set region from config map'() { + given: + def values = [ + platform: 's3', + region: 'eu-west-1' + ] + + when: + def config = new S3ProviderConfig('my-bucket', values) + + then: + config.region == Region.EU_WEST_1 + } + + def 'should set AWS credentials from config map'() { + given: + def values = [ + platform: 's3', + accessKey: 'AKIAIOSFODNN7EXAMPLE', + secretKey: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' + ] + + when: + def config = new S3ProviderConfig('my-bucket', values) + + then: + config.awsCredentialsProvider instanceof StaticCredentialsProvider + def credentials = config.awsCredentialsProvider.resolveCredentials() + credentials.accessKeyId() == 'AKIAIOSFODNN7EXAMPLE' + credentials.secretAccessKey() == 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' + } + + def 'should set defaults from Global session AWS config'() { + given: + def sessionConfig = [ + aws: [ + region: 'ap-southeast-1', + accessKey: 'ASIA123456789EXAMPLE', + secretKey: 'testSecretKey123' + ] + ] + Global.session = Mock(Session) { + getConfig() >> sessionConfig + } + + when: + def config = new S3ProviderConfig('my-bucket') + + then: + config.region == Region.AP_SOUTHEAST_1 + config.awsCredentialsProvider instanceof StaticCredentialsProvider + } + + def 'should override session config with provider values'() { + given: + def sessionConfig = [ + aws: [ + region: 'us-east-1', + accessKey: 'ASIA111111111EXAMPLE', + secretKey: 'sessionSecret' + ] + ] + Global.session = Mock(Session) { + getConfig() >> sessionConfig + } + def values = [ + platform: 's3', + region: 'eu-central-1', + accessKey: 'AKIAIOSFODNN7EXAMPLE', + secretKey: 'providerSecret' + ] + + when: + def config = new S3ProviderConfig('my-bucket', values) + + then: + config.region == Region.EU_CENTRAL_1 + def credentials = config.awsCredentialsProvider.resolveCredentials() + credentials.accessKeyId() == 'AKIAIOSFODNN7EXAMPLE' + credentials.secretAccessKey() == 'providerSecret' + } + + def 'should resolve project name correctly'() { + given: + def config = new S3ProviderConfig('my-bucket') + + when: + def result = config.resolveProjectName('path/to/project') + + then: + result == 'my-bucket/path/to/project' + } + + def 'should handle project name with nested paths'() { + given: + def config = new S3ProviderConfig('test-bucket') + + when: + def result = config.resolveProjectName('org/team/project') + + then: + result == 'test-bucket/org/team/project' + } + + def 'should use default credentials provider when no credentials specified'() { + when: + def config = new S3ProviderConfig('my-bucket') + + then: + config.awsCredentialsProvider != null + // DefaultCredentialsProvider is used by default + } + + def 'should handle different AWS regions'() { + expect: + new S3ProviderConfig('bucket', [platform: 's3', region: REGION]).region == EXPECTED + + where: + REGION | EXPECTED + 'us-east-1' | Region.US_EAST_1 + 'us-west-2' | Region.US_WEST_2 + 'eu-west-1' | Region.EU_WEST_1 + 'ap-northeast-1' | Region.AP_NORTHEAST_1 + 'sa-east-1' | Region.SA_EAST_1 + } + + def 'should handle accessKey without secretKey'() { + given: + def values = [ + platform: 's3', + accessKey: 'AKIAIOSFODNN7EXAMPLE' + ] + + when: + def config = new S3ProviderConfig('my-bucket', values) + + then: + // Should not set static credentials provider if secretKey is missing + config.awsCredentialsProvider != null + } + + def 'should handle secretKey without accessKey'() { + given: + def values = [ + platform: 's3', + secretKey: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' + ] + + when: + def config = new S3ProviderConfig('my-bucket', values) + + then: + // Should not set static credentials provider if accessKey is missing + config.awsCredentialsProvider != null + } + + def 'should handle empty config map'() { + when: + def config = new S3ProviderConfig('my-bucket') + + then: + config.name == 'my-bucket' + config.platform == 's3' + config.server == 's3://my-bucket' + config.region == Region.US_EAST_1 + } + + def 'should handle null Global session'() { + given: + Global.session = null + + when: + def config = new S3ProviderConfig('my-bucket') + + then: + noExceptionThrown() + config.region == Region.US_EAST_1 + } +} diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryFactoryTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryFactoryTest.groovy new file mode 100644 index 0000000000..17bd669a60 --- /dev/null +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryFactoryTest.groovy @@ -0,0 +1,227 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm + +import nextflow.scm.GitUrl +import nextflow.scm.ProviderConfig +import spock.lang.Specification + +/** + * Tests for S3RepositoryFactory + * + * @author Jorge Ejarque + */ +class S3RepositoryFactoryTest extends Specification { + + def 'should create S3 provider instance when platform is s3'() { + given: + def factory = new S3RepositoryFactory() + def config = new S3ProviderConfig('test-bucket') + + when: + def provider = factory.createProviderInstance(config, 'test-bucket/project') + + then: + provider instanceof S3RepositoryProvider + provider.project == 'test-bucket/project' + } + + def 'should return null when platform is not s3'() { + given: + def factory = new S3RepositoryFactory() + def config = new ProviderConfig('github', [platform: 'github']) + + when: + def provider = factory.createProviderInstance(config, 'user/repo') + + then: + provider == null + } + + def 'should register TransportS3 on first provider creation'() { + given: + def factory = new S3RepositoryFactory() + def config = new S3ProviderConfig('test-bucket') + + when: + def provider1 = factory.createProviderInstance(config, 'test-bucket/project1') + def provider2 = factory.createProviderInstance(config, 'test-bucket/project2') + + then: + provider1 instanceof S3RepositoryProvider + provider2 instanceof S3RepositoryProvider + // TransportS3.register() should be called only once + } + + def 'should get config for s3 URL'() { + given: + def factory = new S3RepositoryFactory() + def url = new GitUrl('s3://my-bucket/path/to/project') + def providers = [] + + when: + def config = factory.getConfig(providers, url) + + then: + config instanceof S3ProviderConfig + config.name == 'my-bucket' + config.platform == 's3' + } + + def 'should return existing config when domain matches'() { + given: + def factory = new S3RepositoryFactory() + def existingConfig = new S3ProviderConfig('my-bucket', [ + region: 'eu-west-1', + accessKey: 'test-key', + secretKey: 'test-secret', + platform: 's3' + ]) + def providers = [existingConfig] + def url = new GitUrl('s3://my-bucket/path/to/project') + + when: + def config = factory.getConfig(providers, url) + + then: + config.name == 'my-bucket' + config.region.id() == 'eu-west-1' + } + + def 'should return null for non-s3 protocol'() { + given: + def factory = new S3RepositoryFactory() + def url = new GitUrl('https://github.com/user/repo') + def providers = [] + + when: + def config = factory.getConfig(providers, url) + + then: + config == null + } + + def 'should create config instance when platform is s3'() { + given: + def factory = new S3RepositoryFactory() + def attrs = [ + platform: 's3', + region: 'us-west-2', + accessKey: 'AKIAIOSFODNN7EXAMPLE', + secretKey: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' + ] + + when: + def config = factory.createConfigInstance('my-bucket', attrs) + + then: + config instanceof S3ProviderConfig + config.name == 'my-bucket' + config.platform == 's3' + config.region.id() == 'us-west-2' + } + + def 'should return null when creating config for non-s3 platform'() { + given: + def factory = new S3RepositoryFactory() + def attrs = [ + platform: 'github', + server: 'https://github.com' + ] + + when: + def config = factory.createConfigInstance('github', attrs) + + then: + config == null + } + + def 'should handle multiple buckets'() { + given: + def factory = new S3RepositoryFactory() + def config1 = new S3ProviderConfig('bucket1') + def config2 = new S3ProviderConfig('bucket2') + + when: + def provider1 = factory.createProviderInstance(config1, 'bucket1/project1') + def provider2 = factory.createProviderInstance(config2, 'bucket2/project2') + + then: + provider1.project == 'bucket1/project1' + provider2.project == 'bucket2/project2' + provider1.getEndpointUrl() == 's3://bucket1/project1' + provider2.getEndpointUrl() == 's3://bucket2/project2' + } + + def 'should handle URL with nested paths'() { + given: + def factory = new S3RepositoryFactory() + def url = new GitUrl('s3://my-bucket/org/team/project') + def providers = [] + + when: + def config = factory.getConfig(providers, url) + + then: + config instanceof S3ProviderConfig + config.name == 'my-bucket' + } + + def 'should create new config when no matching provider exists'() { + given: + def factory = new S3RepositoryFactory() + def existingConfig = new S3ProviderConfig('other-bucket') + def providers = [existingConfig] + def url = new GitUrl('s3://my-bucket/project') + + when: + def config = factory.getConfig(providers, url) + + then: + config instanceof S3ProviderConfig + config != existingConfig + config.name == 'my-bucket' + } + + def 'should not modify original attrs map'() { + given: + def factory = new S3RepositoryFactory() + def attrs = [ + platform: 's3', + region: 'us-east-1' + ] + def originalSize = attrs.size() + + when: + factory.createConfigInstance('test-bucket', attrs) + + then: + attrs.size() == originalSize + attrs.platform == 's3' + } + + def 'should handle different S3 URL formats'() { + given: + def factory = new S3RepositoryFactory() + def providers = [] + + expect: + factory.getConfig(providers, new GitUrl('s3://bucket/project')) instanceof S3ProviderConfig + factory.getConfig(providers, new GitUrl('s3://my-bucket/path/to/project')) instanceof S3ProviderConfig + + } +} diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryProviderTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryProviderTest.groovy new file mode 100644 index 0000000000..6ca3dbb68a --- /dev/null +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryProviderTest.groovy @@ -0,0 +1,202 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.cloud.aws.scm + +import nextflow.cloud.aws.scm.jgit.S3GitCredentialsProvider +import nextflow.scm.ProviderConfig +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider +import software.amazon.awssdk.regions.Region +import spock.lang.Specification + +/** + * Tests for S3RepositoryProvider + * + * @author Jorge Ejarque + */ +class S3RepositoryProviderTest extends Specification { + + def 'should create S3 repository provider'() { + given: + def config = new S3ProviderConfig('test-bucket') + + when: + def provider = new S3RepositoryProvider('test-bucket/project', config) + + then: + provider.project == 'test-bucket/project' + provider.config == config + } + + def 'should assert config is S3ProviderConfig'() { + given: + def config = Mock(ProviderConfig) + + when: + new S3RepositoryProvider('test-project', config) + + then: + thrown(AssertionError) + } + + def 'should return correct name'() { + given: + def config = new S3ProviderConfig('test-bucket') + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + def name = provider.getName() + + then: + name == 'test-bucket/project' + } + + def 'should return correct endpoint URL'() { + given: + def config = new S3ProviderConfig('test-bucket') + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + def url = provider.getEndpointUrl() + + then: + url == 's3://test-bucket/project' + } + + def 'should return correct clone URL'() { + given: + def config = new S3ProviderConfig('test-bucket') + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + def url = provider.getCloneUrl() + + then: + url == 's3://test-bucket/project' + } + + def 'should return correct repository URL'() { + given: + def config = new S3ProviderConfig('test-bucket') + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + def url = provider.getRepositoryUrl() + + then: + url == 's3://test-bucket/project' + } + + def 'should indicate has credentials'() { + given: + def config = new S3ProviderConfig('test-bucket') + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + def hasCredentials = provider.hasCredentials() + + then: + hasCredentials + } + + def 'should throw UnsupportedOperationException for getContentUrl'() { + given: + def config = new S3ProviderConfig('test-bucket') + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + provider.getContentUrl('path/to/file') + + then: + thrown(UnsupportedOperationException) + } + + def 'should get Git credentials with region'() { + given: + def config = new S3ProviderConfig('test-bucket', [platform: 's3', region: 'eu-west-1']) + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + def credentials = provider.getGitCredentials() + then: + credentials instanceof S3GitCredentialsProvider + def awsCredentials = credentials as S3GitCredentialsProvider + awsCredentials.region == Region.EU_WEST_1 + awsCredentials.awsCredentialsProvider instanceof DefaultCredentialsProvider + } + + def 'should get Git credentials with AWS credentials provider'() { + given: + def config = new S3ProviderConfig('test-bucket', [ + platform: 's3', + region: 'us-west-2', + accessKey: 'AKIAIOSFODNN7EXAMPLE', + secretKey: 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' + ]) + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + def credentials = provider.getGitCredentials() + + then: + credentials instanceof S3GitCredentialsProvider + def awsCredentials = credentials as S3GitCredentialsProvider + awsCredentials.region == Region.US_WEST_2 + awsCredentials.awsCredentialsProvider instanceof StaticCredentialsProvider + } + + def 'should memoize Git credentials'() { + given: + def config = new S3ProviderConfig('test-bucket') + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + def credentials1 = provider.getGitCredentials() + def credentials2 = provider.getGitCredentials() + + then: + credentials1.is(credentials2) // Should return same instance due to @Memoized + } + + def 'should validate repo without throwing exception'() { + given: + def config = new S3ProviderConfig('test-bucket') + def provider = new S3RepositoryProvider('test-bucket/project', config) + + when: + provider.validateRepo() + + then: + noExceptionThrown() + } + + def 'should handle different project names'() { + given: + def config = new S3ProviderConfig('my-bucket') + + expect: + new S3RepositoryProvider(PROJECT, config).getName() == PROJECT + + where: + PROJECT << [ + 'my-bucket/simple', + 'my-bucket/org/team/project', + 'test-bucket/user/repo', + 'bucket/a/b/c/d' + ] + } +} diff --git a/validation/awsbatch.sh b/validation/awsbatch.sh index b73571cbd6..b4da1787a4 100644 --- a/validation/awsbatch.sh +++ b/validation/awsbatch.sh @@ -7,6 +7,8 @@ get_abs_filename() { export NXF_CMD=${NXF_CMD:-$(get_abs_filename ../launch.sh)} +bash test-s3-git-remote.sh 's3://nextflow-ci/work' + # Execution should fail ignoring $NXF_CMD run test-aws-unstage-fail.nf -c awsbatch-unstage-fail.config || true [[ `grep -c "Error executing process > 'test (1)'" .nextflow.log` == 1 ]] || false diff --git a/validation/s3-remote-test-repo/main.nf b/validation/s3-remote-test-repo/main.nf new file mode 100644 index 0000000000..a4bb63ee51 --- /dev/null +++ b/validation/s3-remote-test-repo/main.nf @@ -0,0 +1,18 @@ +#!/usr/bin/env nextflow + +process sayHello { + input: + val x + + output: + stdout + + script: + """ + echo 'Hey! ${x} world!' + """ +} + +workflow { + Channel.of('Bonjour', 'Ciao', 'Hello', 'Hola') | sayHello | view +} diff --git a/validation/s3-remote-test-repo/nextflow.config b/validation/s3-remote-test-repo/nextflow.config new file mode 100644 index 0000000000..b90e8610ae --- /dev/null +++ b/validation/s3-remote-test-repo/nextflow.config @@ -0,0 +1 @@ +process.container = 'quay.io/nextflow/bash' diff --git a/validation/test-s3-git-remote.sh b/validation/test-s3-git-remote.sh new file mode 100644 index 0000000000..85edc058fd --- /dev/null +++ b/validation/test-s3-git-remote.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set -e + +S3_REPO_BASE=$1 + +get_abs_filename() { + echo "$(cd "$(dirname "$1")" && pwd)/$(basename "$1")" +} + + +export NXF_CMD=${NXF_CMD:-$(get_abs_filename ../launch.sh)} + +## Test S3 git remote integration: push and run +echo "Testing S3 git remote integration" + +# Create timestamp for unique test repo +TIMESTAMP=$(date +%s) +TEST_REPO="test-repo-${TIMESTAMP}" +S3_REPO="$S3_REPO_BASE/${TEST_REPO}" +TEMP_DIR=$(mktemp -d -t nf-s3-test-XXXXXX) +## Test S3 git remote integration: push and run +echo "Testing S3 git remote integration in ${TEMP_DIR}" + +remove() { + echo "Removing ${S3_REPO}" + $NXF_CMD fs rm "${S3_REPO}" + echo "Removing ${TEMP_DIR}" + rm -rf "${TEMP_DIR}" +} + +# Copy test pipeline to temp directory +cp -r s3-remote-test-repo "${TEMP_DIR}/" + +# Copy config +cp awsbatch.config "${TEMP_DIR}/nextflow.config" + +# define trap to remove when exit +trap remove EXIT + +cd ${TEMP_DIR} +echo "Pushing pipeline to ${S3_REPO}" +$NXF_CMD push s3-remote-test-repo -repo "${S3_REPO}" -r main + +echo "Running pipeline from S3 remote" +$NXF_CMD -q run "${S3_REPO}" -r main | tee stdout1 +[[ `grep -c "Hey! Bonjour world!" stdout1` == 1 ]] || false +[[ `grep -c "Hey! Ciao world!" stdout1` == 1 ]] || false +[[ `grep -c "Hey! Hello world!" stdout1` == 1 ]] || false +[[ `grep -c "Hey! Hola world!" stdout1` == 1 ]] || false + +echo "Modifying pipeline message" +sed -i "s/Hey!/Hey there!/g" s3-remote-test-repo/main.nf + +echo "Pushing modified pipeline to ${S3_REPO}" +$NXF_CMD push s3-remote-test-repo -m "Update greeting message" + +echo "Running modified pipeline from S3 remote" +$NXF_CMD -q run "${S3_REPO}" -r main -latest | tee stdout2 +[[ `grep -c "Hey there! Bonjour world!" stdout2` == 1 ]] || false +[[ `grep -c "Hey there! Ciao world!" stdout2` == 1 ]] || false +[[ `grep -c "Hey there! Hello world!" stdout2` == 1 ]] || false +[[ `grep -c "Hey there! Hola world!" stdout2` == 1 ]] || false + +echo "S3 git remote integration test completed successfully" + + From f79f12e951437a53bf40c75ce50a16376e0954c3 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 9 Oct 2025 20:10:32 +0200 Subject: [PATCH 09/20] Fix unit test Signed-off-by: jorgee --- .../src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy index 6e7c8a24c6..a1e6875206 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy @@ -18,7 +18,6 @@ package nextflow.cloud.aws.scm import nextflow.Global import nextflow.Session -import software.amazon.awssdk.auth.credentials.AwsBasicCredentials import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider import software.amazon.awssdk.regions.Region import spock.lang.Specification From dbf02020a8edbfb2287a7cdd2b141c02cbb389fc Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 9 Oct 2025 20:57:58 +0200 Subject: [PATCH 10/20] update implementation to master Signed-off-by: jorgee --- .../src/resources/META-INF/extensions.idx | 22 ------------------- 1 file changed, 22 deletions(-) delete mode 100644 plugins/nf-amazon/src/resources/META-INF/extensions.idx diff --git a/plugins/nf-amazon/src/resources/META-INF/extensions.idx b/plugins/nf-amazon/src/resources/META-INF/extensions.idx deleted file mode 100644 index cff26a8e31..0000000000 --- a/plugins/nf-amazon/src/resources/META-INF/extensions.idx +++ /dev/null @@ -1,22 +0,0 @@ -# -# Copyright 2013-2024, Seqera Labs -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -nextflow.cloud.aws.batch.AwsBatchExecutor -nextflow.cloud.aws.util.S3PathSerializer -nextflow.cloud.aws.util.S3PathFactory -nextflow.cloud.aws.fusion.AwsFusionEnv -nextflow.cloud.aws.mail.AwsMailProvider -nextflow.cloud.aws.scm.S3RepositoryFactory From 04d389674c84d0f55f60240179e8cb74e44e8df6 Mon Sep 17 00:00:00 2001 From: jorgee Date: Thu, 9 Oct 2025 21:02:19 +0200 Subject: [PATCH 11/20] update implementation to master Signed-off-by: jorgee --- plugins/nf-amazon/build.gradle | 1 + .../nextflow/cloud/aws/scm/S3RepositoryProvider.groovy | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/plugins/nf-amazon/build.gradle b/plugins/nf-amazon/build.gradle index 0d635d1f5e..8fffb8662b 100644 --- a/plugins/nf-amazon/build.gradle +++ b/plugins/nf-amazon/build.gradle @@ -32,6 +32,7 @@ nextflowPlugin { 'nextflow.cloud.aws.mail.AwsMailProvider', 'nextflow.cloud.aws.util.S3PathFactory', 'nextflow.cloud.aws.util.S3PathSerializer', + 'nextflow.cloud.aws.scm.S3RepositoryFactory' ] } diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy index f5c8465fbe..f7d7f42be9 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy @@ -124,7 +124,12 @@ class S3RepositoryProvider extends RepositoryProvider { } } - /** {@inheritDoc} **/ + @Override + List listDirectory(String path, int depth) { + throw new UnsupportedOperationException("S3-git-remote does not support 'listDirectory' operation") + } + +/** {@inheritDoc} **/ // called by AssetManager @Override void validateRepo() { From 671875ef16d3d74f09f235362d567894c6d88a01 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 10 Oct 2025 09:16:44 +0200 Subject: [PATCH 12/20] Get default AWS region with the DefaultAwsRegionProvideChain Signed-off-by: jorgee --- .../cloud/aws/scm/S3ProviderConfig.groovy | 3 ++- .../cloud/aws/scm/S3ProviderConfigTest.groovy | 18 ++++++------------ 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy index 30369005ed..134953f17a 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy @@ -26,6 +26,7 @@ import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider import software.amazon.awssdk.regions.Region +import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain /** * Implements a provider config for git-remote-s3 repositories @@ -36,7 +37,7 @@ import software.amazon.awssdk.regions.Region @CompileStatic class S3ProviderConfig extends ProviderConfig { - private Region region = Region.US_EAST_1 + private Region region = DefaultAwsRegionProviderChain.builder().build().region private AwsCredentialsProvider awsCredentialsProvider = DefaultCredentialsProvider.builder().build() diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy index a1e6875206..927a599c34 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy @@ -18,6 +18,7 @@ package nextflow.cloud.aws.scm import nextflow.Global import nextflow.Session +import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider import software.amazon.awssdk.regions.Region import spock.lang.Specification @@ -35,6 +36,7 @@ class S3ProviderConfigTest extends Specification { def 'should create S3 provider config with name only'() { when: + System.setProperty('aws.region', 'us-east-1') // Force Default Region provider to get the us-east-1 def config = new S3ProviderConfig('my-bucket') then: @@ -173,7 +175,7 @@ class S3ProviderConfigTest extends Specification { then: config.awsCredentialsProvider != null - // DefaultCredentialsProvider is used by default + config.awsCredentialsProvider instanceof DefaultCredentialsProvider } def 'should handle different AWS regions'() { @@ -202,6 +204,7 @@ class S3ProviderConfigTest extends Specification { then: // Should not set static credentials provider if secretKey is missing config.awsCredentialsProvider != null + config.awsCredentialsProvider instanceof DefaultCredentialsProvider } def 'should handle secretKey without accessKey'() { @@ -217,21 +220,12 @@ class S3ProviderConfigTest extends Specification { then: // Should not set static credentials provider if accessKey is missing config.awsCredentialsProvider != null - } - - def 'should handle empty config map'() { - when: - def config = new S3ProviderConfig('my-bucket') - - then: - config.name == 'my-bucket' - config.platform == 's3' - config.server == 's3://my-bucket' - config.region == Region.US_EAST_1 + config.awsCredentialsProvider instanceof DefaultCredentialsProvider } def 'should handle null Global session'() { given: + System.setProperty('aws.region', 'us-east-1') // Force Default Region provider to get the us-east-1 Global.session = null when: From 2ac83f371bb9cf9e3e2b7e39e4e3c4a22f5e1940 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 10 Oct 2025 11:02:19 +0200 Subject: [PATCH 13/20] fix get region defaults form AwsConfig Signed-off-by: jorgee --- .../cloud/aws/config/AwsConfig.groovy | 3 ++ .../cloud/aws/scm/S3ProviderConfig.groovy | 43 +++++++++++-------- .../cloud/aws/scm/jgit/S3BaseConnection.java | 3 +- .../cloud/aws/scm/S3ProviderConfigTest.groovy | 21 ++++++++- .../aws/scm/S3RepositoryFactoryTest.groovy | 15 ------- 5 files changed, 49 insertions(+), 36 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsConfig.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsConfig.groovy index f5719c1af0..106f671b6d 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsConfig.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/config/AwsConfig.groovy @@ -130,6 +130,9 @@ class AwsConfig implements ConfigScope { if( region ) return region.toString() } + if( env && env.AWS_REGION ) { + return env.AWS_REGION.toString() + } if( env && env.AWS_DEFAULT_REGION ) { return env.AWS_DEFAULT_REGION.toString() diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy index 134953f17a..5538e2cc49 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy @@ -19,11 +19,14 @@ package nextflow.cloud.aws.scm import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.Global +import nextflow.cloud.aws.AwsClientFactory +import nextflow.cloud.aws.config.AwsConfig import nextflow.exception.AbortOperationException import nextflow.scm.ProviderConfig import software.amazon.awssdk.auth.credentials.AwsBasicCredentials import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider +import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider import software.amazon.awssdk.regions.Region import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain @@ -37,38 +40,44 @@ import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain @CompileStatic class S3ProviderConfig extends ProviderConfig { - private Region region = DefaultAwsRegionProviderChain.builder().build().region + private Region region private AwsCredentialsProvider awsCredentialsProvider = DefaultCredentialsProvider.builder().build() S3ProviderConfig(String name, Map values) { super(name, [ server: "s3://$name"] + values) - setDefaultsFromAwsConfig() - // Override with scm repo attributes - setValuesFromMap(values) + setValues(values) } S3ProviderConfig(String name){ super(name,[ platform: 's3', server: "s3://$name"]) - setDefaultsFromAwsConfig() + setValues() } - private void setDefaultsFromAwsConfig() { - final config = Global.session?.config?.aws as Map - if( config ) { - setValuesFromMap(config) - } - } - private void setValuesFromMap(Map values){ - if( values.region ) { - region = Region.of(values.region as String) + private void setValues(Map values = Map.of()) { + //Get sessions config if exists + def session = Global.session?.config?.aws as Map ?: Map.of() + + //Merge with scm values and convert to AwsConfg to unify SysEnv fallback and profile management + final config = new AwsConfig(session + values) + if( config.region ) { + region = Region.of(config.region) + }else { + // fallback to default region provider + region = DefaultAwsRegionProviderChain.builder().build().region } - if( values.accessKey && values.secretKey ){ + if( config.accessKey && config.secretKey ){ awsCredentialsProvider = StaticCredentialsProvider.create( AwsBasicCredentials.builder() - .accessKeyId(values.accessKey as String) - .secretAccessKey(values.secretKey as String) + .accessKeyId(config.accessKey as String) + .secretAccessKey(config.secretKey as String) .build()) + } else if( config.profile ){ + // Get credentials from profile + awsCredentialsProvider = ProfileCredentialsProvider.builder().profileName(config.profile).build() + } else { + // fallback to default credentials provider + awsCredentialsProvider = DefaultCredentialsProvider.builder().build() } } diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java index 798f02421a..a21c77762c 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3BaseConnection.java @@ -167,8 +167,7 @@ private BranchData(String type, String simpleName, ObjectId objectId){ } public static BranchData fromKey(String key){ String[] parts = key.split("/"); - if (parts.length < 5) throw new RuntimeException("Incorrect key parts"); - // Expect: repo-path/refs///.bundle + if (parts.length < 5) throw new RuntimeException("Incorrect s3 key parts inside the S3-git-remote repository. Key should include the following parts: repo-path/refs///.bundle"); final String type = parts[parts.length - 3]; final String rBranch = parts[parts.length - 2]; final String sha = parts[parts.length - 1].replace(".bundle", ""); diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy index 927a599c34..c1286bb50e 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3ProviderConfigTest.groovy @@ -18,7 +18,9 @@ package nextflow.cloud.aws.scm import nextflow.Global import nextflow.Session +import nextflow.SysEnv import software.amazon.awssdk.auth.credentials.DefaultCredentialsProvider +import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider import software.amazon.awssdk.regions.Region import spock.lang.Specification @@ -36,7 +38,7 @@ class S3ProviderConfigTest extends Specification { def 'should create S3 provider config with name only'() { when: - System.setProperty('aws.region', 'us-east-1') // Force Default Region provider to get the us-east-1 + SysEnv.push(['AWS_DEFAULT_REGION': 'us-east-1']) // Force Default Region provider to get the us-east-1 def config = new S3ProviderConfig('my-bucket') then: @@ -45,6 +47,9 @@ class S3ProviderConfigTest extends Specification { config.server == 's3://my-bucket' config.region == Region.US_EAST_1 config.awsCredentialsProvider != null + + cleanup: + SysEnv.pop() } def 'should create S3 provider config with values map'() { @@ -178,6 +183,15 @@ class S3ProviderConfigTest extends Specification { config.awsCredentialsProvider instanceof DefaultCredentialsProvider } + def 'should use profile credentials provider when profile is specified'() { + when: + def config = new S3ProviderConfig('my-bucket',[platform: 's3', profile: 'profileA']) + + then: + config.awsCredentialsProvider != null + config.awsCredentialsProvider instanceof ProfileCredentialsProvider + } + def 'should handle different AWS regions'() { expect: new S3ProviderConfig('bucket', [platform: 's3', region: REGION]).region == EXPECTED @@ -225,7 +239,7 @@ class S3ProviderConfigTest extends Specification { def 'should handle null Global session'() { given: - System.setProperty('aws.region', 'us-east-1') // Force Default Region provider to get the us-east-1 + SysEnv.push(['AWS_DEFAULT_REGION': 'us-east-1']) // Force Default Region provider to get the us-east-1 Global.session = null when: @@ -234,5 +248,8 @@ class S3ProviderConfigTest extends Specification { then: noExceptionThrown() config.region == Region.US_EAST_1 + + cleanup: + SysEnv.pop() } } diff --git a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryFactoryTest.groovy b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryFactoryTest.groovy index 17bd669a60..01ed6e58ac 100644 --- a/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryFactoryTest.groovy +++ b/plugins/nf-amazon/src/test/nextflow/cloud/aws/scm/S3RepositoryFactoryTest.groovy @@ -52,21 +52,6 @@ class S3RepositoryFactoryTest extends Specification { provider == null } - def 'should register TransportS3 on first provider creation'() { - given: - def factory = new S3RepositoryFactory() - def config = new S3ProviderConfig('test-bucket') - - when: - def provider1 = factory.createProviderInstance(config, 'test-bucket/project1') - def provider2 = factory.createProviderInstance(config, 'test-bucket/project2') - - then: - provider1 instanceof S3RepositoryProvider - provider2 instanceof S3RepositoryProvider - // TransportS3.register() should be called only once - } - def 'should get config for s3 URL'() { given: def factory = new S3RepositoryFactory() From d6818bb49fc995684a487d6387d557527a8593f2 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 10 Oct 2025 12:14:21 +0200 Subject: [PATCH 14/20] adding comment why reflection is used Signed-off-by: jorgee --- .../cloud/aws/scm/jgit/S3PushConnection.java | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java index 8f56da8566..3c63cfc6cb 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/jgit/S3PushConnection.java @@ -128,13 +128,26 @@ private boolean isSameObjectId(S3Object s3object, ObjectId commitId){ return BranchData.fromKey(s3object.key()).getObjectId().name().equals(commitId.name()); } - private void setUpdateStatus(RemoteRefUpdate update, RemoteRefUpdate.Status status) { + /** + * Sets the status on a RemoteRefUpdate using reflection. + * This is necessary because RemoteRefUpdate.status is package-private and JGit + * doesn't provide a public API to set it. It also JAR signing verification which + * disables the implementations of this class in the org.eclipse.jgit.transport package. + * The Custom transport implementations like this S3 transport need to update the + * RemoteRefUpdate.status to inform callers of push results. + */ + private void setUpdateStatus(RemoteRefUpdate update, RemoteRefUpdate.Status status) { try { Field statusField = RemoteRefUpdate.class.getDeclaredField("status"); statusField.setAccessible(true); statusField.set(update, status); + } catch (NoSuchFieldException e) { + throw new RuntimeException("JGit API changed: RemoteRefUpdate.status field not found. " + + "This may require updating the transport implementation.", e); + } catch (IllegalAccessException e) { + throw new RuntimeException("Unable to access RemoteRefUpdate.status field", e); } catch (Exception e) { - throw new RuntimeException("Unable to set status on RemoteRefUpdate", e); + throw new RuntimeException("Unexpected error setting status on RemoteRefUpdate", e); } } From 8c5e48beb669713fcc70e37932918729a4103e5a Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 10 Oct 2025 14:01:51 +0200 Subject: [PATCH 15/20] change from folder-first to repo-first and address claude comment Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdPush.groovy | 20 +++-- .../groovy/nextflow/cli/CmdPushTest.groovy | 23 +++-- .../cloud/aws/scm/S3RepositoryProvider.groovy | 84 +++++++++++++++---- validation/test-s3-git-remote.sh | 10 ++- 4 files changed, 96 insertions(+), 41 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy index fc5a9f0ba8..7de170c55d 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy @@ -24,11 +24,9 @@ import nextflow.plugin.Plugins import nextflow.scm.AssetManager import nextflow.util.TestOnly import org.eclipse.jgit.api.Git -import org.eclipse.jgit.errors.RepositoryNotFoundException import org.eclipse.jgit.transport.RemoteConfig import org.eclipse.jgit.transport.URIish -import java.io.FileFilter /** * CLI sub-command Push @@ -42,11 +40,11 @@ class CmdPush extends CmdBase implements HubOptions { static final public NAME = 'push' - @Parameter(description = 'Path to push', arity = 1) + @Parameter(description = 'Repository URL to push to (optional if already configured as git remote)') List args - @Parameter(names=['-repo'], description = 'Defines the repository to push to') - String repository + @Parameter(names=['-d', '-directory'], description = 'Local directory to push (default: current directory)') + String directory @Parameter(names=['-r','-revision'], description = 'Revision of the project to run (either a git branch, tag or commit SHA number)') String revision = 'main' @@ -65,11 +63,17 @@ class CmdPush extends CmdBase implements HubOptions { @Override void run() { - if( !args || args.size() != 1){ - throw new AbortOperationException('Incorrect folder argument') + if( args && args.size() > 1){ + throw new AbortOperationException('Incorrect number of arguments') } - def folder = new File(args[0]).getAbsoluteFile() + // Get repository from args (optional) + def repository = args && args.size() == 1 ? args[0] : null + + // Folder defaults to current working directory if not specified + def folder = directory + ? new File(directory).getAbsoluteFile() + : new File(System.getProperty('user.dir')).getAbsoluteFile() if( !folder.exists() ) throw new AbortOperationException("Folder does not exist: ${folder.absolutePath}") diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy index 4b8d20eb7d..a16fb762f5 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy @@ -1,5 +1,5 @@ /* - * Copyright 2013-2024, Seqera Labs + * Copyright 2013-2025, Seqera Labs * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ import java.nio.file.Files import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins import org.eclipse.jgit.api.Git -import spock.lang.IgnoreIf import spock.lang.Specification /** @@ -31,7 +30,7 @@ import spock.lang.Specification * * @author Jorge Ejarque */ -@IgnoreIf({System.getenv('NXF_SMOKE')}) + class CmdPushTest extends Specification { def cleanup() { @@ -51,7 +50,7 @@ class CmdPushTest extends Specification { def 'should fail when folder does not exist'() { given: - def cmd = new CmdPush(args: ['/nonexistent/folder'], repository: 'https://github.com/test/repo.git') + def cmd = new CmdPush(args: ['https://github.com/test/repo.git'], directory: '/nonexistent/folder') when: cmd.run() @@ -64,7 +63,7 @@ class CmdPushTest extends Specification { def 'should fail when path is not a directory'() { given: def tempFile = Files.createTempFile('test', '.txt').toFile() - def cmd = new CmdPush(args: [tempFile.absolutePath], repository: 'https://github.com/test/repo.git') + def cmd = new CmdPush(args: ['https://github.com/test/repo.git'], directory: tempFile.absolutePath) when: cmd.run() @@ -80,7 +79,7 @@ class CmdPushTest extends Specification { def 'should fail when no repository specified and no git repo exists'() { given: def tempDir = Files.createTempDirectory('test').toFile() - def cmd = new CmdPush(args: [tempDir.absolutePath]) + def cmd = new CmdPush(directory: tempDir.absolutePath) when: cmd.run() @@ -189,8 +188,8 @@ class CmdPushTest extends Specification { git.close() def cmd = new CmdPush( - args: [tempDir.absolutePath], - repository: 'https://github.com/correct/repo.git' + args: ['https://github.com/correct/repo.git'], + directory: tempDir.absolutePath ) when: @@ -225,8 +224,8 @@ class CmdPushTest extends Specification { git.close() def cmd = new CmdPush( - args: [tempDir.absolutePath], - repository: 'https://github.com/test/repo.git' + args: ['https://github.com/test/repo.git'], + directory: tempDir.absolutePath ) when: @@ -261,8 +260,8 @@ class CmdPushTest extends Specification { git.close() def cmd = new CmdPush( - args: [tempDir.absolutePath], - repository: 'https://github.com/test/repo.git', + args: ['https://github.com/test/repo.git'], + directory: tempDir.absolutePath, revision: 'main' ) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy index f7d7f42be9..575be17338 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3RepositoryProvider.groovy @@ -25,9 +25,12 @@ import nextflow.scm.ProviderConfig import nextflow.scm.RepositoryProvider import org.eclipse.jgit.api.Git import org.eclipse.jgit.api.errors.TransportException +import org.eclipse.jgit.lib.Constants +import org.eclipse.jgit.lib.Ref import org.eclipse.jgit.transport.CredentialsProvider import java.nio.file.Files +import java.nio.file.Path /** @@ -96,32 +99,79 @@ class S3RepositoryProvider extends RepositoryProvider { @Override String getRepositoryUrl() { getEndpointUrl() } - /** {@inheritDoc} **/ - // called by AssetManager - // called by RepositoryProvider.readText() + /** + * {@inheritDoc} + * + * Note: S3 git-remote stores repositories as Git bundles in S3 (one bundle per branch). + * Reading a single file requires downloading and unpacking the entire bundle for that branch. + * When no revision is specified, we determine the default branch from the remote HEAD + * to avoid downloading unnecessary branches. + */ @Override byte[] readBytes( String path ) { - log.debug("Reading $path") - //Not possible to get a single file requires to clone the branch and get the file - final tmpDir = Files.createTempDirectory("s3-git-remote") - final command = Git.cloneRepository() - .setURI(getEndpointUrl()) - .setDirectory(tmpDir.toFile()) - .setCredentialsProvider(getGitCredentials()) - if( revision ) - command.setBranch(revision) + log.debug("Reading $path from S3 git-remote") + Path tmpDir = null try { + tmpDir = Files.createTempDirectory("s3-git-remote-") + + // Determine which branch to clone + def branchToClone = revision + if (!branchToClone) { + // No revision specified - fetch only the default branch + // This avoids downloading unnecessary branch bundles + branchToClone = getDefaultBranch() + log.debug("No revision specified, using default branch: $branchToClone") + } + + final command = Git.cloneRepository() + .setURI(getEndpointUrl()) + .setDirectory(tmpDir.toFile()) + .setCredentialsProvider(getGitCredentials()) + .setCloneAllBranches(false) // Only clone the specified branch + .setBranch(branchToClone) + command.call() final file = tmpDir.resolve(path) - return file.getBytes() + return Files.exists(file) ? Files.readAllBytes(file) : null } catch (Exception e) { - log.debug(" unable to retrieve file: $path from repo: $project", e) + log.debug("Unable to retrieve file: $path from repo: $project", e) return null } - finally{ - tmpDir.deleteDir() + finally { + if (tmpDir != null && Files.exists(tmpDir)) { + tmpDir.toFile().deleteDir() + } + } + } + + /** + * Get the default branch from the S3 git-remote repository by querying remote refs. + * Uses Git's lsRemote to fetch the HEAD symbolic ref, which points to the default branch. + * + * @return The default branch name + */ + @Memoized + String getDefaultBranch() { + // Fetch remote refs using Git's lsRemote + final refs = fetchRefs() + if (!refs){ + throw new Exception("No remote references found") + } + // Find the HEAD symbolic ref + final headRef = refs.find { it.name == Constants.HEAD } + + if (!headRef ) + throw new Exception("No remote HEAD ref found ") + + if( !headRef.isSymbolic() ) + throw new Exception("Incorrect HEAD ref. Not a symbolic ref.") + + final target = headRef.target.name + if( target.startsWith('refs/heads/') ) { + return target.substring('refs/heads/'.length()) } + return target } @Override @@ -129,7 +179,7 @@ class S3RepositoryProvider extends RepositoryProvider { throw new UnsupportedOperationException("S3-git-remote does not support 'listDirectory' operation") } -/** {@inheritDoc} **/ + /** {@inheritDoc} **/ // called by AssetManager @Override void validateRepo() { diff --git a/validation/test-s3-git-remote.sh b/validation/test-s3-git-remote.sh index 85edc058fd..6986ec5a78 100644 --- a/validation/test-s3-git-remote.sh +++ b/validation/test-s3-git-remote.sh @@ -38,8 +38,8 @@ cp awsbatch.config "${TEMP_DIR}/nextflow.config" trap remove EXIT cd ${TEMP_DIR} -echo "Pushing pipeline to ${S3_REPO}" -$NXF_CMD push s3-remote-test-repo -repo "${S3_REPO}" -r main +echo "Pushing pipeline to ${S3_REPO} (with explicit repo URL)" +$NXF_CMD push "${S3_REPO}" -d s3-remote-test-repo -r main echo "Running pipeline from S3 remote" $NXF_CMD -q run "${S3_REPO}" -r main | tee stdout1 @@ -51,8 +51,10 @@ $NXF_CMD -q run "${S3_REPO}" -r main | tee stdout1 echo "Modifying pipeline message" sed -i "s/Hey!/Hey there!/g" s3-remote-test-repo/main.nf -echo "Pushing modified pipeline to ${S3_REPO}" -$NXF_CMD push s3-remote-test-repo -m "Update greeting message" +echo "Pushing modified pipeline to ${S3_REPO} (auto-detect from git remote)" +cd s3-remote-test-repo +$NXF_CMD push -m "Update greeting message" +cd .. echo "Running modified pipeline from S3 remote" $NXF_CMD -q run "${S3_REPO}" -r main -latest | tee stdout2 From 03a340f05d8203da04bf94b06ce14a4ab15564ca Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 10 Oct 2025 15:07:00 +0200 Subject: [PATCH 16/20] fix defaults fallback fix errors Signed-off-by: jorgee --- .../main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy index 5538e2cc49..7f9ff05e25 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/scm/S3ProviderConfig.groovy @@ -40,7 +40,7 @@ import software.amazon.awssdk.regions.providers.DefaultAwsRegionProviderChain @CompileStatic class S3ProviderConfig extends ProviderConfig { - private Region region + private Region region = Region.US_EAST_1 private AwsCredentialsProvider awsCredentialsProvider = DefaultCredentialsProvider.builder().build() @@ -62,9 +62,6 @@ class S3ProviderConfig extends ProviderConfig { final config = new AwsConfig(session + values) if( config.region ) { region = Region.of(config.region) - }else { - // fallback to default region provider - region = DefaultAwsRegionProviderChain.builder().build().region } if( config.accessKey && config.secretKey ){ awsCredentialsProvider = StaticCredentialsProvider.create( @@ -75,9 +72,6 @@ class S3ProviderConfig extends ProviderConfig { } else if( config.profile ){ // Get credentials from profile awsCredentialsProvider = ProfileCredentialsProvider.builder().profileName(config.profile).build() - } else { - // fallback to default credentials provider - awsCredentialsProvider = DefaultCredentialsProvider.builder().build() } } From a0d2cfe814076edabddaf567af14824a92c2fa85 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 5 Nov 2025 18:33:32 +0100 Subject: [PATCH 17/20] Update command description [ci fast] Signed-off-by: Paolo Di Tommaso --- modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy index 7de170c55d..767f1240b6 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy @@ -35,7 +35,7 @@ import org.eclipse.jgit.transport.URIish */ @Slf4j @CompileStatic -@Parameters(commandDescription = "Pushes a local implementation to a remote repository") +@Parameters(commandDescription = "Upload a local project to a remote Git repository") class CmdPush extends CmdBase implements HubOptions { static final public NAME = 'push' From 5709f1bd9f9973d9bf73376902c1a81c27c5ca71 Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 7 Nov 2025 22:12:35 +0100 Subject: [PATCH 18/20] add commit option and remove directory option Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdPush.groovy | 386 +----------------- .../groovy/nextflow/scm/PushManager.groovy | 373 +++++++++++++++++ 2 files changed, 382 insertions(+), 377 deletions(-) create mode 100644 modules/nextflow/src/main/groovy/nextflow/scm/PushManager.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy index 767f1240b6..b2eb64091f 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy @@ -21,11 +21,8 @@ import groovy.transform.CompileStatic import groovy.util.logging.Slf4j import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins -import nextflow.scm.AssetManager +import nextflow.scm.PushManager import nextflow.util.TestOnly -import org.eclipse.jgit.api.Git -import org.eclipse.jgit.transport.RemoteConfig -import org.eclipse.jgit.transport.URIish /** @@ -35,7 +32,7 @@ import org.eclipse.jgit.transport.URIish */ @Slf4j @CompileStatic -@Parameters(commandDescription = "Upload a local project to a remote Git repository") +@Parameters(commandDescription = "Pushes a local implementation to a remote repository") class CmdPush extends CmdBase implements HubOptions { static final public NAME = 'push' @@ -43,8 +40,8 @@ class CmdPush extends CmdBase implements HubOptions { @Parameter(description = 'Repository URL to push to (optional if already configured as git remote)') List args - @Parameter(names=['-d', '-directory'], description = 'Local directory to push (default: current directory)') - String directory + @Parameter(names=['-c', '-commit'], description = 'Add and commit changes in the current directory (default false)') + boolean commit = false @Parameter(names=['-r','-revision'], description = 'Revision of the project to run (either a git branch, tag or commit SHA number)') String revision = 'main' @@ -69,11 +66,7 @@ class CmdPush extends CmdBase implements HubOptions { // Get repository from args (optional) def repository = args && args.size() == 1 ? args[0] : null - - // Folder defaults to current working directory if not specified - def folder = directory - ? new File(directory).getAbsoluteFile() - : new File(System.getProperty('user.dir')).getAbsoluteFile() + def folder = new File(System.getProperty('user.dir')).getAbsoluteFile() if( !folder.exists() ) throw new AbortOperationException("Folder does not exist: ${folder.absolutePath}") @@ -85,379 +78,18 @@ class CmdPush extends CmdBase implements HubOptions { Plugins.init() try { + final manager = new PushManager(folder, commit, maxSizeMB) def resolvedRepo = repository if( !resolvedRepo ) { - resolvedRepo = resolveRepository(folder) + resolvedRepo = manager.resolveRepository() } - log.info "Pushing folder ${folder.absolutePath} to repository ${resolvedRepo}" - pushFolder(folder, resolvedRepo, revision) + log.info "Pushing folder ${folder.absolutePath} to repository ${resolvedRepo} (commit: $commit)" + manager.push(resolvedRepo, revision) } catch( Exception e ) { throw new AbortOperationException("Failed to push folder: ${e.message}", e) } } - private void pushFolder(File folder, String repo, String rev) { - def gitDir = new File(folder, '.git') - def remoteName = "origin" - def isNewRepo = false - - if( gitDir.exists() ) { - log.debug "Found existing git repository in ${folder.absolutePath}" - remoteName = validateExistingRepo(folder, repo) - checkCurrentBranch(folder, rev) - } else { - log.debug "No git repository found, initializing new one" - initializeRepo(folder, repo, rev) - isNewRepo = true - } - - checkFileSizes(folder) - manageNextflowGitignore(folder) - stageAndCommitFiles(folder) - def manager = new AssetManager(folder, repo, this) - manager.upload(rev, remoteName, isNewRepo) - log.info "Successfully pushed to ${repo} (revision: ${rev})" - } - - private String validateExistingRepo(File folder, String expectedRepo) { - def git = Git.open(folder) - - try { - def remotes = git.remoteList().call() - - // Find all remotes and check if any matches the expected repo - def matchingRemote = null - - for( RemoteConfig remote : remotes ) { - if( remote.URIs ) { - def remoteUrl = remote.URIs[0].toString() - def normalizedRemote = normalizeRepoUrl(remoteUrl) - def normalizedExpected = normalizeRepoUrl(expectedRepo) - - if( normalizedRemote == normalizedExpected ) { - matchingRemote = remote.name - break - } - } - } - - if( !matchingRemote ) { - def remotesList = remotes.collect { remote -> - def url = remote.URIs ? remote.URIs[0].toString() : 'no URL' - " ${remote.name}: ${url}" - }.join('\n') - - throw new AbortOperationException( - "Repository URL not found in remotes!\n" + - " Expected repository: ${expectedRepo}\n" + - " Available remotes:\n${remotesList}\n" + - "Please add the repository as a remote or specify the correct repository." - ) - } - - return matchingRemote - } - finally { - git.close() - } - } - - private String normalizeRepoUrl(String url) { - return url?.toLowerCase()?.replaceAll(/\.git$/, '')?.replaceAll(/\/$/, '') - } - - private void checkCurrentBranch(File folder, String requestedBranch) { - def git = Git.open(folder) - - try { - def head = git.getRepository().findRef("HEAD") - if( !head ) { - log.debug "No HEAD found, assuming new repository" - git.close() - return - } - - def currentBranch = null - if( head.isSymbolic() ) { - currentBranch = git.getRepository().getBranch() - } else { - log.debug "HEAD is not symbolic (detached state)" - git.close() - throw new AbortOperationException("Repository is in detached HEAD state. Please checkout to a branch before pushing.") - } - - if( currentBranch && currentBranch != requestedBranch ) { - git.close() - throw new AbortOperationException( - "Current branch '${currentBranch}' does not match requested branch '${requestedBranch}'.\n" + - "Please checkout to branch '${requestedBranch}' before pushing or specify the correct branch with -r option." - ) - } - - log.debug "Current branch '${currentBranch}' matches requested branch '${requestedBranch}'" - } - finally { - git.close() - } - } - - private void initializeRepo(File folder, String repo, String rev) { - log.debug "Initializing git repository in ${folder.absolutePath}" - def git = Git.init().setDirectory(folder).call() - - // Add remote origin - git.remoteAdd() - .setName("origin") - .setUri(new URIish(repo)) - .call() - - git.close() - } - - private void checkFileSizes(File folder) { - def maxSizeBytes = maxSizeMB * 1024 * 1024 - def git = Git.open(folder) - - try { - // Get Git status to find files that would be committed - def status = git.status().call() - def filesToBeCommitted = [] - - // Add untracked files - filesToBeCommitted.addAll(status.untracked) - // Add modified files - filesToBeCommitted.addAll(status.modified) - // Add added files - filesToBeCommitted.addAll(status.added) - - def largeFiles = [] - - filesToBeCommitted.each { relativePath -> - def file = new File(folder, relativePath as String) - if( file.exists() && file.isFile() && file.length() > maxSizeBytes ) { - def fileEntry = [ - file: file, - relativePath: relativePath, - sizeMB: file.length() / (1024 * 1024) - ] - largeFiles.add(fileEntry) - } - } - - if( largeFiles ) { - log.warn "Found ${largeFiles.size()} large files that would be committed:" - largeFiles.each { entry -> - def sizeMB = entry['sizeMB'] as Double - log.warn " ${entry['relativePath']}: ${String.format('%.1f', sizeMB)} MB" - } - - print "Do you want to push these large files? [y/N]: " - def response = System.in.newReader().readLine()?.trim()?.toLowerCase() - - if( response != 'y' && response != 'yes' ) { - // Add large files to .gitignore - def relativePaths = largeFiles.collect { entry -> entry['relativePath'] as String } - addToGitignore(folder, relativePaths) - println "Files have been added to .gitignore" - } - } - } - finally { - git.close() - } - } - - private void addToGitignore(File folder, List filenames) { - def gitignoreFile = new File(folder, '.gitignore') - def content = [] - - if( gitignoreFile.exists() ) { - content = gitignoreFile.readLines() - } - - filenames.each { filename -> - if( !content.contains(filename) ) { - content.add(filename) - } - } - - gitignoreFile.text = content.join('\n') + '\n' - log.info "Added ${filenames.size()} large files to .gitignore" - } - - private void manageNextflowGitignore(File folder) { - def gitignoreFile = new File(folder, '.gitignore') - List content = [] - - if( gitignoreFile.exists() ) { - content = gitignoreFile.readLines() - } - - // Default Nextflow entries to add - def nextflowEntries = [ - '.nextflow', - '.nextflow.log*' - ] - - def added = [] - nextflowEntries.each { entry -> - if( !content.contains(entry) ) { - content.add(entry) - added.add(entry) - } - } - - // Check for work directory - def workDirs = findWorkDirectories(folder) - if( workDirs ) { - def workEntriesToAdd = promptForWorkDirectories(workDirs, content) - workEntriesToAdd.each { workDir -> - if( !content.contains(workDir) ) { - content.add(workDir) - added.add(workDir) - } - } - } - - if( added ) { - gitignoreFile.text = content.join('\n') + '\n' - log.info "Added ${added.size()} Nextflow entries to .gitignore: ${added.join(', ')}" - } else { - log.debug "All Nextflow entries already present in .gitignore" - } - } - - private List findWorkDirectories(File folder) { - List workDirs = [] - - // Check for the default Nextflow work directory - def workDir = new File(folder, 'work') - if( workDir.exists() && workDir.isDirectory() ) { - workDirs.add('work') - } - - return workDirs - } - - private List promptForWorkDirectories(List workDirs, List currentGitignore) { - List toAdd = [] - - workDirs.each { workDir -> - // Check if already in .gitignore - if( currentGitignore.contains(workDir) ) { - log.debug "Work directory '${workDir}' already in .gitignore" - return // Skip this directory - } - - println "Found Nextflow work directory: ${workDir}" - print "Do you want to add '${workDir}' to .gitignore? [Y/n]: " - def response = System.in.newReader().readLine()?.trim()?.toLowerCase() - - // Default to 'yes' if empty response or 'y'/'yes' - if( !response || response == 'y' || response == 'yes' ) { - toAdd.add(workDir) - log.info "Will add '${workDir}' to .gitignore" - } else { - log.info "Skipping '${workDir}'" - } - } - - return toAdd - } - - private void stageAndCommitFiles(File folder) { - def git = Git.open(folder) - - // Add all files - git.add().addFilepattern(".").call() - - // Check if there are any changes to commit - def status = git.status().call() - if( status.clean ) { - log.info "No changes to commit" - git.close() - return - } - - // Commit changes - git.commit() - .setMessage(message) - .call() - - log.debug "Committed changes with message: ${message}" - git.close() - } - - private String resolveRepository(File folder) { - def gitDir = new File(folder, '.git') - - if( !gitDir.exists() ) { - throw new AbortOperationException("No git repository found and no repository URL provided. Please specify a repository with -repo parameter.") - } - - def git = Git.open(folder) - - try { - def remotes = git.remoteList().call() - - if( remotes.empty ) { - throw new AbortOperationException("No remotes configured in git repository. Please add a remote or specify a repository with -repo parameter.") - } - - if( remotes.size() == 1 ) { - def remote = remotes[0] - def remoteUrl = remote.URIs[0].toString() - log.info "Using remote '${remote.name}': ${remoteUrl}" - return remoteUrl - } - - // Multiple remotes - ask user to choose - return selectRemoteFromUser(remotes) - } - finally { - git.close() - } - } - - private static String selectRemoteFromUser(List remotes) { - println "Multiple remotes found. Please select which remote to push to:" - - def remoteOptions = [:] - remotes.eachWithIndex { remote, index -> - def remoteUrl = remote.URIs[0].toString() - def remoteInfo = [name: remote.name, url: remoteUrl] - remoteOptions[index + 1] = remoteInfo - println " ${index + 1}. ${remote.name}: ${remoteUrl}" - } - - println " ${remotes.size() + 1}. Cancel" - - while( true ) { - print "Enter your choice [1-${remotes.size() + 1}]: " - def input = System.in.newReader().readLine()?.trim() - - try { - def choice = Integer.parseInt(input) - - if( choice == remotes.size() + 1 ) { - throw new AbortOperationException("Push operation cancelled by user.") - } - - if( choice >= 1 && choice <= remotes.size() ) { - def selected = remoteOptions[choice] - log.info "Selected remote '${selected['name']}': ${selected['url']}" - return selected['url'] - } - - println "Invalid choice. Please enter a number between 1 and ${remotes.size() + 1}." - } - catch( NumberFormatException ignored ) { - println "Invalid input. Please enter a number." - } - } - } - - } diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/PushManager.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/PushManager.groovy new file mode 100644 index 0000000000..932bf9ff91 --- /dev/null +++ b/modules/nextflow/src/main/groovy/nextflow/scm/PushManager.groovy @@ -0,0 +1,373 @@ +package nextflow.scm + +import groovy.transform.CompileStatic +import groovy.util.logging.Slf4j +import nextflow.exception.AbortOperationException +import org.eclipse.jgit.api.Git +import org.eclipse.jgit.api.Status +import org.eclipse.jgit.transport.RemoteConfig +import org.eclipse.jgit.transport.URIish + +/** + * Manage the push of a folder in to a git repository + * + * @author Jorge Ejarque + */ +@Slf4j +@CompileStatic +class PushManager { + private static final String DEFAULT_BRANCH = 'main' + File folder + boolean commit + int maxSizeMB + + PushManager(File folder, boolean commit, int maxSizeMB){ + this.folder = folder + this.commit = commit + this.maxSizeMB = maxSizeMB + } + + boolean isLocalGit(){ + final gitDir = new File(folder, '.git') + return gitDir.exists() + } + + String push(String repo, String requestedBranch){ + def remoteName = "origin" + def isNewRepo = false + def revision = DEFAULT_BRANCH + if( isLocalGit() ) { + log.debug "Found existing git repository in ${folder.absolutePath}" + remoteName = validateExistingRepo(repo) + def currentBranch = getCurrentBranch() + if( requestedBranch && currentBranch && currentBranch != requestedBranch ) { + throw new AbortOperationException( + "Current branch '${currentBranch}' does not match requested branch '${requestedBranch}'.\n" + + "Please checkout to branch '${requestedBranch}' before pushing or specify the correct branch with -r option." + ) + } + revision = requestedBranch ?: currentBranch ?: revision + } else if (commit){ + revision = requestedBranch ?: revision + log.debug "No git repository found in ${folder.absolutePath}, initializing a git repo with remote $repo and branch ${revision}" + initializeRepo(repo) + isNewRepo = true + } else { + throw new AbortOperationException( "No git repository found in ${folder.absolutePath} - Select the 'commit' option initialize and commit current files") + } + if (commit) { + checkFileSizes() + manageNextflowGitignore() + stageAndCommitFiles() + } + def manager = new AssetManager(folder, repo) + manager.upload(revision, remoteName, isNewRepo) + log.info "Successfully pushed to ${repo} (revision: ${revision})" + return revision + } + + private String validateExistingRepo(String expectedRepo) { + def git = Git.open(folder) + + try { + def remotes = git.remoteList().call() + + // Find all remotes and check if any matches the expected repo + def matchingRemote = null + + for( RemoteConfig remote : remotes ) { + if( remote.URIs ) { + def remoteUrl = remote.URIs[0].toString() + def normalizedRemote = normalizeRepoUrl(remoteUrl) + def normalizedExpected = normalizeRepoUrl(expectedRepo) + + if( normalizedRemote == normalizedExpected ) { + matchingRemote = remote.name + break + } + } + } + + if( !matchingRemote ) { + def remotesList = remotes.collect { remote -> + def url = remote.URIs ? remote.URIs[0].toString() : 'no URL' + " ${remote.name}: ${url}" + }.join('\n') + + throw new AbortOperationException( + "Repository URL not found in remotes!\n" + + " Expected repository: ${expectedRepo}\n" + + " Available remotes:\n${remotesList}\n" + + "Please add the repository as a remote or specify the correct repository." + ) + } + + return matchingRemote + } + finally { + git.close() + } + } + + private String normalizeRepoUrl(String url) { + return url?.toLowerCase()?.replaceAll(/\.git$/, '')?.replaceAll(/\/$/, '') + } + + private String getCurrentBranch() { + def git = Git.open(folder) + + try { + def head = git.getRepository().findRef("HEAD") + if( !head ) { + log.debug "No HEAD found, assuming new repository. Returning default" + return null + } + + if( !head.isSymbolic() ) { + log.debug "HEAD is not symbolic (detached state)" + throw new AbortOperationException("Repository is in detached HEAD state. Please checkout to a branch before pushing.") + } + return git.getRepository().getBranch() + } finally { + git.close() + } + } + + private void initializeRepo(String repo) { + log.debug "Initializing git repository in ${folder.absolutePath}" + def git = Git.init().setDirectory(folder).call() + + // Add remote origin + git.remoteAdd() + .setName("origin") + .setUri(new URIish(repo)) + .call() + + git.close() + } + + private void checkFileSizes() { + def maxSizeBytes = this. maxSizeMB * 1024 * 1024 + def git = Git.open(folder) + + try { + // Get Git status to find files that would be committed + def status = git.status().call() + def filesToBeCommitted = [] + + // Add untracked files + filesToBeCommitted.addAll(status.untracked) + // Add modified files + filesToBeCommitted.addAll(status.modified) + // Add added files + filesToBeCommitted.addAll(status.added) + + def largeFiles = [] + + filesToBeCommitted.each { relativePath -> + def file = new File(folder, relativePath as String) + if( file.exists() && file.isFile() && file.length() > maxSizeBytes ) { + def fileEntry = [ + file: file, + relativePath: relativePath, + sizeMB: file.length() / (1024 * 1024) + ] + largeFiles.add(fileEntry) + } + } + + if( largeFiles ) { + log.warn "Found ${largeFiles.size()} large files that would be committed:" + largeFiles.each { entry -> + def sizeMB = entry['sizeMB'] as Double + log.warn " ${entry['relativePath']}: ${String.format('%.1f', sizeMB)} MB" + } + + print "Do you want to push these large files? [y/N]: " + def response = System.in.newReader().readLine()?.trim()?.toLowerCase() + + if( response != 'y' && response != 'yes' ) { + // Add large files to .gitignore + def relativePaths = largeFiles.collect { entry -> entry['relativePath'] as String } + addToGitignore(relativePaths) + println "Files have been added to .gitignore" + } + } + } + finally { + git.close() + } + } + + private void addToGitignore(List filenames) { + def gitignoreFile = new File(folder, '.gitignore') + def content = [] + + if( gitignoreFile.exists() ) { + content = gitignoreFile.readLines() + } + + filenames.each { filename -> + if( !content.contains(filename) ) { + content.add(filename) + } + } + + gitignoreFile.text = content.join('\n') + '\n' + log.info "Added ${filenames.size()} large files to .gitignore" + } + + private void manageNextflowGitignore() { + def gitignoreFile = new File(folder, '.gitignore') + List content = [] + + if( gitignoreFile.exists() ) { + content = gitignoreFile.readLines() + } + + // Default Nextflow entries to add + def nextflowEntries = [ + '.nextflow*', + 'work' + ] + + def added = [] + nextflowEntries.each { entry -> + if( !content.contains(entry) ) { + content.add(entry) + added.add(entry) + } + } + + if( added ) { + gitignoreFile.text = content.join('\n') + '\n' + log.info "Added ${added.size()} Nextflow entries to .gitignore: ${added.join(', ')}" + } else { + log.debug "All Nextflow entries already present in .gitignore" + } + } + + private void stageAndCommitFiles(String message='Push from nextflow') { + def git = Git.open(folder) + + try { + // Add all files + git.add().addFilepattern(".").call() + + // Check if there are any changes to commit + def status = git.status().call() + if( status.clean ) { + log.info "No changes to commit" + return + } + + showAndConfirmStagedFiles(status, git) + + // Commit changes + git.commit() + .setMessage(message) + .call() + + log.debug "Committed changes with message: ${message}" + } + finally { + git.close() + } + } + + private void showAndConfirmStagedFiles(Status status, Git git) { + def stagedFiles = [] + stagedFiles.addAll(status.added) + stagedFiles.addAll(status.changed) + + if( stagedFiles ) { + println "\nFiles to be committed:" + stagedFiles.each { file -> + println " ${file}" + } + + print "\nDo you want to commit these files? [Y/n]: " + def response = System.in.newReader().readLine()?.trim()?.toLowerCase() + + // Default to 'yes' if empty response or 'y'/'yes' + if( response && response != 'y' && response != 'yes' ) { + log.info "Commit cancelled by user" + + // Unstage all files + git.reset().call() + log.info "Files have been unstaged" + + throw new AbortOperationException("Commit cancelled by user") + } + } + } + + String resolveRepository() { + def gitDir = new File(folder, '.git') + + if( !gitDir.exists() ) { + throw new AbortOperationException("No git repository found and no repository URL provided. Please specify a repository with -repo parameter.") + } + + def git = Git.open(folder) + + try { + def remotes = git.remoteList().call() + + if( remotes.empty ) { + throw new AbortOperationException("No remotes configured in git repository. Please add a remote or specify a repository with -repo parameter.") + } + + if( remotes.size() == 1 ) { + def remote = remotes[0] + def remoteUrl = remote.URIs[0].toString() + log.info "Using remote '${remote.name}': ${remoteUrl}" + return remoteUrl + } + + // Multiple remotes - ask user to choose + return selectRemoteFromUser(remotes) + } + finally { + git.close() + } + } + + private static String selectRemoteFromUser(List remotes) { + println "Multiple remotes found. Please select which remote to push to:" + + def remoteOptions = [:] + remotes.eachWithIndex { remote, index -> + def remoteUrl = remote.URIs[0].toString() + def remoteInfo = [name: remote.name, url: remoteUrl] + remoteOptions[index + 1] = remoteInfo + println " ${index + 1}. ${remote.name}: ${remoteUrl}" + } + + println " ${remotes.size() + 1}. Cancel" + + while( true ) { + print "Enter your choice [1-${remotes.size() + 1}]: " + def input = System.in.newReader().readLine()?.trim() + + try { + def choice = Integer.parseInt(input) + + if( choice == remotes.size() + 1 ) { + throw new AbortOperationException("Push operation cancelled by user.") + } + + if( choice >= 1 && choice <= remotes.size() ) { + def selected = remoteOptions[choice] + log.info "Selected remote '${selected['name']}': ${selected['url']}" + return selected['url'] + } + + println "Invalid choice. Please enter a number between 1 and ${remotes.size() + 1}." + } + catch( NumberFormatException ignored ) { + println "Invalid input. Please enter a number." + } + } + } +} From e506588d2f0720963e4498fef97dc332a3a9ffdd Mon Sep 17 00:00:00 2001 From: jorgee Date: Fri, 7 Nov 2025 22:42:56 +0100 Subject: [PATCH 19/20] fix tests Signed-off-by: jorgee --- .../main/groovy/nextflow/cli/CmdPush.groovy | 6 +- .../groovy/nextflow/cli/CmdPushTest.groovy | 230 +-------------- .../nextflow/scm/PushManagerTest.groovy | 275 ++++++++++++++++++ 3 files changed, 280 insertions(+), 231 deletions(-) create mode 100644 modules/nextflow/src/test/groovy/nextflow/scm/PushManagerTest.groovy diff --git a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy index b2eb64091f..fc8cd195f9 100644 --- a/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/cli/CmdPush.groovy @@ -44,7 +44,7 @@ class CmdPush extends CmdBase implements HubOptions { boolean commit = false @Parameter(names=['-r','-revision'], description = 'Revision of the project to run (either a git branch, tag or commit SHA number)') - String revision = 'main' + String revision @Parameter(names=['-max-size'], description = 'Maximum file size in MB to push without confirmation (default: 10)') int maxSizeMB = 10 @@ -56,7 +56,7 @@ class CmdPush extends CmdBase implements HubOptions { final String getName() { NAME } @TestOnly - protected File root + protected File rootFolder @Override void run() { @@ -66,7 +66,7 @@ class CmdPush extends CmdBase implements HubOptions { // Get repository from args (optional) def repository = args && args.size() == 1 ? args[0] : null - def folder = new File(System.getProperty('user.dir')).getAbsoluteFile() + def folder = rootFolder ?: new File(System.getProperty('user.dir')).getAbsoluteFile() if( !folder.exists() ) throw new AbortOperationException("Folder does not exist: ${folder.absolutePath}") diff --git a/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy b/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy index a16fb762f5..f7964519b1 100644 --- a/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/cli/CmdPushTest.groovy @@ -16,13 +16,10 @@ package nextflow.cli -import org.eclipse.jgit.transport.URIish - import java.nio.file.Files import nextflow.exception.AbortOperationException import nextflow.plugin.Plugins -import org.eclipse.jgit.api.Git import spock.lang.Specification /** @@ -37,240 +34,18 @@ class CmdPushTest extends Specification { Plugins.stop() } - def 'should fail with no arguments'() { - given: - def cmd = new CmdPush() - - when: - cmd.run() - - then: - thrown(AbortOperationException) - } - - def 'should fail when folder does not exist'() { - given: - def cmd = new CmdPush(args: ['https://github.com/test/repo.git'], directory: '/nonexistent/folder') - - when: - cmd.run() - - then: - def e = thrown(AbortOperationException) - e.message.contains('Folder does not exist') - } - - def 'should fail when path is not a directory'() { - given: - def tempFile = Files.createTempFile('test', '.txt').toFile() - def cmd = new CmdPush(args: ['https://github.com/test/repo.git'], directory: tempFile.absolutePath) - - when: - cmd.run() - - then: - def e = thrown(AbortOperationException) - e.message.contains('Path is not a directory') - - cleanup: - tempFile?.delete() - } - def 'should fail when no repository specified and no git repo exists'() { given: def tempDir = Files.createTempDirectory('test').toFile() - def cmd = new CmdPush(directory: tempDir.absolutePath) - - when: - cmd.run() - - then: - def e = thrown(AbortOperationException) - e.message.contains('No git repository found') - - cleanup: - tempDir?.deleteDir() - } - - def 'should normalize repository URLs correctly'() { - given: - def cmd = new CmdPush() - - expect: - cmd.normalizeRepoUrl('https://github.com/user/repo.git') == 'https://github.com/user/repo' - cmd.normalizeRepoUrl('https://github.com/user/repo') == 'https://github.com/user/repo' - cmd.normalizeRepoUrl('HTTPS://GITHUB.COM/USER/REPO.GIT') == 'https://github.com/user/repo' - cmd.normalizeRepoUrl('https://github.com/user/repo/') == 'https://github.com/user/repo' - } - - def 'should add files to gitignore'() { - given: - def tempDir = Files.createTempDirectory('test').toFile() - def gitignoreFile = new File(tempDir, '.gitignore') - def cmd = new CmdPush() - - when: - cmd.addToGitignore(tempDir, ['file1.txt', 'file2.txt']) - - then: - gitignoreFile.exists() - def content = gitignoreFile.text - content.contains('file1.txt') - content.contains('file2.txt') - - cleanup: - tempDir?.deleteDir() - } - - def 'should not duplicate entries in gitignore'() { - given: - def tempDir = Files.createTempDirectory('test').toFile() - def gitignoreFile = new File(tempDir, '.gitignore') - gitignoreFile.text = 'existing.txt\n' def cmd = new CmdPush() - - when: - cmd.addToGitignore(tempDir, ['existing.txt', 'new.txt']) - - then: - def lines = gitignoreFile.readLines() - lines.count { it == 'existing.txt' } == 1 - lines.contains('new.txt') - - cleanup: - tempDir?.deleteDir() - } - - def 'should find work directories'() { - given: - def tempDir = Files.createTempDirectory('test').toFile() - def workDir = new File(tempDir, 'work') - workDir.mkdirs() - - def cmd = new CmdPush() - - when: - def result = cmd.findWorkDirectories(tempDir) - - then: - result.size() == 1 - result[0] == 'work' - - cleanup: - tempDir?.deleteDir() - } - - def 'should not find work directories when none exist'() { - given: - def tempDir = Files.createTempDirectory('test').toFile() - def cmd = new CmdPush() - - when: - def result = cmd.findWorkDirectories(tempDir) - - then: - result.isEmpty() - - cleanup: - tempDir?.deleteDir() - } - - def 'should fail when existing repo has wrong remote'() { - given: - def tempDir = Files.createTempDirectory('test').toFile() - - // Initialize a git repo with a different remote - def git = Git.init().setDirectory(tempDir).call() - git.remoteAdd() - .setName('origin') - .setUri(new URIish('https://github.com/wrong/repo.git')) - .call() - git.close() - - def cmd = new CmdPush( - args: ['https://github.com/correct/repo.git'], - directory: tempDir.absolutePath - ) + cmd.rootFolder = tempDir when: cmd.run() then: def e = thrown(AbortOperationException) - e.message.contains('Repository URL not found in remotes') - - cleanup: - tempDir?.deleteDir() - } - - def 'should fail when repo is in detached HEAD state'() { - given: - def tempDir = Files.createTempDirectory('test').toFile() - - // Initialize repo and create a commit - def git = Git.init().setDirectory(tempDir).call() - git.remoteAdd() - .setName('origin') - .setUri(new URIish('https://github.com/test/repo.git')) - .call() - - // Create a test file and commit - new File(tempDir, 'test.txt').text = 'test content' - git.add().addFilepattern('.').call() - def commit = git.commit().setMessage('initial commit').call() - - // Checkout to detached HEAD - git.checkout().setName(commit.name()).call() - git.close() - - def cmd = new CmdPush( - args: ['https://github.com/test/repo.git'], - directory: tempDir.absolutePath - ) - - when: - cmd.run() - - then: - def e = thrown(AbortOperationException) - e.message.contains('detached HEAD state') - - cleanup: - tempDir?.deleteDir() - } - - def 'should fail when current branch does not match requested branch'() { - given: - def tempDir = Files.createTempDirectory('test').toFile() - - // Initialize repo - def git = Git.init().setDirectory(tempDir).call() - git.remoteAdd() - .setName('origin') - .setUri(new URIish('https://github.com/test/repo.git')) - .call() - - // Create initial commit on main branch - new File(tempDir, 'test.txt').text = 'test content' - git.add().addFilepattern('.').call() - git.commit().setMessage('initial commit').call() - - // Create and checkout dev branch - git.checkout().setCreateBranch(true).setName('dev').call() - git.close() - - def cmd = new CmdPush( - args: ['https://github.com/test/repo.git'], - directory: tempDir.absolutePath, - revision: 'main' - ) - - when: - cmd.run() - - then: - def e = thrown(AbortOperationException) - e.message.contains("Current branch 'dev' does not match requested branch 'main'") + e.message.contains('No git repository found') cleanup: tempDir?.deleteDir() @@ -289,7 +64,6 @@ class CmdPushTest extends Specification { def cmd = new CmdPush() expect: - cmd.revision == 'main' cmd.maxSizeMB == 10 cmd.message == 'Push from nextflow' } diff --git a/modules/nextflow/src/test/groovy/nextflow/scm/PushManagerTest.groovy b/modules/nextflow/src/test/groovy/nextflow/scm/PushManagerTest.groovy new file mode 100644 index 0000000000..8e00b93085 --- /dev/null +++ b/modules/nextflow/src/test/groovy/nextflow/scm/PushManagerTest.groovy @@ -0,0 +1,275 @@ +/* + * Copyright 2013-2025, Seqera Labs + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package nextflow.scm + +import nextflow.exception.AbortOperationException +import org.eclipse.jgit.api.Git +import org.eclipse.jgit.transport.URIish +import spock.lang.Specification + +import java.nio.file.Files + +/** + * Tests for PushManager + * + * @author Jorge Ejarque + */ +class PushManagerTest extends Specification { + + def 'should normalize repository URLs correctly'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def manager = new PushManager(tempDir, false, 10) + + expect: + manager.normalizeRepoUrl('https://github.com/user/repo.git') == 'https://github.com/user/repo' + manager.normalizeRepoUrl('https://github.com/user/repo') == 'https://github.com/user/repo' + manager.normalizeRepoUrl('HTTPS://GITHUB.COM/USER/REPO.GIT') == 'https://github.com/user/repo' + manager.normalizeRepoUrl('https://github.com/user/repo/') == 'https://github.com/user/repo' + + cleanup: + tempDir?.deleteDir() + } + + def 'should add files to gitignore'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def gitignoreFile = new File(tempDir, '.gitignore') + def manager = new PushManager(tempDir, false, 10) + + when: + manager.addToGitignore(['file1.txt', 'file2.txt']) + + then: + gitignoreFile.exists() + def content = gitignoreFile.text + content.contains('file1.txt') + content.contains('file2.txt') + + cleanup: + tempDir?.deleteDir() + } + + def 'should not duplicate entries in gitignore'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def gitignoreFile = new File(tempDir, '.gitignore') + gitignoreFile.text = 'existing.txt\n' + def manager = new PushManager(tempDir, false, 10) + + when: + manager.addToGitignore(['existing.txt', 'new.txt']) + + then: + def lines = gitignoreFile.readLines() + lines.count { it == 'existing.txt' } == 1 + lines.contains('new.txt') + + cleanup: + tempDir?.deleteDir() + } + + def 'should detect existing git repository'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + + when: + def manager = new PushManager(tempDir, false, 10) + def result1 = manager.isLocalGit() + + then: + !result1 + + when: + Git.init().setDirectory(tempDir).call().close() + def result2 = manager.isLocalGit() + + then: + result2 + + cleanup: + tempDir?.deleteDir() + } + + def 'should fail when existing repo has wrong remote'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + + // Initialize a git repo with a different remote + def git = Git.init().setDirectory(tempDir).call() + git.remoteAdd() + .setName('origin') + .setUri(new URIish('https://github.com/wrong/repo.git')) + .call() + git.close() + + def manager = new PushManager(tempDir, false, 10) + + when: + manager.push('https://github.com/correct/repo.git', null) + + then: + def e = thrown(AbortOperationException) + e.message.contains('Repository URL not found in remotes') + + cleanup: + tempDir?.deleteDir() + } + + def 'should fail when repo is in detached HEAD state'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + + // Initialize repo and create a commit + def git = Git.init().setDirectory(tempDir).call() + git.remoteAdd() + .setName('origin') + .setUri(new URIish('https://github.com/test/repo.git')) + .call() + + // Create a test file and commit + new File(tempDir, 'test.txt').text = 'test content' + git.add().addFilepattern('.').call() + def commit = git.commit().setMessage('initial commit').call() + + // Checkout to detached HEAD + git.checkout().setName(commit.name()).call() + git.close() + + def manager = new PushManager(tempDir, false, 10) + + when: + manager.push('https://github.com/test/repo.git', null) + + then: + def e = thrown(AbortOperationException) + e.message.contains('detached HEAD state') + + cleanup: + tempDir?.deleteDir() + } + + def 'should fail when current branch does not match requested branch'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + + // Initialize repo + def git = Git.init().setDirectory(tempDir).call() + git.remoteAdd() + .setName('origin') + .setUri(new URIish('https://github.com/test/repo.git')) + .call() + + // Create initial commit on main branch + new File(tempDir, 'test.txt').text = 'test content' + git.add().addFilepattern('.').call() + git.commit().setMessage('initial commit').call() + + // Create and checkout dev branch + git.checkout().setCreateBranch(true).setName('dev').call() + git.close() + + def manager = new PushManager(tempDir, false, 10) + + when: + manager.push('https://github.com/test/repo.git', 'main') + + then: + def e = thrown(AbortOperationException) + e.message.contains("Current branch 'dev' does not match requested branch 'main'") + + cleanup: + tempDir?.deleteDir() + } + + def 'should fail when no git repository found and commit is false'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def manager = new PushManager(tempDir, false, 10) + + when: + manager.push('https://github.com/test/repo.git', null) + + then: + def e = thrown(AbortOperationException) + e.message.contains('No git repository found') + e.message.contains('commit') + + cleanup: + tempDir?.deleteDir() + } + + def 'should resolve repository from single remote'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + + // Initialize git repo with single remote + def git = Git.init().setDirectory(tempDir).call() + git.remoteAdd() + .setName('origin') + .setUri(new URIish('https://github.com/test/repo.git')) + .call() + git.close() + + def manager = new PushManager(tempDir, false, 10) + + when: + def repo = manager.resolveRepository() + + then: + repo == 'https://github.com/test/repo.git' + + cleanup: + tempDir?.deleteDir() + } + + def 'should fail to resolve repository when no git repo exists'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + def manager = new PushManager(tempDir, false, 10) + + when: + manager.resolveRepository() + + then: + def e = thrown(AbortOperationException) + e.message.contains('No git repository found') + + cleanup: + tempDir?.deleteDir() + } + + def 'should fail to resolve repository when no remotes configured'() { + given: + def tempDir = Files.createTempDirectory('test').toFile() + + // Initialize git repo without remotes + Git.init().setDirectory(tempDir).call().close() + + def manager = new PushManager(tempDir, false, 10) + + when: + manager.resolveRepository() + + then: + def e = thrown(AbortOperationException) + e.message.contains('No remotes configured') + + cleanup: + tempDir?.deleteDir() + } +} From 18b2e8488f05caf41c883f82f8bc58a8ac0ef458 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 11 Nov 2025 17:01:22 +0100 Subject: [PATCH 20/20] Add ADR for S3 Git Repository Provider [ci skip] Signed-off-by: Paolo Di Tommaso --- adr/20251111-s3-git-repository-provider.md | 355 +++++++++++++++++++++ 1 file changed, 355 insertions(+) create mode 100644 adr/20251111-s3-git-repository-provider.md diff --git a/adr/20251111-s3-git-repository-provider.md b/adr/20251111-s3-git-repository-provider.md new file mode 100644 index 0000000000..4fde44089c --- /dev/null +++ b/adr/20251111-s3-git-repository-provider.md @@ -0,0 +1,355 @@ +# S3 Git Repository Provider + +- Authors: Jorge Ejarque +- Status: accepted +- Date: 2025-11-11 +- Tags: scm, aws, s3, git, plugins + +## Summary + +Implements a Git repository provider that uses AWS S3 as a storage backend instead of traditional Git hosting services (GitHub, GitLab, etc.), enabling self-hosted workflow distribution without requiring dedicated Git server infrastructure. + +## Problem Statement + +Nextflow workflows are typically distributed through Git hosting platforms like GitHub, which require internet access, authentication management, and impose rate limits. Organizations seeking self-hosted alternatives must deploy and maintain dedicated Git servers (GitLab, Gitea, etc.). + +The goal is to provide a lightweight alternative that leverages existing S3 infrastructure for workflow distribution, avoiding the operational complexity of Git server hosting while maintaining compatibility with Nextflow's Git-based asset management system. The solution must not depend on external tools or libraries that require separate installation or system-level configuration. + +## Goals or Decision Drivers + +- **Infrastructure Simplicity**: Leverage existing S3 buckets without additional server deployments +- **Standard Git Compatibility**: Support standard Git operations (clone, fetch, push) through JGit +- **AWS Integration**: Seamless integration with AWS credential management and IAM policies +- **Existing Architecture Compatibility**: Fit within Nextflow's RepositoryProvider abstraction +- **Performance**: Minimize unnecessary data transfer when fetching specific branches or files +- **Ephemeral Execution Support**: Remove friction for ephemeral tasks and pipeline executions with remote executors by eliminating dependencies on external Git hosting services +- **Access Constraints**: Provide a solution for users that are constrained on the use of public Git providers due to organizational policies, security requirements, or network restrictions + +## Non-goals + +- Supporting Git features beyond basic clone/fetch/push operations (no git-lfs, submodules, sparse checkout) +- Optimizing for repositories with hundreds of branches or very large commit histories +- Providing directory traversal API (listDirectory operation) +- Replacing traditional Git hosting for collaborative development workflows +- Replacing GitHub as the preferred Git provider for Nextflow pipelines - this solution is intended as an alternative for specific use cases, not as a general replacement for GitHub-based workflow distribution + +## Considered Options + +### Option 1: Standalone git-remote-helper + +Implement a standalone git-remote-helper executable that git invokes for s3:// URLs. + +- **Pro**: Standard Git integration pattern used by git-remote-s3, git-remote-gcrypt, etc. +- **Pro**: Works with any git client without code changes +- **Con**: Requires separate executable distribution and PATH management +- **Con**: Complex subprocess communication protocol between git and helper +- **Con**: Difficult integration with Nextflow's existing AWS credential management +- **Con**: Limited error handling and debugging capabilities + +### Option 2: JGit Custom Transport Protocol + +Extend JGit's Transport interface to implement S3 operations as a native transport protocol. + +- **Pro**: Direct integration with Nextflow's existing JGit usage +- **Pro**: Unified credential management through AWS SDK +- **Pro**: Better error handling and debugging within the JVM +- **Pro**: No external dependencies or PATH configuration +- **Con**: Only works within JGit-based applications +- **Con**: Requires implementing low-level JGit transport interfaces + +### Option 3: S3-backed Git Server + +Deploy a lightweight HTTP server that translates Git smart protocol to S3 operations. + +- **Pro**: Works with any git client +- **Pro**: Could support more advanced Git features +- **Con**: Requires server deployment and management +- **Con**: Defeats the purpose of avoiding Git server infrastructure +- **Con**: Additional complexity for authentication and authorization + +## Solution + +Implement a JGit custom transport protocol (Option 2) integrated as a plugin within the nf-amazon module. + +## Rationale & Discussion + +### Core Architecture Decision: JGit Transport Extension + +The solution extends JGit's Transport abstraction by implementing: + +- **TransportS3**: Custom transport protocol that registers the "s3://" scheme with JGit +- **S3FetchConnection**: Handles clone and fetch operations +- **S3PushConnection**: Handles push operations +- **S3BaseConnection**: Common functionality for ref management and S3 interactions + +```mermaid +graph TB + subgraph "Nextflow Core" + AM[AssetManager] + RP[RepositoryProvider] + end + + subgraph "nf-amazon Plugin" + S3RP[S3RepositoryProvider] + S3RF[S3RepositoryFactory] + S3PC[S3ProviderConfig] + + subgraph "JGit Integration" + TS3[TransportS3] + S3FC[S3FetchConnection] + S3PC2[S3PushConnection] + S3BC[S3BaseConnection] + end + end + + subgraph "External Systems" + JGIT[JGit Library] + S3[AWS S3] + end + + AM --> RP + RP --> S3RP + S3RF --> S3RP + S3RP --> S3PC + S3RP --> JGIT + JGIT --> TS3 + TS3 --> S3FC + TS3 --> S3PC2 + S3FC --> S3BC + S3PC2 --> S3BC + S3BC --> S3 +``` + +### Storage Model: Git Bundles on S3 + +**Key Decision**: Store each branch as a Git bundle file rather than individual Git objects. + +**S3 Object Key Structure**: +``` +bucket/ + repo-path/ + HEAD # Default branch reference + refs/ + heads/ + main/ + .bundle # Bundle for main branch + feature-x/ + .bundle # Bundle for feature-x branch + tags/ + v1.0.0/ + .bundle # Bundle for tag v1.0.0 +``` + +**Rationale**: +1. **S3 is Object Storage, not a Filesystem**: S3 lacks true directory semantics and atomic operations. Git's native format relies on filesystem assumptions that don't map well to S3. + +2. **Atomic Branch Updates**: A single bundle file per branch provides atomic updates - either the old or new bundle exists, never a partially written state. + +3. **Minimal Transfer for Single-Branch Operations**: Fetching a specific branch only requires downloading one bundle file, not the entire repository. + +4. **Standard Git Format**: Bundles are a standard Git format that JGit handles natively, requiring minimal custom code. + +### Fetch Operation Flow + +```mermaid +sequenceDiagram + participant NF as Nextflow + participant S3RP as S3RepositoryProvider + participant S3FC as S3FetchConnection + participant S3 as AWS S3 + participant JGit as JGit Bundle Transport + + NF->>S3RP: readBytes(path) or clone() + S3RP->>S3FC: fetch(refs) + S3FC->>S3: List objects (refs/heads/*, refs/tags/*) + S3-->>S3FC: Available refs with commit SHAs + + loop For each requested ref + S3FC->>S3: GetObject(refs/heads/branch/sha.bundle) + S3-->>S3FC: Bundle file + S3FC->>JGit: Parse bundle into local repository + JGit-->>S3FC: Objects and refs imported + end + + S3FC-->>S3RP: Fetch complete + S3RP-->>NF: Repository ready +``` + +**Key Implementation Details**: + +1. **Ref Discovery**: Lists S3 objects under `refs/heads/` and `refs/tags/` prefixes to build the advertised refs map. The commit SHA is extracted from the bundle filename. + +2. **Bundle Download**: Each requested branch's bundle is downloaded to a temporary directory. + +3. **Bundle Import**: JGit's bundle transport imports the bundle into the local repository, handling all Git object parsing and ref updates. + +4. **Default Branch Optimization**: When no revision is specified, only the default branch is fetched by querying the HEAD reference first, avoiding unnecessary branch downloads. + +### Push Operation Flow + +```mermaid +sequenceDiagram + participant NF as Nextflow Push Command + participant PM as PushManager + participant S3PC as S3PushConnection + participant S3 as AWS S3 + participant JGit as JGit Bundle Writer + + NF->>PM: push(repo, branch) + PM->>PM: Validate/initialize local git + PM->>PM: Stage and commit files + PM->>S3PC: push(refs) + + loop For each ref to push + S3PC->>S3: List objects (refs/heads/branch/) + S3-->>S3PC: Existing bundle (if any) + + alt Existing bundle found + S3PC->>S3PC: Check if commit in history + alt Commit not in history + S3PC-->>PM: REJECTED_REMOTE_CHANGED + end + alt Same commit + S3PC-->>PM: UP_TO_DATE + end + end + + S3PC->>JGit: Create bundle for ref + JGit-->>S3PC: Bundle file in temp directory + S3PC->>S3: PutObject(new bundle) + S3-->>S3PC: Upload complete + + opt Old bundle exists + S3PC->>S3: DeleteObject(old bundle) + end + + opt First push to repo + S3PC->>S3: PutObject(HEAD reference) + end + + S3PC-->>PM: OK + end + + PM-->>NF: Push complete +``` + +**Key Implementation Details**: + +1. **Conflict Detection**: Before pushing, checks if the existing bundle's commit is an ancestor of the new commit. Rejects the push if it would create a non-fast-forward update. + +2. **Bundle Generation**: Uses JGit's BundleWriter to create a complete bundle containing all objects reachable from the branch tip. + +3. **Atomic Update**: Uploads the new bundle before deleting the old one, ensuring the branch is never in an invalid state visible to other clients. + +4. **HEAD Management**: If no HEAD reference exists (first push to repo), creates one pointing to the pushed branch as the default. + +### Authentication and Credentials + +**Key Decision**: Use AWS SDK credential providers instead of Git username/password authentication. + +The S3GitCredentialsProvider implements JGit's CredentialsProvider interface but bridges to AWS SDK credential resolution: + +1. **Configuration Priority**: + - Explicit `accessKey`/`secretKey` in SCM config + - AWS profile specified in SCM config + - Session-level AWS configuration + - DefaultCredentialsProvider (environment variables, instance profiles, etc.) + +2. **Region Resolution**: + - Explicit region in SCM config + - Session-level AWS configuration + - DefaultAwsRegionProviderChain (environment, config files, instance metadata) + +This approach provides seamless integration with existing AWS infrastructure while maintaining Nextflow's configuration patterns. + +### Reflection-based Status Updates + +**Technical Challenge**: JGit's RemoteRefUpdate class stores the push status in a package-private field without public setters. Custom transport implementations need to update this status to communicate results. + +**Solution**: Use reflection to access and modify the status field. + +**Rationale**: +1. **JAR Signing Restrictions**: JGit's JAR is signed, preventing custom classes from being placed in the org.eclipse.jgit.transport package where they would have package-private access. + +2. **No Public API**: JGit provides no public API for transport implementations to set push status. + +3. **Essential Functionality**: Without status updates, push operations cannot communicate success/failure/rejection to callers. + +4. **Stability**: The RemoteRefUpdate.status field has been stable across JGit versions, minimizing fragility risk. + +### Limitations and Trade-offs + +**Unsupported Operations**: +- **Directory Traversal**: The `listDirectory` method throws UnsupportedOperationException because it would require cloning the entire repository just to list files. + +**Performance Characteristics**: +- **Single-File Access**: Reading a single file requires downloading and unpacking the entire branch bundle. Optimized by only fetching the default branch when no revision is specified. + +- **Multiple Branches**: Fetching multiple branches requires downloading one bundle per branch. More efficient than traditional Git for repositories with many branches when only a few are needed. + +- **Large Histories**: Bundle size grows with commit history. Works well for typical workflow repositories (dozens to hundreds of commits) but not suitable for repositories with thousands of commits per branch. + +**Operational Considerations**: +- **S3 Consistency**: Relies on S3's strong read-after-write consistency for new objects. +- **Concurrent Pushes**: No distributed locking mechanism. Concurrent pushes to the same branch may result in one being rejected (similar to traditional Git). +- **Storage Costs**: Each branch update creates a new bundle file. Old bundles are deleted immediately, so storage cost is proportional to the number of active branches, not the update frequency. + +## Consequences + +### Positive + +- **Zero Server Infrastructure**: Organizations can distribute workflows using existing S3 buckets without deploying Git servers +- **IAM Integration**: Leverages AWS IAM for access control rather than managing separate Git authentication +- **Transparent Integration**: Works seamlessly with existing Nextflow commands (pull, run, push) +- **Plugin Architecture**: Isolated in nf-amazon plugin, optional for users not needing this functionality +- **Selective Branch Fetch**: Only downloads requested branches, efficient for repositories with many branches + +### Negative + +- **Bundle Overhead**: Each fetch downloads the complete branch history even for single-file access +- **JGit Dependency**: Solution is specific to JGit and doesn't benefit non-Nextflow Git clients +- **Limited Git Features**: No support for advanced Git operations (partial clone, shallow fetch, etc.) +- **Directory Traversal**: Cannot list repository contents without cloning +- **Reflection Fragility**: Status update mechanism depends on JGit internal implementation details + +### Neutral + +- **Storage Format Compatibility**: Uses git-remote-s3 compatible storage format, but this is an implementation detail rather than a requirement +- **AWS-Specific**: Only works with AWS S3, not other S3-compatible services (MinIO, etc.) unless they provide compatible APIs +- **Credential Complexity**: Configuration requires understanding AWS credential chain, which may be unfamiliar to some users + +## Implementation Notes + +### Plugin Registration + +The S3RepositoryFactory uses @Priority(-10) to ensure it's checked after standard providers, preventing conflicts with other repository types. + +The TransportS3 protocol is registered lazily on first use via an AtomicBoolean guard, ensuring the JGit transport registry is extended exactly once. + +### Error Handling + +- **Missing Bundles**: Treated as transport errors with descriptive messages +- **Multiple Bundles**: Protected against by implementation (should never occur) but explicitly checked and rejected +- **Invalid Refs**: Gracefully handled by returning empty ref lists for missing heads/tags + +### New Commands + +Introduces `nextflow push` command to upload local directories to S3-backed Git repositories, with options for: +- Automatic git initialization and commit +- Branch specification +- File size validation +- .gitignore management + +## Links + +- Git Bundle Format: https://git-scm.com/docs/git-bundle +- JGit Transport API: https://wiki.eclipse.org/JGit/User_Guide#Transports +- git-remote-s3 project: https://github.com/bgahagan/git-remote-s3 (inspiration for storage format) + +## More Information + +This implementation enables workflow distribution patterns such as: +- Private workflow libraries within AWS organizations +- Air-gapped environments with S3-compatible storage +- Cost-effective alternatives to GitHub private repositories +- Integration with existing S3 bucket policies and lifecycle rules \ No newline at end of file