diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index 7a4f0a2b..cfaa467e 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -64,6 +64,7 @@ jobs: run: | mkdir docker_build git clone git@github.com:allenai/helios.git docker_build/helios + git -C docker_build/helios checkout f0a63f190b0f99d9c503249daf7e3e47bbd4792a echo "helios @ /opt/rslearn_projects/docker_build/helios/" >> requirements-extra.txt # Same thing for olmoearth_run repository. @@ -74,7 +75,7 @@ jobs: - name: Clone olmoearth_run and update requirements-extra.txt. run: | git clone git@github.com:allenai/olmoearth_run.git docker_build/olmoearth_run - echo "earth-system-run @ /opt/rslearn_projects/docker_build/olmoearth_run/" >> requirements-extra.txt + echo "olmoearth_run @ /opt/rslearn_projects/docker_build/olmoearth_run/" >> requirements-extra.txt - name: Build and push Docker image id: build-push diff --git a/esrun_data/sample/esrun.yaml b/esrun_data/sample/esrun.yaml deleted file mode 100644 index 4981d328..00000000 --- a/esrun_data/sample/esrun.yaml +++ /dev/null @@ -1,38 +0,0 @@ -partition_strategies: - partition_request_geometry: - class_path: esrun.runner.tools.partitioners.noop_partitioner.NoopPartitioner - init_args: - - prepare_window_geometries: - class_path: esrun.runner.tools.partitioners.fixed_window_partitioner.FixedWindowPartitioner - init_args: - window_size: 128 # intended to be a pixel value - -postprocessing_strategies: - process_dataset: - class_path: esrun.runner.tools.postprocessors.noop_raster.NoopRaster - - process_partition: - class_path: esrun.runner.tools.postprocessors.noop_raster.NoopRaster - - process_window: - class_path: esrun.runner.tools.postprocessors.noop_raster.NoopRaster - -window_prep: - sampler: - class_path: esrun.runner.tools.samplers.noop_sampler.NoopSampler - labeled_window_preparer: - class_path: esrun.runner.tools.labeled_window_preparers.point_to_pixel_window_preparer.PointToPixelWindowPreparer - init_args: - window_resolution: 10.0 - data_splitter: - class_path: esrun.runner.tools.data_splitters.random_data_splitter.RandomDataSplitter - init_args: - train_prop: 0.8 - val_prop: 0.2 - test_prop: 0.0 - seed: 42 - label_layer: "labels" - label_property: "category" - group_name: "post_random_split" - split_property: "split" diff --git a/esrun_data/nandi/README.md b/olmoearth_run_data/nandi/README.md similarity index 71% rename from esrun_data/nandi/README.md rename to olmoearth_run_data/nandi/README.md index ed455987..f708c612 100644 --- a/esrun_data/nandi/README.md +++ b/olmoearth_run_data/nandi/README.md @@ -1,4 +1,4 @@ -Local esrun for Nandi: +Local olmoearth_run for Nandi: ```bash export EXTRA_FILES_PATH=/weka/dfive-default/helios/checkpoints export DATASET_PATH=/weka/dfive-default/yawenz/datasets/scratch_v5/dataset_0 @@ -7,6 +7,6 @@ export TRAINER_DATA_PATH=/weka/dfive-default/yawenz/test/nandi export WANDB_PROJECT=2025_10_03_nandi_crop_type export WANDB_NAME=nandi_crop_type_segment_helios_base_S2_S1_ts_ws4_ps1_bs8_add_annotations_2 export WANDB_ENTITY=eai-ai2 -python -m rslp.main esrun esrun --config_path esrun_data/nandi/ --scratch_path /weka/dfive-default/yawenz/datasets/scratch_v5/ --checkpoint_path /weka/dfive-default/yawenz/test/checkpoints/last_rewritten.ckpt +python -m rslp.main olmoearth_run olmoearth_run --config_path olmoearth_run_data/nandi/ --scratch_path /weka/dfive-default/yawenz/datasets/scratch_v5/ --checkpoint_path /weka/dfive-default/yawenz/test/checkpoints/last_rewritten.ckpt ``` Note that the original `task_name` has been converted from `crop_type_classification` to `class` in the checkpoint, and this checkpoint is also available at `gs://earth-system-run-dev/models/4edd1efb-b645-44c3-8d7a-5cc2abbbcc46/stage_0/checkpoint.ckpt`. diff --git a/esrun_data/nandi/dataset.json b/olmoearth_run_data/nandi/dataset.json similarity index 100% rename from esrun_data/nandi/dataset.json rename to olmoearth_run_data/nandi/dataset.json diff --git a/esrun_data/nandi/model.yaml b/olmoearth_run_data/nandi/model.yaml similarity index 100% rename from esrun_data/nandi/model.yaml rename to olmoearth_run_data/nandi/model.yaml diff --git a/esrun_data/nandi/esrun.yaml b/olmoearth_run_data/nandi/olmoearth_run.yaml similarity index 75% rename from esrun_data/nandi/esrun.yaml rename to olmoearth_run_data/nandi/olmoearth_run.yaml index 684877a5..f49c27e2 100644 --- a/esrun_data/nandi/esrun.yaml +++ b/olmoearth_run_data/nandi/olmoearth_run.yaml @@ -37,12 +37,12 @@ inference_results_config: partition_strategies: partition_request_geometry: - class_path: esrun.runner.tools.partitioners.grid_partitioner.GridPartitioner + class_path: olmoearth_run.runner.tools.partitioners.grid_partitioner.GridPartitioner init_args: grid_size: 0.25 prepare_window_geometries: - class_path: esrun.runner.tools.partitioners.grid_partitioner.GridPartitioner + class_path: olmoearth_run.runner.tools.partitioners.grid_partitioner.GridPartitioner init_args: grid_size: 64 output_projection: @@ -55,10 +55,10 @@ partition_strategies: postprocessing_strategies: process_dataset: - class_path: esrun.runner.tools.postprocessors.combine_geotiff.CombineGeotiff + class_path: olmoearth_run.runner.tools.postprocessors.combine_geotiff.CombineGeotiff process_partition: - class_path: esrun.runner.tools.postprocessors.combine_geotiff.CombineGeotiff + class_path: olmoearth_run.runner.tools.postprocessors.combine_geotiff.CombineGeotiff process_window: - class_path: esrun.runner.tools.postprocessors.noop_raster.NoopRaster + class_path: olmoearth_run.runner.tools.postprocessors.noop_raster.NoopRaster diff --git a/esrun_data/nandi/prediction_request_geometry.geojson b/olmoearth_run_data/nandi/prediction_request_geometry.geojson similarity index 100% rename from esrun_data/nandi/prediction_request_geometry.geojson rename to olmoearth_run_data/nandi/prediction_request_geometry.geojson diff --git a/esrun_data/sample/.gitignore b/olmoearth_run_data/sample/.gitignore similarity index 100% rename from esrun_data/sample/.gitignore rename to olmoearth_run_data/sample/.gitignore diff --git a/esrun_data/sample/README.md b/olmoearth_run_data/sample/README.md similarity index 53% rename from esrun_data/sample/README.md rename to olmoearth_run_data/sample/README.md index cc98d6a3..3a94a73b 100644 --- a/esrun_data/sample/README.md +++ b/olmoearth_run_data/sample/README.md @@ -1,35 +1,35 @@ # ES Runner Local Development Guide -## What is esrunner? +## What is olmoearth_runner? -ESRunner provides: +OlmoEarthRunner provides: -- the [EsPredictRunner](https://github.com/allenai/earth-system-run/blob/josh/esrunner/src/esrun/runner/local/predict_runner.py) -- the [EsFineTuneRunner](https://github.com/allenai/earth-system-run/blob/josh/esrunner/src/esrun/runner/local/fine_tune_runner.py) +- the [OlmoEarthRunPredictRunner](https://github.com/allenai/olmoearth_run/blob/develop/src/olmoearth_run/runner/local/predict_runner.py) +- the [OlmoEarthRunFineTuneRunner](https://github.com/allenai/olmoearth_run/blob/develop/src/olmoearth_run/runner/local/fine_tune_runner.py) -classes, which can be used to run prediction and fine-tuning pipelines outside of the esrun service architecture +classes, which can be used to run prediction and fine-tuning pipelines outside of the olmoearth_run service architecture ## Setting up your environment -- Install `esrunner` (earth-system-run) in your development environment. +- Install `oerunner` (olmoearth-run) in your development environment. ``` - pip install earth-system-run @ git+https://github.com/allenai/earth-system-run.git + pip install olmoearth-run @ git+https://github.com/allenai/olmoearth-run.git ``` -- Following the project structure below, create a directory in the `rslearn-projects/esrun_data/` directory. This directory will contain all the necessary files for your prediction or fine-tuning pipeline. +- Following the project structure below, create a directory in the `rslearn-projects/olmoearth_run_data/` directory. This directory will contain all the necessary files for your prediction or fine-tuning pipeline. ## Project Structure - `checkpoint.ckpt`: This is the model checkpoint file. It is required for running inference. If you are only building datasets, this file is not required. Note: You probably don't want to check this file into git repository. - `dataset.json`: This is the rslearn dataset definition file. -- `esrun.yaml`: This file defines the behavior of the esrunner including partitioning, postprocessing, training window prep, etc.. +- `olmoearth_run.yaml`: This file defines the behavior of the olmoearth_runner including partitioning, postprocessing, training window prep, etc.. - `model.yaml`: This is the rslearn (pytorch) model definition file. - `annotation_features.geojson`: Labeled annotation feature collection, exported from Studio. Only required for labeled window prep. - `annotation_task_features.geojson`: Studio tasks for the annotation features, also exported from Studio. Only required for labeled window prep. -- `prediction/test-request1.geojson`: This directory contains the prediction requests in GeoJSON format. Each file represents a set of prediction requests for a specific region or time period. Many different prediction requests can be defined within a single file as separate features in the feature collection. The esrunner will partition these requests into smaller tasks based on the partition strategies defined in `esrun.yaml#partition_strategies` +- `prediction/test-request1.geojson`: This directory contains the prediction requests in GeoJSON format. Each file represents a set of prediction requests for a specific region or time period. Many different prediction requests can be defined within a single file as separate features in the feature collection. The olmoearth_runner will partition these requests into smaller tasks based on the partition strategies defined in `olmoearth_run.yaml#partition_strategies` ## Fine-Tuning -Fine-tuning is encapsulated in the Fine Tuning Workflow, accessible through `EsFineTuningRunner`. It currently only exposes a method for preparing labeled RSLearn windows from geojson feature collections exported through Earth System Studio. Using it requires your `esrun.yaml` to define the following data processing pipeline: +Fine-tuning is encapsulated in the Fine Tuning Workflow, accessible through `OlmoEarthRunFineTuneRunner`. It currently only exposes a method for preparing labeled RSLearn windows from geojson feature collections exported through Earth System Studio. Using it requires your `olmoearth_run.yaml` to define the following data processing pipeline: ```yaml window_prep: @@ -44,7 +44,7 @@ Technically optional, defaulting to `NoopSampler`. These classes receive a `list ### labeled_window_preparer -Transforms individual `AnnotationTask` instances to `list[LabeledWindow[LabeledSTGeometry]]` or `list[LabeledWindow[ndarray]]` depending on whether vector or raster label output layers are desired. +Transforms individual `AnnotationTask` instances to `list[LabeledWindow[LabeledSTGeometry]]` or `LabeledWindowPreparer[list[RasterLabel]]` depending on whether vector or raster label output layers are desired, respectively. Available window preparers: - `PointToPixelWindowPreparer` - Converts each annotation feature in a Studio task to a 1x1pixel window with a vector class label @@ -59,12 +59,12 @@ Available data splitters: ### Run a pipeline end-to-end -A fully functional `esrun.yaml` and set of `.geojson` files is available in `esrun_data/sample` as a reference example. +A fully functional `olmoearth_run.yaml` and set of `.geojson` files is available in `olmoearth_run_data/sample` as a reference example. Exercise it via: ``` -python -m rslp.main esrun prepare_labeled_windows \ - --project_path esrun_data/sample \ +python -m rslp.main olmoearth_run prepare_labeled_windows \ + --project_path olmoearth_run_data/sample \ --scratch_path /tmp/scratch ``` @@ -82,27 +82,27 @@ format via the "Export Annotations" tab. This will create the required data file ### Writing Your Own Samplers -You may supply your own data samplers by creating a new class that implements the `SamplerInterface` class in the `esrun.runner.tools.samplers.sampler_interface` module. You can then specify your custom sampler in the `esrun.yaml` file. This -class must be importable via your PYTHONPATH. Include it as code in this repository or as a new implementation in earth-system-run.git. +You may supply your own data samplers by creating a new class that implements the `SamplerInterface` class in the `olmoearth_run.runner.tools.samplers.sampler_interface` module. You can then specify your custom sampler in the `olmoearth_run.yaml` file. This +class must be importable via your PYTHONPATH. Include it as code in this repository or as a new implementation in olmoearth_run.git. ### Writing Your Own LabeledWindowPreparers You may supply new implementations for converting raw Studio Tasks + Annotations into LabeledWindows. To do so, implement -either `esrun.runner.tools.labeled_window_preparers.labeled_window_preparer.RasterLabelsWindowPreparer` (for rasterized targets) or `esrun.runner.tools.labeled_window_preparers.labeled_window_preparer.VectorLabelsWindowPreparer` (for vector targets). As with Samplers, these must be importable from your PYTHONPATH and can be referenced by class path in `esrun.yaml`. Include as code in this repository or contribute directly to earth-system-run.git. +either `olmoearth_run.runner.tools.labeled_window_preparers.labeled_window_preparer.RasterLabelsWindowPreparer` (for rasterized targets) or `olmoearth_run.runner.tools.labeled_window_preparers.labeled_window_preparer.VectorLabelsWindowPreparer` (for vector targets). As with Samplers, these must be importable from your PYTHONPATH and can be referenced by class path in `olmoearth_run.yaml`. Include as code in this repository or contribute directly to earth-system-run.git. ### Writing Your Own DataPartitioners -You may supply your own data partitioners to determine test/eval/train split assignment for a LabeledWindow. To do so, implement `esrun.runner.tools.data_splitter.data_splitter_interface.DataSplitterInterface`. +You may supply your own data partitioners to determine test/eval/train split assignment for a LabeledWindow. To do so, implement `olmoearth_run.runner.tools.data_splitter.data_splitter_interface.DataSplitterInterface`. ## Inference -Inference is encapsulated in the Prediction Workflow, accessible through `EsPredictRunner`. It requires your `esrun.yaml` define: +Inference is encapsulated in the Prediction Workflow, accessible through `OlmoEarthRunPredictRunner`. It requires your `olmoearth_run.yaml` define: - partitioning strategy - post-processing strategy ### Partitioning Strategies -These stanzas defines how esrunner will break the inference request into multiple request geometries for compute parallelization (equivalent to rslearn window groups) and prediction window geometries. +These stanzas defines how olmoearth_runner will break the inference request into multiple request geometries for compute parallelization (equivalent to rslearn window groups) and prediction window geometries. Partitioning strategies can be mixed and matched for flexible development. - partition_request_geometry @@ -113,14 +113,14 @@ Available partitioners: - `GridPartitioner` - Given a grid size, this partitioner will create partitions based on the grid cells that intersect with the prediction request. - NoopPartitioner - Does not partition the prediction request. This is useful for testing or when you want to run the entire prediction request as a single task. -Example `esrun.yaml`. This will leave the original input as a single partition, but will create individual windows of size 128x128 pixels for each feature. +Example `olmoearth_run.yaml`. This will leave the original input as a single partition, but will create individual windows of size 128x128 pixels for each feature. ```yaml partition_request_geometry: - class_path: esrun.tools.partitioners.noop_partitioner.NoopPartitioner + class_path: olmoearth_run.tools.partitioners.noop_partitioner.NoopPartitioner init_args: prepare_window_geometries: - class_path: esrun.tools.partitioners.fixed_window_partitioner.FixedWindowPartitioner + class_path: olmoearth_run.tools.partitioners.fixed_window_partitioner.FixedWindowPartitioner init_args: window_size: 128 # intended to be a pixel value ``` @@ -135,20 +135,20 @@ There are 3 different stages to postprocessing: #### Run a pipeline end-to-end -The simplest way to run a pipeline is to use the `esrun-local-predict` CLI command. This command will run the entire pipeline end-to-end including partitioning, dataset building, inference, post-processing, and combining the final outputs. +The simplest way to run a pipeline is to use the `olmoearth-run-local-predict` CLI command. This command will run the entire pipeline end-to-end including partitioning, dataset building, inference, post-processing, and combining the final outputs. ``` -$ esrun-local-predict +$ olmoearth-run-local-predict ``` -If you want more flexibility, you can use the `EsPredictRunner` class directly. The following example shows how to run the entire pipeline end-to-end using the `EsPredictRunner` class. Note: This example may become out of date very quickly due to ongoing changes in the EsPredictRunner class. Refer to the esrun repo for the most up-to-date information. +If you want more flexibility, you can use the `OlmoEarthRunPredictRunner` class directly. The following example shows how to run the entire pipeline end-to-end using the `OlmoEarthRunPredictRunner` class. Note: This example may become out of date very quickly due to ongoing changes in the OlmoEarthRunPredictRunner class. Refer to the olmoearth_run repo for the most up-to-date information. ```python file=run_pipeline.py from pathlib import Path -from esrun.runner.local.predict_runner import EsPredictRunner +from olmoearth_run.runner.local.predict_runner import OlmoEarthRunPredictRunner config_path = Path(__file__).parent -runner = EsPredictRunner( +runner = OlmoEarthRunPredictRunner( project_path=config_path, scratch_path=config_path / "scratch", ) @@ -164,11 +164,11 @@ runner.combine(partitions) #### Run dataset building for the entire prediction request. ```python file=run_dataset_building.py from pathlib import Path -from esrun.runner.local.predict_runner import EsPredictRunner +from olmoearth_run.runner.local.predict_runner import OlmoEarthRunPredictRunner config_path = Path(__file__).parent -runner = EsPredictRunner( +runner = OlmoEarthRunPredictRunner( project_path=config_path, scratch_path=config_path / "scratch", ) @@ -181,11 +181,11 @@ for partition_id in runner.partition(): (Assumes you have an existing materialized dataset for the partition.) ```python file=run_inference_single_partition.py from pathlib import Path -from esrun.runner.local.predict_runner import EsPredictRunner +from olmoearth_run.runner.local.predict_runner import OlmoEarthRunPredictRunner config_path = Path(__file__).parent -runner = EsPredictRunner( +runner = OlmoEarthRunPredictRunner( project_path=config_path, scratch_path=config_path / "scratch", ) @@ -196,13 +196,13 @@ runner.run_inference(partition_id) #### Run inference for a single window. Since we don't expose window-level inference via the runner API, you can configure your partitioners to produce limited sets of partitions and windows. -```yaml file=esrun.yaml +```yaml file=olmoearth_run.yaml partition_request_geometry: - class_path: esrun.runner.tools.partitioners.noop_partitioner.NoopPartitioner + class_path: olmoearth_run.runner.tools.partitioners.noop_partitioner.NoopPartitioner init_args: prepare_window_geometries: - class_path: esrun.runner.tools.partitioners.fixed_window_partitioner.FixedWindowPartitioner + class_path: olmoearth_run.runner.tools.partitioners.fixed_window_partitioner.FixedWindowPartitioner init_args: window_size: 128 # intended to be a pixel value limit: 1 # This will limit window generation to a single window per large partition, effectively allowing you to run inference on a single window. @@ -210,11 +210,11 @@ prepare_window_geometries: ```python file=run_inference_single_window.py from pathlib import Path -from esrun.runner.local.predict_runner import EsPredictRunner +from olmoearth_run.runner.local.predict_runner import OlmoEarthRunPredictRunner config_path = Path(__file__).parent -runner = EsPredictRunner( +runner = OlmoEarthRunPredictRunner( project_path=config_path, scratch_path=config_path / "scratch", ) @@ -225,18 +225,18 @@ for partition_id in partitions: ``` ### Writing Your Own Partitioners -You may supply your own partitioners by creating a new class that implements the ` PartitionInterface` class in the `esrun.runner.tools.partitioners.partition_interface` module. You can then specify your custom partitioner in the `esrun.yaml` file. This class must exist on your PYTHONPATH and be importable by the esrunner. As such we recommend you place your custom partitioner in the `rslp/common/partitioners` directory of this repository to ensure it gets installed into the final Dockerimage artifact. +You may supply your own partitioners by creating a new class that implements the ` PartitionInterface` class in the `olmoearth_run.runner.tools.partitioners.partition_interface` module. You can then specify your custom partitioner in the `olmoearth_run.yaml` file. This class must exist on your PYTHONPATH and be importable by the olmoearth_runner. As such we recommend you place your custom partitioner in the `rslp/common/partitioners` directory of this repository to ensure it gets installed into the final Dockerimage artifact. ### Writing your own post-processing strategies -You may supply your own post-processing strategies by creating a new class that implements the `PostprocessInterface` class in the `esrun.runner.tools.postprocessors.postprocess_inferface` module. You can then specify your custom post-processing strategy in the `postprocessing_strategies.yaml` file. This class must exist on your `PYTHONPATH` and be importable by the esrunner. As such we recommend you place your custom post-processing strategy in the `rslp/common/postprocessing` directory of this repository to ensure it gets installed into the final Docker image artifact. +You may supply your own post-processing strategies by creating a new class that implements the `PostprocessInterface` class in the `olmoearth_run.runner.tools.postprocessors.postprocess_inferface` module. You can then specify your custom post-processing strategy in the `postprocessing_strategies.yaml` file. This class must exist on your `PYTHONPATH` and be importable by the olmoearth_runner. As such we recommend you place your custom post-processing strategy in the `rslp/common/postprocessing` directory of this repository to ensure it gets installed into the final Docker image artifact. #### Testing Partitioner & Post-Processing Implementations -See the [earth-system-run](https://github.com/allenai/earth-system-run) repository for tests covering existing [partitioner](https://github.com/allenai/earth-system-run/tree/v1-develop/tests/unit/runner/tools/partitioners) and [post-processor](https://github.com/allenai/earth-system-run/tree/v1-develop/tests/unit/runner/tools/postprocessors) implementations. +See the [olmoearth_run](https://github.com/allenai/olmoearth_run) repository for tests covering existing [partitioner](https://github.com/allenai/olmoearth_run/tree/develop/tests/unit/olmoearth_run/runner/tools/partitioners) and [post-processor](https://github.com/allenai/olmoearth_run/tree/develop/tests/unit/olmoearth_run/runner/tools/postprocessors) implementations. ## Longer Term Vision / Model Development Workflow 1. ML folk will create the requisite configs in a directory like this one. 2. Any additional or alternate requirements will be specified in a requirements.txt file in the same directory. 3. When a PR is created, CI will perform a docker build using the main Dockerfile in the root of the repo, but ensure any deviations from the main requirements.txt are merged into the main requirements.txt at build time so that the docker image is built with the correct requirements. This will allow developers to use this docker image for things like beaker runs or other executions (if needed.) -4. When the PR is merged, the docker build from above will be performed again, but the final image will be published to esrun as a new "model" (model version?) using the configurations in this directory. (TODO: Should we consider "versioning" models in esrun?) -5. Once the "model" has been published to esrun, fine-tuning can be performed using esrun. (Longer term I think we can use a standard versioned helios image for this, but for now we can use the bespoke images created in the previous step.) -6. (Presumably) Once the fine-tuning is complete, esrun will publish the final model (with weights) to esrun as a (new?) model (version?). Esrun can then be used to run predictions with this final model. +4. When the PR is merged, the docker build from above will be performed again, but the final image will be published to olmoearth_run as a new "model" (model version?) using the configurations in this directory. (TODO: Should we consider "versioning" models in olmoearth_run?) +5. Once the "model" has been published to olmoearth_run, fine-tuning can be performed using olmoearth_run. (Longer term I think we can use a standard versioned helios image for this, but for now we can use the bespoke images created in the previous step.) +6. (Presumably) Once the fine-tuning is complete, olmoearth_run will publish the final model (with weights) to olmoearth_run as a (new?) model (version?). OlmoEarth Run can then be used to run predictions with this final model. diff --git a/esrun_data/sample/annotation_features.geojson b/olmoearth_run_data/sample/annotation_features.geojson similarity index 100% rename from esrun_data/sample/annotation_features.geojson rename to olmoearth_run_data/sample/annotation_features.geojson diff --git a/esrun_data/sample/annotation_task_features.geojson b/olmoearth_run_data/sample/annotation_task_features.geojson similarity index 81% rename from esrun_data/sample/annotation_task_features.geojson rename to olmoearth_run_data/sample/annotation_task_features.geojson index 323414dc..c22f30cb 100644 --- a/esrun_data/sample/annotation_task_features.geojson +++ b/olmoearth_run_data/sample/annotation_task_features.geojson @@ -33,9 +33,9 @@ }, "id": null, "properties": { - "es_annotations_task_id": "164679b9-04ed-5b35-b438-9677104067fc", - "es_end_time": "2024-02-24 05:57:04+00:00", - "es_start_time": "2024-02-24 05:57:00+00:00" + "oe_annotations_task_id": "164679b9-04ed-5b35-b438-9677104067fc", + "oe_end_time": "2024-02-24 05:57:04+00:00", + "oe_start_time": "2024-02-24 05:57:00+00:00" }, "type": "Feature" } diff --git a/esrun_data/sample/dataset.json b/olmoearth_run_data/sample/dataset.json similarity index 100% rename from esrun_data/sample/dataset.json rename to olmoearth_run_data/sample/dataset.json diff --git a/esrun_data/sample/model.yaml b/olmoearth_run_data/sample/model.yaml similarity index 100% rename from esrun_data/sample/model.yaml rename to olmoearth_run_data/sample/model.yaml diff --git a/olmoearth_run_data/sample/olmoearth_run.yaml b/olmoearth_run_data/sample/olmoearth_run.yaml new file mode 100644 index 00000000..bd13a7be --- /dev/null +++ b/olmoearth_run_data/sample/olmoearth_run.yaml @@ -0,0 +1,38 @@ +partition_strategies: + partition_request_geometry: + class_path: olmoearth_run.runner.tools.partitioners.noop_partitioner.NoopPartitioner + init_args: + + prepare_window_geometries: + class_path: olmoearth_run.runner.tools.partitioners.fixed_window_partitioner.FixedWindowPartitioner + init_args: + window_size: 128 # intended to be a pixel value + +postprocessing_strategies: + process_dataset: + class_path: olmoearth_run.runner.tools.postprocessors.noop_raster.NoopRaster + + process_partition: + class_path: olmoearth_run.runner.tools.postprocessors.noop_raster.NoopRaster + + process_window: + class_path: olmoearth_run.runner.tools.postprocessors.noop_raster.NoopRaster + +window_prep: + sampler: + class_path: olmoearth_run.runner.tools.samplers.noop_sampler.NoopSampler + labeled_window_preparer: + class_path: olmoearth_run.runner.tools.labeled_window_preparers.point_to_pixel_window_preparer.PointToPixelWindowPreparer + init_args: + window_resolution: 10.0 + data_splitter: + class_path: olmoearth_run.runner.tools.data_splitters.random_data_splitter.RandomDataSplitter + init_args: + train_prop: 0.8 + val_prop: 0.2 + test_prop: 0.0 + seed: 42 + label_layer: "labels" + label_property: "category" + group_name: "post_random_split" + split_property: "split" diff --git a/esrun_data/sample/prediction_request_geometry.geojson b/olmoearth_run_data/sample/prediction_request_geometry.geojson similarity index 100% rename from esrun_data/sample/prediction_request_geometry.geojson rename to olmoearth_run_data/sample/prediction_request_geometry.geojson diff --git a/esrun_data/satlas/solar_farm/dataset.json b/olmoearth_run_data/satlas/solar_farm/dataset.json similarity index 100% rename from esrun_data/satlas/solar_farm/dataset.json rename to olmoearth_run_data/satlas/solar_farm/dataset.json diff --git a/esrun_data/satlas/solar_farm/model.yaml b/olmoearth_run_data/satlas/solar_farm/model.yaml similarity index 100% rename from esrun_data/satlas/solar_farm/model.yaml rename to olmoearth_run_data/satlas/solar_farm/model.yaml diff --git a/esrun_data/satlas/solar_farm/esrun.yaml b/olmoearth_run_data/satlas/solar_farm/olmoearth_run.yaml similarity index 59% rename from esrun_data/satlas/solar_farm/esrun.yaml rename to olmoearth_run_data/satlas/solar_farm/olmoearth_run.yaml index c4cdcf1f..c6da0750 100644 --- a/esrun_data/satlas/solar_farm/esrun.yaml +++ b/olmoearth_run_data/satlas/solar_farm/olmoearth_run.yaml @@ -1,6 +1,6 @@ partition_strategies: partition_request_geometry: - class_path: esrun.runner.tools.partitioners.grid_partitioner.GridPartitioner + class_path: olmoearth_run.runner.tools.partitioners.grid_partitioner.GridPartitioner init_args: grid_size: 0.15 output_projection: @@ -12,7 +12,7 @@ partition_strategies: use_utm: true prepare_window_geometries: - class_path: esrun.runner.tools.partitioners.grid_partitioner.GridPartitioner + class_path: olmoearth_run.runner.tools.partitioners.grid_partitioner.GridPartitioner init_args: grid_size: 2048 output_projection: @@ -25,10 +25,10 @@ partition_strategies: postprocessing_strategies: process_dataset: - class_path: esrun.runner.tools.postprocessors.combine_geotiff.CombineGeotiff + class_path: olmoearth_run.runner.tools.postprocessors.combine_geotiff.CombineGeotiff process_partition: - class_path: esrun.runner.tools.postprocessors.combine_geotiff.CombineGeotiff + class_path: olmoearth_run.runner.tools.postprocessors.combine_geotiff.CombineGeotiff process_window: - class_path: esrun.runner.tools.postprocessors.noop_raster.NoopRaster + class_path: olmoearth_run.runner.tools.postprocessors.noop_raster.NoopRaster diff --git a/esrun_data/satlas/solar_farm/prediction_request_geometry.geojson b/olmoearth_run_data/satlas/solar_farm/prediction_request_geometry.geojson similarity index 100% rename from esrun_data/satlas/solar_farm/prediction_request_geometry.geojson rename to olmoearth_run_data/satlas/solar_farm/prediction_request_geometry.geojson diff --git a/esrun_data/satlas/solar_farm_oe/README.md b/olmoearth_run_data/satlas/solar_farm_oe/README.md similarity index 100% rename from esrun_data/satlas/solar_farm_oe/README.md rename to olmoearth_run_data/satlas/solar_farm_oe/README.md diff --git a/esrun_data/satlas/solar_farm_oe/dataset.json b/olmoearth_run_data/satlas/solar_farm_oe/dataset.json similarity index 100% rename from esrun_data/satlas/solar_farm_oe/dataset.json rename to olmoearth_run_data/satlas/solar_farm_oe/dataset.json diff --git a/esrun_data/satlas/solar_farm_oe/model.yaml b/olmoearth_run_data/satlas/solar_farm_oe/model.yaml similarity index 100% rename from esrun_data/satlas/solar_farm_oe/model.yaml rename to olmoearth_run_data/satlas/solar_farm_oe/model.yaml diff --git a/esrun_data/satlas/solar_farm_oe/esrun.yaml b/olmoearth_run_data/satlas/solar_farm_oe/olmoearth_run.yaml similarity index 61% rename from esrun_data/satlas/solar_farm_oe/esrun.yaml rename to olmoearth_run_data/satlas/solar_farm_oe/olmoearth_run.yaml index 30931c42..c79d7639 100644 --- a/esrun_data/satlas/solar_farm_oe/esrun.yaml +++ b/olmoearth_run_data/satlas/solar_farm_oe/olmoearth_run.yaml @@ -1,6 +1,6 @@ partition_strategies: partition_request_geometry: - class_path: esrun.runner.tools.partitioners.grid_partitioner.GridPartitioner + class_path: olmoearth_run.runner.tools.partitioners.grid_partitioner.GridPartitioner init_args: grid_size: 0.15 output_projection: @@ -12,7 +12,7 @@ partition_strategies: use_utm: true prepare_window_geometries: - class_path: esrun.runner.tools.partitioners.grid_partitioner.GridPartitioner + class_path: olmoearth_run.runner.tools.partitioners.grid_partitioner.GridPartitioner init_args: grid_size: 2048 output_projection: @@ -25,13 +25,13 @@ partition_strategies: postprocessing_strategies: process_dataset: - class_path: esrun.runner.tools.postprocessors.combine_geotiff.CombineGeotiff + class_path: olmoearth_run.runner.tools.postprocessors.combine_geotiff.CombineGeotiff process_partition: - class_path: esrun.runner.tools.postprocessors.combine_geotiff.CombineGeotiff + class_path: olmoearth_run.runner.tools.postprocessors.combine_geotiff.CombineGeotiff process_window: - class_path: esrun.runner.tools.postprocessors.noop_raster.NoopRaster + class_path: olmoearth_run.runner.tools.postprocessors.noop_raster.NoopRaster inference_results_config: data_type: RASTER diff --git a/esrun_data/satlas/solar_farm_oe/prediction_request_geometry.geojson b/olmoearth_run_data/satlas/solar_farm_oe/prediction_request_geometry.geojson similarity index 100% rename from esrun_data/satlas/solar_farm_oe/prediction_request_geometry.geojson rename to olmoearth_run_data/satlas/solar_farm_oe/prediction_request_geometry.geojson diff --git a/requirements-helios.txt b/requirements-helios.txt index cbada3c7..275361fa 100644 --- a/requirements-helios.txt +++ b/requirements-helios.txt @@ -1 +1 @@ -helios @ git+https://github.com/allenai/helios.git@main +helios @ git+https://github.com/allenai/helios.git@f0a63f190b0f99d9c503249daf7e3e47bbd4792a diff --git a/requirements-olmoearth_run.txt b/requirements-olmoearth_run.txt index ce6678d3..79b7f386 100644 --- a/requirements-olmoearth_run.txt +++ b/requirements-olmoearth_run.txt @@ -1 +1 @@ -earth-system-run @ git+https://github.com/allenai/olmoearth_run.git@develop +olmoearth_run @ git+https://github.com/allenai/olmoearth_run.git@develop diff --git a/rslp/esrun/README.md b/rslp/esrun/README.md deleted file mode 100644 index 1d23d0cd..00000000 --- a/rslp/esrun/README.md +++ /dev/null @@ -1,35 +0,0 @@ -Here is example: - -``` -python -m rslp.main esrun esrun --config_path esrun_data/satlas/solar_farm/ --scratch_path /tmp/scratch/ -``` - -So in `esrun_data/satlas/solar_farm/` we have: - -- `dataset.json`: the rslearn dataset configuration file. -- `model.yaml`: the rslearn model configuration file. -- `esrun.yaml`: new YAML file containing esrun pre/post processing config. -- `prediction_request_geometry.geojson`: the GeoJSON input to the esrun partition and window generation. - - -In the `esrun_data/sample` directory, we can also run training window preparation, which -depends on: - -- `dataset.json`: the rslearn dataset configuration file. -- `esrun.ymal`: new YAML file containiner the window_prep config -- `annotation_features.geojson`: annotation geojson FeatureCollection exported from Studio -- `annotation_task_features.geojson`: the Studio task geojson Features corresponding to the above - -Run with: - -``` -python -m rslp.main esrun prepare_labeled_windows \ - --project_path esrun_data/sample \ - --scratch_path /tmp/scratch -``` - -to produce a new dataset at: - -``` -/tmp/scratch/dataset -``` diff --git a/rslp/esrun/__init__.py b/rslp/esrun/__init__.py deleted file mode 100644 index 6792e198..00000000 --- a/rslp/esrun/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -"""esrun pipeline.""" - -from .esrun import esrun, one_stage, prepare_labeled_windows - -workflows = { - "esrun": esrun, - "one_stage": one_stage, - "prepare_labeled_windows": prepare_labeled_windows, -} diff --git a/rslp/nandi/README.md b/rslp/nandi/README.md index f3a67089..914ecb10 100644 --- a/rslp/nandi/README.md +++ b/rslp/nandi/README.md @@ -98,4 +98,4 @@ Added AEF embeddings into `20250625` dataset. ### 2025-09-16 -Worked on ES-run inference, see `esrun_data/nandi` for more details. +Worked on olmoearth_run inference, see `olmoearth_run_data/nandi` for more details. diff --git a/rslp/olmoearth_run/README.md b/rslp/olmoearth_run/README.md new file mode 100644 index 00000000..0f1f4bef --- /dev/null +++ b/rslp/olmoearth_run/README.md @@ -0,0 +1,35 @@ +Here is example: + +``` +python -m rslp.main olmoearth_run olmoearth_run --config_path olmoearth_run_data/satlas/solar_farm/ --scratch_path /tmp/scratch/ +``` + +So in `olmoearth_run/satlas/solar_farm/` we have: + +- `dataset.json`: the rslearn dataset configuration file. +- `model.yaml`: the rslearn model configuration file. +- `olmoearth_run.yaml`: new YAML file containing oerun pre/post processing config. +- `prediction_request_geometry.geojson`: the GeoJSON input to the olmoearth_run partition and window generation. + + +In the `olmoearth_run_data/sample` directory, we can also run training window preparation, which +depends on: + +- `dataset.json`: the rslearn dataset configuration file. +- `olmoearth_run.yaml`: new YAML file containiner the window_prep config +- `annotation_features.geojson`: annotation geojson FeatureCollection exported from Studio +- `annotation_task_features.geojson`: the Studio task geojson Features corresponding to the above + +Run with: + +``` +python -m rslp.main olmoearth_run prepare_labeled_windows \ + --project_path olmoearth_run_data/sample \ + --scratch_path /tmp/scratch +``` + +to produce a new dataset at: + +``` +/tmp/scratch/dataset +``` diff --git a/rslp/olmoearth_run/__init__.py b/rslp/olmoearth_run/__init__.py new file mode 100644 index 00000000..9a96a238 --- /dev/null +++ b/rslp/olmoearth_run/__init__.py @@ -0,0 +1,9 @@ +"""olmoearth_run pipeline.""" + +from .olmoearth_run import olmoearth_run, one_stage, prepare_labeled_windows + +workflows = { + "olmoearth_run": olmoearth_run, + "one_stage": one_stage, + "prepare_labeled_windows": prepare_labeled_windows, +} diff --git a/rslp/esrun/esrun.py b/rslp/olmoearth_run/olmoearth_run.py similarity index 73% rename from rslp/esrun/esrun.py rename to rslp/olmoearth_run/olmoearth_run.py index e5b17766..d1529525 100644 --- a/rslp/esrun/esrun.py +++ b/rslp/olmoearth_run/olmoearth_run.py @@ -1,4 +1,4 @@ -"""Run EsPredictRunner inference pipeline.""" +"""Run OlmoEarthRunPredictRunner inference pipeline.""" import hashlib import shutil @@ -7,8 +7,8 @@ from pathlib import Path import fsspec -from esrun.runner.local.fine_tune_runner import EsFineTuneRunner -from esrun.runner.local.predict_runner import EsPredictRunner +from olmoearth_run.runner.local.fine_tune_runner import OlmoEarthRunFineTuneRunner +from olmoearth_run.runner.local.predict_runner import OlmoEarthRunPredictRunner from upath import UPath from rslp.log_utils import get_logger @@ -35,7 +35,7 @@ def get_local_checkpoint(checkpoint_path: UPath) -> Path: local_upath = ( UPath(tempfile.gettempdir()) / "rslearn_cache" - / "esrun_checkpoints" + / "olmoearth_run_checkpoints" / f"{cache_id}.ckpt" ) @@ -50,9 +50,9 @@ def get_local_checkpoint(checkpoint_path: UPath) -> Path: def prepare_labeled_windows(project_path: Path, scratch_path: Path) -> None: - """Run EsFineTuneRunner's prepare_windows pipeline.""" - logger.info("Loading EsFineTuneRunner") - runner = EsFineTuneRunner( + """Run OlmoEarthRunFineTuneRunner's prepare_windows pipeline.""" + logger.info("Loading OlmoEarthRunFineTuneRunner") + runner = OlmoEarthRunFineTuneRunner( project_path=project_path, scratch_path=scratch_path, ) @@ -60,7 +60,7 @@ def prepare_labeled_windows(project_path: Path, scratch_path: Path) -> None: runner.prepare_labeled_windows() -def esrun(config_path: Path, scratch_path: Path, checkpoint_path: str) -> None: +def olmoearth_run(config_path: Path, scratch_path: Path, checkpoint_path: str) -> None: """Run EsPredictRunner inference pipeline. Args: @@ -69,8 +69,8 @@ def esrun(config_path: Path, scratch_path: Path, checkpoint_path: str) -> None: scratch_path: directory to use for scratch space. checkpoint_path: path to the model checkpoint. """ - runner = EsPredictRunner( - # ESRun does not work with relative path, so make sure to convert to absolute here. + runner = OlmoEarthRunPredictRunner( + # OlmoEarth Run does not work with relative path, so make sure to convert to absolute here. project_path=config_path.absolute(), scratch_path=scratch_path, checkpoint_path=get_local_checkpoint(UPath(checkpoint_path)), @@ -91,8 +91,8 @@ def esrun(config_path: Path, scratch_path: Path, checkpoint_path: str) -> None: runner.combine(partitions) -class EsrunStage(StrEnum): - """The stage of esrun pipeline to run. +class OlmoEarthRunStage(StrEnum): + """The stage of olmoearth_run pipeline to run. We always run the partition stage so that is not an option here. """ @@ -107,42 +107,42 @@ def one_stage( config_path: Path, scratch_path: Path, checkpoint_path: str, - stage: EsrunStage, + stage: OlmoEarthRunStage, partition_id: str | None = None, ) -> None: - """Run EsPredictRunner inference pipeline. + """Run OlmoEarthRunPredictRunner inference pipeline. Args: - config_path: see esrun. - scratch_path: see esrun. - checkpoint_path: see esrun. + config_path: see olmoearth_run. + scratch_path: see olmoearth_run. + checkpoint_path: see olmoearth_run. stage: which stage to run. partition_id: the partition to run the stage for. If not set, we run the stage for all partitions, except BUILD_DATASET and COMBINE, which happens across partitions. """ - if stage == EsrunStage.COMBINE and partition_id is not None: + if stage == OlmoEarthRunStage.COMBINE and partition_id is not None: raise ValueError("partition_id cannot be set for COMBINE stage") - runner = EsPredictRunner( - # ESRun does not work with relative path, so make sure to convert to absolute here. - project_path=config_path, + runner = OlmoEarthRunPredictRunner( + # OlmoEarth Run does not work with relative path, so make sure to convert to absolute here. + project_path=config_path.absolute(), scratch_path=scratch_path, checkpoint_path=get_local_checkpoint(UPath(checkpoint_path)), ) partitions = runner.partition() - if stage == EsrunStage.BUILD_DATASET: + if stage == OlmoEarthRunStage.BUILD_DATASET: runner.build_dataset(partitions) if stage in [ - EsrunStage.RUN_INFERENCE, - EsrunStage.POSTPROCESS, + OlmoEarthRunStage.RUN_INFERENCE, + OlmoEarthRunStage.POSTPROCESS, ]: fn = None - if stage == EsrunStage.RUN_INFERENCE: + if stage == OlmoEarthRunStage.RUN_INFERENCE: fn = runner.run_inference - elif stage == EsrunStage.POSTPROCESS: + elif stage == OlmoEarthRunStage.POSTPROCESS: fn = runner.postprocess else: assert False @@ -155,5 +155,5 @@ def one_stage( for partition_id in partitions: fn(partition_id) - elif stage == EsrunStage.COMBINE: + elif stage == OlmoEarthRunStage.COMBINE: runner.combine(partitions) diff --git a/tests/integration/esrun/test_esrun.py b/tests/integration/olmoearth_run/test_olmoearth_run.py similarity index 73% rename from tests/integration/esrun/test_esrun.py rename to tests/integration/olmoearth_run/test_olmoearth_run.py index a57fe19f..d6cfd517 100644 --- a/tests/integration/esrun/test_esrun.py +++ b/tests/integration/olmoearth_run/test_olmoearth_run.py @@ -1,4 +1,4 @@ -"""Test esrun pipeline.""" +"""Test olmoearth_run pipeline.""" import shutil from datetime import UTC, datetime @@ -15,18 +15,20 @@ from rslearn.utils.vector_format import GeojsonVectorFormat from upath import UPath -from rslp.esrun.esrun import esrun +from rslp.olmoearth_run.olmoearth_run import olmoearth_run -def test_esrun_solar_farm(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: - """Test ESRun pipeline by applying solar farm on small request geometry.""" +def test_olmoearth_run_solar_farm( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Test OlmoEarthRun pipeline by applying solar farm on small request geometry.""" # For now this is fixed but we should figure out how to have standardized path for # each application later, similar to RSLP_PREFIX. checkpoint_path = "gs://ai2-rslearn-projects-data/projects/2025_06_06_helios_finetuning/v2_satlas_solar_farm_128_ts_helios_per_mod_patchdisc_contrastive_fix_esrun/checkpoints/epoch=9999-step=99999.ckpt" # Copy the configuration files. We use the tmp_path as the config dir that we will # initialize from since we will customize the request geometry. - src_dir = Path("esrun_data/satlas/solar_farm_oe/") + src_dir = Path("olmoearth_run_data/satlas/solar_farm_oe/") config_dir = tmp_path / "config" config_dir.mkdir(parents=True) @@ -53,22 +55,22 @@ def test_esrun_solar_farm(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> No feat = Feature( request_geometry, { - "es_start_time": datetime(2024, 12, 1, tzinfo=UTC).isoformat(), - "es_end_time": datetime(2025, 7, 1, tzinfo=UTC).isoformat(), + "oe_start_time": datetime(2024, 12, 1, tzinfo=UTC).isoformat(), + "oe_end_time": datetime(2025, 7, 1, tzinfo=UTC).isoformat(), }, ) GeojsonVectorFormat().encode_to_file( UPath(config_dir / "prediction_request_geometry.geojson"), [feat] ) - # We also customize the esrun.yaml since we want to use a single window. - with (src_dir / "esrun.yaml").open() as f: - esrun_config = yaml.safe_load(f) - esrun_config["partition_strategies"]["partition_request_geometry"] = { - "class_path": "esrun.runner.tools.partitioners.noop_partitioner.NoopPartitioner", + # We also customize the olmoearth_run.yaml since we want to use a single window. + with (src_dir / "olmoearth_run.yaml").open() as f: + olmoearth_run_config = yaml.safe_load(f) + olmoearth_run_config["partition_strategies"]["partition_request_geometry"] = { + "class_path": "olmoearth_run.runner.tools.partitioners.noop_partitioner.NoopPartitioner", } - esrun_config["partition_strategies"]["prepare_window_geometries"] = { - "class_path": "esrun.runner.tools.partitioners.reprojection_partitioner.ReprojectionPartitioner", + olmoearth_run_config["partition_strategies"]["prepare_window_geometries"] = { + "class_path": "olmoearth_run.runner.tools.partitioners.reprojection_partitioner.ReprojectionPartitioner", "init_args": { "output_projection": { "class_path": "rslearn.utils.geometry.Projection", @@ -81,8 +83,8 @@ def test_esrun_solar_farm(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> No "use_utm": True, }, } - with (config_dir / "esrun.yaml").open("w") as f: - yaml.safe_dump(esrun_config, f) + with (config_dir / "olmoearth_run.yaml").open("w") as f: + yaml.safe_dump(olmoearth_run_config, f) # We customize the model.yaml to use smaller batch size. # Because in CI we have small system memory. @@ -100,7 +102,7 @@ def test_esrun_solar_farm(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> No ) scratch_dir = tmp_path / "scratch" - esrun( + olmoearth_run( config_path=config_dir, scratch_path=scratch_dir, checkpoint_path=checkpoint_path,