Update the figure manifest to reflect the local figure directory and upload any
new blobs to the GitHub release.
The local figure directory is treated as the source of truth for adds and
updates: every file under it is hashed and recorded in the manifest. Blobs are
content-addressed --- each unique hash is uploaded at most once and never
overwritten, so updates by different collaborators do not clobber each other
on the release. Manifest changes are written to disk for the user to commit.
By default, entries already present in the manifest but absent from the local
figure directory are left in place (additive behaviour). Pass prune=True
to drop those entries from the manifest --- the upstream blobs remain on the
release for any past commit that still references them.
Functions:
-
push_figures
–
Update the manifest from the local figure directory and upload any new
Attributes:
DEFAULT_UPLOAD_WORKERS = 8
push_figures(
tag: str = FIGURE_GITHUB_RELEASE_TAG,
repo_name: str = GH_REPO_NAME,
fig_dir: Path = FIGURE_DIRECTORY,
manifest_path: Path = FIGURE_MANIFEST_PATH,
title: str = FIGURES_ARCHIVE_TITLE,
prune: bool = False,
figure_tasks: Sequence[Task] = ALL_FIGURE_TASKS,
max_workers: int = DEFAULT_UPLOAD_WORKERS,
)
Update the manifest from the local figure directory and upload any new
blobs to the GitHub release.
Source code in mecfs_bio/figures/key_scripts/push_figures.py
| def push_figures(
tag: str = FIGURE_GITHUB_RELEASE_TAG,
repo_name: str = GH_REPO_NAME,
fig_dir: Path = FIGURE_DIRECTORY,
manifest_path: Path = FIGURE_MANIFEST_PATH,
title: str = FIGURES_ARCHIVE_TITLE,
prune: bool = False,
figure_tasks: Sequence[Task] = ALL_FIGURE_TASKS,
max_workers: int = DEFAULT_UPLOAD_WORKERS,
):
"""
Update the manifest from the local figure directory and upload any new
blobs to the GitHub release.
"""
fig_dir.mkdir(parents=True, exist_ok=True)
old_manifest = FigureManifest.load(manifest_path)
local_manifest = scan_figure_dir(fig_dir)
new_manifest = _merge_manifests(old=old_manifest, local=local_manifest, prune=prune)
# Fail fast if the manifest we are about to write references files that
# no task in figure_tasks produces --- those would silently rot.
validate_manifest_subset_of_tasks(
manifest=new_manifest, tasks=figure_tasks, fig_dir=fig_dir
)
remote_assets = list_release_asset_names(release_tag=tag, repo_name=repo_name)
new_hashes = new_manifest.hashes() - remote_assets
# Collect one (rel_path, src) per unique hash that needs uploading. The
# release is content-addressed, so even if two figure paths share the
# same blob it gets uploaded exactly once.
uploads_by_sha: dict[str, tuple[Path, Path]] = {}
for rel_path, sha in local_manifest.figures.items():
if sha in new_hashes and sha not in uploads_by_sha:
uploads_by_sha[sha] = (rel_path, fig_dir / rel_path)
unaccounted = new_hashes - uploads_by_sha.keys()
if unaccounted:
# A hash listed in the new manifest is missing both from the release
# and from the local figure directory --- this can only happen if
# the manifest already referenced a blob the user does not have a
# local copy of and which was never uploaded.
raise RuntimeError(
f"Manifest references hashes that are neither on the release nor "
f"available locally: {sorted(unaccounted)}"
)
if not uploads_by_sha:
logger.debug("No new blobs to upload to release.")
else:
# Create the release once up front so the parallel uploads can all
# use `gh release upload` and skip the per-blob existence check.
ensure_release_exists(release_tag=tag, repo_name=repo_name, title=title)
_upload_blobs_in_parallel(
uploads_by_sha=uploads_by_sha,
tag=tag,
repo_name=repo_name,
max_workers=max_workers,
)
new_manifest.save(manifest_path)
logger.debug(f"Manifest written to {manifest_path}.")
logger.debug("Push complete. Commit the manifest to record the change.")
|