Skip to content

mecfs_bio.build_system.task.extract_gzip_task

Task to extract a file from a gzip.

Classes:

Functions:

ExtractGzipTextFileTask

Bases: Task

Methods:

Attributes:

deps property

deps: list[Task]

meta property

meta: Meta

create_for_gwas_file classmethod

create_for_gwas_file(
    source_file_task: Task,
    asset_id: str,
    readspec: DataFrameReadSpec | None = None,
)
Source code in mecfs_bio/build_system/task/extract_gzip_task.py
@classmethod
def create_for_gwas_file(
    cls,
    source_file_task: Task,
    asset_id: str,
    readspec: DataFrameReadSpec | None = None,
):
    src_meta = source_file_task.meta
    assert isinstance(src_meta, GWASSummaryDataFileMeta)
    if readspec is None:
        readspec = src_meta.read_spec()
    meta = GWASSummaryDataFileMeta(
        id=AssetId(asset_id),
        trait=src_meta.trait,
        project=src_meta.project,
        sub_dir=src_meta.sub_dir,
        project_path=None,
        read_spec=readspec,
    )

    return cls(
        meta=meta,
        source_file_task=source_file_task,
    )

create_for_reference_file classmethod

create_for_reference_file(
    source_file_task: Task, asset_id: str
)
Source code in mecfs_bio/build_system/task/extract_gzip_task.py
@classmethod
def create_for_reference_file(cls, source_file_task: Task, asset_id: str):
    src_meta = source_file_task.meta
    assert isinstance(src_meta, ReferenceFileMeta)
    meta = ReferenceFileMeta(
        group=src_meta.group,
        sub_group=src_meta.sub_group,
        sub_folder=PurePath("extracted"),
        id=AssetId(asset_id),
        filename=src_meta.filename,
        extension="",
    )
    return cls(
        meta=meta,
        source_file_task=source_file_task,
    )

execute

execute(scratch_dir: Path, fetch: Fetch, wf: WF) -> Asset
Source code in mecfs_bio/build_system/task/extract_gzip_task.py
def execute(self, scratch_dir: Path, fetch: Fetch, wf: WF) -> Asset:
    out_path = scratch_dir / "extracted_file"
    src_asset = fetch(self._source_file_id)
    src_path = read_file_asset_path(src_asset)
    apply_gzip(src_path, out_path)
    return FileAsset(out_path)

apply_gzip

apply_gzip(src: Path, dst: Path)
Source code in mecfs_bio/build_system/task/extract_gzip_task.py
def apply_gzip(src: Path, dst: Path):
    with gzip.open(src, "rb") as f_in:
        with open(dst, "wb") as f_out:
            shutil.copyfileobj(f_in, f_out)