Skip to content

mecfs_bio.build_system.task.magma.magma_gene_analysis_task

Classes:

Functions:

Attributes:

DuplicateMode module-attribute

DuplicateMode = Literal['first', 'last', 'error']

GENE_ANALYSIS_OUTPUT_STEM_NAME module-attribute

GENE_ANALYSIS_OUTPUT_STEM_NAME = 'gene_analysis_output'

SynonymMode module-attribute

SynonymMode = Literal['skip', 'drop', 'drop-dup']

logger module-attribute

logger = get_logger()

MagmaGeneAnalysisTask

Bases: Task

Methods:

Attributes:

deps property

deps: list[Task]

duplicate_mode class-attribute instance-attribute

duplicate_mode: DuplicateMode | None = 'first'

ld_ref_file_stem instance-attribute

ld_ref_file_stem: str

magma_annotation_id property

magma_annotation_id: AssetId

magma_annotation_task instance-attribute

magma_annotation_task: Task

magma_binary_id property

magma_binary_id: AssetId

magma_binary_task instance-attribute

magma_binary_task: Task

magma_ld_ref_id property

magma_ld_ref_id: AssetId

magma_ld_ref_task instance-attribute

magma_ld_ref_task: Task

magma_p_value_task instance-attribute

magma_p_value_task: Task

meta property

meta: Meta

p_value_id property

p_value_id: AssetId

p_value_meta property

p_value_meta: Meta

sample_size instance-attribute

sample_size: int

synonym_mode class-attribute instance-attribute

synonym_mode: SynonymMode = 'drop-dup'

create classmethod

create(
    asset_id: str,
    magma_annotation_task: Task,
    magma_p_value_task: Task,
    magma_binary_task: Task,
    magma_ld_ref_task: Task,
    ld_ref_file_stem: str,
    sample_size: int,
)
Source code in mecfs_bio/build_system/task/magma/magma_gene_analysis_task.py
@classmethod
def create(
    cls,
    asset_id: str,
    magma_annotation_task: Task,
    magma_p_value_task: Task,
    magma_binary_task: Task,
    magma_ld_ref_task: Task,
    ld_ref_file_stem: str,
    sample_size: int,
):
    annotation_meta = magma_annotation_task.meta  # magma_p_value_task.meta
    assert isinstance(annotation_meta, FilteredGWASDataMeta)
    meta = ProcessedGwasDataDirectoryMeta(
        id=AssetId(asset_id),
        trait=annotation_meta.trait,
        project=annotation_meta.project,
        sub_dir=PurePath(annotation_meta.sub_dir),
    )
    return cls(
        magma_annotation_task=magma_annotation_task,
        magma_p_value_task=magma_p_value_task,
        magma_binary_task=magma_binary_task,
        magma_ld_ref_task=magma_ld_ref_task,
        ld_ref_file_stem=ld_ref_file_stem,
        sample_size=sample_size,
        meta=meta,
    )

execute

execute(scratch_dir: Path, fetch: Fetch, wf: WF) -> Asset
Source code in mecfs_bio/build_system/task/magma/magma_gene_analysis_task.py
def execute(self, scratch_dir: Path, fetch: Fetch, wf: WF) -> Asset:
    binary_asset = fetch(self.magma_binary_id)
    annotation_asset = fetch(self.magma_annotation_id)
    p_value_asset = fetch(self.p_value_id)
    ld_ref_asset = fetch(self.magma_ld_ref_id)
    assert isinstance(binary_asset, FileAsset)
    assert isinstance(p_value_asset, FileAsset)
    assert isinstance(annotation_asset, FileAsset)
    assert isinstance(ld_ref_asset, DirectoryAsset)
    binary_path = binary_asset.path
    annotation_path = annotation_asset.path
    p_value_path = p_value_asset.path
    ld_ref_dir_path = ld_ref_asset.path
    out_dir = scratch_dir / "gene_analysis_dir"
    out_dir.mkdir(parents=True, exist_ok=True)
    out_base_path = out_dir / GENE_ANALYSIS_OUTPUT_STEM_NAME
    cmd = [
        str(binary_path),
        "--bfile",
        str(ld_ref_dir_path / self.ld_ref_file_stem),
        f"synonym-dup={self.synonym_mode}",
        "--pval",
        str(p_value_path),
        f"duplicate={self.duplicate_mode}",
        f"N={self.sample_size}",
        "--gene-annot",
        str(annotation_path),
        "--out",
        str(out_base_path),
    ]
    logger.debug(f"Running command: {' '.join(cmd)}")
    execute_command(cmd)

    return DirectoryAsset(out_dir)

read_magma_gene_analysis_result

read_magma_gene_analysis_result(
    result_dir: Path,
) -> pd.DataFrame
Source code in mecfs_bio/build_system/task/magma/magma_gene_analysis_task.py
def read_magma_gene_analysis_result(result_dir: Path) -> pd.DataFrame:
    return (
        scan_dataframe(
            result_dir / str(GENE_ANALYSIS_OUTPUT_STEM_NAME + ".genes.out"),
            spec=DataFrameReadSpec(DataFrameWhiteSpaceSepTextFormat(comment_code="#")),
        )
        .collect()
        .to_pandas()
    )