Skip to content

mecfs_bio.build_system.task.mixer.bivariate_mixer_test_task

Classes:

  • BivariateMixerTestTask

    Task to run bivariate MiXeR test evaluation on a model produced by bivariate MiXeR fit

BivariateMixerTestTask

Bases: Task

Task to run bivariate MiXeR test evaluation on a model produced by bivariate MiXeR fit

Methods:

Attributes:

bim_file_pattern class-attribute instance-attribute

bim_file_pattern: str = (
    "1000G_EUR_Phase3_plink/1000G.EUR.QC.@.bim"
)

chr_to_use_arg class-attribute instance-attribute

chr_to_use_arg: str | None = None

deps property

deps: list[Task]

extra_args class-attribute instance-attribute

extra_args: Sequence[str] = tuple()

extract_file_pattern_gen class-attribute instance-attribute

extract_file_pattern_gen: Callable[[int], str] | None = None

fit_task instance-attribute

fit_task: BivariateMixerTask

ld_file_pattern class-attribute instance-attribute

ld_file_pattern: str = (
    "1000G_EUR_Phase3_plink/1000G.EUR.QC.@.run4.ld"
)

meta property

meta: Meta

reference_data_directory_task instance-attribute

reference_data_directory_task: Task

rep property

rep: int

threads class-attribute instance-attribute

threads: int = 4

trait_1_source instance-attribute

trait_1_source: (
    MixerDataSource | PreformattedMixerDataSource
)

trait_2_source instance-attribute

trait_2_source: (
    MixerDataSource | PreformattedMixerDataSource
)

create classmethod

create(
    asset_id: str,
    trait_1_source: MixerDataSource
    | PreformattedMixerDataSource,
    trait_2_source: MixerDataSource
    | PreformattedMixerDataSource,
    ref_data_directory_task: Task,
    fit_task: BivariateMixerTask,
    extra_args: Sequence[str] = tuple(),
    chr_to_use_arg: str | None = None,
    ld_file_pattern: str = "1000G_EUR_Phase3_plink/1000G.EUR.QC.@.run4.ld",
    bim_file_pattern: str = "1000G_EUR_Phase3_plink/1000G.EUR.QC.@.bim",
    threads: int = 4,
    extract_file_pattern_gen: Callable[[int], str]
    | None = None,
)
Source code in mecfs_bio/build_system/task/mixer/bivariate_mixer_test_task.py
@classmethod
def create(
    cls,
    asset_id: str,
    trait_1_source: MixerDataSource | PreformattedMixerDataSource,
    trait_2_source: MixerDataSource | PreformattedMixerDataSource,
    ref_data_directory_task: Task,
    fit_task: BivariateMixerTask,
    extra_args: Sequence[str] = tuple(),
    chr_to_use_arg: str | None = None,
    ld_file_pattern: str = "1000G_EUR_Phase3_plink/1000G.EUR.QC.@.run4.ld",
    bim_file_pattern: str = "1000G_EUR_Phase3_plink/1000G.EUR.QC.@.bim",
    threads: int = 4,
    extract_file_pattern_gen: Callable[[int], str] | None = None,
):
    meta = ResultDirectoryMeta(
        id=asset_id,
        trait="multi_trait",
        project="polygenic_overlap",
        sub_dir=PurePath("analysis") / "bivariate_mixer",
    )
    return cls(
        meta=meta,
        fit_task=fit_task,
        trait_1_source=trait_1_source,
        trait_2_source=trait_2_source,
        reference_data_directory_task=ref_data_directory_task,
        extract_file_pattern_gen=extract_file_pattern_gen,
        extra_args=extra_args,
        chr_to_use_arg=chr_to_use_arg,
        ld_file_pattern=ld_file_pattern,
        bim_file_pattern=bim_file_pattern,
        threads=threads,
    )

execute

execute(scratch_dir: Path, fetch: Fetch, wf: WF) -> Asset
Source code in mecfs_bio/build_system/task/mixer/bivariate_mixer_test_task.py
def execute(self, scratch_dir: Path, fetch: Fetch, wf: WF) -> Asset:
    chr_args = (
        ["--chr2use", self.chr_to_use_arg]
        if self.chr_to_use_arg is not None
        else []
    )
    reference_dir_asset = fetch(self.reference_data_directory_task.asset_id)
    assert isinstance(reference_dir_asset, DirectoryAsset)
    ref_mounts = {reference_dir_asset.path.resolve(): CONTAINER_REF_DIR}

    rep = self.rep
    fit_json = MIXER_FIT_JSON_PATTERN.replace("@", str(rep))
    bivariate_fit_prefix = MIXER_BIVARIATE_FIT_PREFIX_PATTERN.replace("@", str(rep))
    bivariate_fit_json = bivariate_fit_prefix + ".json"
    bivariate_test_prefix = MIXER_BIVARIATE_TEST_PREFIX_PATTERN.replace(
        "@", str(rep)
    )

    with tempfile.TemporaryDirectory(dir=os.getcwd()) as tempdir:
        tmp_path = Path(tempdir).relative_to(os.getcwd())

        fit_asset = fetch(self.fit_task.asset_id)
        assert isinstance(fit_asset, DirectoryAsset)
        bivariate_fit_json_path = tmp_path / bivariate_fit_json
        shutil.copy(fit_asset.path / (bivariate_fit_json), bivariate_fit_json_path)

        trait_1_stats_path = prepare_mixer_trait_input_file(
            source=self.trait_1_source,
            fetch=fetch,
            temp_dir=tmp_path,
        )
        trait_2_stats_path = prepare_mixer_trait_input_file(
            source=self.trait_2_source,
            fetch=fetch,
            temp_dir=tmp_path,
        )
        common_args = [
            "--ld-file",
            str(CONTAINER_REF_DIR / self.ld_file_pattern),
            "--bim-file",
            str(CONTAINER_REF_DIR / self.bim_file_pattern),
            "--threads",
            str(self.threads),
        ]

        bivar_test_out_str = str(tmp_path / bivariate_test_prefix)

        extra_test_args = []
        if self.extract_file_pattern_gen is not None:
            extract_args = get_mixer_extract_args(
                extract_file_pattern_gen=self.extract_file_pattern_gen,
                rep=rep,
                reference_dir_path=reference_dir_asset.path,
            )
            extra_test_args.extend(extract_args)
        invoke_mixer(
            ["test2"]
            + common_args
            + chr_args
            + extra_test_args
            + [
                "--trait1-file",
                str(trait_1_stats_path),
                "--trait2-file",
                str(trait_2_stats_path),
                "--load-params",
                str(bivariate_fit_json_path),
                "--out",
                bivar_test_out_str,
            ],
            extra_mounts=ref_mounts,
        )
        test_out_json_path = Path(bivar_test_out_str + ".json")
        test_out_log_path = Path(bivar_test_out_str + ".log")
        assert test_out_json_path.exists()
        assert test_out_log_path.exists()
        test_out_json_path.rename(tmp_path / test_out_json_path.name)
        test_out_log_path.rename(tmp_path / test_out_log_path.name)
        return DirectoryAsset(scratch_dir)