Skip to content

mecfs_bio.build_system.task.upset_plot_task

Create an upset plot to describe the intersection of sets represented as dataframe columns See: https://en.wikipedia.org/wiki/UpSet_plot

Classes:

Functions:

Attributes:

SetSource module-attribute

SetSource = FileSetSource | DirSetSource

logger module-attribute

logger = get_logger()

DirSetSource

Attributes:

col_name instance-attribute

col_name: str

file_in_dir instance-attribute

file_in_dir: PurePath

name instance-attribute

name: str

pipe class-attribute instance-attribute

pipe: DataProcessingPipe = IdentityPipe()

read_spec instance-attribute

read_spec: DataFrameReadSpec

task instance-attribute

task: Task

FileSetSource

Attributes:

col_name instance-attribute

col_name: str

name instance-attribute

name: str

pipe class-attribute instance-attribute

pipe: DataProcessingPipe = IdentityPipe()

task instance-attribute

task: Task

UpSetPlotTask

Bases: Task

Create an upset plot to describe the intersection of sets represented as dataframe columns See: https://en.wikipedia.org/wiki/UpSet_plot

Methods:

Attributes:

deps property

deps: list[Task]

meta property

meta: Meta

set_sources instance-attribute

set_sources: Sequence[SetSource]

create classmethod

create(asset_id: str, set_sources: Sequence[SetSource])
Source code in mecfs_bio/build_system/task/upset_plot_task.py
@classmethod
def create(cls, asset_id: str, set_sources: Sequence[SetSource]):
    assert len(set_sources) >= 1
    source_meta = set_sources[0].task.meta
    if isinstance(source_meta, ResultDirectoryMeta):
        meta = GWASPlotFileMeta(
            trait=source_meta.trait,
            project=source_meta.project,
            extension=".png",
            id=AssetId(asset_id),
        )
        return cls(
            meta=meta,
            set_sources=set_sources,
        )
    raise ValueError(f"Unknown source meta {source_meta}")

execute

execute(scratch_dir: Path, fetch: Fetch, wf: WF) -> Asset
Source code in mecfs_bio/build_system/task/upset_plot_task.py
def execute(self, scratch_dir: Path, fetch: Fetch, wf: WF) -> Asset:
    contents_dict = {
        item.name: load_contents(item, fetch=fetch) for item in self.set_sources
    }
    sets = from_contents(contents_dict)
    if len(sets) == 0:
        write_blank_png(scratch_dir / "sets.png")
        logger.debug(
            "No sets to intersect.  Writing a blank png file as a placeholder."
        )
        return FileAsset(scratch_dir / "sets.png")
    UpSet(
        sets,
        show_counts=True,
    ).plot(
        # fig
    )
    write_plots_to_dir(
        scratch_dir,
        {
            "upset": plt.gcf(),
        },
    )
    return FileAsset(scratch_dir / "upset.png")

load_contents

load_contents(
    set_source: SetSource, fetch: Fetch
) -> list[str]
Source code in mecfs_bio/build_system/task/upset_plot_task.py
def load_contents(set_source: SetSource, fetch: Fetch) -> list[str]:
    if isinstance(set_source, FileSetSource):
        asset = fetch(set_source.task.asset_id)
        df = (
            set_source.pipe.process(
                scan_dataframe_asset(asset, meta=set_source.task.meta)
            )
            .collect()
            .to_pandas()
        )
        return df[set_source.col_name].tolist()
    elif isinstance(set_source, DirSetSource):
        asset = fetch(set_source.task.asset_id)
        assert isinstance(asset, DirectoryAsset)
        df = (
            set_source.pipe.process(
                scan_dataframe(
                    path=asset.path / set_source.file_in_dir, spec=set_source.read_spec
                )
            )
            .collect()
            .to_pandas()
        )
        return df[set_source.col_name].tolist()
    else:
        raise ValueError("Unknown set source")

write_blank_png

write_blank_png(pth: Path)
Source code in mecfs_bio/build_system/task/upset_plot_task.py
def write_blank_png(pth: Path):
    width = 200
    height = 100
    pixel_data = array.array("B", [255] * (width * height))

    with open(pth, "wb") as f:
        w = png.Writer(width, height, greyscale=True, bitdepth=8)
        w.write(f, [pixel_data[i * width : (i + 1) * width] for i in range(height)])