match.lib.plinkscorefiles

This module contains the PlinkScoreFiles class, which represents one or more scoring files ready to be used with plink2 –score

Attributes

logger

Classes

PlinkScoreFiles

Represents a sequence of scoring files written by MatchResults

Module Contents

class match.lib.plinkscorefiles.PlinkScoreFiles(*elements)

Represents a sequence of scoring files written by MatchResults

merge(directory)

Merge scoring files without recomputing matches

Assumes a standard file naming system was used: dataset_chrom_effecttype_n

>>> import tempfile, os, glob
>>> from ._config import Config
>>> from .variantframe import VariantFrame
>>> from .scoringfileframe import ScoringFileFrame, match_variants
>>> from .matchresult import MatchResult, MatchResults
>>> fout = tempfile.NamedTemporaryFile(delete=False)
>>> target_path = Config.ROOT_DIR / "tests" / "data" / "good_match.pvar"
>>> score_path =  Config.ROOT_DIR / "tests" / "data" / "good_match_scorefile.txt"
>>> target = VariantFrame(target_path, dataset="goodmatch")
>>> scorefile = ScoringFileFrame(score_path)
>>> with target as target_df, scorefile as score_df:
...     results = match_variants(score_df=score_df, target_df=target_df, target=target)
...     _ = results.collect(outfile=fout.name)
>>> x = MatchResult.from_ipc(fout.name, dataset="goodmatch")
>>> foutdir = tempfile.mkdtemp()
>>> with scorefile as score_df:
...     _ = MatchResults(x).write_scorefiles(directory=foutdir, split=True, score_df=score_df)
>>> plink_files = (pathlib.Path(foutdir) / x for x in os.listdir(foutdir))
>>> psf = PlinkScoreFiles(*plink_files)
>>> psf
PlinkScoreFiles([PosixPath('.../goodmatch_1_additive_0.scorefile.gz'), ...])
>>> psf.merge(foutdir)
>>> combined_paths = sorted(glob.glob(foutdir + "/*ALL*"),  key=lambda x: pathlib.Path(x).stem)
>>> len(combined_paths)
3
>>> combined_paths
['.../goodmatch_ALL_additive_0.scorefile.gz', '.../goodmatch_ALL_dominant_0.scorefile.gz', '.../goodmatch_ALL_recessive_0.scorefile.gz']
match.lib.plinkscorefiles.logger