Source code for scalr.utils.test_file_utils

"""This is a test file for file_utils.py"""

import os
from os import path
import shutil

import numpy as np

from scalr.utils import generate_dummy_anndata
from scalr.utils import read_data
from scalr.utils import write_chunkwise_data
from scalr.utils import write_data


[docs] def test_write_chunkwise_data(): """This function tests `write_chunkwise()`, `write_data()` & `read_data()` functions of file_utils.""" os.makedirs('./tmp', exist_ok=True) # Generating dummy anndata. adata = generate_dummy_anndata(n_samples=25, n_features=5) # Path to write full data. fulldata_path = './tmp/fulldata.h5ad' write_data(adata, fulldata_path) # sample_chunksize to store full data in chunks. sample_chunksize = 5 # Path to store chunked data. dirpath = './tmp/chunked_data/' # Writing fulldata in chunks. full_data = read_data(fulldata_path) write_chunkwise_data(full_data, sample_chunksize=sample_chunksize, dirpath=dirpath) # Iterating over stored chunked data to assert shape. observed_n_chunks = 0 for i in range(len(os.listdir(dirpath))): if os.path.exists(path.join(dirpath, f'{i}.h5ad')): chunked_data = read_data(path.join(dirpath, f'{i}.h5ad'), backed='r') assert chunked_data.shape == ( sample_chunksize, len(adata.var_names) ), f"There is some issue with chunk-{i}. Please check!" observed_n_chunks += 1 else: break # Checking the number of chunks stored. expected_n_chunks = np.ceil(adata.shape[0] / sample_chunksize).astype(int) assert observed_n_chunks == expected_n_chunks, f"There is mismatch of observed_n_chunks - {observed_n_chunks} with expected_n_chunks - {expected_n_chunks}." shutil.rmtree('./tmp', ignore_errors=True)