Source code for scalr.nn.dataloader.test_simple_dataloader
"""This is a test file for simpledataloader."""
from scalr.nn.dataloader import build_dataloader
from scalr.utils import generate_dummy_anndata
[docs]
def test_metadataloader():
"""This function tests features shape returned by simpledataloader for the below 2 cases.
1. #features are consistent with feature_subsetsize. No padding is required.
2. #features are less than feature_subsetsize. This case needs padding.
"""
# Generating dummy anndata.
n_samples = 30
n_features = 13
adata = generate_dummy_anndata(n_samples=n_samples, n_features=n_features)
# Generating mappings for anndata obs columns.
mappings = {}
for column_name in adata.obs.columns:
mappings[column_name] = {}
id2label = []
id2label += adata.obs[column_name].astype(
'category').cat.categories.tolist()
label2id = {id2label[i]: i for i in range(len(id2label))}
mappings[column_name]['id2label'] = id2label
mappings[column_name]['label2id'] = label2id
# Test case 1
# Expected features shape after dataloading is (batch_size, 13).
# So no padding is required as adata n_features=13. But we can pass
# `padding=feature_subsetsize` in dataloader_config.
## Defining required parameters for simpledataloader.
feature_subsetsize = 13
dataloader_config = {
'name': 'SimpleDataLoader',
'params': {
'batch_size': 10,
'padding': feature_subsetsize,
}
}
dataloader, _ = build_dataloader(dataloader_config=dataloader_config,
adata=adata,
target='celltype',
mappings=mappings)
## Comparing expecting features shape after using metadatloader.
for feature, _ in dataloader:
assert feature.shape[
1] == feature_subsetsize, f"There is some issue in simpledataloader."\
f" Expected #features({n_features}) != Observed #features({feature.shape[1]}). Please check!"
# Breaking, as checking only the first batch is enough.
break
# Test case 2
# Expected features shape after dataloading is (batch_size, 20).
# So padding is required as adata n_features=13. Hence 7 columns should be padded in dataloader.
## Defining required parameters for simpledataloader.
feature_subsetsize = 20
dataloader_config = {
'name': 'SimpleDataLoader',
'params': {
'batch_size': 10,
'padding': feature_subsetsize,
}
}
dataloader, _ = build_dataloader(dataloader_config=dataloader_config,
adata=adata,
target='celltype',
mappings=mappings)
## Comparing expected features shape after using metadatloader.
for feature, _ in dataloader:
assert feature.shape[
1] == feature_subsetsize, f"There is some issue in simpledataloader."\
f" Expected #features({feature_subsetsize}) != Observed #features({feature.shape[1]}). Please check!"
# Breaking, as checking only the first batch is enough.
break