@@ -53,7 +53,7 @@ def count_alleles(g: ArrayLike, _: ArrayLike, out: ArrayLike) -> None:
53
53
54
54
55
55
def count_call_alleles (
56
- ds : Dataset , * , call_genotype : str = " call_genotype" , merge : bool = True
56
+ ds : Dataset , * , call_genotype : str = variables . call_genotype , merge : bool = True
57
57
) -> Dataset :
58
58
"""Compute per sample allele counts from genotype calls.
59
59
@@ -64,7 +64,7 @@ def count_call_alleles(
64
64
:func:`sgkit.create_genotype_call_dataset`.
65
65
call_genotype
66
66
Input variable name holding call_genotype as defined by
67
- :data:`sgkit.variables.call_genotype `
67
+ :data:`sgkit.variables.call_genotype_spec `
68
68
merge
69
69
If True (the default), merge the input dataset and the computed
70
70
output variables into a single dataset, otherwise return only
@@ -104,14 +104,14 @@ def count_call_alleles(
104
104
[[2, 0],
105
105
[2, 0]]], dtype=uint8)
106
106
"""
107
- variables .validate (ds , {call_genotype : variables .call_genotype })
107
+ variables .validate (ds , {call_genotype : variables .call_genotype_spec })
108
108
n_alleles = ds .dims ["alleles" ]
109
109
G = da .asarray (ds [call_genotype ])
110
110
shape = (G .chunks [0 ], G .chunks [1 ], n_alleles )
111
111
N = da .empty (n_alleles , dtype = np .uint8 )
112
112
new_ds = Dataset (
113
113
{
114
- " call_allele_count" : (
114
+ variables . call_allele_count : (
115
115
("variants" , "samples" , "alleles" ),
116
116
da .map_blocks (
117
117
count_alleles , G , N , chunks = shape , drop_axis = 2 , new_axis = 2
@@ -123,7 +123,7 @@ def count_call_alleles(
123
123
124
124
125
125
def count_variant_alleles (
126
- ds : Dataset , * , call_genotype : str = " call_genotype" , merge : bool = True
126
+ ds : Dataset , * , call_genotype : str = variables . call_genotype , merge : bool = True
127
127
) -> Dataset :
128
128
"""Compute allele count from genotype calls.
129
129
@@ -134,7 +134,7 @@ def count_variant_alleles(
134
134
:func:`sgkit.create_genotype_call_dataset`.
135
135
call_genotype
136
136
Input variable name holding call_genotype as defined by
137
- :data:`sgkit.variables.call_genotype `
137
+ :data:`sgkit.variables.call_genotype_spec `
138
138
merge
139
139
If True (the default), merge the input dataset and the computed
140
140
output variables into a single dataset, otherwise return only
@@ -169,10 +169,10 @@ def count_variant_alleles(
169
169
"""
170
170
new_ds = Dataset (
171
171
{
172
- " variant_allele_count" : (
172
+ variables . variant_allele_count : (
173
173
("variants" , "alleles" ),
174
174
count_call_alleles (ds , call_genotype = call_genotype )[
175
- " call_allele_count"
175
+ variables . call_allele_count
176
176
].sum (dim = "samples" ),
177
177
)
178
178
}
@@ -222,28 +222,30 @@ def allele_frequency(
222
222
data_vars : Dict [Hashable , Any ] = {}
223
223
# only compute variant allele count if not already in dataset
224
224
if variant_allele_count is not None :
225
- variables .validate (ds , {variant_allele_count : variables .variant_allele_count })
225
+ variables .validate (
226
+ ds , {variant_allele_count : variables .variant_allele_count_spec }
227
+ )
226
228
AC = ds [variant_allele_count ]
227
229
else :
228
230
AC = count_variant_alleles (ds , merge = False , call_genotype = call_genotype )[
229
- " variant_allele_count"
231
+ variables . variant_allele_count
230
232
]
231
- data_vars [" variant_allele_count" ] = AC
233
+ data_vars [variables . variant_allele_count ] = AC
232
234
233
235
M = ds [call_genotype_mask ].stack (calls = ("samples" , "ploidy" ))
234
236
AN = (~ M ).sum (dim = "calls" ) # type: ignore
235
237
assert AN .shape == (ds .dims ["variants" ],)
236
238
237
- data_vars [" variant_allele_total" ] = AN
238
- data_vars [" variant_allele_frequency" ] = AC / AN
239
+ data_vars [variables . variant_allele_total ] = AN
240
+ data_vars [variables . variant_allele_frequency ] = AC / AN
239
241
return Dataset (data_vars )
240
242
241
243
242
244
def variant_stats (
243
245
ds : Dataset ,
244
246
* ,
245
- call_genotype_mask : str = " call_genotype_mask" ,
246
- call_genotype : str = " call_genotype" ,
247
+ call_genotype_mask : str = variables . call_genotype_mask ,
248
+ call_genotype : str = variables . call_genotype ,
247
249
variant_allele_count : Optional [str ] = None ,
248
250
merge : bool = True ,
249
251
) -> Dataset :
@@ -256,13 +258,13 @@ def variant_stats(
256
258
:func:`sgkit.create_genotype_call_dataset`.
257
259
call_genotype
258
260
Input variable name holding call_genotype.
259
- Defined by :data:`sgkit.variables.call_genotype `.
261
+ Defined by :data:`sgkit.variables.call_genotype_spec `.
260
262
call_genotype_mask
261
263
Input variable name holding call_genotype_mask.
262
- Defined by :data:`sgkit.variables.call_genotype_mask `
264
+ Defined by :data:`sgkit.variables.call_genotype_mask_spec `
263
265
variant_allele_count
264
266
Optional name of the input variable holding variant_allele_count,
265
- as defined by :data:`sgkit.variables.variant_allele_count `.
267
+ as defined by :data:`sgkit.variables.variant_allele_count_spec `.
266
268
merge
267
269
If True (the default), merge the input dataset and the computed
268
270
output variables into a single dataset, otherwise return only
@@ -273,30 +275,30 @@ def variant_stats(
273
275
-------
274
276
A dataset containing the following variables:
275
277
276
- - :data:`sgkit.variables.variant_n_called ` (variants):
278
+ - :data:`sgkit.variables.variant_n_called_spec ` (variants):
277
279
The number of samples with called genotypes.
278
- - :data:`sgkit.variables.variant_call_rate ` (variants):
280
+ - :data:`sgkit.variables.variant_call_rate_spec ` (variants):
279
281
The fraction of samples with called genotypes.
280
- - :data:`sgkit.variables.variant_n_het ` (variants):
282
+ - :data:`sgkit.variables.variant_n_het_spec ` (variants):
281
283
The number of samples with heterozygous calls.
282
- - :data:`sgkit.variables.variant_n_hom_ref ` (variants):
284
+ - :data:`sgkit.variables.variant_n_hom_ref_spec ` (variants):
283
285
The number of samples with homozygous reference calls.
284
- - :data:`sgkit.variables.variant_n_hom_alt ` (variants):
286
+ - :data:`sgkit.variables.variant_n_hom_alt_spec ` (variants):
285
287
The number of samples with homozygous alternate calls.
286
- - :data:`sgkit.variables.variant_n_non_ref ` (variants):
288
+ - :data:`sgkit.variables.variant_n_non_ref_spec ` (variants):
287
289
The number of samples that are not homozygous reference calls.
288
- - :data:`sgkit.variables.variant_allele_count ` (variants, alleles):
290
+ - :data:`sgkit.variables.variant_allele_count_spec ` (variants, alleles):
289
291
The number of occurrences of each allele.
290
- - :data:`sgkit.variables.variant_allele_total ` (variants):
292
+ - :data:`sgkit.variables.variant_allele_total_spec ` (variants):
291
293
The number of occurrences of all alleles.
292
- - :data:`sgkit.variables.variant_allele_frequency ` (variants, alleles):
294
+ - :data:`sgkit.variables.variant_allele_frequency_spec ` (variants, alleles):
293
295
The frequency of occurrence of each allele.
294
296
"""
295
297
variables .validate (
296
298
ds ,
297
299
{
298
- call_genotype : variables .call_genotype ,
299
- call_genotype_mask : variables .call_genotype_mask ,
300
+ call_genotype : variables .call_genotype_spec ,
301
+ call_genotype_mask : variables .call_genotype_mask_spec ,
300
302
},
301
303
)
302
304
new_ds = xr .merge (
0 commit comments