Skip to content

Commit 5c45ed3

Browse files
gegnewzbjornson
authored andcommitted
bug(comp): several bug fixes...
* Channels could be out of order after compensating * Allow comp matrix to have a subset of the file's columns in Compensation.apply * Verify that a file's stored events were retrieved with the same kwargs provided to Compensation.apply Additionally cleans up use of FCS files in tests.
1 parent b07acb0 commit 5c45ed3

File tree

9 files changed

+391
-34
lines changed

9 files changed

+391
-34
lines changed

cellengine/resources/compensation.py

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -100,36 +100,46 @@ def dataframe_as_html(self):
100100
"""Return the compensation matrix dataframe as HTML."""
101101
return self.dataframe._repr_html_()
102102

103-
def apply(self, file: "FcsFile", inplace: bool = False, **kwargs):
104-
"""
105-
Compensate an FcsFile's data.
103+
def apply(self, file: FcsFile, inplace: bool = True, **kwargs):
104+
"""Compensate an FcsFile's data.
106105
107106
Args:
108-
file (FcsFile): The FCS file to compensate.
109-
inplace (bool): Compensate the file's data in-place.
107+
file (FcsFile): The FcsFile to compensate.
108+
inplace (bool): If True, modify the `FcsFile.events` with the result.
109+
If False, return the compensated events.
110110
kwargs (Dict):
111111
All arguments accepted by `FcsFile.get_events` are accepted here.
112+
If the file's events have already been retrieved with the same
113+
kwargs provided here, those stored events will be used.
114+
Otherwise, the file's events will be retrieved from CellEngine.
112115
Returns:
113116
DataFrame: if ``inplace=True``, updates `FcsFile.events` for
114117
the target FcsFile
115118
"""
116-
data = file.get_events(**kwargs, inplace=True, destination=None)
117-
118-
# spill -> comp by inverting
119-
inverted = numpy.linalg.inv(self.dataframe)
119+
if kwargs.items() == file._events_kwargs.items():
120+
data = file.events
121+
else:
122+
data = file.get_events(inplace=inplace, destination=None, **kwargs)
120123

121124
# Calculate matrix product for channels matching between file and comp
122-
if data and data[self.channels]:
123-
comped = data[self.channels]
124-
comped = comped.dot(inverted) # type: ignore
125-
comped.columns = self.channels
126-
data.update(comped)
125+
cols = data.columns
126+
ix = list(
127+
filter(
128+
None,
129+
[channel if channel in cols else None for channel in self.channels],
130+
)
131+
)
132+
if any(ix):
133+
copy = data.copy()
134+
comped = copy[ix]
135+
comped = comped.dot(numpy.linalg.inv(self.dataframe)) # type: ignore
136+
comped.columns = ix
137+
copy.update(comped.astype(comped.dtypes[0]))
127138
else:
128139
raise IndexError(
129140
"No channels from this file match those in the compensation."
130141
)
131142

132143
if inplace:
133-
file._events = data
134-
else:
135-
return data
144+
file._events = copy
145+
return copy

cellengine/resources/fcs_file.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
from __future__ import annotations
2+
from cellengine.utils.parse_fcs_file import parse_fcs_file
23
from cellengine.utils.dataclass_mixin import DataClassMixin, ReadOnly
34
from dataclasses import dataclass, field
45
from dataclasses_json import config
56
from typing import Any, Dict, List, Optional, Union
6-
from fcsparser.api import FCSParser
7-
import pandas
87
from pandas.core.frame import DataFrame
98

109
import cellengine as ce
@@ -14,16 +13,18 @@
1413

1514
@dataclass
1615
class FcsFile(DataClassMixin):
17-
_annotations: str = field(metadata=config(field_name="annotations"))
1816
filename: str
19-
is_control: str
2017
panel_name: str
2118
deleted: Optional[bool]
2219
panel: List[Dict[str, Any]]
2320
_id: str = field(
2421
metadata=config(field_name="_id"), default=ReadOnly()
2522
) # type: ignore
2623
compensation: Optional[int] = None
24+
is_control: Optional[bool] = None
25+
_annotations: Optional[List[str]] = field(
26+
metadata=config(field_name="annotations"), default=None
27+
)
2728
crc32c: str = field(default=ReadOnly()) # type: ignore
2829
event_count: int = field(default=ReadOnly()) # type: ignore
2930
experiment_id: str = field(default=ReadOnly()) # type: ignore
@@ -39,6 +40,9 @@ class FcsFile(DataClassMixin):
3940
def __repr__(self):
4041
return f"FcsFile(_id='{self._id}', name='{self.name}')"
4142

43+
def __post_init__(self):
44+
self._events_kwargs = {}
45+
4246
@property
4347
def name(self):
4448
"""Alias for `filename`."""
@@ -276,13 +280,13 @@ def get_events(
276280
If destination is a string, saves file to the destination and returns None.
277281
"""
278282

279-
fresp = ce.APIClient().download_fcs_file(self.experiment_id, self._id, **kwargs)
280-
if destination:
281-
with open(destination, "wb") as file:
282-
file.write(fresp)
283-
return
284-
parser = FCSParser.from_data(fresp)
285-
events = pandas.DataFrame(parser.data, columns=parser.channel_names_n)
283+
if inplace is True:
284+
self._events_kwargs = kwargs
285+
286+
fresp = parse_fcs_file(
287+
ce.APIClient().download_fcs_file(self.experiment_id, self._id, **kwargs),
288+
destination=destination,
289+
)
286290
if inplace:
287-
self._events = events
288-
return events
291+
self.events = fresp
292+
return fresp

cellengine/utils/parse_fcs_file.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from fcsparser.api import FCSParser
2+
from pandas.core.frame import DataFrame
3+
4+
5+
def parse_fcs_file(file: bytes, destination: str = None) -> DataFrame:
6+
"""Parse an FCS file to a Dataframe"""
7+
if destination:
8+
with open(destination, "wb") as loc:
9+
loc.write(file)
10+
else:
11+
parser = FCSParser.from_data(file)
12+
return DataFrame(parser.data, columns=parser.channel_names_n)

tests/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
"fixtures.api-statistics",
3939
"fixtures.api-events",
4040
"fixtures.spillstring",
41+
"fixtures.compensated_events",
4142
]
4243

4344

tests/data/acea.json

Lines changed: 153 additions & 0 deletions
Large diffs are not rendered by default.

tests/data/acea_compensated.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"FSC-H":{"0":138757,"1":2265460,"2":1764200,"3":1141736,"4":1971316},"SSC-H":{"0":12964,"1":278241,"2":113642,"3":101310,"4":110507},"BL1-H":{"0":1564.7786865234,"1":-874.6184692383,"2":-713.8847045898,"3":1384.0349121094,"4":-4269.4731445312},"BL2-H":{"0":159.2403869629,"1":40.1197471619,"2":413.5137329102,"3":413.7367248535,"4":-494.6390991211},"BL4-H":{"0":358.1521911621,"1":767.3947143555,"2":94748.734375,"3":1309.9842529297,"4":-367.4680786133},"BL5-H":{"0":507.7327880859,"1":3116.5961914062,"2":22717.23046875,"3":22469.728515625,"4":45046.3359375},"RL1-H":{"0":258.6846923828,"1":630.6443481445,"2":-1535.5026855469,"3":-363.8207092285,"4":353.9656677246},"RL2-H":{"0":-46.4186859131,"1":917.6150512695,"2":71390.8515625,"3":23751.76953125,"4":126923.3671875},"VL1-H":{"0":308.7546081543,"1":21288.693359375,"2":17380.46484375,"3":6990.4873046875,"4":22613.78125},"VL2-H":{"0":4096.4223632812,"1":37982.15625,"2":14175.427734375,"3":11279.310546875,"4":12162.017578125},"VL3-H":{"0":-599.0392456055,"1":-167.088104248,"2":-659.5164794922,"3":-660.2655029297,"4":806.4384155273},"FSC-A":{"0":41612,"1":1125353,"2":690129,"3":457694,"4":761981},"SSC-A":{"0":3886,"1":111465,"2":38985,"3":34435,"4":38346},"BL1-A":{"0":2623.0192871094,"1":-5131.4921875,"2":2718.5319824219,"3":6351.1533203125,"4":-5807.2319335938},"BL2-A":{"0":252.2498016357,"1":126.8422698975,"2":1621.1063232422,"3":1894.4182128906,"4":-490.734161377},"BL4-A":{"0":476.7016296387,"1":687.8566894531,"2":150272.640625,"3":3924.1550292969,"4":-251.9639129639},"BL5-A":{"0":800.4254760742,"1":5294.1884765625,"2":37090.51953125,"3":35231.73828125,"4":70414.75},"RL1-A":{"0":586.5928955078,"1":2220.6091308594,"2":2739.4243164062,"3":-969.1270141602,"4":1738.6413574219},"RL2-A":{"0":-783.5219726562,"1":1646.0065917969,"2":125483.03125,"3":42060.16796875,"4":224123.875},"VL1-A":{"0":634.5665893555,"1":38083.5703125,"2":29632.32421875,"3":12616.3515625,"4":37282.18359375},"VL2-A":{"0":6043.6430664062,"1":60756.7109375,"2":24508.138671875,"3":22079.921875,"4":19058.962890625},"VL3-A":{"0":-1048.1103515625,"1":680.421875,"2":-2519.341796875,"3":-2800.7143554688,"4":1013.7341308594},"Width":{"0":45,"1":82,"2":75,"3":73,"4":75},"TIME":{"0":42574.0,"1":42839.00390625,"2":44266.0,"3":48457.0,"4":50867.0}}

tests/fixtures/compensated_events.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from cellengine.resources.fcs_file import FcsFile
2+
import json
3+
from cellengine.utils.parse_fcs_file import parse_fcs_file
4+
from cellengine.resources.compensation import Compensation
5+
import pytest
6+
from pandas import read_json, DataFrame
7+
8+
9+
@pytest.fixture(scope="function")
10+
def acea_events() -> DataFrame:
11+
"""Real events from 'Acea - Novocyte.fcs'"""
12+
events_body = open("tests/data/Acea - Novocyte.fcs", "rb")
13+
file = parse_fcs_file(events_body.read())
14+
return file.astype(float)
15+
16+
17+
@pytest.fixture(scope="function")
18+
def acea_fcs_file(acea_events):
19+
with open("tests/data/acea.json", "r") as f:
20+
fcs_file = json.load(f)
21+
file = FcsFile.from_dict(fcs_file)
22+
file.events = acea_events
23+
return file
24+
25+
26+
@pytest.fixture(scope="function")
27+
def acea_compensation(acea_fcs_file):
28+
return Compensation.from_spill_string(acea_fcs_file.spill_string)
29+
30+
31+
@pytest.fixture(scope="function")
32+
def acea_events_compensated(acea_events):
33+
"""File-internal compensated events from 'Acea - Novocyte.fcs'"""
34+
file = read_json("tests/data/acea_compensated.json")
35+
return file.astype(float)

tests/unit/resources/test_compensation.py

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,26 @@
1+
from cellengine.utils.parse_fcs_file import parse_fcs_file
2+
from cellengine.resources.fcs_file import FcsFile
13
import json
2-
from numpy import array
34
import pytest
45
import responses
56
from pandas import DataFrame
7+
from pandas.testing import assert_frame_equal
8+
from numpy import identity
69
from cellengine.resources.compensation import Compensation
710

811

912
EXP_ID = "5d38a6f79fae87499999a74b"
1013

1114

12-
@pytest.fixture(scope="module")
13-
def compensation(ENDPOINT_BASE, client, compensations):
15+
@pytest.fixture(scope="function")
16+
def fcs_file(ENDPOINT_BASE, client, fcs_files):
17+
file = fcs_files[0]
18+
file.update({"experimentId": EXP_ID})
19+
return FcsFile.from_dict(file)
20+
21+
22+
@pytest.fixture(scope="function")
23+
def compensation(ENDPOINT_BASE, client, fcs_file, compensations):
1424
comp = compensations[0]
1525
comp.update({"experimentId": EXP_ID})
1626
return Compensation.from_dict(comp)
@@ -98,3 +108,70 @@ def test_create_from_spill_string(spillstring):
98108
"Qdot655-A",
99109
"Qdot705-A",
100110
]
111+
112+
113+
@responses.activate
114+
def test_apply_comp_errors_for_nonmatching_channels(
115+
client, ENDPOINT_BASE, compensation, fcs_file
116+
):
117+
events_body = open("tests/data/Acea - Novocyte.fcs", "rb")
118+
responses.add(
119+
responses.GET,
120+
f"{ENDPOINT_BASE}/experiments/{EXP_ID}/fcsfiles/{fcs_file._id}.fcs",
121+
body=events_body,
122+
)
123+
events = parse_fcs_file(client.download_fcs_file(EXP_ID, fcs_file._id))
124+
fcs_file.events = events
125+
126+
with pytest.raises(IndexError):
127+
compensation.apply(fcs_file)
128+
129+
130+
@responses.activate
131+
def test_apply_compensation_to_fcs_file_with_matching_kwargs(
132+
client, ENDPOINT_BASE, compensation, fcs_file
133+
):
134+
# Given: a Compensation with channels as a subset of the FcsFile events
135+
responses.add(
136+
responses.GET,
137+
f"{ENDPOINT_BASE}/experiments/{EXP_ID}/fcsfiles/{fcs_file._id}.fcs",
138+
body=open("tests/data/Acea - Novocyte.fcs", "rb"),
139+
)
140+
events = fcs_file.get_events(inplace=True, testKwarg="foo")
141+
assert fcs_file._events_kwargs == {"testKwarg": "foo"}
142+
143+
ix = list(events.columns)
144+
compensation.dataframe = DataFrame(identity(24), index=ix, columns=ix)
145+
compensation.channels = ix
146+
147+
# When: a Compensation is applied
148+
results = compensation.apply(fcs_file, testKwarg="foo")
149+
150+
# Then: events should be compensated
151+
assert all(results == events)
152+
assert (
153+
responses.assert_call_count(
154+
f"{ENDPOINT_BASE}/experiments/{EXP_ID}/fcsfiles/{fcs_file._id}.fcs?testKwarg=foo",
155+
1,
156+
)
157+
is True
158+
)
159+
160+
161+
@responses.activate
162+
def test_apply_comp_compensates_values(
163+
acea_events_compensated, acea_fcs_file, acea_compensation
164+
):
165+
"""This test compares results from a file-internal compensation conducted
166+
by the Python toolkit to one conducted by CellEngine. See
167+
tests/fixtures/compensated_events.py for details on the fixtures used
168+
here."""
169+
# Given:
170+
# - a file-internal compensation (see tests/fixtures/compensated_events.py)
171+
# - an FcsFile with uncompensated events
172+
173+
# When: the Compensation is applied to a file
174+
results = acea_compensation.apply(acea_fcs_file, inplace=False)
175+
176+
# Then: events should be compensated correctly
177+
assert_frame_equal(results.head(5), acea_events_compensated.head(5))

tests/unit/resources/test_fcsfile.py

Lines changed: 65 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
22
import json
3+
from fcsparser.api import FCSParser
4+
from pandas.core.frame import DataFrame
35
import responses
46
from io import BufferedReader, BytesIO
57

@@ -77,11 +79,55 @@ def test_gets_file_internal_compensation(ENDPOINT_BASE, client, fcs_files, spill
7779
assert type(comp) == Compensation
7880

7981

82+
def test_parse_fcs_file():
83+
events_body = open("tests/data/Acea - Novocyte.fcs", "rb")
84+
parser = FCSParser.from_data(events_body.read())
85+
86+
events = DataFrame(parser.data, columns=parser.channel_names_n)
87+
assert type(events) is DataFrame
88+
assert tuple(events.columns) == parser.channel_names_n
89+
90+
91+
@responses.activate
92+
def test_parses_fcs_file_events(ENDPOINT_BASE, client, fcs_files):
93+
file_data = fcs_files[0]
94+
file = FcsFile.from_dict(file_data)
95+
events_body = open("tests/data/Acea - Novocyte.fcs", "rb")
96+
responses.add(
97+
responses.GET,
98+
f"{ENDPOINT_BASE}/experiments/{EXP_ID}/fcsfiles/{file._id}.fcs",
99+
body=events_body,
100+
)
101+
102+
# When:
103+
data = file.get_events()
104+
105+
# Then:
106+
type(data) is DataFrame
107+
108+
109+
@responses.activate
110+
def test_parses_fcs_file_events_inplace(ENDPOINT_BASE, client, fcs_files):
111+
file_data = fcs_files[0]
112+
file = FcsFile.from_dict(file_data)
113+
events_body = open("tests/data/Acea - Novocyte.fcs", "rb")
114+
responses.add(
115+
responses.GET,
116+
f"{ENDPOINT_BASE}/experiments/{EXP_ID}/fcsfiles/{file._id}.fcs",
117+
body=events_body,
118+
)
119+
120+
# When:
121+
file.get_events(inplace=True)
122+
123+
# Then:
124+
type(file.events) is DataFrame
125+
126+
80127
@responses.activate
81128
def test_save_events_to_file(ENDPOINT_BASE, client, fcs_files):
82129
file_data = fcs_files[0]
83130
file = FcsFile.from_dict(file_data)
84-
events_body = open("tests/data/Acea - Novocyte.fcs")
85131
responses.add(
86132
responses.GET,
87133
f"{ENDPOINT_BASE}/experiments/{EXP_ID}/fcsfiles/{file._id}.fcs",
@@ -95,3 +141,21 @@ def test_save_events_to_file(ENDPOINT_BASE, client, fcs_files):
95141
with open("test.fcs", "r") as events:
96142
assert events.readline() == "test"
97143
os.remove("test.fcs")
144+
145+
146+
@responses.activate
147+
def test_get_events_save_kwargs(ENDPOINT_BASE, client, fcs_files):
148+
file_data = fcs_files[0]
149+
file = FcsFile.from_dict(file_data)
150+
events_body = open("tests/data/Acea - Novocyte.fcs", "rb")
151+
responses.add(
152+
responses.GET,
153+
f"{ENDPOINT_BASE}/experiments/{EXP_ID}/fcsfiles/{file._id}.fcs",
154+
body=events_body,
155+
)
156+
157+
# When:
158+
file.get_events(inplace=True, compensatedQ=False, seed=10)
159+
160+
# Then:
161+
assert file._events_kwargs == {"compensatedQ": False, "seed": 10}

0 commit comments

Comments
 (0)