Skip to content

Commit d36380d

Browse files
committed
Workaround for Pandas null conversion bug
1 parent 2707c71 commit d36380d

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

common/ontology.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from ontoma.interface import OnToma
66
from pandarallel import pandarallel
7+
from pyspark.sql.functions import col, when
78

89
ONTOMA_MAX_ATTEMPTS = 3
910
pandarallel.initialize()
@@ -58,7 +59,14 @@ def add_efo_mapping(evidence_strings, spark_instance, ontoma_cache_dir=None):
5859
disease_info_to_map = disease_info_to_map.explode('diseaseFromSourceMappedId')
5960

6061
logging.info('Join the resulting information into the evidence strings.')
61-
disease_info_df = spark_instance.createDataFrame(disease_info_to_map.astype(str))
62+
disease_info_df = (
63+
spark_instance
64+
.createDataFrame(disease_info_to_map.astype(str))
65+
.withColumn(
66+
'diseaseFromSourceMappedId',
67+
when(col('diseaseFromSourceMappedId') != 'nan', col('diseaseFromSourceMappedId'))
68+
)
69+
)
6270
return evidence_strings.join(
6371
disease_info_df,
6472
on=['diseaseFromSource', 'diseaseFromSourceId'],

0 commit comments

Comments
 (0)