diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index 6b84ddaf2a..292f203fd5 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.8 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/transform/Main.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/transform/Main.java deleted file mode 100644 index d0027b15f9..0000000000 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/transform/Main.java +++ /dev/null @@ -1,26 +0,0 @@ -package org.opencb.cellbase.app.transform; - -import org.apache.log4j.ConsoleAppender; -import org.apache.log4j.Level; -import org.apache.log4j.LogManager; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Created by fjlopez on 10/01/17. - */ -public class Main { - public static void main(String[] args) { - org.apache.log4j.Logger rootLogger = LogManager.getRootLogger(); - ConsoleAppender stderr = (ConsoleAppender) rootLogger.getAppender("stderr"); - stderr.setThreshold(Level.toLevel("warn")); - - Logger logger = LoggerFactory.getLogger(Main.class); - - logger.debug("Hello world debug"); - logger.info("Hello world info"); - logger.warn("Hello world warn"); - logger.error("Hello world error"); - - } -} diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/transform/clinical/variant/ClinVarIndexer.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/transform/clinical/variant/ClinVarIndexer.java index ff618ddc69..739ede4b20 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/transform/clinical/variant/ClinVarIndexer.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/transform/clinical/variant/ClinVarIndexer.java @@ -66,6 +66,8 @@ public class ClinVarIndexer extends ClinicalIndexer { private int numberGermlineRecords = 0; private int numberNoDiseaseTrait = 0; private int numberMultipleInheritanceModels = 0; + private static final String RCVIDS = "rcvIds"; + private static final String SCVIDS = "scvIds"; private static final Set DOMINANT_TERM_SET = new HashSet<>(Arrays.asList(ModeOfInheritance.monoallelic, ModeOfInheritance.monoallelic_maternally_imprinted, @@ -213,7 +215,7 @@ private boolean updateRocksDB(AlleleLocationData alleleLocationData, PublicSetTy String mateVariantString, Map traitsToEfoTermsMap) throws RocksDBException, IOException { - // More than one variant being returned from the normalisatio process would mean it's and MNV which has been + // More than one variant being returned from the normalisation process would mean it's and MNV which has been // decomposed List normalisedVariantStringList = getNormalisedVariantString( alleleLocationData.getSequenceLocation().getChromosome(), @@ -232,8 +234,11 @@ private boolean updateRocksDB(AlleleLocationData alleleLocationData, PublicSetTy clinicalHaplotypeString = StringUtils.join(normalisedVariantStringList, HAPLOTYPE_STRING_SEPARATOR); } + // get VCV ID + String vcvId = getVcvId(publicSet); + // parse RCVs - String accession = publicSet.getReferenceClinVarAssertion().getClinVarAccession().getAcc(); + String rcvAccession = publicSet.getReferenceClinVarAssertion().getClinVarAccession().getAcc(); String clinicalSignficanceDescription = publicSet.getReferenceClinVarAssertion() .getClinicalSignificance() .getDescription(); @@ -241,12 +246,14 @@ private boolean updateRocksDB(AlleleLocationData alleleLocationData, PublicSetTy .getReviewStatus().name(); List getObservedIn = publicSet.getReferenceClinVarAssertion().getObservedIn(); addNewEntries(variantAnnotation, publicSet, alleleLocationData.getAlleleId(), mateVariantString, - clinicalHaplotypeString, traitsToEfoTermsMap, accession, clinicalSignficanceDescription, - reviewStatusName, getObservedIn); + clinicalHaplotypeString, traitsToEfoTermsMap, rcvAccession, clinicalSignficanceDescription, + reviewStatusName, getObservedIn, vcvId); + + List scvAccessions = new ArrayList<>(); // parse SCVs for (MeasureTraitType measureTraitType : publicSet.getClinVarAssertion()) { - accession = measureTraitType.getClinVarAccession().getAcc(); + String scvAccession = measureTraitType.getClinVarAccession().getAcc(); clinicalSignficanceDescription = StringUtils.join(measureTraitType.getClinicalSignificance().getDescription(), CLINICAL_SIGNIFICANCE_SEPARATOR); @@ -254,10 +261,15 @@ private boolean updateRocksDB(AlleleLocationData alleleLocationData, PublicSetTy reviewStatusName = getReviewStatusIfPresent(measureTraitType); getObservedIn = measureTraitType.getObservedIn(); addNewEntries(variantAnnotation, publicSet, alleleLocationData.getAlleleId(), mateVariantString, - clinicalHaplotypeString, traitsToEfoTermsMap, accession, clinicalSignficanceDescription, - reviewStatusName, getObservedIn); + clinicalHaplotypeString, traitsToEfoTermsMap, scvAccession, clinicalSignficanceDescription, + reviewStatusName, getObservedIn, vcvId); + scvAccessions.add(scvAccession); } + if (StringUtils.isNotEmpty(vcvId)) { + // add SCVs and RCVs to VCV entry + addAdditionalProperties(variantAnnotation, vcvId, rcvAccession, scvAccessions); + } rdb.put(normalisedVariantString.getBytes(), jsonObjectWriter.writeValueAsBytes(variantAnnotation)); } return true; @@ -266,6 +278,46 @@ private boolean updateRocksDB(AlleleLocationData alleleLocationData, PublicSetTy return false; } + private String getVcvId(PublicSetType publicSet) { + if (publicSet.getReferenceClinVarAssertion() == null || publicSet.getReferenceClinVarAssertion().getMeasureSet() == null + || publicSet.getReferenceClinVarAssertion().getMeasureSet().getID() == null) { + return null; + } + return publicSet.getReferenceClinVarAssertion().getMeasureSet().getID().toString(); + } + + private void addAdditionalProperties(VariantAnnotation variantAnnotation, String vcvId, String rcvAccession, + List scvAccessions) { + List properties = getTraitAssociation(variantAnnotation, vcvId).getAdditionalProperties(); + boolean hasRCVIds = false; + boolean hasSCVIds = false; + for (Property property : properties) { + if (RCVIDS.equals(property.getName())) { + hasRCVIds = true; + property.setValue(property.getValue() + "," + rcvAccession); + } + if (SCVIDS.equals(property.getName())) { + hasSCVIds = true; + property.setValue(property.getValue() + "," + String.join(",", scvAccessions)); + } + } + if (!hasRCVIds) { + properties.add(new Property(null, RCVIDS, rcvAccession)); + } + if (!hasSCVIds) { + properties.add(new Property(null, SCVIDS, String.join(",", scvAccessions))); + } + } + + private EvidenceEntry getTraitAssociation(VariantAnnotation variantAnnotation, String vcvId) { + for (EvidenceEntry evidenceEntry: variantAnnotation.getTraitAssociation()) { + if (vcvId.equals(evidenceEntry.getId())) { + return evidenceEntry; + } + } + return null; + } + private String getReviewStatusIfPresent(MeasureTraitType measureTraitType) { if (measureTraitType.getClinicalSignificance().getReviewStatus() != null) { return measureTraitType.getClinicalSignificance().getReviewStatus().name(); @@ -355,10 +407,13 @@ private void addNewEntries(VariantAnnotation variantAnnotation, PublicSetType pu String mateVariantString, String clinicalHaplotypeString, Map traitsToEfoTermsMap, String accession, String clinicalSignficanceDescription, String reviewStatusName, - List getObservedIn) + List getObservedIn, String vcvId) throws JsonProcessingException { - List additionalProperties = new ArrayList<>(3); + List additionalProperties = new ArrayList<>(); + if (StringUtils.isNotEmpty(vcvId)) { + additionalProperties.add(new Property(null, "vcvIds", vcvId)); + } EvidenceSource evidenceSource = new EvidenceSource(EtlCommons.CLINVAR_DATA, null, null); // String accession = publicSet.getReferenceClinVarAssertion().getClinVarAccession().getAcc(); diff --git a/cellbase-app/src/test/java/org/opencb/cellbase/app/transform/clinical/variant/ClinicalVariantParserTest.java b/cellbase-app/src/test/java/org/opencb/cellbase/app/transform/clinical/variant/ClinicalVariantParserTest.java index bf40f142b8..48c51fc924 100644 --- a/cellbase-app/src/test/java/org/opencb/cellbase/app/transform/clinical/variant/ClinicalVariantParserTest.java +++ b/cellbase-app/src/test/java/org/opencb/cellbase/app/transform/clinical/variant/ClinicalVariantParserTest.java @@ -63,6 +63,8 @@ private void initGrch38() throws Exception { (new ClinicalVariantParser(clinicalVariantFolder, true, genomeSequenceFilePath, "GRCh38", serializer)).parse(); } + + @Test public void testUnexpectedAccession() throws Exception { cleanUp(); @@ -80,8 +82,31 @@ public void testUnexpectedAccession() throws Exception { assertEquals("G", variant.getReference()); assertEquals("A", variant.getAlternate()); + // variant should have list of SCVs and RCVs and VCVs + EvidenceEntry evidenceEntry = getEvidenceEntryByAccession(variant, "RCV000007529"); + assertEquals(4, evidenceEntry.getAdditionalProperties().size()); + assertEquals("7109", getValueByName(evidenceEntry, "vcvIds")); + + evidenceEntry = getEvidenceEntryByAccession(variant, "SCV000053488"); + assertEquals(4, evidenceEntry.getAdditionalProperties().size()); + assertEquals("7109", getValueByName(evidenceEntry, "vcvIds")); + + evidenceEntry = getEvidenceEntryByAccession(variant, "7109"); + assertEquals(4, evidenceEntry.getAdditionalProperties().size()); + assertEquals("RCV000007529", getValueByName(evidenceEntry, "rcvIds")); + assertEquals("SCV000053488", getValueByName(evidenceEntry, "scvIds")); + } + + private String getValueByName(EvidenceEntry evidenceEntry, String name) { + for (Property property : evidenceEntry.getAdditionalProperties()) { + if (property.getName().equals(name)) { + return property.getValue(); + } + } + return null; } + @Test public void testReallyLongVariant() throws Exception { cleanUp(); diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index 515c995a44..a88b8d64cd 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.8 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index bb01260ae0..61ab0fec36 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.8 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/annotation/VariantAnnotationCalculator.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/annotation/VariantAnnotationCalculator.java index 47e4646177..d4930000ec 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/annotation/VariantAnnotationCalculator.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/annotation/VariantAnnotationCalculator.java @@ -697,7 +697,8 @@ private void adjustPhasedConsequenceTypes(Object[] variantArray) { consequenceType3.setSequenceOntologyTerms(soTerms); // Flag these transcripts as already updated for this variant - flagTranscriptAnnotationUpdated(variant2, consequenceType1.getEnsemblTranscriptId()); + flagTranscriptAnnotationUpdated(variant2, consequenceType1.getEnsemblTranscriptId(), + Arrays.asList(variant0, variant1)); variant2DisplayCTNeedsUpdate = true; @@ -745,8 +746,8 @@ private void adjustPhasedConsequenceTypes(Object[] variantArray) { consequenceType2.setSequenceOntologyTerms(soTerms); // Flag these transcripts as already updated for this variant - flagTranscriptAnnotationUpdated(variant0, consequenceType1.getEnsemblTranscriptId()); - flagTranscriptAnnotationUpdated(variant1, consequenceType1.getEnsemblTranscriptId()); + flagTranscriptAnnotationUpdated(variant0, consequenceType1.getEnsemblTranscriptId(), Arrays.asList((variant1))); + flagTranscriptAnnotationUpdated(variant1, consequenceType1.getEnsemblTranscriptId(), Arrays.asList((variant0))); variant0DisplayCTNeedsUpdate = true; variant1DisplayCTNeedsUpdate = true; @@ -771,24 +772,25 @@ private void adjustPhasedConsequenceTypes(Object[] variantArray) { } } - private void flagTranscriptAnnotationUpdated(Variant variant, String ensemblTranscriptId) { + private void flagTranscriptAnnotationUpdated(Variant variant, String ensemblTranscriptId, List phasedVariants) { Map additionalAttributesMap = variant.getAnnotation().getAdditionalAttributes(); if (additionalAttributesMap == null) { additionalAttributesMap = new HashMap<>(); AdditionalAttribute additionalAttribute = new AdditionalAttribute(); Map transcriptsSet = new HashMap<>(); - transcriptsSet.put(ensemblTranscriptId, null); + transcriptsSet.put(ensemblTranscriptId, VariantAnnotationUtils.buildVariantIds(phasedVariants)); additionalAttribute.setAttribute(transcriptsSet); additionalAttributesMap.put("phasedTranscripts", additionalAttribute); variant.getAnnotation().setAdditionalAttributes(additionalAttributesMap); } else if (additionalAttributesMap.get("phasedTranscripts") == null) { AdditionalAttribute additionalAttribute = new AdditionalAttribute(); Map transcriptsSet = new HashMap<>(); - transcriptsSet.put(ensemblTranscriptId, null); + transcriptsSet.put(ensemblTranscriptId, VariantAnnotationUtils.buildVariantIds(phasedVariants)); additionalAttribute.setAttribute(transcriptsSet); additionalAttributesMap.put("phasedTranscripts", additionalAttribute); } else { - additionalAttributesMap.get("phasedTranscripts").getAttribute().put(ensemblTranscriptId, null); + additionalAttributesMap.get("phasedTranscripts").getAttribute().put(ensemblTranscriptId, + VariantAnnotationUtils.buildVariantIds(phasedVariants)); } } diff --git a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/annotation/VariantAnnotationUtils.java b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/annotation/VariantAnnotationUtils.java index 8a8555b0a0..c462d87ada 100644 --- a/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/annotation/VariantAnnotationUtils.java +++ b/cellbase-core/src/main/java/org/opencb/cellbase/core/variant/annotation/VariantAnnotationUtils.java @@ -601,12 +601,33 @@ public static SequenceOntologyTerm newSequenceOntologyTerm(String name) throws S return new SequenceOntologyTerm(ConsequenceTypeMappings.getSoAccessionString(name), name); } + public static String buildVariantId(Variant variant) { + if (variant == null) { + return null; + } + return buildVariantId(variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate()); + } + + public static String buildVariantIds(List variants) { + StringBuilder variantIds = new StringBuilder(); + for (Variant variant : variants) { + if (variant == null) { + continue; + } + variantIds.append(buildVariantId(variant.getChromosome(), variant.getStart(), variant.getReference(), variant.getAlternate())); + } + if (variantIds == null) { + return null; + } + return variantIds.toString(); + } + public static String buildVariantId(String chromosome, int start, String reference, String alternate) { StringBuilder stringBuilder = new StringBuilder(); appendChromosome(chromosome, stringBuilder) .append(SEPARATOR_CHAR) - .append(StringUtils.leftPad(Integer.toString(start), 10, " ")) + .append(start) .append(SEPARATOR_CHAR); // if (reference.length() > Variant.SV_THRESHOLD) { diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 74834b7b0e..41f213f754 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.8 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/VariantAnnotationCalculatorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/VariantAnnotationCalculatorTest.java index b8c32f2562..bf23164443 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/VariantAnnotationCalculatorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/VariantAnnotationCalculatorTest.java @@ -24,7 +24,9 @@ import org.junit.Before; import org.junit.Test; import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.VariantBuilder; import org.opencb.biodata.models.variant.avro.*; +import org.opencb.cellbase.core.variant.AnnotationBasedPhasedQueryManager; import org.opencb.cellbase.core.variant.annotation.VariantAnnotationCalculator; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; import org.opencb.commons.datastore.core.QueryOptions; @@ -794,6 +796,8 @@ public void testQueryResultGroupingDecomposedMNVs() throws Exception { + + } @Test @@ -869,14 +873,14 @@ public void testPopulationFrequencies() throws Exception { } - @Test - public void testAnnotationGrch37() throws Exception { - // threw a NULL pointer exception - QueryOptions queryOptions = new QueryOptions("useCache", false); - QueryResult queryResult = variantAnnotationCalculator - .getAnnotationByVariant(new Variant("9:34648830:A:N"), queryOptions); - assertEquals(1, queryResult.getNumTotalResults()); - } +// @Test +// public void testAnnotationGrch37() throws Exception { +// // threw a NULL pointer exception +// QueryOptions queryOptions = new QueryOptions("useCache", false); +// QueryResult queryResult = variantAnnotationCalculator +// .getAnnotationByVariant(new Variant("9:34648830:A:N"), queryOptions); +// assertEquals(1, queryResult.getNumTotalResults()); +// } @Test public void testHgvsAnnotationGrch37() throws Exception { @@ -1981,8 +1985,7 @@ public void testMissingClinVar() throws Exception { QueryResult queryResult = variantAnnotationCalculator.getAnnotationByVariant(variant1, queryOptions); - List hgvs = queryResult.getResult().get(0).getHgvs(); - assertEquals("test", hgvs); + assertEquals(14,new HashSet(queryResult.getResult().get(0).getHgvs()).size()); } @@ -2083,6 +2086,66 @@ public void testCheckAminoAcidChange() throws Exception { assertTrue(hasClinVar(variantTraitAssociation, "251327")); } + private static final String PHASE_DATA_URL_SEPARATOR = "\\+"; + private static final String VARIANT_STRING_FORMAT = "\\+"; + + @Test + public void testMNVAdditionalProperties() throws Exception { + +// chr19:13025339:C:A+1|1+999 - synonymous +// chr19:13025341:G:T+1|1+999 - synonymous + + initGrch38(); + + VariantBuilder variantBuilder = new VariantBuilder("19", + 13025339, + 13025339, + "C", + "A"); + variantBuilder.setFormat(Arrays.asList("PS", "GT")); + variantBuilder.setSamplesData(Collections.singletonList(Arrays.asList("999", "1|1"))); + Variant variant1 = variantBuilder.build(); + + variantBuilder = new VariantBuilder("19", + 13025341, + 13025341, + "G", + "T"); + variantBuilder.setFormat(Arrays.asList("PS", "GT")); + variantBuilder.setSamplesData(Collections.singletonList(Arrays.asList("999", "1|1"))); + Variant variant2 = variantBuilder.build(); + + List variantList = new ArrayList<>(); + variantList.add(variant1); + variantList.add(variant2); + + QueryOptions queryOptions = new QueryOptions("useCache", false); + queryOptions.put("include", "consequenceType, reference, alternate, clinical"); + queryOptions.put("normalize", true); + queryOptions.put("skipDecompose", false); + queryOptions.put("checkAminoAcidChange", false); + queryOptions.put("imprecise", true); + queryOptions.put("phased", true); + + List> queryResult = variantAnnotationCalculator.getAnnotationByVariantList(variantList, + queryOptions); + + assertEquals(2, queryResult.size()); + + VariantAnnotation v1 = queryResult.get(0).getResult().get(0); + VariantAnnotation v2 = queryResult.get(1).getResult().get(0); + + assertEquals("missense_variant", v1.getDisplayConsequenceType()); + assertEquals("missense_variant", v2.getDisplayConsequenceType()); + + Map additionalAttributes1 = (Map) v1.getAdditionalAttributes().get("phasedTranscripts").get("attribute"); + Map additionalAttributes2 = (Map) v1.getAdditionalAttributes().get("phasedTranscripts").get("attribute"); + + System.out.println(additionalAttributes1); + + assertEquals(12, additionalAttributes1.size()); + assertEquals(12, additionalAttributes2.size()); + } private boolean hasClinVarAccession(List traitAssociation, String accession) { for (EvidenceEntry evidenceEntry : traitAssociation) { diff --git a/cellbase-lib/src/test/resources/variant-annotation/grch38/gene.test.json.gz b/cellbase-lib/src/test/resources/variant-annotation/grch38/gene.test.json.gz index e24b8c102f..f51c72e5f3 100644 Binary files a/cellbase-lib/src/test/resources/variant-annotation/grch38/gene.test.json.gz and b/cellbase-lib/src/test/resources/variant-annotation/grch38/gene.test.json.gz differ diff --git a/cellbase-lib/src/test/resources/variant-annotation/grch38/genome_sequence.test.json.gz b/cellbase-lib/src/test/resources/variant-annotation/grch38/genome_sequence.test.json.gz index 9aea072914..d93f114d83 100644 Binary files a/cellbase-lib/src/test/resources/variant-annotation/grch38/genome_sequence.test.json.gz and b/cellbase-lib/src/test/resources/variant-annotation/grch38/genome_sequence.test.json.gz differ diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index 8978f0c996..3c893a7356 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.8 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-test/pom.xml b/cellbase-test/pom.xml index 8a962ef904..50f0397ae4 100644 --- a/cellbase-test/pom.xml +++ b/cellbase-test/pom.xml @@ -22,7 +22,7 @@ org.opencb.cellbase cellbase-test - 4.9.8 + 4.10.0-SNAPSHOT pom diff --git a/pom.xml b/pom.xml index 0440732be6..46bfbec467 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.8 + 4.10.0-SNAPSHOT pom CellBase project @@ -22,7 +22,7 @@ - 4.9.8 + 4.10.0-SNAPSHOT 1.8 3.7.5 1.5.5 @@ -109,6 +109,10 @@ biodata-tools ${biodata.version} + + log4j + log4j + slf4j-simple org.slf4j