diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml index a5ed481a99..292f203fd5 100644 --- a/cellbase-app/pom.xml +++ b/cellbase-app/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.5 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java index 5f223dc77e..f9382d3974 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java @@ -447,6 +447,10 @@ public class VariantAnnotationCommandOptions { arity = 0) public boolean checkAminoAcidChange; + @Parameter(names = {"--filter"}, description = "string indicating the FILTER label that variants must have to be annotated. " + + "Only variants with this label will be written in the output.", required = false, arity = 1) + public String filter = null; + @DynamicParameter(names = "-D", description = "Dynamic parameters. Available parameters: " + "{population-frequencies=for internal purposes mainly. Full path to a json file containing Variant " + "documents that include lists of population frequencies objects. Will allow annotating the input file " diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutor.java index 64c3b80c11..487550f72a 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutor.java @@ -392,7 +392,7 @@ private List> for (int i = 0; i < numThreads; i++) { List variantAnnotatorList = createAnnotators(); - variantAnnotatorTaskList.add(new VariantAnnotatorTask(variantAnnotatorList)); + variantAnnotatorTaskList.add(new VariantAnnotatorTask(variantAnnotatorList, serverQueryOptions)); } return variantAnnotatorTaskList; } @@ -573,7 +573,7 @@ private void checkParameters() throws IOException { leftAlign = !variantAnnotationCommandOptions.skipLeftAlign; // Update serverQueryOptions serverQueryOptions.put("checkAminoAcidChange", variantAnnotationCommandOptions.checkAminoAcidChange); - + serverQueryOptions.put("filter", variantAnnotationCommandOptions.filter); // output file if (variantAnnotationCommandOptions.output != null) { output = Paths.get(variantAnnotationCommandOptions.output); diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/variant/annotation/VariantAnnotatorTask.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/variant/annotation/VariantAnnotatorTask.java index 2c19fcea79..6c5e4687e9 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/variant/annotation/VariantAnnotatorTask.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/variant/annotation/VariantAnnotatorTask.java @@ -1,14 +1,19 @@ package org.opencb.cellbase.app.cli.variant.annotation; import org.opencb.biodata.models.variant.Variant; +import org.opencb.biodata.models.variant.avro.FileEntry; +import org.opencb.biodata.models.variant.avro.StudyEntry; import org.opencb.biodata.models.variant.avro.VariantType; import org.opencb.cellbase.core.variant.annotation.VariantAnnotator; +import org.opencb.commons.datastore.core.QueryOptions; import org.opencb.commons.run.ParallelTaskRunner; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; +import java.util.Map; /** * Created by fjlopez on 11/02/16. @@ -18,9 +23,16 @@ public class VariantAnnotatorTask implements private final Logger logger = LoggerFactory.getLogger(this.getClass()); private List variantAnnotatorList; + private QueryOptions serverQueryOptions; + private static final String FILTER_PARAM = "filter"; public VariantAnnotatorTask(List variantAnnotatorList) { + this(variantAnnotatorList, new QueryOptions()); + } + + public VariantAnnotatorTask(List variantAnnotatorList, QueryOptions serverQueryOptions) { this.variantAnnotatorList = variantAnnotatorList; + this.serverQueryOptions = serverQueryOptions; } public void pre() { @@ -30,18 +42,42 @@ public void pre() { } public List apply(List batch) throws Exception { - List variantListToAnnotate = filterReferenceBlocksOut(batch); + List variantListToAnnotate = filter(batch); for (VariantAnnotator variantAnnotator : variantAnnotatorList) { variantAnnotator.run(variantListToAnnotate); } return variantListToAnnotate; } - private List filterReferenceBlocksOut(List variantList) { + private List filter(List variantList) { List filteredVariantList = new ArrayList<>(variantList.size()); + String queryOptionsFilterValue = null; + if (serverQueryOptions != null && serverQueryOptions.containsKey(FILTER_PARAM)) { + queryOptionsFilterValue = (String) serverQueryOptions.get(FILTER_PARAM); + } for (Variant variant : variantList) { + // true when we find a FILTER match. to prevent variant being added twice. + boolean variantFound = false; + // filter out reference blocks if (!VariantType.NO_VARIATION.equals(variant.getType())) { - filteredVariantList.add(variant); + // if FILTER param set, VCF line must match or it's skipped + if (queryOptionsFilterValue != null) { + Iterator studyIterator = variant.getImpl().getStudies().listIterator(); + while (studyIterator.hasNext() && !variantFound) { + for (FileEntry fileEntry : studyIterator.next().getFiles()) { + Map attributes = fileEntry.getAttributes(); + String vcfFilterValue = attributes.get("FILTER"); + if (vcfFilterValue != null && vcfFilterValue.equalsIgnoreCase(queryOptionsFilterValue)) { + // matched, variant added. we are done here. + filteredVariantList.add(variant); + variantFound = true; + break; + } + } + } + } else { + filteredVariantList.add(variant); + } } } diff --git a/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java b/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java index 8e731f4909..dfca6b25ed 100644 --- a/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java +++ b/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java @@ -45,6 +45,7 @@ public VariantAnnotationCommandExecutorTest() throws URISyntaxException { jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); } + @Test public void proteinChangeMatchTest() throws IOException, URISyntaxException { // Remove database content @@ -123,7 +124,7 @@ public void indexedVariantWithoutRequiredAttributeTest() throws IOException, URI "FAKEATTRIBUTE", null, null, - -1 + -1, null )); variantAnnotationCommandExecutor.loadCellBaseConfiguration(); variantAnnotationCommandExecutor.execute(); @@ -156,7 +157,7 @@ public void twoCustomFilesTest() throws IOException, URISyntaxException { "GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4:GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4", null, null, - 100 + 100, null )); variantAnnotationCommandExecutor.loadCellBaseConfiguration(); @@ -201,7 +202,7 @@ public void alreadyIndexedTest() throws IOException, URISyntaxException { "GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4", null, null, - 100 + 100, null )); variantAnnotationCommandExecutor.loadCellBaseConfiguration(); @@ -244,7 +245,7 @@ public void maxFilesCustomAnnotationTest() throws IOException, URISyntaxExceptio "GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4", null, null, - 100 + 100, null )); variantAnnotationCommandExecutor.loadCellBaseConfiguration(); variantAnnotationCommandExecutor.execute(); @@ -323,7 +324,7 @@ public void phasedCustomAnnotationTest() throws IOException, URISyntaxException "GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4", null, null, - -1 + -1, null )); variantAnnotationCommandExecutor.loadCellBaseConfiguration(); variantAnnotationCommandExecutor.execute(); @@ -383,7 +384,7 @@ public void additionalPopulationFrequencyUnphasedAnnotationTest() throws Excepti .resolve("commandExecutor/additionalPopulationFrequency/chr1.2017-12-27_01_12.hgva.freq.cellbase.test.json.gz") .toString(), true, - -1 + -1, null )); variantAnnotationCommandExecutor.loadCellBaseConfiguration(); variantAnnotationCommandExecutor.execute(); @@ -481,6 +482,9 @@ public void additionalPopulationFrequencyUnphasedAnnotationTest() throws Excepti } + + + @Test public void additionalPopulationFrequencyPhasedAnnotationTest() throws Exception { cleanUp(); @@ -495,7 +499,7 @@ public void additionalPopulationFrequencyPhasedAnnotationTest() throws Exception .resolve("commandExecutor/additionalPopulationFrequency/chr1.2017-12-27_01_12.hgva.freq.cellbase.test.json.gz") .toString(), true, - -1)); + -1, null)); variantAnnotationCommandExecutor.loadCellBaseConfiguration(); variantAnnotationCommandExecutor.execute(); List variantList = loadResult(); @@ -725,6 +729,52 @@ public void additionalPopulationFrequencyPhasedAnnotationTest() throws Exception } + + @Test + public void testFilter() throws Exception { + cleanUp(); + + // Set up annotation CLI options: NOTE checkAminoAcidChange is NOT enabled + CliOptionsParser.VariantAnnotationCommandOptions variantAnnotationCommandOptions + = new CliOptionsParser().getVariantAnnotationCommandOptions(); + variantAnnotationCommandOptions.assembly = "GRCh37"; + variantAnnotationCommandOptions.commonOptions.conf = resourcesFolder.resolve("commandExecutor/configuration.json").toString(); + variantAnnotationCommandOptions.input + = resourcesFolder.resolve("commandExecutor/proteinChangeMatch/proband.duprem.atomic.left.split.vcf.gz").toString(); + variantAnnotationCommandOptions.output = OUTPUT_FILENAME; + variantAnnotationCommandOptions.local = true; + variantAnnotationCommandOptions.species = "hsapiens"; + variantAnnotationCommandOptions.filter = "PASS"; + // Annotate + VariantAnnotationCommandExecutor variantAnnotationCommandExecutor + = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions); + variantAnnotationCommandExecutor.loadCellBaseConfiguration(); + variantAnnotationCommandExecutor.execute(); + // Load annotated variants + List variantList = loadResult(); + + // one variant has the PASS filter + assertEquals(1, variantList.size()); + + variantAnnotationCommandOptions.filter = "BAD FILTER"; + variantAnnotationCommandExecutor = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions); + variantAnnotationCommandExecutor.loadCellBaseConfiguration(); + variantAnnotationCommandExecutor.execute(); + variantList = loadResult(); + + // one variant has the PASS filter. there should be no results! + assertEquals(0, variantList.size()); + + variantAnnotationCommandOptions.filter = null; + variantAnnotationCommandExecutor = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions); + variantAnnotationCommandExecutor.loadCellBaseConfiguration(); + variantAnnotationCommandExecutor.execute(); + variantList = loadResult(); + + // no filter 1 results + assertEquals(1, variantList.size()); + } + private List getPopulationFrequency(List populationFrequencyList, PopulationFrequency populationFrequency) { List populationFrequencyList1 = new ArrayList<>(1); @@ -800,7 +850,8 @@ private void cleanUp() throws IOException { String customFileFields, String populationFrequencyFilename, Boolean completeInputPopulation, - int maxOpenFiles) { + int maxOpenFiles, + String filter) { CliOptionsParser.VariantAnnotationCommandOptions variantAnnotationCommandOptions = new CliOptionsParser().getVariantAnnotationCommandOptions(); @@ -828,6 +879,7 @@ private void cleanUp() throws IOException { variantAnnotationCommandOptions.maxOpenFiles = maxOpenFiles; variantAnnotationCommandOptions.noImprecision = true; variantAnnotationCommandOptions.buildParams = (new HashMap<>(1)); + variantAnnotationCommandOptions.filter = filter; if (populationFrequencyFilename != null) { variantAnnotationCommandOptions.buildParams.put("population-frequencies", populationFrequencyFilename); diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml index dc83bbc7c5..a88b8d64cd 100644 --- a/cellbase-client/pom.xml +++ b/cellbase-client/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.5 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml index e927b30bbf..61ab0fec36 100644 --- a/cellbase-core/pom.xml +++ b/cellbase-core/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.5 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml index 0295473177..41f213f754 100644 --- a/cellbase-lib/pom.xml +++ b/cellbase-lib/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.5 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml index c8dcdb56b6..3c893a7356 100644 --- a/cellbase-server/pom.xml +++ b/cellbase-server/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.5 + 4.10.0-SNAPSHOT ../pom.xml diff --git a/cellbase-test/pom.xml b/cellbase-test/pom.xml index ed6e2558c5..50f0397ae4 100644 --- a/cellbase-test/pom.xml +++ b/cellbase-test/pom.xml @@ -22,7 +22,7 @@ org.opencb.cellbase cellbase-test - 4.9.5 + 4.10.0-SNAPSHOT pom diff --git a/pom.xml b/pom.xml index c36c35e837..54062d5dfe 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ org.opencb.cellbase cellbase - 4.9.5 + 4.10.0-SNAPSHOT pom CellBase project @@ -22,7 +22,7 @@ - 4.9.5 + 4.10.0-SNAPSHOT 1.8 3.7.5 1.5.5