diff --git a/cellbase-app/pom.xml b/cellbase-app/pom.xml
index a5ed481a99..292f203fd5 100644
--- a/cellbase-app/pom.xml
+++ b/cellbase-app/pom.xml
@@ -6,7 +6,7 @@
org.opencb.cellbase
cellbase
- 4.9.5
+ 4.10.0-SNAPSHOT
../pom.xml
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java
index 5f223dc77e..f9382d3974 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/CliOptionsParser.java
@@ -447,6 +447,10 @@ public class VariantAnnotationCommandOptions {
arity = 0)
public boolean checkAminoAcidChange;
+ @Parameter(names = {"--filter"}, description = "string indicating the FILTER label that variants must have to be annotated. "
+ + "Only variants with this label will be written in the output.", required = false, arity = 1)
+ public String filter = null;
+
@DynamicParameter(names = "-D", description = "Dynamic parameters. Available parameters: "
+ "{population-frequencies=for internal purposes mainly. Full path to a json file containing Variant "
+ "documents that include lists of population frequencies objects. Will allow annotating the input file "
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutor.java
index 64c3b80c11..487550f72a 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutor.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutor.java
@@ -392,7 +392,7 @@ private List>
for (int i = 0; i < numThreads; i++) {
List variantAnnotatorList = createAnnotators();
- variantAnnotatorTaskList.add(new VariantAnnotatorTask(variantAnnotatorList));
+ variantAnnotatorTaskList.add(new VariantAnnotatorTask(variantAnnotatorList, serverQueryOptions));
}
return variantAnnotatorTaskList;
}
@@ -573,7 +573,7 @@ private void checkParameters() throws IOException {
leftAlign = !variantAnnotationCommandOptions.skipLeftAlign;
// Update serverQueryOptions
serverQueryOptions.put("checkAminoAcidChange", variantAnnotationCommandOptions.checkAminoAcidChange);
-
+ serverQueryOptions.put("filter", variantAnnotationCommandOptions.filter);
// output file
if (variantAnnotationCommandOptions.output != null) {
output = Paths.get(variantAnnotationCommandOptions.output);
diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/variant/annotation/VariantAnnotatorTask.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/variant/annotation/VariantAnnotatorTask.java
index 2c19fcea79..6c5e4687e9 100644
--- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/variant/annotation/VariantAnnotatorTask.java
+++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/variant/annotation/VariantAnnotatorTask.java
@@ -1,14 +1,19 @@
package org.opencb.cellbase.app.cli.variant.annotation;
import org.opencb.biodata.models.variant.Variant;
+import org.opencb.biodata.models.variant.avro.FileEntry;
+import org.opencb.biodata.models.variant.avro.StudyEntry;
import org.opencb.biodata.models.variant.avro.VariantType;
import org.opencb.cellbase.core.variant.annotation.VariantAnnotator;
+import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.commons.run.ParallelTaskRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
+import java.util.Map;
/**
* Created by fjlopez on 11/02/16.
@@ -18,9 +23,16 @@ public class VariantAnnotatorTask implements
private final Logger logger = LoggerFactory.getLogger(this.getClass());
private List variantAnnotatorList;
+ private QueryOptions serverQueryOptions;
+ private static final String FILTER_PARAM = "filter";
public VariantAnnotatorTask(List variantAnnotatorList) {
+ this(variantAnnotatorList, new QueryOptions());
+ }
+
+ public VariantAnnotatorTask(List variantAnnotatorList, QueryOptions serverQueryOptions) {
this.variantAnnotatorList = variantAnnotatorList;
+ this.serverQueryOptions = serverQueryOptions;
}
public void pre() {
@@ -30,18 +42,42 @@ public void pre() {
}
public List apply(List batch) throws Exception {
- List variantListToAnnotate = filterReferenceBlocksOut(batch);
+ List variantListToAnnotate = filter(batch);
for (VariantAnnotator variantAnnotator : variantAnnotatorList) {
variantAnnotator.run(variantListToAnnotate);
}
return variantListToAnnotate;
}
- private List filterReferenceBlocksOut(List variantList) {
+ private List filter(List variantList) {
List filteredVariantList = new ArrayList<>(variantList.size());
+ String queryOptionsFilterValue = null;
+ if (serverQueryOptions != null && serverQueryOptions.containsKey(FILTER_PARAM)) {
+ queryOptionsFilterValue = (String) serverQueryOptions.get(FILTER_PARAM);
+ }
for (Variant variant : variantList) {
+ // true when we find a FILTER match. to prevent variant being added twice.
+ boolean variantFound = false;
+ // filter out reference blocks
if (!VariantType.NO_VARIATION.equals(variant.getType())) {
- filteredVariantList.add(variant);
+ // if FILTER param set, VCF line must match or it's skipped
+ if (queryOptionsFilterValue != null) {
+ Iterator studyIterator = variant.getImpl().getStudies().listIterator();
+ while (studyIterator.hasNext() && !variantFound) {
+ for (FileEntry fileEntry : studyIterator.next().getFiles()) {
+ Map attributes = fileEntry.getAttributes();
+ String vcfFilterValue = attributes.get("FILTER");
+ if (vcfFilterValue != null && vcfFilterValue.equalsIgnoreCase(queryOptionsFilterValue)) {
+ // matched, variant added. we are done here.
+ filteredVariantList.add(variant);
+ variantFound = true;
+ break;
+ }
+ }
+ }
+ } else {
+ filteredVariantList.add(variant);
+ }
}
}
diff --git a/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java b/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java
index 8e731f4909..dfca6b25ed 100644
--- a/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java
+++ b/cellbase-app/src/test/java/org/opencb/cellbase/app/cli/VariantAnnotationCommandExecutorTest.java
@@ -45,6 +45,7 @@ public VariantAnnotationCommandExecutorTest() throws URISyntaxException {
jsonObjectMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL);
}
+
@Test
public void proteinChangeMatchTest() throws IOException, URISyntaxException {
// Remove database content
@@ -123,7 +124,7 @@ public void indexedVariantWithoutRequiredAttributeTest() throws IOException, URI
"FAKEATTRIBUTE",
null,
null,
- -1
+ -1, null
));
variantAnnotationCommandExecutor.loadCellBaseConfiguration();
variantAnnotationCommandExecutor.execute();
@@ -156,7 +157,7 @@ public void twoCustomFilesTest() throws IOException, URISyntaxException {
"GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4:GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4",
null,
null,
- 100
+ 100, null
));
variantAnnotationCommandExecutor.loadCellBaseConfiguration();
@@ -201,7 +202,7 @@ public void alreadyIndexedTest() throws IOException, URISyntaxException {
"GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4",
null,
null,
- 100
+ 100, null
));
variantAnnotationCommandExecutor.loadCellBaseConfiguration();
@@ -244,7 +245,7 @@ public void maxFilesCustomAnnotationTest() throws IOException, URISyntaxExceptio
"GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4",
null,
null,
- 100
+ 100, null
));
variantAnnotationCommandExecutor.loadCellBaseConfiguration();
variantAnnotationCommandExecutor.execute();
@@ -323,7 +324,7 @@ public void phasedCustomAnnotationTest() throws IOException, URISyntaxException
"GN,AF,AC,AN,MAF,HWE,AN_Cancer,AN_SRv3,AN_RD,AN_SRv4,AC_Cancer,AC_SRv3,AC_RD,AC_SRv4,AF_Cancer,AF_SRv3,AF_RD,AF_SRv4,MAF_Cancer,MAF_SRv3,MAF_RD,MAF_SRv4,HWE_Cancer,HWE_SRv3,HWE_RD,HWE_SRv4",
null,
null,
- -1
+ -1, null
));
variantAnnotationCommandExecutor.loadCellBaseConfiguration();
variantAnnotationCommandExecutor.execute();
@@ -383,7 +384,7 @@ public void additionalPopulationFrequencyUnphasedAnnotationTest() throws Excepti
.resolve("commandExecutor/additionalPopulationFrequency/chr1.2017-12-27_01_12.hgva.freq.cellbase.test.json.gz")
.toString(),
true,
- -1
+ -1, null
));
variantAnnotationCommandExecutor.loadCellBaseConfiguration();
variantAnnotationCommandExecutor.execute();
@@ -481,6 +482,9 @@ public void additionalPopulationFrequencyUnphasedAnnotationTest() throws Excepti
}
+
+
+
@Test
public void additionalPopulationFrequencyPhasedAnnotationTest() throws Exception {
cleanUp();
@@ -495,7 +499,7 @@ public void additionalPopulationFrequencyPhasedAnnotationTest() throws Exception
.resolve("commandExecutor/additionalPopulationFrequency/chr1.2017-12-27_01_12.hgva.freq.cellbase.test.json.gz")
.toString(),
true,
- -1));
+ -1, null));
variantAnnotationCommandExecutor.loadCellBaseConfiguration();
variantAnnotationCommandExecutor.execute();
List variantList = loadResult();
@@ -725,6 +729,52 @@ public void additionalPopulationFrequencyPhasedAnnotationTest() throws Exception
}
+
+ @Test
+ public void testFilter() throws Exception {
+ cleanUp();
+
+ // Set up annotation CLI options: NOTE checkAminoAcidChange is NOT enabled
+ CliOptionsParser.VariantAnnotationCommandOptions variantAnnotationCommandOptions
+ = new CliOptionsParser().getVariantAnnotationCommandOptions();
+ variantAnnotationCommandOptions.assembly = "GRCh37";
+ variantAnnotationCommandOptions.commonOptions.conf = resourcesFolder.resolve("commandExecutor/configuration.json").toString();
+ variantAnnotationCommandOptions.input
+ = resourcesFolder.resolve("commandExecutor/proteinChangeMatch/proband.duprem.atomic.left.split.vcf.gz").toString();
+ variantAnnotationCommandOptions.output = OUTPUT_FILENAME;
+ variantAnnotationCommandOptions.local = true;
+ variantAnnotationCommandOptions.species = "hsapiens";
+ variantAnnotationCommandOptions.filter = "PASS";
+ // Annotate
+ VariantAnnotationCommandExecutor variantAnnotationCommandExecutor
+ = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions);
+ variantAnnotationCommandExecutor.loadCellBaseConfiguration();
+ variantAnnotationCommandExecutor.execute();
+ // Load annotated variants
+ List variantList = loadResult();
+
+ // one variant has the PASS filter
+ assertEquals(1, variantList.size());
+
+ variantAnnotationCommandOptions.filter = "BAD FILTER";
+ variantAnnotationCommandExecutor = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions);
+ variantAnnotationCommandExecutor.loadCellBaseConfiguration();
+ variantAnnotationCommandExecutor.execute();
+ variantList = loadResult();
+
+ // one variant has the PASS filter. there should be no results!
+ assertEquals(0, variantList.size());
+
+ variantAnnotationCommandOptions.filter = null;
+ variantAnnotationCommandExecutor = new VariantAnnotationCommandExecutor(variantAnnotationCommandOptions);
+ variantAnnotationCommandExecutor.loadCellBaseConfiguration();
+ variantAnnotationCommandExecutor.execute();
+ variantList = loadResult();
+
+ // no filter 1 results
+ assertEquals(1, variantList.size());
+ }
+
private List getPopulationFrequency(List populationFrequencyList,
PopulationFrequency populationFrequency) {
List populationFrequencyList1 = new ArrayList<>(1);
@@ -800,7 +850,8 @@ private void cleanUp() throws IOException {
String customFileFields,
String populationFrequencyFilename,
Boolean completeInputPopulation,
- int maxOpenFiles) {
+ int maxOpenFiles,
+ String filter) {
CliOptionsParser.VariantAnnotationCommandOptions variantAnnotationCommandOptions
= new CliOptionsParser().getVariantAnnotationCommandOptions();
@@ -828,6 +879,7 @@ private void cleanUp() throws IOException {
variantAnnotationCommandOptions.maxOpenFiles = maxOpenFiles;
variantAnnotationCommandOptions.noImprecision = true;
variantAnnotationCommandOptions.buildParams = (new HashMap<>(1));
+ variantAnnotationCommandOptions.filter = filter;
if (populationFrequencyFilename != null) {
variantAnnotationCommandOptions.buildParams.put("population-frequencies", populationFrequencyFilename);
diff --git a/cellbase-client/pom.xml b/cellbase-client/pom.xml
index dc83bbc7c5..a88b8d64cd 100644
--- a/cellbase-client/pom.xml
+++ b/cellbase-client/pom.xml
@@ -6,7 +6,7 @@
org.opencb.cellbase
cellbase
- 4.9.5
+ 4.10.0-SNAPSHOT
../pom.xml
diff --git a/cellbase-core/pom.xml b/cellbase-core/pom.xml
index e927b30bbf..61ab0fec36 100644
--- a/cellbase-core/pom.xml
+++ b/cellbase-core/pom.xml
@@ -6,7 +6,7 @@
org.opencb.cellbase
cellbase
- 4.9.5
+ 4.10.0-SNAPSHOT
../pom.xml
diff --git a/cellbase-lib/pom.xml b/cellbase-lib/pom.xml
index 0295473177..41f213f754 100644
--- a/cellbase-lib/pom.xml
+++ b/cellbase-lib/pom.xml
@@ -6,7 +6,7 @@
org.opencb.cellbase
cellbase
- 4.9.5
+ 4.10.0-SNAPSHOT
../pom.xml
diff --git a/cellbase-server/pom.xml b/cellbase-server/pom.xml
index c8dcdb56b6..3c893a7356 100644
--- a/cellbase-server/pom.xml
+++ b/cellbase-server/pom.xml
@@ -6,7 +6,7 @@
org.opencb.cellbase
cellbase
- 4.9.5
+ 4.10.0-SNAPSHOT
../pom.xml
diff --git a/cellbase-test/pom.xml b/cellbase-test/pom.xml
index ed6e2558c5..50f0397ae4 100644
--- a/cellbase-test/pom.xml
+++ b/cellbase-test/pom.xml
@@ -22,7 +22,7 @@
org.opencb.cellbase
cellbase-test
- 4.9.5
+ 4.10.0-SNAPSHOT
pom
diff --git a/pom.xml b/pom.xml
index c36c35e837..54062d5dfe 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
org.opencb.cellbase
cellbase
- 4.9.5
+ 4.10.0-SNAPSHOT
pom
CellBase project
@@ -22,7 +22,7 @@
- 4.9.5
+ 4.10.0-SNAPSHOT
1.8
3.7.5
1.5.5