diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java index 79e5fd12d..312e2de96 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java @@ -78,25 +78,24 @@ public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) { * lets see if there are any records that match on ref and alt */ return getAnnotationsFromCurrentRecords(requestedCp); + } - } else { - int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong); - if (nextCPAsLong > -1 && matchWithNextCP < 0) { - /* - * requestedCp is "less than" next CP - * return empty list here - */ - } else { -// logger.debug(reader.getFile().getName() + ": getting next record. requestedCp: " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null)); - getNextRecord(requestedCpAsLong, matchWithNextCP); - if (requestedCpAsLong == currentCPAsLong) { - return getAnnotationsFromCurrentRecords(requestedCp); - } - /* - * requestedCP and currentCP are not equal - */ - } + int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong); + if (nextCPAsLong > -1 && matchWithNextCP < 0) { + /* + * requestedCp is "less than" next CP + * return empty list here + */ + return annotationToReturn(null); + } + + getNextRecord(requestedCpAsLong, matchWithNextCP); + if (requestedCpAsLong == currentCPAsLong) { + return getAnnotationsFromCurrentRecords(requestedCp); } + /* + * requestedCP and currentCP are not equal + */ return annotationToReturn(null); } diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java index 005a0875f..e4982f5df 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCF.java @@ -62,8 +62,6 @@ public AnnotationSourceSnpEffVCF(RecordReader reader, int chrPositionInR @Override public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) { -// logger.debug(reader.getFile().getName() + ": requestedCp is " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null) + ", nextCP: " + (null != nextCP ? nextCP.toIGVString() : null)); - /* * check to see if the records we currently have stored are a match */ @@ -73,70 +71,21 @@ public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) { * we match on position * lets see if there are any records that match on ref and alt */ -// return getAnnotationsFromRecords(requestedCp); - if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt) { - String reqRef = reqCpRefAlt.getRef(); - String reqAlt = reqCpRefAlt.getAlt(); - for (String rec : currentRecords) { - String[] recArray = TabTokenizer.tokenize(rec, DEFAULT_DELIMITER); - String recRef = recArray[refPositionInFile]; - String recAlt = recArray[altPositionInFile]; - - if (recAlt.contains(",")) { - String[] recAltArray = recAlt.split(","); - for (String recAltValue : recAltArray) { - if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) { - return annotationToReturnWithAlt(rec, recAltValue); - } - } - } else { - if (reqRef.equals(recRef) && reqAlt.equals(recAlt)) { - return annotationToReturnWithAlt(rec, recAlt); - } - } - } - } - + return getAnnotationsFromRecords(requestedCp); } else { int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong); if (nextCPAsLong > -1 && matchWithNextCP < 0) { } else { -// logger.debug(reader.getFile().getName() + ": getting next record. requestedCp: " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null)); getNextRecord(requestedCpAsLong, matchWithNextCP); if (requestedCpAsLong == currentCPAsLong) { /* * we match on position * lets see if there are any records that match on ref and alt */ - if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt) { - String reqRef = reqCpRefAlt.getRef(); - String reqAlt = reqCpRefAlt.getAlt(); - for (String rec : currentRecords) { - String[] recArray = TabTokenizer.tokenize(rec, DEFAULT_DELIMITER); - String recRef = recArray[refPositionInFile]; - String recAlt = recArray[altPositionInFile]; - - if (recAlt.contains(",")) { - String[] recAltArray = recAlt.split(","); - for (String recAltValue : recAltArray) { - if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) { - return annotationToReturnWithAlt(rec, recAltValue); - } - } - } else { - if (reqRef.equals(recRef) && reqAlt.equals(recAlt)) { - return annotationToReturnWithAlt(rec, recAlt); - } - } - } - } -// return getAnnotationsFromRecords(requestedCp); + return getAnnotationsFromRecords(requestedCp); } - /* - * requestedCP and currentCP are not equal - */ } } return annotationToReturn(null); @@ -151,7 +100,7 @@ private String getAnnotationsFromRecords(ChrPosition requestedCp){ String recRef = recArray[refPositionInFile]; String recAlt = recArray[altPositionInFile]; - if (recAlt.contains(",")) { + if (recAlt.indexOf(',') >= 0) { String[] recAltArray = recAlt.split(","); for (String recAltValue : recAltArray) { if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) { @@ -177,7 +126,6 @@ public String annotationToReturn(String[] record) { * dealing with a vcf file and assuming that the required annotation fields are in the INFO field * so get that and go from there. */ -// String[] recordArray = record.split("\t"); String info = record[7]; String alt = record[4]; @@ -221,32 +169,31 @@ public static String extractFieldsFromInfoField(String info, List fields if (StringUtils.isNullOrEmpty(worstConsequence)) { return emptyInfoFieldResult; } - /* - * we have our consequence - * split by pipe and then get our fields + * we have our consequences (comma-delimited) + * split by comma into consequences, then by pipe into fields */ - String[] consequenceArray = TabTokenizer.tokenize(worstConsequence, '|'); + String[] consequences = worstConsequence.split(","); for (String af : fields) { if (!StringUtils.isNullOrEmpty(af)) { - /* - * get position from map - */ String aflc = af.toLowerCase(); Integer arrayPosition = SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS.get(aflc); - if (null != arrayPosition && arrayPosition >= 0 && arrayPosition < consequenceArray.length) { - /* - * good - */ - String annotation = consequenceArray[arrayPosition]; - dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" + annotation : af + "=" + annotation); - } else { -// System.out.println("Could not find field [" + af + "] in SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS map!"); -// System.out.println("arrayPosition.intValue(): " + arrayPosition.intValue() + ", consequenceArray.length: " + consequenceArray.length); - } + if (null != arrayPosition) { + StringBuilder fieldValues = new StringBuilder(); + for (String consequence : consequences) { + String[] consequenceArray = TabTokenizer.tokenize(consequence, '|'); + if (arrayPosition >= 0 && arrayPosition < consequenceArray.length) { + String annotation = consequenceArray[arrayPosition]; + fieldValues.append(fieldValues.isEmpty() ? annotation : "|" + annotation); + } + } + dataToReturn.append((!dataToReturn.isEmpty()) + ? FIELD_DELIMITER_TAB + af + "=" + fieldValues + : af + "=" + fieldValues); + } } } return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString(); @@ -283,19 +230,27 @@ public static String getWorstConsequence(String info, String alt) { * Pick the first one as that is the one with the highest effect as decreed by snpEff */ int annoIndex = info.indexOf("ANN="); + if (annoIndex < 0) { + return ""; + } int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, annoIndex); String ann = info.substring(annoIndex + 4, end == -1 ? info.length() : end); String[] annArray = ann.split(","); - String worstConsequence = ""; + Map worstByGene = new java.util.LinkedHashMap<>(); for (String aa : annArray) { if (aa.startsWith(alt)) { - worstConsequence = aa; - break; + String[] parts = TabTokenizer.tokenize(aa, '|'); + if (parts.length > 3) { + String gene = parts[3]; + if (!StringUtils.isNullOrEmpty(gene) && !worstByGene.containsKey(gene)) { + worstByGene.put(gene, aa); + } + } } } - return worstConsequence; + return String.join(",", worstByGene.values()); } @Override diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java index 3c050499c..8d91add62 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java @@ -19,6 +19,9 @@ public class AnnotationSourceTSV extends AnnotationSource { List headerLines; Map headerNameAndPosition; + private String[] fieldNames; + private int[] fieldPositions; + public AnnotationSourceTSV(RecordReader reader, int chrPositionInRecord, int positionPositionInRecord, int refPositionInFile, int altPositionInFile, String fieldNames, boolean chrStartsWithChr) { super(reader, chrPositionInRecord, positionPositionInRecord, refPositionInFile, altPositionInFile, chrStartsWithChr); @@ -46,6 +49,12 @@ public AnnotationSourceTSV(RecordReader reader, int chrPositionInRecord, if (headerNameAndPosition.isEmpty()) { throw new IllegalArgumentException("Could not find requested fields (" + fieldNames + ") in header: " + headerLine); } + // precompute arrays for fast extraction + this.fieldNames = headerNameAndPosition.keySet().toArray(new String[0]); + this.fieldPositions = new int[this.fieldNames.length]; + for (int i = 0; i < this.fieldNames.length; i++) { + this.fieldPositions[i] = headerNameAndPosition.get(this.fieldNames[i]); + } } /* @@ -96,20 +105,18 @@ public String annotationToReturn(String[] record) { /* * entries in the INFO field are delimited by ';' */ - return extractFieldsFromRecord(record, headerNameAndPosition); + return extractFieldsFromRecord(record, fieldNames, fieldPositions); } - public static String extractFieldsFromRecord(String[] record, Map fields) { + public static String extractFieldsFromRecord(String[] record, String[] fieldNames, int[] fieldPositions) { StringBuilder dataToReturn = new StringBuilder(); int recordLength = null != record ? record.length : 0; - if ( recordLength > 0 && null != fields) { -// String [] recordArray = TabTokenizer.tokenize(record); - for (Entry entry : fields.entrySet()) { - /* - * make sure that array length is not shorter than entry value - */ - if (recordLength > entry.getValue()) { - dataToReturn.append(( ! dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "").append(entry.getKey()).append("=").append(record[entry.getValue()]); + if (recordLength > 0 && null != fieldNames && null != fieldPositions) { + for (int i = 0; i < Math.min(fieldNames.length, fieldPositions.length); i++) { + int pos = fieldPositions[i]; + if (recordLength > pos) { + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "") + .append(fieldNames[i]).append("=").append(record[pos]); } } } diff --git a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java index dc15768ab..874c4a317 100644 --- a/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java +++ b/qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java @@ -2,7 +2,9 @@ import java.io.IOException; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.stream.Collectors; import org.qcmg.common.string.StringUtils; @@ -11,7 +13,8 @@ public class AnnotationSourceVCF extends AnnotationSource { public static final String FIELD_DELIMITER_SEMI_COLON = ";"; - + private static final int INFO_LENGTH_PARSE_THRESHOLD = 2000; + private static final int FIELDS_PARSE_THRESHOLD = 3; List annotationFields; @@ -52,24 +55,52 @@ public String annotationToReturn(String [] record) { public static String extractFieldsFromInfoField(String info, List fields, String emptyInfoFieldResult) { - if (StringUtils.isNullOrEmptyOrMissingData(info)) { + if (StringUtils.isNullOrEmptyOrMissingData(info) || fields == null) { return emptyInfoFieldResult; } - StringBuilder dataToReturn = new StringBuilder(); - for (String af : fields) { - if ( ! StringUtils.isNullOrEmpty(af)) { - int start = info.indexOf(af + "="); - if (start > -1) { - int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start); - if (end == -1) { - dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start) : info.substring(start)); + boolean parseOnce = (fields.size() > FIELDS_PARSE_THRESHOLD) || info.length() > INFO_LENGTH_PARSE_THRESHOLD; + if ( ! parseOnce) { + StringBuilder dataToReturn = new StringBuilder(); + for (String af : fields) { + if (!StringUtils.isNullOrEmpty(af)) { + int start = info.indexOf(af + "="); + if (start > -1) { + int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start); + if (end == -1) { + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start) : info.substring(start)); + } else { + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start, end) : info.substring(start, end)); + } } else { - dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start, end) : info.substring(start, end)); + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" : af + "="); } - } else { - dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" : af + "="); } } + return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString(); + } + Map infoMap = new HashMap<>(); + int start = 0; + while (start <= info.length()) { + int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start); + if (end == -1) end = info.length(); + + String token = info.substring(start, end); + int eq = token.indexOf('='); + if (eq > -1) { + infoMap.put(token.substring(0, eq), token.substring(eq + 1)); + } else if (!token.isEmpty()) { + infoMap.put(token, ""); + } + + start = end + 1; + } + StringBuilder dataToReturn = new StringBuilder(); + for (String af : fields) { + if (!StringUtils.isNullOrEmpty(af)) { + String value = infoMap.get(af); + String entry = (value != null) ? af + "=" + value : af + "="; + dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "").append(entry); + } } return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString(); } diff --git a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCFTest.java b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCFTest.java index 0921607aa..9ed45e930 100644 --- a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCFTest.java +++ b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceSnpEffVCFTest.java @@ -2,14 +2,25 @@ import static org.junit.Assert.assertEquals; +import java.io.File; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.Arrays; import java.util.List; +import org.junit.Rule; import org.junit.Test; import au.edu.qimr.qannotate.nanno.AnnotationSourceSnpEffVCF; +import org.junit.rules.TemporaryFolder; +import org.qcmg.common.model.ChrPositionRefAlt; +import org.qcmg.common.util.ChrPositionUtils; +import org.qcmg.qio.record.StringFileReader; public class AnnotationSourceSnpEffVCFTest { + + @Rule + public final TemporaryFolder testFolder = new TemporaryFolder(); @Test public void extractFieldsFromInfoField() { @@ -47,4 +58,192 @@ public void extractFieldsFromInfoField2() { assertEquals("cdna_position=", AnnotationSourceSnpEffVCF.extractFieldsFromInfoField(info, List.of("cdna_position"), ".", alt)); } + @Test + public void getWorstConsequenceMultipleGenesCommaDelimited() { + String info = "AC=2;AF=1.00;AN=2;ANN=" + + "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|synonymous_variant|LOW|GENE1|ID1|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE2|ID2|transcript|TR3|protein_coding|1/1|c.1A>G|||||10|," + + "T|synonymous_variant|LOW|GENE3|ID3|transcript|TR4|protein_coding|1/1|c.1A>T|||||10|"; + + String alt = "G"; + String expected = "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE2|ID2|transcript|TR3|protein_coding|1/1|c.1A>G|||||10|"; + + assertEquals(expected, AnnotationSourceSnpEffVCF.getWorstConsequence(info, alt)); + } + + @Test + public void getAnnotationMultiGene() throws Exception { + File vcf = testFolder.newFile("snpeff.vcf"); + + String line = "chr1\t100\t.\tA\tG,T\t.\t.\tANN=G|downstream_gene_variant|MODIFIER|NADK|ENSG00000008130.15|transcript|ENST00000341426.9|protein_coding||c.*4307G>C|||||2635|," + + "G|intergenic_region|MODIFIER|CDK11A-NADK|ENSG00000008128.23-ENSG00000008130.15|intergenic_region|ENSG00000008128.23-ENSG00000008130.15|||n.1748597C>G||||||"; + + Files.write(vcf.toPath(), List.of(line), StandardCharsets.UTF_8); + + try (StringFileReader reader = new StringFileReader(vcf)) { + AnnotationSourceSnpEffVCF source = new AnnotationSourceSnpEffVCF( + reader, + 1, // chrPositionInRecord (1-based) + 2, // positionPositionInRecord (1-based) + 4, // refPositionInFile (1-based) + 5, // altPositionInFile (1-based) + "effect,gene_name", + true + ); + + ChrPositionRefAlt cp = new ChrPositionRefAlt("chr1", 100, 100, "A", "G"); + long cpAsLong = ChrPositionUtils.convertContigAndPositionToLong("1", 100); + + String first = source.getAnnotation(cpAsLong, cp); + String second = source.getAnnotation(cpAsLong, cp); + + assertEquals("effect=downstream_gene_variant|intergenic_region\tgene_name=NADK|CDK11A-NADK", first); + assertEquals("effect=downstream_gene_variant|intergenic_region\tgene_name=NADK|CDK11A-NADK", second); + } + } + + @Test + public void getAnnotationUsesSameLogicForCurrentAndNext() throws Exception { + File vcf = testFolder.newFile("snpeff.vcf"); + + String line = "chr1\t100\t.\tA\tG,T\t.\t.\tANN=" + + "G|missense_variant|MODERATE|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>G|||||10|," + + "T|synonymous_variant|LOW|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>T|||||10|"; + + Files.write(vcf.toPath(), List.of(line), StandardCharsets.UTF_8); + + try (StringFileReader reader = new StringFileReader(vcf)) { + AnnotationSourceSnpEffVCF source = new AnnotationSourceSnpEffVCF( + reader, + 1, // chrPositionInRecord (1-based) + 2, // positionPositionInRecord (1-based) + 4, // refPositionInFile (1-based) + 5, // altPositionInFile (1-based) + "effect", + true + ); + + ChrPositionRefAlt cp = new ChrPositionRefAlt("chr1", 100, 100, "A", "T"); + long cpAsLong = ChrPositionUtils.convertContigAndPositionToLong("1", 100); + + String first = source.getAnnotation(cpAsLong, cp); + String second = source.getAnnotation(cpAsLong, cp); + + assertEquals("effect=synonymous_variant", first); + assertEquals("effect=synonymous_variant", second); + + cp = new ChrPositionRefAlt("chr1", 100, 100, "A", "G"); + + first = source.getAnnotation(cpAsLong, cp); + second = source.getAnnotation(cpAsLong, cp); + + assertEquals("effect=missense_variant", first); + assertEquals("effect=missense_variant", second); + } + } + + @Test + public void getAnnotationNoMatchReturnsEmpty() throws Exception { + File vcf = testFolder.newFile("snpeff-no-match.vcf"); + + String line = "chr1\t100\t.\tA\tG,T\t.\t.\tANN=" + + "G|missense_variant|MODERATE|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>G|||||10|," + + "T|synonymous_variant|LOW|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>T|||||10|"; + + Files.write(vcf.toPath(), List.of(line), StandardCharsets.UTF_8); + + try (StringFileReader reader = new StringFileReader(vcf)) { + AnnotationSourceSnpEffVCF source = new AnnotationSourceSnpEffVCF( + reader, + 1, // chrPositionInRecord (1-based) + 2, // positionPositionInRecord (1-based) + 4, // refPositionInFile (1-based) + 5, // altPositionInFile (1-based) + "effect", + true + ); + + // same position, but alt does not exist in the record + ChrPositionRefAlt cp = new ChrPositionRefAlt("chr1", 100, 100, "A", "C"); + long cpAsLong = ChrPositionUtils.convertContigAndPositionToLong("1", 100); + + assertEquals("effect=", source.getAnnotation(cpAsLong, cp)); + } + } + + @Test + public void getAnnotationNoPositionMatchReturnsEmpty() throws Exception { + File vcf = testFolder.newFile("snpeff-no-pos.vcf"); + + String line = "chr1\t100\t.\tA\tG\t.\t.\tANN=" + + "G|missense_variant|MODERATE|GENE|ID|transcript|TR|protein_coding|1/1|c.1A>G|||||10|"; + + Files.write(vcf.toPath(), List.of(line), StandardCharsets.UTF_8); + + try (StringFileReader reader = new StringFileReader(vcf)) { + AnnotationSourceSnpEffVCF source = new AnnotationSourceSnpEffVCF( + reader, + 1, // chrPositionInRecord (1-based) + 2, // positionPositionInRecord (1-based) + 4, // refPositionInFile (1-based) + 5, // altPositionInFile (1-based) + "effect", + true + ); + + // different position (no match) + ChrPositionRefAlt cp = new ChrPositionRefAlt("chr1", 101, 101, "A", "G"); + long cpAsLong = ChrPositionUtils.convertContigAndPositionToLong("1", 101); + + assertEquals("effect=", source.getAnnotation(cpAsLong, cp)); + } + } + + @Test + public void getWorstConsequenceSkipsDuplicateGeneUsesFirst() { + String info = "ANN=" + + "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE1|ID1|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + String alt = "G"; + String expected = "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|"; + + assertEquals(expected, AnnotationSourceSnpEffVCF.getWorstConsequence(info, alt)); + } + + @Test + public void getWorstConsequenceSkipsEmptyGene() { + String info = "ANN=" + + "G|missense_variant|MODERATE||ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE2|ID2|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + String alt = "G"; + String expected = "G|stop_gained|HIGH|GENE2|ID2|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + assertEquals(expected, AnnotationSourceSnpEffVCF.getWorstConsequence(info, alt)); + } + + @Test + public void extractFieldsFromInfoFieldMultiGeneAltNotPresent() { + String info = "ANN=" + + "G|missense_variant|MODERATE|GENE1|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE2|ID2|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + assertEquals("effect=", AnnotationSourceSnpEffVCF.extractFieldsFromInfoField(info, List.of("effect"), "effect=", "T")); + } + + @Test + public void extractFieldsFromInfoFieldMultiGeneOrderIsStable() { + // SnpEff already orders consequences by severity; we preserve first-seen order. + String info = "ANN=" + + "G|missense_variant|MODERATE|GENE_B|ID1|transcript|TR1|protein_coding|1/1|c.1A>G|||||10|," + + "G|stop_gained|HIGH|GENE_A|ID2|transcript|TR2|protein_coding|1/1|c.1A>G|||||10|"; + + String result = AnnotationSourceSnpEffVCF.extractFieldsFromInfoField(info, List.of("effect", "gene_name"), "effect=\tgene_name=", "G"); + + assertEquals("effect=missense_variant|stop_gained\tgene_name=GENE_B|GENE_A", result); + } + } diff --git a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java index c1cc590a1..b45dc0562 100644 --- a/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java +++ b/qannotate/test/au/edu/qimr/qannotate/nanno/AnnotationSourceTSVTest.java @@ -14,28 +14,37 @@ public class AnnotationSourceTSVTest { @Test public void extractFieldsFromRecord() { - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(null, null)); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{}, null)); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"blah"}, null)); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(null, new HashMap<>())); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{}, new HashMap<>())); - Map fields = new HashMap<>(); - fields.put("foo", 0); - assertEquals("foo=short_record", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"short_record"}, fields)); - assertEquals("foo=slightly_longer", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record"}, fields)); - fields.put("foo", 10); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record"}, fields)); - assertEquals("foo=", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", ""}, fields)); - assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fields)); - fields.put("foo2", 2); - assertEquals("foo=bar\tfoo2=", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fields)); - fields.put("foo2", 1); - assertEquals("foo=bar\tfoo2=record", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fields)); - fields.put("foo2", 11); - assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fields)); - fields.put("foo", 100); - assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record","","","","","","","","","bar"}, fields)); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(null, null, null)); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{}, null, null)); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"blah"}, null, null)); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(null, new String[]{}, new int[]{})); + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{}, new String[]{}, new int[]{})); + String[] fieldNames = new String[]{"foo"}; + int[] fieldPositions = new int[]{0}; + assertEquals("foo=short_record", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"short_record"}, fieldNames, fieldPositions)); + assertEquals("foo=slightly_longer", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record"}, fieldNames, fieldPositions)); + fieldPositions = new int[]{10}; + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record"}, fieldNames, fieldPositions)); + assertEquals("foo=", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", ""}, fieldNames, fieldPositions)); + assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + fieldNames = new String[]{"foo", "foo2"}; + fieldPositions = new int[]{10, 2}; + assertEquals("foo=bar\tfoo2=", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + fieldPositions = new int[]{10, 1}; + assertEquals("foo=bar\tfoo2=record", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + fieldPositions = new int[]{10, 11}; + assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + fieldPositions = new int[]{100, 11}; + assertEquals("", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer","record","","","","","","","","","bar"}, fieldNames, fieldPositions)); + // mismatched array lengths - fieldNames longer than fieldPositions + fieldNames = new String[]{"foo", "foo2", "foo3"}; + fieldPositions = new int[]{10}; + assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); + // mismatched array lengths - fieldPositions longer than fieldNames + fieldNames = new String[]{"foo"}; + fieldPositions = new int[]{10, 1, 0}; + assertEquals("foo=bar", AnnotationSourceTSV.extractFieldsFromRecord(new String[]{"slightly_longer", "record", "", "", "", "", "", "", "", "", "bar"}, fieldNames, fieldPositions)); } @Test