Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,25 +78,24 @@ public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) {
* lets see if there are any records that match on ref and alt
*/
return getAnnotationsFromCurrentRecords(requestedCp);
}

} else {
int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong);
if (nextCPAsLong > -1 && matchWithNextCP < 0) {
/*
* requestedCp is "less than" next CP
* return empty list here
*/
} else {
// logger.debug(reader.getFile().getName() + ": getting next record. requestedCp: " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null));
getNextRecord(requestedCpAsLong, matchWithNextCP);
if (requestedCpAsLong == currentCPAsLong) {
return getAnnotationsFromCurrentRecords(requestedCp);
}
/*
* requestedCP and currentCP are not equal
*/
}
int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong);
if (nextCPAsLong > -1 && matchWithNextCP < 0) {
/*
* requestedCp is "less than" next CP
* return empty list here
*/
return annotationToReturn(null);
}

getNextRecord(requestedCpAsLong, matchWithNextCP);
if (requestedCpAsLong == currentCPAsLong) {
return getAnnotationsFromCurrentRecords(requestedCp);
}
/*
* requestedCP and currentCP are not equal
*/
return annotationToReturn(null);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ public AnnotationSourceSnpEffVCF(RecordReader<String> reader, int chrPositionInR
@Override
public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) {

// logger.debug(reader.getFile().getName() + ": requestedCp is " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null) + ", nextCP: " + (null != nextCP ? nextCP.toIGVString() : null));

/*
* check to see if the records we currently have stored are a match
*/
Expand All @@ -73,70 +71,21 @@ public String getAnnotation(long requestedCpAsLong, ChrPosition requestedCp) {
* we match on position
* lets see if there are any records that match on ref and alt
*/
// return getAnnotationsFromRecords(requestedCp);
if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt) {
String reqRef = reqCpRefAlt.getRef();
String reqAlt = reqCpRefAlt.getAlt();
for (String rec : currentRecords) {
String[] recArray = TabTokenizer.tokenize(rec, DEFAULT_DELIMITER);
String recRef = recArray[refPositionInFile];
String recAlt = recArray[altPositionInFile];

if (recAlt.contains(",")) {
String[] recAltArray = recAlt.split(",");
for (String recAltValue : recAltArray) {
if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) {
return annotationToReturnWithAlt(rec, recAltValue);
}
}
} else {
if (reqRef.equals(recRef) && reqAlt.equals(recAlt)) {
return annotationToReturnWithAlt(rec, recAlt);
}
}
}
}

return getAnnotationsFromRecords(requestedCp);
} else {
int matchWithNextCP = Long.compare(requestedCpAsLong, nextCPAsLong);
if (nextCPAsLong > -1 && matchWithNextCP < 0) {

} else {

// logger.debug(reader.getFile().getName() + ": getting next record. requestedCp: " + (null != requestedCp ? requestedCp.toIGVString() : null) + ", currentCP: " + (null != currentCP ? currentCP.toIGVString() : null));
getNextRecord(requestedCpAsLong, matchWithNextCP);
if (requestedCpAsLong == currentCPAsLong) {
/*
* we match on position
* lets see if there are any records that match on ref and alt
*/
if (requestedCp instanceof ChrPositionRefAlt reqCpRefAlt) {
String reqRef = reqCpRefAlt.getRef();
String reqAlt = reqCpRefAlt.getAlt();
for (String rec : currentRecords) {
String[] recArray = TabTokenizer.tokenize(rec, DEFAULT_DELIMITER);
String recRef = recArray[refPositionInFile];
String recAlt = recArray[altPositionInFile];

if (recAlt.contains(",")) {
String[] recAltArray = recAlt.split(",");
for (String recAltValue : recAltArray) {
if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) {
return annotationToReturnWithAlt(rec, recAltValue);
}
}
} else {
if (reqRef.equals(recRef) && reqAlt.equals(recAlt)) {
return annotationToReturnWithAlt(rec, recAlt);
}
}
}
}
// return getAnnotationsFromRecords(requestedCp);
return getAnnotationsFromRecords(requestedCp);
}
/*
* requestedCP and currentCP are not equal
*/
}
}
return annotationToReturn(null);
Expand All @@ -151,7 +100,7 @@ private String getAnnotationsFromRecords(ChrPosition requestedCp){
String recRef = recArray[refPositionInFile];
String recAlt = recArray[altPositionInFile];

if (recAlt.contains(",")) {
if (recAlt.indexOf(',') >= 0) {
String[] recAltArray = recAlt.split(",");
for (String recAltValue : recAltArray) {
if (reqRef.equals(recRef) && reqAlt.equals(recAltValue)) {
Expand All @@ -177,7 +126,6 @@ public String annotationToReturn(String[] record) {
* dealing with a vcf file and assuming that the required annotation fields are in the INFO field
* so get that and go from there.
*/
// String[] recordArray = record.split("\t");
String info = record[7];
String alt = record[4];

Expand Down Expand Up @@ -221,32 +169,31 @@ public static String extractFieldsFromInfoField(String info, List<String> fields
if (StringUtils.isNullOrEmpty(worstConsequence)) {
return emptyInfoFieldResult;
}

/*
* we have our consequence
* split by pipe and then get our fields
* we have our consequences (comma-delimited)
* split by comma into consequences, then by pipe into fields
*/
String[] consequenceArray = TabTokenizer.tokenize(worstConsequence, '|');
String[] consequences = worstConsequence.split(",");

for (String af : fields) {
if (!StringUtils.isNullOrEmpty(af)) {

/*
* get position from map
*/
String aflc = af.toLowerCase();
Integer arrayPosition = SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS.get(aflc);
if (null != arrayPosition && arrayPosition >= 0 && arrayPosition < consequenceArray.length) {
/*
* good
*/
String annotation = consequenceArray[arrayPosition];
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" + annotation : af + "=" + annotation);
} else {
// System.out.println("Could not find field [" + af + "] in SNP_EFF_ANNOTATION_FIELDS_AND_POSITIONS map!");
// System.out.println("arrayPosition.intValue(): " + arrayPosition.intValue() + ", consequenceArray.length: " + consequenceArray.length);
}

if (null != arrayPosition) {
StringBuilder fieldValues = new StringBuilder();
for (String consequence : consequences) {
String[] consequenceArray = TabTokenizer.tokenize(consequence, '|');
if (arrayPosition >= 0 && arrayPosition < consequenceArray.length) {
String annotation = consequenceArray[arrayPosition];
fieldValues.append(fieldValues.isEmpty() ? annotation : "|" + annotation);
}
}
dataToReturn.append((!dataToReturn.isEmpty())
? FIELD_DELIMITER_TAB + af + "=" + fieldValues
: af + "=" + fieldValues);
}
}
}
return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString();
Expand Down Expand Up @@ -283,19 +230,27 @@ public static String getWorstConsequence(String info, String alt) {
* Pick the first one as that is the one with the highest effect as decreed by snpEff
*/
int annoIndex = info.indexOf("ANN=");
if (annoIndex < 0) {
return "";
}
int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, annoIndex);
String ann = info.substring(annoIndex + 4, end == -1 ? info.length() : end);


String[] annArray = ann.split(",");
String worstConsequence = "";
Map<String, String> worstByGene = new java.util.LinkedHashMap<>();
for (String aa : annArray) {
if (aa.startsWith(alt)) {
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getWorstConsequence uses aa.startsWith(alt) to select consequences for an allele. This can incorrectly match alleles where alt is a prefix (e.g., requested alt A will also match an ANN entry starting with AA|...). To avoid mis-annotating multi-allelic records, match the allele token exactly (e.g., aa.startsWith(alt + "|") or parse up to the first '|').

Suggested change
if (aa.startsWith(alt)) {
int pipeIndex = aa.indexOf('|');
if (pipeIndex <= 0) {
// Malformed ANN entry or missing allele token; skip
continue;
}
String alleleToken = aa.substring(0, pipeIndex);
if (alleleToken.equals(alt)) {

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Coplit please implement your suggestion

worstConsequence = aa;
break;
String[] parts = TabTokenizer.tokenize(aa, '|');
if (parts.length > 3) {
String gene = parts[3];
if (!StringUtils.isNullOrEmpty(gene) && !worstByGene.containsKey(gene)) {
worstByGene.put(gene, aa);
}
}
}
}
return worstConsequence;
return String.join(",", worstByGene.values());
}

@Override
Expand Down
27 changes: 17 additions & 10 deletions qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceTSV.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ public class AnnotationSourceTSV extends AnnotationSource {
List<String> headerLines;
Map<String, Integer> headerNameAndPosition;

private String[] fieldNames;
private int[] fieldPositions;

public AnnotationSourceTSV(RecordReader<String> reader, int chrPositionInRecord, int positionPositionInRecord,
int refPositionInFile, int altPositionInFile, String fieldNames, boolean chrStartsWithChr) {
super(reader, chrPositionInRecord, positionPositionInRecord, refPositionInFile, altPositionInFile, chrStartsWithChr);
Expand Down Expand Up @@ -46,6 +49,12 @@ public AnnotationSourceTSV(RecordReader<String> reader, int chrPositionInRecord,
if (headerNameAndPosition.isEmpty()) {
throw new IllegalArgumentException("Could not find requested fields (" + fieldNames + ") in header: " + headerLine);
}
// precompute arrays for fast extraction
this.fieldNames = headerNameAndPosition.keySet().toArray(new String[0]);
this.fieldPositions = new int[this.fieldNames.length];
for (int i = 0; i < this.fieldNames.length; i++) {
this.fieldPositions[i] = headerNameAndPosition.get(this.fieldNames[i]);
}
Comment on lines +52 to +57
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The precomputed fieldNames array is built from headerNameAndPosition.keySet(), which has non-deterministic iteration order for a HashMap. This can cause extracted annotations to be emitted in an arbitrary order that may not match the user-requested field order (and can diverge from emptyRecordResult, which preserves the input order). Consider building fieldNames/fieldPositions by iterating over the requested fieldNames string (split by ',') or using an order-preserving map (e.g., LinkedHashMap).

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot open a new pull request to apply changes based on this feedbacl

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot open a new pull request to apply changes based on this feedback

}

/*
Expand Down Expand Up @@ -96,20 +105,18 @@ public String annotationToReturn(String[] record) {
/*
* entries in the INFO field are delimited by ';'
*/
return extractFieldsFromRecord(record, headerNameAndPosition);
return extractFieldsFromRecord(record, fieldNames, fieldPositions);
}

public static String extractFieldsFromRecord(String[] record, Map<String, Integer> fields) {
public static String extractFieldsFromRecord(String[] record, String[] fieldNames, int[] fieldPositions) {
StringBuilder dataToReturn = new StringBuilder();
int recordLength = null != record ? record.length : 0;
if ( recordLength > 0 && null != fields) {
// String [] recordArray = TabTokenizer.tokenize(record);
for (Entry<String, Integer> entry : fields.entrySet()) {
/*
* make sure that array length is not shorter than entry value
*/
if (recordLength > entry.getValue()) {
dataToReturn.append(( ! dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "").append(entry.getKey()).append("=").append(record[entry.getValue()]);
if (recordLength > 0 && null != fieldNames && null != fieldPositions) {
for (int i = 0; i < Math.min(fieldNames.length, fieldPositions.length); i++) {
int pos = fieldPositions[i];
if (recordLength > pos) {
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "")
Comment on lines +111 to +118
Copy link

Copilot AI Feb 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

extractFieldsFromRecord iterates over fieldNames.length but indexes into fieldPositions[i] without verifying the arrays are the same length. If a caller passes mismatched arrays, this will throw ArrayIndexOutOfBoundsException. Consider validating lengths up front (and returning "" / throwing IllegalArgumentException), or iterating up to Math.min(fieldNames.length, fieldPositions.length).

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot open a new pull request to apply changes based on this feedback

.append(fieldNames[i]).append("=").append(record[pos]);
}
}
}
Expand Down
57 changes: 44 additions & 13 deletions qannotate/src/au/edu/qimr/qannotate/nanno/AnnotationSourceVCF.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import org.qcmg.common.string.StringUtils;
Expand All @@ -11,7 +13,8 @@
public class AnnotationSourceVCF extends AnnotationSource {

public static final String FIELD_DELIMITER_SEMI_COLON = ";";

private static final int INFO_LENGTH_PARSE_THRESHOLD = 2000;
private static final int FIELDS_PARSE_THRESHOLD = 3;


List<String> annotationFields;
Expand Down Expand Up @@ -52,24 +55,52 @@ public String annotationToReturn(String [] record) {


public static String extractFieldsFromInfoField(String info, List<String> fields, String emptyInfoFieldResult) {
if (StringUtils.isNullOrEmptyOrMissingData(info)) {
if (StringUtils.isNullOrEmptyOrMissingData(info) || fields == null) {
return emptyInfoFieldResult;
}
StringBuilder dataToReturn = new StringBuilder();
for (String af : fields) {
if ( ! StringUtils.isNullOrEmpty(af)) {
int start = info.indexOf(af + "=");
if (start > -1) {
int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start);
if (end == -1) {
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start) : info.substring(start));
boolean parseOnce = (fields.size() > FIELDS_PARSE_THRESHOLD) || info.length() > INFO_LENGTH_PARSE_THRESHOLD;
if ( ! parseOnce) {
StringBuilder dataToReturn = new StringBuilder();
for (String af : fields) {
if (!StringUtils.isNullOrEmpty(af)) {
int start = info.indexOf(af + "=");
if (start > -1) {
int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start);
if (end == -1) {
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start) : info.substring(start));
} else {
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start, end) : info.substring(start, end));
}
} else {
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + info.substring(start, end) : info.substring(start, end));
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" : af + "=");
}
} else {
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB + af + "=" : af + "=");
}
}
return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString();
}
Map<String, String> infoMap = new HashMap<>();
int start = 0;
while (start <= info.length()) {
int end = info.indexOf(FIELD_DELIMITER_SEMI_COLON, start);
if (end == -1) end = info.length();

String token = info.substring(start, end);
int eq = token.indexOf('=');
if (eq > -1) {
infoMap.put(token.substring(0, eq), token.substring(eq + 1));
} else if (!token.isEmpty()) {
infoMap.put(token, "");
}

start = end + 1;
}
StringBuilder dataToReturn = new StringBuilder();
for (String af : fields) {
if (!StringUtils.isNullOrEmpty(af)) {
String value = infoMap.get(af);
String entry = (value != null) ? af + "=" + value : af + "=";
dataToReturn.append((!dataToReturn.isEmpty()) ? FIELD_DELIMITER_TAB : "").append(entry);
}
}
return (dataToReturn.isEmpty()) ? emptyInfoFieldResult : dataToReturn.toString();
}
Expand Down
Loading