Skip to content

Commit

Permalink
Handle GAF lines missing trailing tabs. Fixes #11. (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
balhoff authored Jul 9, 2019
1 parent 813430f commit 088b860
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 10 deletions.
23 changes: 13 additions & 10 deletions src/main/scala/org/geneontology/gaferencer/Gaferencer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,19 @@ object Gaferencer extends LazyLogging {

def processLine(line: String, propertyByName: Map[String, OWLObjectProperty], curieUtil: MultiCurieUtil): Set[(TermWithTaxon, ExtendedAnnotation)] = {
val items = line.split("\t", -1)
val maybeTaxon = items(12).split(raw"\|", -1).headOption.map(_.trim.replaceAllLiterally("taxon:", TaxonPrefix)).map(Class(_))
if (maybeTaxon.isEmpty) logger.warn(s"Skipping row with badly formatted taxon: ${items(12)}")
val aspect = items(8).trim
val relation = AspectToGAFRelation(aspect)
val term = Class(items(4).trim.replaceAllLiterally("GO:", GOPrefix))
(for {
taxon <- maybeTaxon.toIterable
conjunction <- items(15).split("\\|", -1)
links = conjunction.split(",", -1).toSet[String].map(_.trim).flatMap(parseLink(_, propertyByName, curieUtil).toSet)
} yield (TermWithTaxon(term, taxon), ExtendedAnnotation(Link(relation, term), taxon, links))).toSet
if (items.size < 13) Set.empty
else {
val maybeTaxon = items(12).split(raw"\|", -1).headOption.map(_.trim.replaceAllLiterally("taxon:", TaxonPrefix)).map(Class(_))
if (maybeTaxon.isEmpty) logger.warn(s"Skipping row with badly formatted taxon: ${items(12)}")
val aspect = items(8).trim
val relation = AspectToGAFRelation(aspect)
val term = Class(items(4).trim.replaceAllLiterally("GO:", GOPrefix))
(for {
taxon <- maybeTaxon.toIterable
conjunction <- if (items.size > 15) items(15).split("\\|", -1) else Array("")
links = conjunction.split(",", -1).toSet[String].map(_.trim).flatMap(parseLink(_, propertyByName, curieUtil).toSet)
} yield (TermWithTaxon(term, taxon), ExtendedAnnotation(Link(relation, term), taxon, links))).toSet
}
}

def parseLink(text: String, propertyByName: Map[String, OWLObjectProperty], curieUtil: MultiCurieUtil): Option[Link] = text match {
Expand Down
6 changes: 6 additions & 0 deletions src/test/scala/org/geneontology/gaferencer/TestParsing.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ object TestParsing extends TestSuite {
val line2 = "FOO\tFOO:2\tfoo2\t\tGO:0006412\tTEST:1\tIDA\t\tP\t\t\tgene\ttaxon:10090\t20100209\tFOO\tregulates(GO:0051594)\t"
val line3 = "FOO\tFOO:2\tfoo2\t\tGO:0006412\tTEST:1\tIDA\t\tP\t\t\tgene\ttaxon:10090\t20100209\tFOO\tregulates(GO:0051594),occurs_in(GO:0005739)\t"
val line4 = "FOO\tFOO:2\tfoo2\t\tGO:0006412\tTEST:1\tIDA\t\tP\t\t\tgene\ttaxon:10090\t20100209\tFOO\tregulates(GO:0051594),occurs_in(GO:0005739)|occurs_in(GO:0005739),negatively_regulates(GO:0051594)\t"
val line5 = "FOO\tFOO:1\tfoo1\t\tGO:0006412\tTEST:1\tIDA\t\tP\t\t\tgene\ttaxon:10090\t20100209\tFOO"
val line6 = "FOO\tFOO:1\tfoo1\t\tGO:0006412"
val properties = Map(
"regulates" -> Regulates,
"negatively_regulates" -> NegativelyRegulates,
Expand All @@ -41,6 +43,10 @@ object TestParsing extends TestSuite {
assert(res4.size == 2)
assert(res4(TermWithTaxon(GOTerm, Mouse) -> ExtendedAnnotation(Link(relation, GOTerm), Mouse, Set(Link(Regulates, Class("http://purl.obolibrary.org/obo/GO_0051594")), Link(OccursIn, Class("http://purl.obolibrary.org/obo/GO_0005739"))))))
assert(res4(TermWithTaxon(GOTerm, Mouse) -> ExtendedAnnotation(Link(relation, GOTerm), Mouse, Set(Link(NegativelyRegulates, Class("http://purl.obolibrary.org/obo/GO_0051594")), Link(OccursIn, Class("http://purl.obolibrary.org/obo/GO_0005739"))))))
val res5 = Gaferencer.processLine(line5, properties, cu)
assert(res5.size == 1)
val res6 = Gaferencer.processLine(line6, properties, cu)
assert(res6.size == 0)
}

}
Expand Down

0 comments on commit 088b860

Please sign in to comment.