diff --git a/app/models/concerns/has_edam_terms.rb b/app/models/concerns/has_terms_and_synonyms.rb similarity index 62% rename from app/models/concerns/has_edam_terms.rb rename to app/models/concerns/has_terms_and_synonyms.rb index 4c8169f11..96848841a 100644 --- a/app/models/concerns/has_edam_terms.rb +++ b/app/models/concerns/has_terms_and_synonyms.rb @@ -1,17 +1,17 @@ -module HasEdamTerms +module HasTermsAndSynonyms extend ActiveSupport::Concern def scientific_topics_and_synonyms - edam_term_names_and_synonyms(scientific_topics) + term_names_and_synonyms(scientific_topics) end def operations_and_synonyms - edam_term_names_and_synonyms(operations) + term_names_and_synonyms(operations) end private - def edam_term_names_and_synonyms(terms) + def term_names_and_synonyms(terms) terms.map do |term| [term.preferred_label] + term.has_exact_synonym + term.has_narrow_synonym end.flatten.uniq diff --git a/app/models/event.rb b/app/models/event.rb index 910b5dbd5..571acf006 100644 --- a/app/models/event.rb +++ b/app/models/event.rb @@ -17,7 +17,7 @@ class Event < ApplicationRecord include HasFriendlyId include FuzzyDictionaryMatch include WithTimezone - include HasEdamTerms + include HasTermsAndSynonyms include HasLanguage include InSpace include HasPeople diff --git a/app/models/material.rb b/app/models/material.rb index 9663b35fb..f4ac0cb11 100644 --- a/app/models/material.rb +++ b/app/models/material.rb @@ -19,7 +19,7 @@ class Material < ApplicationRecord include IdentifiersDotOrg include HasFriendlyId include HasDifficultyLevel - include HasEdamTerms + include HasTermsAndSynonyms include InSpace include HasPeople diff --git a/app/models/ontology_term_link.rb b/app/models/ontology_term_link.rb index f376fb3aa..d55c7d448 100644 --- a/app/models/ontology_term_link.rb +++ b/app/models/ontology_term_link.rb @@ -2,10 +2,11 @@ class OntologyTermLink < ApplicationRecord belongs_to :resource, polymorphic: true def ontology_term - ontology.lookup(term_uri) + ontology&.lookup(term_uri) end def ontology - Edam::Ontology.instance + @ontology ||= Ontology.subclasses.map(&:instance).\ + find { |ontology| ontology.term_uri_matches?(term_uri) } end end diff --git a/app/models/workflow.rb b/app/models/workflow.rb index bd135facc..b212a49ff 100644 --- a/app/models/workflow.rb +++ b/app/models/workflow.rb @@ -10,7 +10,7 @@ class Workflow < ApplicationRecord include HasFriendlyId include CurationQueue include HasDifficultyLevel - include HasEdamTerms + include HasTermsAndSynonyms include InSpace include HasPeople diff --git a/app/ontologies/edam/ontology.rb b/app/ontologies/edam/ontology.rb index 30d35c466..b02342852 100644 --- a/app/ontologies/edam/ontology.rb +++ b/app/ontologies/edam/ontology.rb @@ -31,5 +31,13 @@ def scoped_lookup_by_name(name, subset = :_) result = graph.query(query).first lookup(result.u) if result end + + def scoped_lookup_by_name_or_synonym(name, subset = :_) + out = scoped_lookup_by_name(name, subset) + return out unless out.blank? + out = find_by(OBO.hasExactSynonym, name) + return out unless out.blank? + find_by(OBO.hasNarrowSynonym, name) + end end end diff --git a/app/ontologies/ontology.rb b/app/ontologies/ontology.rb index 649f489d9..f7117c2ea 100644 --- a/app/ontologies/ontology.rb +++ b/app/ontologies/ontology.rb @@ -6,6 +6,20 @@ def initialize(filename, term_class = OntologyTerm) @query_cache = {} end + def uri + # Must implement in subclass ... + raise NotImplementedError + end + + def scoped_lookup_by_name_or_synonym(name, subset = :_) + # Must implement in subclass ... + raise NotImplementedError + end + + def term_uri_matches?(uri) + uri.starts_with?(self.uri) + end + def lookup(uri) @term_cache[RDF::URI(uri)] ||= fetch(uri) end diff --git a/lib/has_ontology_terms.rb b/lib/has_ontology_terms.rb index 55db7b553..94a71594f 100644 --- a/lib/has_ontology_terms.rb +++ b/lib/has_ontology_terms.rb @@ -22,7 +22,22 @@ def self.included(mod) end module ClassMethods - def has_ontology_terms(association_name, ontology: Edam::Ontology.instance, branch: :_) # :_ is essentially a wildcard, meaning it will match any branch. + def has_ontology_terms(association_name, + ontology: nil, + branch: nil, + ontologies: nil) + unless ontologies + ontology ||= Edam::Ontology.instance + # :_ is essentially a wildcard, meaning it will match any branch. + branch ||= :_ + else + # ontologies is an array of hashes with keys :ontology and :branch + ontologies = ontologies.map do |ontology_specification| + { ontology: ontology_specification[:ontology] || Edam::Ontology.instance, + branch: ontology_specification[:branch] || :_ } + end + end + method = association_name.to_s singular = association_name.to_s.singularize links_method = "#{singular}_links" @@ -43,9 +58,21 @@ def has_ontology_terms(association_name, ontology: Edam::Ontology.instance, bran dependent: :destroy, inverse_of: :resource - cattr_accessor :ontology_term_fields - self.ontology_term_fields ||= [] - self.ontology_term_fields << method.to_sym + # Previously used cattr_accessor, which uses "@@" variables that mess with inheritance. + # So we do this instead (use "@" class vars), while explicitly merging inherited + # values so STI subclasses still see ontology term fields declared on parent classes. + def self.ontology_term_fields + inherited_fields = superclass.respond_to?(:ontology_term_fields) ? superclass.ontology_term_fields : [] + own_fields = @ontology_term_fields ||= [] + (inherited_fields + own_fields).uniq + end + + def self.add_ontology_term_field(field) + @ontology_term_fields ||= [] + @ontology_term_fields << field unless ontology_term_fields.include?(field) + end + + self.add_ontology_term_field(method.to_sym) define_method "#{links_method}=" do |links| send(links_method).reset @@ -69,7 +96,7 @@ def has_ontology_terms(association_name, ontology: Edam::Ontology.instance, bran # OntologyTerm objects define_method method do - send(links_method).map(&:ontology_term).uniq + send(links_method)&.compact&.map(&:ontology_term)&.uniq&.compact end define_method "#{method}=" do |terms| @@ -78,29 +105,44 @@ def has_ontology_terms(association_name, ontology: Edam::Ontology.instance, bran # Names/Labels define_method names_method do - send(method).map(&:preferred_label).uniq + send(method)&.compact&.map(&:preferred_label)&.uniq end define_method "#{names_method}=" do |names| terms = [] [names].flatten.each do |name| unless name.blank? - st = [ontology.scoped_lookup_by_name(name, branch)].compact # FIXME: This is probably too EDAM specific - st = ontology.find_by(OBO.hasExactSynonym, name) if st.empty? - st = ontology.find_by(OBO.hasNarrowSynonym, name) if st.empty? + st = if ontologies + # TODO: if name is found in first ontology, should it skip others? + ontologies.map do |ontology_specification| + [ontology_specification[:ontology].\ + scoped_lookup_by_name_or_synonym(name, + ontology_specification[:branch])] + end + else + [ontology.scoped_lookup_by_name_or_synonym(name, branch)] + end terms += st end end - send("#{method}=", terms.uniq) + send("#{method}=", terms.flatten.compact.uniq) end # URIs define_method uris_method do - send(method).map(&:uri).uniq + send(method)&.compact&.map(&:uri)&.uniq end define_method "#{uris_method}=" do |uris| - send("#{method}=", uris.map { |uri| ontology.lookup(uri) }) + terms = if ontologies + ontologies.map do |ontology_specification| + uris.map { |uri| ontology_specification[:ontology].lookup(uri) } + end.flatten + else + uris.map { |uri| ontology.lookup(uri) } + end + + send("#{method}=", terms) end end end diff --git a/test/unit/has_ontology_terms_test.rb b/test/unit/has_ontology_terms_test.rb new file mode 100644 index 000000000..7f30e17e7 --- /dev/null +++ b/test/unit/has_ontology_terms_test.rb @@ -0,0 +1,188 @@ +require 'test_helper' + +class HasOntologyTermsTest < ActiveSupport::TestCase + # Summary: we create attributes 'test_topics' and 'multi_test_topics' for + # the fake model DummyMaterial. 'test_topics' uses DummyOntology, + # 'multi_test_topics' uses both DummyOntology and Edam::Ontology + + teardown do + DummyMaterial.clear_index! + end + + class DummyTerm + attr_reader :label, :uri + def initialize(term) + @label = term + @uri = "http://dummy/#{term}" + end + alias_method :preferred_label, :label + end + + class DummyOntology < ::Ontology + # A very permissive ontology: it allows any term as long as it doesn't have Chemistry in it + include Singleton + + def initialize + end + + def uri + 'http://dummy/' + end + + def scoped_lookup_by_name(term, subset = :_) + return DummyTerm.new(term) unless term =~ /chemistry/i + end + alias_method :scoped_lookup_by_name_or_synonym, :scoped_lookup_by_name + + def lookup(uri) + term = uri[/http:\/\/dummy\/(.*)/,1] + return DummyTerm.new(term) unless term.blank? + end + end + + class DummyMaterial < ::Material + has_ontology_terms(:test_topics, ontology: DummyOntology.instance) + has_ontology_terms(:multi_test_topics, + ontologies: [{ ontology: Edam::Ontology.instance, + branch: EDAM.topics}, + { ontology: DummyOntology.instance}]) + + # TODO: see similar tests with model subclasses, maybe can be in a module? + def self.index + (@index ||= Hash.new).values.flatten.uniq + end + + def self.add_to_index(m) + index + @index[m.id] = m.reload.collections.to_a + end + + def self.clear_index! + @index = Hash.new + end + + def solr_index + self.class.add_to_index(self) + end + end + + test 'can create an attribute with terms from a single ontology' do + # See the Event/Material model tests for many examples of this ... + dummy = materials(:good_material).becomes(DummyMaterial) + + # This is found in the ontology ... + dummy.test_topic_names = ['Bioinformatics'] + dummy.save! + assert_equal dummy.test_topics.count, 1 + assert_equal dummy.test_topic_names, ['Bioinformatics'] + assert_equal dummy.test_topic_uris, ['http://dummy/Bioinformatics'] + + # This is not + dummy.test_topic_names = ['Biochemistry'] + dummy.save! + assert_equal dummy.test_topics.count, 0 + assert_equal dummy.test_topic_names, [] + assert_equal dummy.test_topic_uris, [] + end + + test 'can create an attribute with terms from multiple ontologies' do + dummy = materials(:good_material).becomes(DummyMaterial) + + # This is found in both ontologies ... + dummy.multi_test_topic_names = ['Bioinformatics'] + dummy.save! + assert_equal dummy.multi_test_topics.count, 2 + assert_equal Set.new(dummy.multi_test_topic_uris), + Set.new(['http://edamontology.org/topic_0091', 'http://dummy/Bioinformatics']) + # The two exact names collapse into one ... + assert_equal dummy.multi_test_topic_names, ['Bioinformatics'] + + # This is found in only in Edam ... + dummy.multi_test_topic_names = ['Biochemistry'] + dummy.save! + assert_equal dummy.multi_test_topics.count, 1 + assert_equal dummy.multi_test_topic_names, ['Biochemistry'] + assert_equal dummy.multi_test_topic_uris, ['http://edamontology.org/topic_3292'] + + # This is found only in DummyOntology ... + dummy.multi_test_topic_names = ['Poodles'] + dummy.save! + assert_equal dummy.multi_test_topics.count, 1 + assert_equal dummy.multi_test_topic_names, ['Poodles'] + assert_equal dummy.multi_test_topic_uris, ['http://dummy/Poodles'] + + # This is found in neither ... + dummy.multi_test_topic_names = ['Poodle Chemistry'] + dummy.save! + assert_equal dummy.multi_test_topics.count, 0 + assert_equal dummy.multi_test_topic_names, [] + assert_equal dummy.multi_test_topic_uris, [] + + # Set via URIs + dummy.multi_test_topic_uris = ['http://dummy/Poodles', + 'http://edamontology.org/topic_3292'] + dummy.save! + assert_equal dummy.multi_test_topics.count, 2 + assert_equal Set.new(dummy.multi_test_topic_names), + Set.new(['Biochemistry', 'Poodles']) + assert_equal Set.new(dummy.multi_test_topic_uris), + Set.new(['http://dummy/Poodles', + 'http://edamontology.org/topic_3292']) + assert_equal dummy.ontology_term_links.map(&:field), ["multi_test_topics", + "multi_test_topics"] + assert_equal Set.new(dummy.ontology_term_links.map(&:term_uri)), + Set.new(['http://dummy/Poodles', + 'http://edamontology.org/topic_3292']) + end + + test "Ignores attributes that don't come from any ontology" do + dummy = materials(:good_material).becomes(DummyMaterial) + dummy.ontology_term_links.create(field: :test_topics, term_uri: 'http://not-a-term.com') + dummy.ontology_term_links.create(field: :multi_test_topics, term_uri: 'http://also-not-a-term.com') + + assert_equal dummy.ontology_term_links.count, 2 + + assert_equal dummy.test_topics, [] + assert_equal dummy.test_topic_names, [] + assert_equal dummy.test_topic_uris, [] + + assert_equal dummy.multi_test_topics, [] + assert_equal dummy.multi_test_topic_names, [] + assert_equal dummy.multi_test_topic_uris, [] + + # Setting URI manually wipes out the ontology_term_links + dummy.test_topic_uris = ['http://not-a-term.com'] + dummy.multi_test_topic_uris = ['http://also-not-a-term.com'] + + assert_equal dummy.ontology_term_links.count, 0 + + # What if there is a term in here already, plus a bogus term link? + # (perhaps bogus because a previous ontology was take out). + dummy.test_topic_names = ['Bioinformatics'] + dummy.multi_test_topic_names = ['Biochemistry', 'Bioinformatics', 'Poodles'] + assert_equal dummy.ontology_term_links.count, 5 + assert_equal dummy.test_topic_links.count, 1 + assert_equal dummy.multi_test_topic_links.count, 4 + + dummy.ontology_term_links.create(field: :test_topics, term_uri: 'http://not-a-term.com') + dummy.ontology_term_links.create(field: :multi_test_topics, term_uri: 'http://also-not-a-term.com') + assert_equal dummy.ontology_term_links.count, 7 + assert_equal dummy.test_topic_links.count, 2 + assert_equal dummy.multi_test_topic_links.count, 5 + + # Terms with bogus URIs don't appear here + assert_equal dummy.test_topics.count, 1 + assert_equal dummy.test_topic_names, ['Bioinformatics'] + assert_equal dummy.test_topic_uris, ['http://dummy/Bioinformatics'] + + assert_equal dummy.multi_test_topics.count, 4 + # Bioinformatics is in both ontologies + assert_equal Set.new(dummy.multi_test_topic_names), Set.new(['Biochemistry', 'Bioinformatics', 'Poodles']) + assert_equal Set.new(dummy.multi_test_topic_uris), + Set.new(['http://edamontology.org/topic_3292', + 'http://edamontology.org/topic_0091', + 'http://dummy/Bioinformatics', + 'http://dummy/Poodles']) + end + +end