Skip to content

Commit

Permalink
Replace Creator model with named reference (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Mar 7, 2025
1 parent eab3a30 commit 5f2a45b
Show file tree
Hide file tree
Showing 10 changed files with 34 additions and 40 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ dependencies = [
"typing_extensions",
"zenodo_client",
"ssslm",
"curies>=0.10.6",
]

[project.optional-dependencies]
Expand Down
4 changes: 2 additions & 2 deletions src/semra/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import requests
from bioontologies.obograph import write_warned
from bioontologies.robot import write_getter_warnings
from curies.vocabulary import charlie
from tqdm.auto import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
from zenodo_client import Creator, Metadata, ensure_zenodo
Expand All @@ -23,7 +24,6 @@
write_pickle,
write_sssom,
)
from semra.rules import CHARLIE_NAME, CHARLIE_ORCID
from semra.sources import SOURCE_RESOLVER
from semra.sources.wikidata import get_wikidata_mappings_by_prefix

Expand Down Expand Up @@ -187,7 +187,7 @@ def main(include_wikidata: bool):
f"Note that primary mappings are marked with the license of their source (when available). "
f"Inferred mappings are distributed under the CC0 license.",
creators=[
Creator(name=CHARLIE_NAME, orcid=CHARLIE_ORCID.identifier),
Creator(name=charlie.name, orcid=charlie.identifier),
],
)
res = ensure_zenodo(
Expand Down
4 changes: 2 additions & 2 deletions src/semra/landscape/anatomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import click
import pystow
from curies.vocabulary import charlie
from pyobo.sources.mesh import get_mesh_category_curies

import semra
from semra.pipeline import CREATOR_CHARLIE

__all__ = [
"CONFIGURATION",
Expand Down Expand Up @@ -35,7 +35,7 @@
CONFIGURATION = semra.Configuration(
name="SeMRA Anatomy Mappings Database",
description="Supports the analysis of the landscape of anatomy nomenclature resources.",
creators=[CREATOR_CHARLIE],
creators=[charlie],
inputs=[
semra.Input(source="biomappings"),
semra.Input(source="gilda"),
Expand Down
5 changes: 3 additions & 2 deletions src/semra/landscape/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@

import click
import pystow
from curies.vocabulary import charlie

from semra.api import project, str_source_target_counts
from semra.io import write_sssom
from semra.pipeline import CREATOR_CHARLIE, Configuration, Input, Mutation, get_mappings_from_config
from semra.pipeline import Configuration, Input, Mutation, get_mappings_from_config

__all__ = [
"CONFIGURATION",
Expand Down Expand Up @@ -51,7 +52,7 @@
description="Originally a reproduction of the EFO/Cellosaurus/DepMap/CCLE scenario posed in "
"the Biomappings paper, this configuration imports several different cell and cell line "
"resources and identifies mappings between them.",
creators=[CREATOR_CHARLIE],
creators=[charlie],
inputs=[
Input(source="biomappings"),
Input(source="gilda"),
Expand Down
6 changes: 3 additions & 3 deletions src/semra/landscape/complexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

import click
import pystow
from curies.vocabulary import charlie

from semra.pipeline import Configuration, Creator, Input, Mutation
from semra.rules import CHARLIE_NAME, CHARLIE_ORCID
from semra.pipeline import Configuration, Input, Mutation

__all__ = [
"CONFIGURATION",
Expand All @@ -31,7 +31,7 @@
name="SeMRA Protein Complex Landscape Analysis",
description="Analyze the landscape of protein complex nomenclature "
"resources, species-agnostic.",
creators=[Creator(orcid=CHARLIE_ORCID.identifier, name=CHARLIE_NAME)],
creators=[charlie],
inputs=[
Input(source="gilda"),
Input(source="biomappings"),
Expand Down
5 changes: 3 additions & 2 deletions src/semra/landscape/diseases.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import bioregistry
import click
import pystow
from curies.vocabulary import charlie
from pyobo.sources.mesh import get_mesh_category_curies

from semra.pipeline import CREATOR_CHARLIE, Configuration, Input, Mutation
from semra.pipeline import Configuration, Input, Mutation

__all__ = [
"CONFIGURATION",
Expand Down Expand Up @@ -47,7 +48,7 @@
CONFIGURATION = Configuration(
name="SeMRA Disease Mappings Database",
description="Supports the analysis of the landscape of disease nomenclature resources.",
creators=[CREATOR_CHARLIE],
creators=[charlie],
inputs=[
Input(source="biomappings"),
Input(source="gilda"),
Expand Down
5 changes: 3 additions & 2 deletions src/semra/landscape/genes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

import click
import pystow
from curies.vocabulary import charlie

from semra.pipeline import CREATOR_CHARLIE, Configuration, Input, Mutation
from semra.pipeline import Configuration, Input, Mutation

__all__ = [
"CONFIGURATION",
Expand Down Expand Up @@ -32,7 +33,7 @@
CONFIGURATION = Configuration(
name="SeMRA Gene Mapping Database",
description="Analyze the landscape of gene nomenclature resources, species-agnostic.",
creators=[CREATOR_CHARLIE],
creators=[charlie],
inputs=[
Input(prefix="hgnc", source="pyobo", confidence=0.99),
Input(prefix="mgi", source="pyobo", confidence=0.99),
Expand Down
21 changes: 5 additions & 16 deletions src/semra/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import Any, Literal

import requests
from curies import NamedReference
from pydantic import BaseModel, Field, root_validator
from tqdm.auto import tqdm

Expand Down Expand Up @@ -36,7 +37,7 @@
write_pickle,
write_sssom,
)
from semra.rules import CHARLIE_NAME, CHARLIE_ORCID, DB_XREF, EXACT_MATCH, IMPRECISE
from semra.rules import DB_XREF, EXACT_MATCH, IMPRECISE
from semra.sources import SOURCE_RESOLVER
from semra.sources.biopragmatics import (
from_biomappings_negative,
Expand All @@ -51,13 +52,10 @@
import zenodo_client

__all__ = [
# Configuration model
"Configuration",
"Creator",
"Input",
"Mutation",
"SubsetConfiguration",
# Functions
"get_mappings_from_config",
"get_raw_mappings",
"process",
Expand Down Expand Up @@ -87,24 +85,14 @@ class Mutation(BaseModel):
SubsetConfiguration = t.Mapping[str, t.Collection[str]]


class Creator(BaseModel):
"""A model describing a creator."""

name: str
orcid: str


CREATOR_CHARLIE = Creator(name=CHARLIE_NAME, orcid=CHARLIE_ORCID.identifier)


class Configuration(BaseModel):
"""Represents the steps taken during mapping assembly."""

name: str = Field(description="The name of the mapping set configuration")
description: str | None = Field(
None, description="An explanation of the purpose of the mapping set configuration"
)
creators: list[Creator] = Field(
creators: list[NamedReference] = Field(
default_factory=list, description="A list of the ORCID identifiers for creators"
)
inputs: list[Input] = Field(..., description="A list of sources of mappings")
Expand Down Expand Up @@ -263,8 +251,9 @@ def _get_zenodo_metadata(self) -> zenodo_client.Metadata:
title=self.name,
description=self.description,
creators=[
zenodo_client.Creator(name=creator.name, orcid=creator.orcid)
zenodo_client.Creator(name=creator.name, orcid=creator.identifier)
for creator in self.creators
if creator.prefix == "orcid"
],
)

Expand Down
15 changes: 7 additions & 8 deletions src/semra/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
from __future__ import annotations

from curies import Reference
from curies import vocabulary as v

EXACT_MATCH = Reference(prefix="skos", identifier="exactMatch")
BROAD_MATCH = Reference(prefix="skos", identifier="broadMatch")
NARROW_MATCH = Reference(prefix="skos", identifier="narrowMatch")
CLOSE_MATCH = Reference(prefix="skos", identifier="closeMatch")
DB_XREF = Reference(prefix="oboinowl", identifier="hasDbXref")
EXACT_MATCH = v.exact_match
BROAD_MATCH = v.broad_match
NARROW_MATCH = v.narrow_match
CLOSE_MATCH = v.close_match
DB_XREF = v.has_dbxref
EQUIVALENT_TO = Reference(prefix="owl", identifier="equivalentTo")
REPLACED_BY = Reference(prefix="iao", identifier="0100001")
REPLACED_BY = v.term_replaced_by

RELATIONS = [
EXACT_MATCH,
Expand Down Expand Up @@ -51,6 +52,4 @@
CHAIN_MAPPING = Reference.from_curie("semapv:MappingChaining")
KNOWLEDGE_MAPPING = Reference.from_curie("semapv:BackgroundKnowledgeBasedMatching")

CHARLIE_ORCID = Reference.from_curie("orcid:0000-0003-4423-4370")
CHARLIE_NAME = "Charles Tapley Hoyt"
BEN_ORCID = Reference.from_curie("orcid:0000-0001-9439-5346")
8 changes: 5 additions & 3 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import unittest
from pathlib import Path

from curies.vocabulary import charlie

from semra import EXACT_MATCH, Mapping, MappingSet, Reference, SimpleEvidence
from semra.io import write_sssom
from semra.pipeline import Configuration, Input, get_raw_mappings
from semra.rules import CHARLIE_ORCID, MANUAL_MAPPING
from semra.rules import MANUAL_MAPPING
from semra.sources import SOURCE_RESOLVER

TEST_MAPPING_SET = MappingSet(
Expand All @@ -23,7 +25,7 @@
SimpleEvidence(
justification=MANUAL_MAPPING,
mapping_set=TEST_MAPPING_SET,
author=CHARLIE_ORCID,
author=charlie,
)
],
)
Expand Down Expand Up @@ -53,7 +55,7 @@ def assert_test_mappings(self, mappings):
ev = mapping.evidence[0]
self.assertIsInstance(ev, SimpleEvidence)
self.assertEqual(MANUAL_MAPPING, ev.justification)
self.assertEqual(CHARLIE_ORCID, ev.author)
self.assertEqual(charlie.pair, ev.author.pair)
self.assertIsNotNone(ev.mapping_set)
self.assertEqual("test", ev.mapping_set.name)
self.assertEqual(1.0, ev.mapping_set.confidence)
Expand Down

0 comments on commit 5f2a45b

Please sign in to comment.