Source code for mgnipy.V2.proxies

from __future__ import annotations

from typing import (
    TYPE_CHECKING,
    Any,
    AsyncIterator,
    Callable,
    ClassVar,
    Iterator,
    Literal,
    Optional,
)

from mgnipy._models.config import AuthMGnipyConfig
from mgnipy._models.CONSTANTS import (
    SupportedEndpoints,
)
from mgnipy.V2.core import MGnifier
from mgnipy.V2.endpoints import (
    BETWEEN_RESOURCE_RELATIONSHIPS,
    PARENT_CHILD_RESOURCES,
    WITHIN_RESOURCE_RELATIONSHIPS,
)
from mgnipy.V2.mixins import BiomesTreeMixin

if TYPE_CHECKING:
    from mgnipy.V2.query_set import QuerySet

ListResource = Literal[
    "biomes",
    "studies",
    "samples",
    "runs",
    "analyses",
    "genomes",
    "assemblies",
    "publications",
    "catalogues",
    "private_studies",
]

DetailResource = Literal[
    "biome",
    "study",
    "sample",
    "run",
    "analysis",
    "genome",
    "assembly",
    "publication",
    "catalogue",
]



[docs]
class MGnifyList(MGnifier):
    RESOURCE: ClassVar[Optional[ListResource]] = None

    def __init__(
        self,
        *,
        config: Optional[dict] = None,
        params: Optional[dict[str, Any]] = None,
        **kwargs,
    ):
        # Accept accidental "resource" in kwargs, but do not expose it in signature
        passed_resource = kwargs.pop("resource", None)
        resolved_resource = self.RESOURCE or passed_resource

        if resolved_resource is None:
            raise TypeError(
                "`resource` is required for base MGnifyList; "
                "use a concrete subclass like Analyses/Runs/... "
                f"or pass a resource param: {ListResource!r}"
            )

        if self.RESOURCE is not None and passed_resource not in (
            None,
            self.RESOURCE,
        ):
            raise ValueError(
                f"Conflicting resource: expected {self.RESOURCE!r}, got {passed_resource!r}"
            )

        super().__init__(
            resource=resolved_resource,
            params=params,
            config=config,
            **kwargs,
        )
        self.child_resource: str = PARENT_CHILD_RESOURCES.get(self.resource, None)

    def __call__(self, **kwargs) -> "MGnifyList":
        """
        Allow calling the list proxy to create a new instance with updated params.
        This is useful for refining the query with new parameters without having to re-specify the resource or config.

        Examples
        -------
        # Example 1: Using call to update params
        gut_studies = MG.studies(search="gut")
        # Example 2: Using params
        cancer_studies = MG.studies(params={"search": "cancer"})

        Note
        ----
        if "params" is included in kwargs, it will be used as the new params.
        Otherwise, all kwargs will be treated as params.
        """
        params = kwargs.pop("params", None) or {}
        # Merge with params, giving precedence to kwargs
        params.update(kwargs)

        return self.__class__(config=self.config.model_dump(mode="json"), params=params)

    @property
    def _next_rel_module(self) -> Callable:
        """
        Get the next relationship module for the child resource.
        This is used to determine which API endpoint the child proxy should use.
        """
        # check
        if len(self.list_relationships()) == 0:
            raise AttributeError(f"{self.resource} does not have any linked resources.")

        # quick check
        assert (
            len(self.list_relationships()) == 1
            and self.child_resource.value == self.list_relationships()[0]
        ), (
            "Should only be be parent to detail endpoint: "
            f"{self.child_resource!r}, but got {self.list_relationships()[0]!r}"
        )

        detail_endpoint = WITHIN_RESOURCE_RELATIONSHIPS[self.resource][
            self.child_resource
        ]
        return detail_endpoint


[docs]
    def iter_details(self, fetch: bool = True) -> Iterator["QuerySet"]:
        """
        Lazily iterate over child detail proxies.

        Parameters
        ----------
        fetch : bool
            Whether to immediately fetch each detail after creating the proxy.

        Returns
        -------
        Iterator of QuerySet
            An iterator that yields child detail proxies.

        Example
        -------
        for sample in samples.iter_details():
            sample.get()
        """
        for acc in self.results_ids or []:
            yield self.get_detail(self._resolve_id_param(acc), fetch=fetch)



[docs]
    def collect_details(
        self,
        *,
        fetch: bool = True,
        by_id: bool = False,
    ) -> list["QuerySet"] | dict[str, "QuerySet"]:
        """
        Collect child detail proxies into a list or dict.

        Parameters
        ----------
        fetch : bool
            Whether to immediately fetch the details after creating the proxies.
        by_id : bool
            Whether to return a dict keyed by identifier instead of a list.

        Returns
        -------
        list of QuerySet or dict of str to QuerySet
            A list or dict of child detail proxies.

        Example
        -------
        sample_detail = samples.collect_details(fetch=True, by_id=True)
        """

        items: list["QuerySet"] = []
        for item in self.iter_details(fetch=fetch):
            items.append(item)

        if by_id:
            return {x.identifier: x for x in items if x.identifier is not None}
        return items


    def __iter__(self) -> Iterator["QuerySet"]:
        return self.iter_details()

    async def __aiter__(self) -> AsyncIterator["QuerySet"]:
        """
        Async version of __iter__.

        Examples
        -------
        async for sample in samples:
            await sample.aget()

        """
        async for item in self.aiter_details():
            yield item


[docs]
    async def aiter_details(self, fetch: bool = True) -> AsyncIterator["QuerySet"]:
        """
        Async version of iter_details.

        Parameters
        ----------
        fetch : bool
            Whether to immediately fetch each detail after creating the proxy.

        Returns
        -------
        AsyncIterator of QuerySet
            An async iterator that yields child detail proxies.
        """
        for acc in self.results_ids or []:
            yield await self.aget_detail(self._resolve_id_param(acc), fetch=fetch)



[docs]
    async def acollect_details(
        self,
        *,
        fetch: bool = True,
        by_id: bool = False,
        concurrency: Optional[int] = None,
        hide_progress: bool = False,
    ) -> list["QuerySet"] | dict[str, "QuerySet"]:
        acc_params = [self._resolve_id_param(acc) for acc in (self.results_ids or [])]

        async def _worker(access_param):
            child = await self.aget_detail(access_param, fetch=fetch)
            return child

        items = await self.exec.map_with_concurrency(
            items=acc_params,
            worker=_worker,
            concurrency=concurrency,
            hide_progress=hide_progress,
        )

        if by_id:
            return {
                x.identifier: x
                for x in items
                if x is not None and x.identifier is not None
            }
        return items


    def __getitem__(self, key: int | str) -> "QuerySet":
        """
        Allow index or accession-based access to child details.
        Default is not lazy and will fetch immediately, but can be configured to return proxies without fetching.
        """
        return self.get_detail(
            self._resolve_id_param(key),
            fetch=True,
        )


[docs]
    def get_detail(
        self,
        access_param: dict[str, str],
        fetch: bool = True,
    ) -> "QuerySet":
        """
        Get detail proxy for a specific accession/pubmed_id/catalogue_id.

        Parameters
        ----------
        access_param : dict[str, str]
            A dictionary containing the necessary parameter to identify the detail resource,
            such as {"accession": "MGYS00001234"} or {"biome_lineage": "root"}.
        resource_name : Optional[str]
            The name of the resource to get the next instance of. If None, will use the first or only linked resource.
        fetch : bool
            Whether to immediately fetch the detail after creating the proxy.


        Returns
        -------
        QuerySet
            A proxy for the next resource.

        Examples
        -------
        sample = samples.get_detail({"accession": "MGYS00001234"})
        """

        detail_cls = V2_ENDPOINT_DETAIL_PROXIES.get(self.child_resource)
        if not detail_cls:
            raise ValueError(
                f"Unsupported child resource for detail: {self.child_resource}"
            )

        child = detail_cls(**access_param)
        child.endpoint_module = self._next_rel_module
        if fetch:
            child.get(safety=False)
        return child



[docs]
    async def aget_detail(
        self,
        access_param: dict[str, str],
        fetch: bool = True,
    ) -> "QuerySet":
        """
        Async version of get_detail.
        Get detail proxy for a specific accession/pubmed_id/catalogue_id.

        Examples
        -------
        sample = await samples.aget_detail({"accession": "MGYS00001234"})
        """
        detail_cls = V2_ENDPOINT_DETAIL_PROXIES.get(self.child_resource)
        if not detail_cls:
            raise ValueError(
                f"Unsupported child resource for detail: {self.child_resource}"
            )
        child = detail_cls(**access_param)
        child.endpoint_module = self._next_rel_module
        if fetch:
            await child.aget(safety=False)
        return child





[docs]
class MGnifyDetail(MGnifier):
    RESOURCE: ClassVar[Optional[DetailResource]] = None

    def __init__(
        self,
        id: str,
        config: Optional[dict] = None,
        **kwargs,
    ):

        passed_resource = kwargs.pop("resource", None)
        resolved_resource = self.RESOURCE or passed_resource

        if resolved_resource is None:
            raise TypeError(
                "`resource` is required for base MGnifyDetail; "
                "init a concrete subclass like Biome/Study/Sample... "
                f"or pass as a resource param: {DetailResource!r}"
            )

        if self.RESOURCE is not None and passed_resource not in (
            None,
            self.RESOURCE,
        ):
            raise ValueError(
                f"Conflicting resource: expected {self.RESOURCE!r}, got {passed_resource!r}"
            )

        # init MGnifier without id first
        super().__init__(resource=resolved_resource, config=config, **kwargs)
        # then add it to param
        self._params.update({self.id_param_key: id})

    def _next_rel_module(self, name: str) -> SupportedEndpoints:
        """
        Get the next resource name based on the relationship name
        """
        if name in self.list_relationships():
            return BETWEEN_RESOURCE_RELATIONSHIPS[self.resource][
                SupportedEndpoints.validate(name)
            ]

        raise AttributeError(f"{self.resource} does not have linked resource: {name!r}")

    def __getattr__(self, name: str):
        # if is a supported relationship
        if name in self.list_relationships():

            access_param = self._resolve_id_param(self.identifier)

            return self.get_list(
                resource=name,
                access_param=access_param,
                fetch=False,
            )


[docs]
    def get_list(
        self,
        resource: Literal[
            "biomes",
            "studies",
            "samples",
            "runs",
            "genomes",
            "analyses",
            "assemblies",
            "publications",
            "catalogues",
        ],
        access_param: dict[str, str],
        fetch: bool = True,
        explain: bool = False,
    ) -> "QuerySet":
        """
        Get list proxy for a specific accession/pubmed_id/catalogue_id detail.

        Parameters
        ----------
        resource : str
            Valid child resource name e.g. in list_relationships(), such as "samples" for a study detail, or "analyses" for a run detail.
        access_param : dict[str, str]
            A dictionary containing the necessary parameter to identify the detail resource,
            such as {"accession": "MGYS00001234"} or {"biome_lineage": "root"}.
        fetch : bool
            Whether to immediately fetch the detail after creating the proxy.
        explain : bool
            Whether to print example URLs that would be called.
        Returns
        -------
        QuerySet
            A proxy for the next resource.

        Examples
        -------
        samples = study.get_list("samples", {"accession": "MGYS00001234"})
        """

        proxy_cls = V2_ENDPOINT_LIST_PROXIES.get(SupportedEndpoints.validate(resource))
        if not proxy_cls:
            raise ValueError(f"Unsupported resource: {resource}")
        child = proxy_cls(config=self.config.model_dump(mode="json"), **access_param)
        child.endpoint_module = self._next_rel_module(resource)
        if explain:
            child.explain()
        if fetch:
            child.get(safety=False)
        return child



[docs]
    async def aget_list(
        self,
        resource: Literal[
            "biomes",
            "studies",
            "samples",
            "runs",
            "genomes",
            "analyses",
            "assemblies",
            "publications",
            "catalogues",
        ],
        access_param: dict[str, str],
        fetch: bool = True,
        explain: bool = False,
    ) -> "QuerySet":
        """
        Get list proxy for a specific accession/pubmed_id/catalogue_id detail.

        Parameters
        ----------
        resource : str
            Valid child resource name e.g. in list_relationships(), such as "samples" for a study detail, or "analyses" for a run detail.
        access_param : dict[str, str]
            A dictionary containing the necessary parameter to identify the detail resource,
            such as {"accession": "MGYS00001234"} or {"biome_lineage": "root"}.
        fetch : bool
            Whether to immediately fetch the detail after creating the proxy.

        Returns
        -------
        QuerySet
            A proxy for the next resource.

        Examples
        -------
        samples = await study.aget_list("samples", {"accession": "MGYS00001234"})
        """

        proxy_cls = V2_ENDPOINT_LIST_PROXIES.get(SupportedEndpoints.validate(resource))
        if not proxy_cls:
            raise ValueError(f"Unsupported resource: {resource}")
        child = proxy_cls(config=self.config.model_dump(mode="json"), **access_param)
        child.endpoint_module = self._next_rel_module(resource)
        if explain:
            child.explain()
        if fetch:
            await child.aget(safety=False)
        return child





[docs]
class Analyses(MGnifyList):
    RESOURCE: ClassVar[Literal["analyses"]] = "analyses"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(params=params, config=config, **kwargs)




[docs]
class Runs(MGnifyList):

    RESOURCE: ClassVar[Literal["runs"]] = "runs"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(params=params, config=config, **kwargs)




[docs]
class Samples(MGnifyList):
    RESOURCE: ClassVar[Literal["samples"]] = "samples"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(params=params, config=config, **kwargs)




[docs]
class Studies(MGnifyList):

    RESOURCE: ClassVar[Literal["studies"]] = "studies"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(params=params, config=config, **kwargs)




[docs]
class PrivateStudies(MGnifyList):

    RESOURCE: ClassVar[Literal["private_studies"]] = "private_studies"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(params=params, config=config, **kwargs)

        self.config = AuthMGnipyConfig(**config)
        self.config.resolve_auth_token()




[docs]
class Biomes(MGnifyList, BiomesTreeMixin):
    RESOURCE: ClassVar[Literal["biomes"]] = "biomes"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(params=params, config=config, **kwargs)




[docs]
class Assemblies(MGnifyList):
    RESOURCE: ClassVar[Literal["assemblies"]] = "assemblies"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(params=params, config=config, **kwargs)




[docs]
class Genomes(MGnifyList):
    RESOURCE: ClassVar[Literal["genomes"]] = "genomes"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(params=params, config=config, **kwargs)




[docs]
class Publications(MGnifyList):
    RESOURCE: ClassVar[Literal["publications"]] = "publications"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):
        super().__init__(params=params, config=config, **kwargs)




[docs]
class Catalogues(MGnifyList):
    RESOURCE: ClassVar[Literal["catalogues"]] = "catalogues"

    def __init__(
        self,
        *,
        params: Optional[dict[str, Any]] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):
        super().__init__(params=params, config=config, **kwargs)




[docs]
class StudyDetail(MGnifyDetail):
    RESOURCE: ClassVar[Literal["study"]] = "study"

    def __init__(
        self,
        id: Optional[str] = None,
        *,
        accession: Optional[str] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(
            id=id or accession,
            config=config,
            **kwargs,
        )




[docs]
class SampleDetail(MGnifyDetail):
    RESOURCE: ClassVar[Literal["sample"]] = "sample"

    def __init__(
        self,
        id: Optional[str] = None,
        *,
        accession: Optional[str] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(
            id=id or accession,
            config=config,
            **kwargs,
        )




[docs]
class RunDetail(MGnifyDetail):
    RESOURCE: ClassVar[Literal["run"]] = "run"

    def __init__(
        self,
        id: Optional[str] = None,
        *,
        accession: Optional[str] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(
            id=id or accession,
            config=config,
            **kwargs,
        )




[docs]
class AnalysisDetail(MGnifyDetail):
    RESOURCE: ClassVar[Literal["analysis"]] = "analysis"

    def __init__(
        self,
        id: Optional[str] = None,
        *,
        accession: Optional[str] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(
            id=id or accession,
            config=config,
            **kwargs,
        )




[docs]
class GenomeDetail(MGnifyDetail):
    RESOURCE: ClassVar[Literal["genome"]] = "genome"

    def __init__(
        self,
        id: Optional[str] = None,
        *,
        accession: Optional[str] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(
            id=id or accession,
            config=config,
            **kwargs,
        )




[docs]
class AssemblyDetail(MGnifyDetail):
    RESOURCE: ClassVar[Literal["assembly"]] = "assembly"

    def __init__(
        self,
        id: Optional[str] = None,
        *,
        accession: Optional[str] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(
            id=id or accession,
            config=config,
            **kwargs,
        )




[docs]
class BiomeDetail(MGnifyDetail, BiomesTreeMixin):
    RESOURCE: ClassVar[Literal["biome"]] = "biome"

    def __init__(
        self,
        id: Optional[str] = None,
        *,
        biome_lineage: Optional[str] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(
            id=id or biome_lineage,
            config=config,
            **kwargs,
        )




[docs]
class PublicationDetail(MGnifyDetail):
    RESOURCE: ClassVar[Literal["publication"]] = "publication"

    def __init__(
        self,
        id: Optional[str] = None,
        *,
        accession: Optional[str] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(
            id=id or accession,
            config=config,
            **kwargs,
        )




[docs]
class CatalogueDetail(MGnifyDetail):
    RESOURCE: ClassVar[Literal["catalogue"]] = "catalogue"

    def __init__(
        self,
        id: Optional[str] = None,
        *,
        accession: Optional[str] = None,
        config: Optional[dict] = None,
        **kwargs,
    ):

        super().__init__(
            id=id or accession,
            config=config,
            **kwargs,
        )



V2_ENDPOINT_LIST_PROXIES = {
    SupportedEndpoints.ANALYSES: Analyses,
    SupportedEndpoints.RUNS: Runs,
    SupportedEndpoints.SAMPLES: Samples,
    SupportedEndpoints.STUDIES: Studies,
    SupportedEndpoints.BIOMES: Biomes,
    SupportedEndpoints.ASSEMBLIES: Assemblies,
    SupportedEndpoints.GENOMES: Genomes,
    SupportedEndpoints.PUBLICATIONS: Publications,
    SupportedEndpoints.CATALOGUES: Catalogues,
    SupportedEndpoints.PRIVATE_STUDIES: PrivateStudies,
}

V2_ENDPOINT_DETAIL_PROXIES = {
    SupportedEndpoints.ANALYSIS: AnalysisDetail,
    SupportedEndpoints.RUN: RunDetail,
    SupportedEndpoints.SAMPLE: SampleDetail,
    SupportedEndpoints.STUDY: StudyDetail,
    SupportedEndpoints.BIOME: BiomeDetail,
    SupportedEndpoints.ASSEMBLY: AssemblyDetail,
    SupportedEndpoints.GENOME: GenomeDetail,
    SupportedEndpoints.PUBLICATION: PublicationDetail,
    SupportedEndpoints.CATALOGUE: CatalogueDetail,
    SupportedEndpoints.ANNOTATIONS: None,  # "MGazine",
}