Source code for mgnipy.V2.mixins

from __future__ import annotations

import inspect
import os
from itertools import chain
from typing import (
    TYPE_CHECKING,
    Any,
    Literal,
    Optional,
)
from urllib.parse import urlencode

import pandas as pd
import polars as pl
from bigtree import (
    Tree,
)

from mgnipy._shared_helpers.docstring_parser import (
    get_docstring,
    parse_docstring,
)

if TYPE_CHECKING:
    pass



[docs]
class ResultsHandlerMixin:

    @property
    def data(self) -> dict[int, list[dict[str, Any]]]:
        """
        results based on the current resource.
        """
        return getattr(self, "_results", {}) or {}

    # helpers
    def _df_expand_nested(
        self, df: pd.DataFrame, cols: list[str] = None
    ) -> pd.DataFrame:
        """
        Expand nested structures in the DataFrame into separate columns.

        Parameters
        ----------
        df : pd.DataFrame
            The DataFrame to expand.
        cols : list of str
            List of column names to expand.

        Returns
        -------
        pd.DataFrame
            The expanded DataFrame.
        """

        cols = cols or ["metadata"]

        new_df = df.copy()
        for c in cols:
            if c in new_df.columns:
                attr_df = pd.json_normalize(new_df[c])
                new_df = pd.concat([new_df.drop(columns=[c]), attr_df], axis=1)
        return new_df

    def _unpageinate_results(self, data: Optional[dict] = None) -> chain:
        """
        Unpaginate the results by flattening the dictionary of pages into a single list of records.

        Returns
        -------
        chain
            An iterator that yields individual metadata records from all pages.
        """
        _data = data or self.data

        def _page_to_records(page):
            if page is None:
                return []
            if isinstance(page, list):
                return page
            if isinstance(page, dict):
                return [page]
            return [page]

        return chain.from_iterable(_page_to_records(v) for v in _data.values())

    # viewing the retrieved

[docs]
    def to_df(
        self,
        data: Optional[dict[int, list[dict]]] = None,
        expand_nested_dicts: Optional[list[str] | bool] = False,
        rename_columns: Optional[dict[str, str]] = None,
        **kwargs,
    ) -> pd.DataFrame:
        """
        Convert the current or provided metadata to a pandas DataFrame.

        Parameters
        ----------
        data : list of dict, optional
            List of records to convert. If None, uses self._results or self._previewed_page.
        expand_nested_dicts : list of str, optional
            List of keys to expand into separate columns.
        rename_columns : dict of str to str, optional
            A dictionary mapping old column names to new column names.
        **kwargs
            Additional keyword arguments passed to pd.DataFrame.

        Returns
        -------
        pd.DataFrame | None
            DataFrame containing the metadata.

        Raises
        ------
        RuntimeError
            If no data is available to convert.
        """

        _data = data or self.data

        _rename_columns = rename_columns or {"lineage": "biome_lineage"}

        if _data == {} or _data is None:
            return None

        as_pandas = pd.DataFrame(self._unpageinate_results(_data), **kwargs).rename(
            columns=_rename_columns
        )

        if expand_nested_dicts is None or expand_nested_dicts is False:
            return as_pandas

        if isinstance(expand_nested_dicts, list):
            return self._df_expand_nested(
                as_pandas,
                cols=expand_nested_dicts,
            )
        if expand_nested_dicts is True:  # TODO
            return self._df_expand_nested(as_pandas)



[docs]
    def to_list(
        self, data: Optional[dict[int, list[dict]]] = None
    ) -> list[dict[str, Any]]:
        """
        Convert the current or provided metadata to a list of dictionaries.

        Parameters
        ----------
        data : dict of int to list of dict, optional
            The paginated data to convert. If None, uses self.data.

        Returns
        -------
        list of dict | None
            A list of metadata records as dictionaries, or None if no data is available .

        Raises
        ------
        RuntimeError
            If no data is available to convert.
        """
        _data = data or self.data

        if _data == {} or _data is None:
            return None

        return list(self._unpageinate_results(_data))



[docs]
    def to_json(
        self,
        data: Optional[dict[int, list[dict]]] = None,
        orient: str = "records",
        lines: bool = True,
        **json_kwargs,
    ) -> str:
        """
        Convert the current metadata to a JSON string or save it to a file.

        Parameters
        ----------
        data : dict of int to list of dict, optional
            The paginated data to convert. If None, uses self._results.
        **json_kwargs
            Additional keyword arguments passed to the JSON serialization function.

        Returns
        -------
        str or None
            The JSON string representation of the metadata, or None if no data is available.

        Raises
        ------
        RuntimeError
            If no data is available to convert.
        """
        return self.to_df(data, expand_nested_dicts=False).to_json(
            orient=orient, lines=lines, **json_kwargs
        )



[docs]
    def to_polars(
        self, data: Optional[dict[int, list[dict]]] = None, **polars_kwargs
    ) -> pl.DataFrame:
        """
        Convert the current metadata to a Polars DataFrame.

        Parameters
        ----------
        data : dict of int to list of dict, optional
            The paginated data to convert. If None, uses self._results.
        **polars_kwargs
            Additional keyword arguments passed to pl.DataFrame.

        Returns
        -------
        pl.DataFrame
            A Polars DataFrame containing the metadata.

        Raises
        ------
        RuntimeError
            If no data is available to convert.
        """

        _data = data or self.data

        if _data == {} or _data is None:
            return None

        return pl.DataFrame(self._unpageinate_results(_data), **polars_kwargs)





[docs]
class BiomesTreeMixin:

    @property
    def lineages(self) -> list[str]:
        return getattr(self, "results_ids", []) or []

    @property
    def tree(self) -> Tree:
        """
        Convert the biomes metadata to a tree structure for visualization or analysis.

        Returns
        -------
        Tree
            A tree representation of the biomes and their relationships.
        """
        # TODO generate nodes first
        return Tree.from_list(self.lineages, sep=":")


[docs]
    def show_tree(
        self,
        method: Literal[
            "compact",
            "show",
            "print",
            "horizontal",
            "hshow",
            "h",
            "hprint",
            "vertical",
            "vshow",
            "v",
            "vprint",
        ] = "compact",
    ):
        if method in ["compact", "show", "print"]:
            # TODO print_tree(self._tree)
            self.tree.show()
        elif method in ["horizontal", "hshow", "h", "hprint"]:
            self.tree.hshow()
        elif method in ["vertical", "vshow", "v", "vprint"]:
            self.tree.vshow()
        else:
            raise ValueError(
                f"Invalid method: {method}. "
                "Supported methods: 'compact', 'show', 'print', "
                "'horizontal', 'hshow', 'h', 'hprint', "
                "'vertical', 'vshow', 'v', 'vprint'."
            )





[docs]
class DescribeEmgapiMixin:


[docs]
    def endpoint_module(self):
        return getattr(self, "endpoint_module", None)



[docs]
    def list_supported_params(self) -> list[str]:
        """
        Lists supported keyword arguments for the endpoint module.

        Returns
        -------
        list of str
            List of supported keyword argument names.
        """
        sig = inspect.signature(self.endpoint_module._get_kwargs)
        return list(sig.parameters.keys())



[docs]
    def validate_endpoint_kwargs(self, **kwargs) -> dict[str, Any]:
        """
        Validates the provided keyword arguments against the supported parameters of the endpoint module.

        Parameters
        ----------
        **kwargs
            Keyword arguments to validate.

        Returns
        -------
        dict of str to Any
            The validated keyword arguments.

        Raises
        ------
        ValueError
            If any provided keyword argument is not supported by the endpoint module.
        """
        return self.endpoint_module._get_kwargs(**kwargs)


    @property
    def emgapi_resource(self) -> Optional[str]:
        """
        Retrieves the name of the endpoint resource based on the endpoint module.

        Returns
        -------
        str or None
            The name of the endpoint resource, or None if the endpoint module is not set.
        """
        return os.path.basename(os.path.dirname(self.endpoint_module.__file__))


[docs]
    def sub_url(self, **kwargs) -> Optional[str]:
        """
        Constructs the sub-URL for the endpoint based on the current parameters.

        Returns
        -------
        str or None
            The constructed sub-URL, or None if the endpoint module is not set.
        """
        _kwargs = self.validate_endpoint_kwargs(**kwargs)
        _end_url: str = _kwargs.get(
            "url", f"/metagenomics/api/v2/{self.emgapi_resource}/"
        ).strip("/")

        return _end_url



[docs]
    def resolve_query_string(self, **kwargs) -> str:
        """
        Resolves the query string for the endpoint based on the current parameters.

        Parameters
        ----------
        **kwargs
            Keyword arguments to validate and include in the query string.

        Returns
        -------
        str
            The resolved query string.
        """
        _kwargs = self.validate_endpoint_kwargs(**kwargs)

        # get validated params if any
        params = _kwargs.get("params", {})

        # encode params for url
        return urlencode(params, doseq=True)



[docs]
    def url_path(self, **kwargs) -> str:
        """
        Constructs the full URL path for the endpoint based on the current parameters.

        Parameters
        ----------
        **kwargs
            Keyword arguments to validate and include in the URL construction.

        Returns
        -------
        str
            The constructed URL path.
        """
        _end_url = self.sub_url(**kwargs)
        query_string = self.resolve_query_string(**kwargs)

        return f"{_end_url}?{query_string}" if query_string else _end_url


    @property
    def emgapi_docs(self) -> str:
        return get_docstring(self.endpoint_module, "sync")


[docs]
    def describe_endpoint(self, as_dict: bool = False) -> dict[str, str] | None:
        return parse_docstring(self.emgapi_docs, as_dict=as_dict)