mgnipy.V2.core module

mgnipy.V2.core module#

class mgnipy.V2.core.MGnifier(resource, *, config=None, params=None, **param_kwargs)[source]#

Bases: QuerySet, ResultsHandler

MGnifier is a class that provides an interface for querying the MGnify API. It allows users to specify a resource and query parameters, and then fetch results in a paginated manner. The class also includes methods for fetching specific pages, performing bulk fetches, and planning API calls with a dry run.

Parameters:

resource (str ) – The MGnify resource to query (e.g., “studies”, “samples”).
config (MGnipyConfig or dict , optional) – Configuration for MGnipy, either as an MGnipyConfig instance or a dictionary of configuration parameters (default is None).
params (dict , optional) – Query filter parameters (default is None).
**param_kwargs – Additional parameters treated as query filters.

TODO#

async abulk_fetch(*args, **kwargs)[source]#

Asynchronously fetch a large collection of results efficiently.

Parameters:

*args – Positional arguments forwarded to executor.
**kwargs – Keyword arguments forwarded to executor.

Returns:

All fetched results.

Return type:

dict

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> results = await query.abulk_fetch(limit=100)

async aget()[source]#

Asynchronously fetch all pages of results.

Returns:: All result data.
Return type:: dict

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> results = await query.aget()

async apage(*args, **kwargs)[source]#

Asynchronously fetch a specific page or range of pages.

Parameters:

*args – Positional arguments forwarded to executor.
**kwargs – Keyword arguments forwarded to executor.

Returns:

The requested page(s) of results.

Return type:

dict

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> page_data = await query.apage(1)

property base_url: str #

bulk_fetch(*args, **kwargs)[source]#

Fetch a large collection of results efficiently.

Parameters:

*args – Positional arguments forwarded to executor.
**kwargs – Keyword arguments forwarded to executor.

Returns:

All fetched results.

Return type:

dict

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> results = query.bulk_fetch(limit=100)

property cache_dir: Path | None #

clear_cache()#: Clear the cached results for the current resource and parameters. This will delete any cached files associated with the current query parameters.

config: MGnipyConfig#

continue_iterator(*args, **kwargs)[source]#

Continue iteration from a specific page. THis is a facade of underlying QueryExecutor.continue_iterator, allowing users to resume iteration after an interruption or to jump to a specific page.

Parameters:

*args – Positional arguments forwarded to executor.
**kwargs – Keyword arguments forwarded to executor.

Return type:

None

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> query.continue_iterator(start_page=5)

count: int | None #

property data: chain[dict [str , Any ]]#: results based on the current resource.

describe_endpoint(**kwargs)[source]#

Retrieve documentation about the endpoint.

Returns:: Endpoint documentation, or None if unavailable.
Return type:: dict [str , str ] or None

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> docs = query.describe_endpoint()

describe_relationships()[source]#

Describe the related resources and their relationships.

Return type:: None

Note

This method is not yet implemented.

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> query.describe_relationships()

dry_run()[source]#

Plan the API call by validating parameters and estimating the number of pages and records available. Prints the plan details for the user to review before executing the full data retrieval. This method can be called before get() to ensure that the parameters are valid and to understand the scope of the data retrieval.

Return type:: None

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies", params={"search": "gut"})
>>> query.dry_run()

emgapi_handler: DescribeEmgapiModule#

property endpoint_module: Callable #

explain(head=None)[source]#

Print example API URLs that would be called.

Parameters:: head (int , optional) – Maximum number of URLs to print. If None, prints all.
Return type:: None

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> query.explain(head=3)

filter(**filters)#

Update the parameters for the API call to filter results.

Parameters:: **filters – Keyword arguments corresponding to the supported parameters for the current resource. These will be used to filter the results returned by the API.
Returns:: A new QuerySet instance with updated parameters for filtering results.
Return type:: QuerySet

first()[source]#

Get the first record from the query results.

Executes the query and returns the first metadata record.

Returns:: The first record as a dictionary, or None if unavailable.
Return type:: dict or None

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> first_record = query.first()

get()[source]#

Fetch all pages of results.

Returns:: All result data.
Return type:: dict

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> results = query.get()

property id_param_key: str #

Get the parameter name used to identify this resource.

Returns:: The identifier parameter (e.g., “accession”, “biome_lineage”).
Return type:: str

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> key = query.id_param_key

property last_successful_page: int | None #

Get the last successfully retrieved page number.

Returns:: The last successful page number, or None if no pages have been retrieved yet.
Return type:: int or None

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> query.get()
>>> print(query.last_successful_page)

list_relationships()[source]#

Get the names of related resources available from this resource.

Returns:: Names of related resource types (e.g., [“samples”, “analyses”]).
Return type:: list [str ]

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> relationships = query.list_relationships()

list_supported_params()[source]#

Get the valid query filter parameters for this resource.

Returns:: Supported parameter names.
Return type:: list [str ]

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> params = query.list_supported_params()

list_urls()#

Generate and return a list of URLs for all the API requests that would be made to retrieve the data based on the current parameters. This allows the user to see exactly which endpoints and query parameters will be used in the API calls before executing them.

Returns:: A list of URLs corresponding to each API request that would be made.
Return type:: list of str

num_requests: int | None #

page(*args, **kwargs)[source]#

Fetch a specific page or range of pages.

Parameters:

*args – Positional arguments forwarded to executor.
**kwargs – Keyword arguments forwarded to executor.

Returns:

The requested page(s) of results.

Return type:

dict

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> page_data = query.page(1)

property params: dict [str , Any ]#

preview()[source]#

Get a DataFrame preview of the first page of results.

Quickly check the structure and content of the data without retrieving all pages.

Returns:: DataFrame containing the first page of metadata.
Return type:: pd.DataFrame

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> df = query.preview()

property progress#

Get the progress of the current query execution as a percentage.

Returns:: Progress percentage and counts (e.g., “75.00% (150/200 pages)”).
Return type:: str

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> print(query.progress)

queries(**httpx_kwargs)#

Generate a list of query parameter dictionaries for each API request that would be made based on the current parameters. This allows the user to see the specific query parameters for each request before executing them.

Returns:: A list of dictionaries, each containing the query parameters for a corresponding API request.
Return type:: list of dict

property records: chain | None #

Get an iterator of individual metadata records from the retrieved results, if available. This property provides a convenient way to access the metadata records without needing to handle pagination.

Returns:: An iterator that yields individual metadata records if results are available, otherwise None.
Return type:: chain or None

property request_url: str #

Get the URL for the API request based on the current resource and parameters. This is a single URL that represents the request for the current page of results.

Returns:: The constructed URL for the API request.
Return type:: str

reset_iterator()[source]#

Reset the pagination state to the beginning.

Return type:: None

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> query.reset_iterator()

property resource: SupportedEndpoints#

property results: dict [int , list [dict ]]#: Get the retrieved metadata results, if available. Results are stored in a dictionary with request number (e.g. page number) as keys.

property results_ids: list [str ] | None #

Get the list of identifiers from the current results.

Returns:: List of identifiers (accessions, etc.), or None if no results.
Return type:: list [str ] or None

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> query.get()
>>> ids = query.results_ids

resume()[source]#

Again facade of QueryExecutor.resume, allowing users to easily continue fetching results after an interruption.

Examples

>>> from mgnipy.V2.core import MGnifier
>>> query = MGnifier("studies")
>>> query.resume()

to_df(data=None, expand_nested_dicts=False, rename_columns=None, **kwargs)#

Convert the current or provided metadata to a pandas DataFrame.

Parameters:

data (list of dict , optional) – List of records to convert. If None, uses :pyattr:`data`.
expand_nested_dicts (list of str or bool , optional) – List of keys to expand into separate columns, or True to expand defaults.
rename_columns (dict of str to str, optional) – A dictionary mapping old column names to new column names.
**kwargs – Additional keyword arguments passed to pd.DataFrame.

Returns:

DataFrame containing the metadata or None when no data is available.

Return type:

pd.DataFrame or None

Examples

>>> handler = ResultsHandler(data=[{"a": 1, "b": 2}])
>>> df = handler.to_df()
>>> list(df.columns)
['a', 'b']
>>> df.iloc[0]['a']
np.int64(1)

to_json(data=None, orient='records', lines=True, **json_kwargs)#

Convert the current metadata to a JSON string or save it to a file.

Parameters:

data (dict of int to list of dict , optional) – The paginated data to convert. If None, uses self.qs._results.
**json_kwargs – Additional keyword arguments passed to the JSON serialization function.
orient (str )
lines (bool )

Returns:

The JSON string representation of the metadata, or None if no data is available.

Return type:

str or None

Raises:

RuntimeError – If no data is available to convert.

to_list(data=None)#

Convert the current or provided metadata to a list of dictionaries.

Parameters:: data (optional) – The paginated data to convert. If None, uses :pyattr:`data`.
Returns:: A list of metadata records as dictionaries, or None if no data is available.
Return type:: list

Examples

>>> handler = ResultsHandler(data=[{"x": 10}])
>>> handler.to_list()
[{'x': 10}]

to_polars(data=None, expand_nested_dicts=False, rename_columns=None, **polars_kwargs)#

Convert the current metadata to a Polars DataFrame.

Parameters:

data (dict of int to list of dict , optional) – The paginated data to convert. If None, uses self.qs._results.
**polars_kwargs – Additional keyword arguments passed to pl.DataFrame.
expand_nested_dicts (list [str ] | bool | None)
rename_columns (dict [str , str ] | None)

Returns:

A Polars DataFrame containing the metadata.

Return type:

pl.DataFrame

Raises:

RuntimeError – If no data is available to convert.

mgnipy.V2.core module

Contents

mgnipy.V2.core module#