Module pyaurorax.search.data_products
Use the AuroraX search engine to search and upload data product records.
Note that all functions and classes from submodules are all imported at this level of the data_products module. They can be referenced from here instead of digging in deeper to the submodules.
Expand source code
# Copyright 2024 University of Calgary
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Use the AuroraX search engine to search and upload data product records.
Note that all functions and classes from submodules are all imported
at this level of the data_products module. They can be referenced from
here instead of digging in deeper to the submodules.
"""
import datetime
from typing import Optional, Dict, List
from .classes.data_product import DataProductData
from .classes.search import DataProductSearch
from ..sources.classes.data_source import DataSource
from ._data_products import search as func_search
from ._data_products import upload as func_upload
from ._data_products import delete as func_delete
from ._data_products import delete_urls as func_delete_urls
from ._data_products import describe as func_describe
from ._data_products import get_request_url as func_get_request_url
__all__ = ["DataProductsManager"]
class DataProductsManager:
"""
The DataProductsManager object is initialized within every PyAuroraX object. It acts as a way to access
the submodules and carry over configuration information in the super class.
"""
__STANDARD_POLLING_SLEEP_TIME: float = 1.0 # Polling sleep time when waiting for data (after the initial sleep time), in seconds
__UPLOAD_CHUNK_SIZE = 500 # number of ephemeris records to upload at a time
def __init__(self, aurorax_obj):
self.__aurorax_obj = aurorax_obj
def search(self,
start: datetime.datetime,
end: datetime.datetime,
programs: Optional[List[str]] = None,
platforms: Optional[List[str]] = None,
instrument_types: Optional[List[str]] = None,
data_product_types: Optional[List[str]] = None,
metadata_filters: Optional[List[Dict]] = None,
metadata_filters_logical_operator: Optional[str] = None,
response_format: Optional[Dict] = None,
poll_interval: float = __STANDARD_POLLING_SLEEP_TIME,
return_immediately: bool = False,
verbose: bool = False) -> DataProductSearch:
"""
Search for data product records
By default, this function will block and wait until the request completes and
all data is downloaded. If you don't want to wait, set the 'return_immediately`
value to True. The Search object will be returned right after the search has been
started, and you can use the helper functions as part of that object to get the
data when it's done.
Note: At least one search criteria from programs, platforms, or instrument_types,
must be specified.
Args:
start: start timestamp of the search (inclusive)
end: end timestamp of the search (inclusive)
programs: list of programs to search through, defaults to None
platforms: list of platforms to search through, defaults to None
instrument_types: list of instrument types to search through, defaults to None
data_product_types: list of dictionaries describing data product
types to filter on e.g. "keogram", defaults to None. Options are in the
pyaurorax.data_products module, or at the top level using the
pyaurorax.DATA_PRODUCT_TYPE* variables.
metadata_filters: list of dictionaries describing metadata keys and
values to filter on, defaults to None
Example:
[{
"key": "nbtrace_region",
"operator": "in",
"values": ["north polar cap"]
}]
metadata_filters_logical_operator: the logical operator to use when
evaluating metadata filters (either 'AND' or 'OR'), defaults
to "AND"
response_format: JSON representation of desired data response format
poll_interval: time in seconds to wait between polling attempts, defaults
to pyaurorax.requests.STANDARD_POLLING_SLEEP_TIME
return_immediately: initiate the search and return without waiting for data to
be received, defaults to False
verbose: output poll times and other progress messages, defaults to False
Returns:
a `pyaurorax.search.DataProductSearch` object
"""
return func_search(
self.__aurorax_obj,
start,
end,
programs,
platforms,
instrument_types,
data_product_types,
metadata_filters,
metadata_filters_logical_operator,
response_format,
poll_interval,
return_immediately,
verbose,
)
def upload(self, identifier: int, records: List[DataProductData], validate_source: bool = False, chunk_size: int = __UPLOAD_CHUNK_SIZE) -> int:
"""
Upload data product records to AuroraX
Args:
identifier: the AuroraX data source ID
records: data product records to upload
validate_source: validate all records before uploading, defaults to False
chunk_size: number of records to upload in a single call, defaults to 500
Returns:
0 for success, raises exception on error
Raises:
pyaurorax.exceptions.AuroraXUploadError: upload error
pyaurorax.exceptions.AuroraXError: data source validation error
"""
return func_upload(self.__aurorax_obj, identifier, records, validate_source, chunk_size)
def delete_urls(self, data_source: DataSource, urls: List[str]) -> int:
"""
Delete data products by URL.
The API processes this request asynchronously, so this method will return
immediately whether or not the data has already been deleted.
Args:
data_source: data source associated with the data product records (note that
identifier, program, platform, and instrument_type are required)
urls: URLs of data product records to delete
Returns:
0 on success
Raises:
pyaurorax.exceptions.AuroraXAPIError: An API error was encountered
pyaurorax.exceptions.AuroraXUnauthorizedError: invalid API key for this operation
"""
return func_delete_urls(self.__aurorax_obj, data_source, urls)
def delete(self,
data_source: DataSource,
start: datetime.datetime,
end: datetime.datetime,
data_product_types: Optional[List[str]] = None) -> int:
"""
Delete data products associated with a data source within a date range.
The API processes this request asynchronously, so this method will return
immediately whether or not the data has already been deleted.
Args:
data_source: data source associated with the data product records (note that
identifier, program, platform, and instrument_type are required)
start: timestamp marking beginning of range to delete records for, inclusive
end: timestamp marking end of range to delete records for, inclusive
data_product_types: specific types of data product to delete, e.g.
["keogram", "movie"]. If omitted, all data product types will be deleted.
Returns:
1 on success
Raises:
pyaurorax.exceptions.AuroraXNotFoundError: source not found
pyaurorax.exceptions.AuroraXUnauthorizedError: invalid API key for this operation
"""
return func_delete(self.__aurorax_obj, data_source, start, end, data_product_types)
def describe(self, search_obj: Optional[DataProductSearch] = None, query_dict: Optional[Dict] = None) -> str:
"""
Describe a data product search as an "SQL-like" string. Either a DataProductSearch
object can be supplied, or a dictionary of the raw JSON query.
Args:
search_obj: the data product search to describe, optional
query_dict: the data product search query represented as a raw dictionary, optional
Returns:
the "SQL-like" string describing the data product search object
"""
return func_describe(self.__aurorax_obj, search_obj, query_dict)
def get_request_url(self, request_id: str) -> str:
"""
Get the data product search request URL for a given
request ID. This URL can be used for subsequent
pyaurorax.requests function calls. Primarily this method
facilitates delving into details about a set of already-submitted
data product searches.
Args:
request_id: the request identifier
Returns:
the request URL
"""
return func_get_request_url(self.__aurorax_obj, request_id)
Sub-modules
pyaurorax.search.data_products.classes
-
Separated classes and functions used by the data_products module …
Classes
class DataProductsManager (aurorax_obj)
-
The DataProductsManager object is initialized within every PyAuroraX object. It acts as a way to access the submodules and carry over configuration information in the super class.
Expand source code
class DataProductsManager: """ The DataProductsManager object is initialized within every PyAuroraX object. It acts as a way to access the submodules and carry over configuration information in the super class. """ __STANDARD_POLLING_SLEEP_TIME: float = 1.0 # Polling sleep time when waiting for data (after the initial sleep time), in seconds __UPLOAD_CHUNK_SIZE = 500 # number of ephemeris records to upload at a time def __init__(self, aurorax_obj): self.__aurorax_obj = aurorax_obj def search(self, start: datetime.datetime, end: datetime.datetime, programs: Optional[List[str]] = None, platforms: Optional[List[str]] = None, instrument_types: Optional[List[str]] = None, data_product_types: Optional[List[str]] = None, metadata_filters: Optional[List[Dict]] = None, metadata_filters_logical_operator: Optional[str] = None, response_format: Optional[Dict] = None, poll_interval: float = __STANDARD_POLLING_SLEEP_TIME, return_immediately: bool = False, verbose: bool = False) -> DataProductSearch: """ Search for data product records By default, this function will block and wait until the request completes and all data is downloaded. If you don't want to wait, set the 'return_immediately` value to True. The Search object will be returned right after the search has been started, and you can use the helper functions as part of that object to get the data when it's done. Note: At least one search criteria from programs, platforms, or instrument_types, must be specified. Args: start: start timestamp of the search (inclusive) end: end timestamp of the search (inclusive) programs: list of programs to search through, defaults to None platforms: list of platforms to search through, defaults to None instrument_types: list of instrument types to search through, defaults to None data_product_types: list of dictionaries describing data product types to filter on e.g. "keogram", defaults to None. Options are in the pyaurorax.data_products module, or at the top level using the pyaurorax.DATA_PRODUCT_TYPE* variables. metadata_filters: list of dictionaries describing metadata keys and values to filter on, defaults to None Example: [{ "key": "nbtrace_region", "operator": "in", "values": ["north polar cap"] }] metadata_filters_logical_operator: the logical operator to use when evaluating metadata filters (either 'AND' or 'OR'), defaults to "AND" response_format: JSON representation of desired data response format poll_interval: time in seconds to wait between polling attempts, defaults to pyaurorax.requests.STANDARD_POLLING_SLEEP_TIME return_immediately: initiate the search and return without waiting for data to be received, defaults to False verbose: output poll times and other progress messages, defaults to False Returns: a `pyaurorax.search.DataProductSearch` object """ return func_search( self.__aurorax_obj, start, end, programs, platforms, instrument_types, data_product_types, metadata_filters, metadata_filters_logical_operator, response_format, poll_interval, return_immediately, verbose, ) def upload(self, identifier: int, records: List[DataProductData], validate_source: bool = False, chunk_size: int = __UPLOAD_CHUNK_SIZE) -> int: """ Upload data product records to AuroraX Args: identifier: the AuroraX data source ID records: data product records to upload validate_source: validate all records before uploading, defaults to False chunk_size: number of records to upload in a single call, defaults to 500 Returns: 0 for success, raises exception on error Raises: pyaurorax.exceptions.AuroraXUploadError: upload error pyaurorax.exceptions.AuroraXError: data source validation error """ return func_upload(self.__aurorax_obj, identifier, records, validate_source, chunk_size) def delete_urls(self, data_source: DataSource, urls: List[str]) -> int: """ Delete data products by URL. The API processes this request asynchronously, so this method will return immediately whether or not the data has already been deleted. Args: data_source: data source associated with the data product records (note that identifier, program, platform, and instrument_type are required) urls: URLs of data product records to delete Returns: 0 on success Raises: pyaurorax.exceptions.AuroraXAPIError: An API error was encountered pyaurorax.exceptions.AuroraXUnauthorizedError: invalid API key for this operation """ return func_delete_urls(self.__aurorax_obj, data_source, urls) def delete(self, data_source: DataSource, start: datetime.datetime, end: datetime.datetime, data_product_types: Optional[List[str]] = None) -> int: """ Delete data products associated with a data source within a date range. The API processes this request asynchronously, so this method will return immediately whether or not the data has already been deleted. Args: data_source: data source associated with the data product records (note that identifier, program, platform, and instrument_type are required) start: timestamp marking beginning of range to delete records for, inclusive end: timestamp marking end of range to delete records for, inclusive data_product_types: specific types of data product to delete, e.g. ["keogram", "movie"]. If omitted, all data product types will be deleted. Returns: 1 on success Raises: pyaurorax.exceptions.AuroraXNotFoundError: source not found pyaurorax.exceptions.AuroraXUnauthorizedError: invalid API key for this operation """ return func_delete(self.__aurorax_obj, data_source, start, end, data_product_types) def describe(self, search_obj: Optional[DataProductSearch] = None, query_dict: Optional[Dict] = None) -> str: """ Describe a data product search as an "SQL-like" string. Either a DataProductSearch object can be supplied, or a dictionary of the raw JSON query. Args: search_obj: the data product search to describe, optional query_dict: the data product search query represented as a raw dictionary, optional Returns: the "SQL-like" string describing the data product search object """ return func_describe(self.__aurorax_obj, search_obj, query_dict) def get_request_url(self, request_id: str) -> str: """ Get the data product search request URL for a given request ID. This URL can be used for subsequent pyaurorax.requests function calls. Primarily this method facilitates delving into details about a set of already-submitted data product searches. Args: request_id: the request identifier Returns: the request URL """ return func_get_request_url(self.__aurorax_obj, request_id)
Methods
def delete(self, data_source: DataSource, start: datetime.datetime, end: datetime.datetime, data_product_types: Optional[List[str]] = None) ‑> int
-
Delete data products associated with a data source within a date range.
The API processes this request asynchronously, so this method will return immediately whether or not the data has already been deleted.
Args
data_source
- data source associated with the data product records (note that identifier, program, platform, and instrument_type are required)
start
- timestamp marking beginning of range to delete records for, inclusive
end
- timestamp marking end of range to delete records for, inclusive
data_product_types
- specific types of data product to delete, e.g. ["keogram", "movie"]. If omitted, all data product types will be deleted.
Returns
1 on success
Raises
AuroraXNotFoundError
- source not found
AuroraXUnauthorizedError
- invalid API key for this operation
Expand source code
def delete(self, data_source: DataSource, start: datetime.datetime, end: datetime.datetime, data_product_types: Optional[List[str]] = None) -> int: """ Delete data products associated with a data source within a date range. The API processes this request asynchronously, so this method will return immediately whether or not the data has already been deleted. Args: data_source: data source associated with the data product records (note that identifier, program, platform, and instrument_type are required) start: timestamp marking beginning of range to delete records for, inclusive end: timestamp marking end of range to delete records for, inclusive data_product_types: specific types of data product to delete, e.g. ["keogram", "movie"]. If omitted, all data product types will be deleted. Returns: 1 on success Raises: pyaurorax.exceptions.AuroraXNotFoundError: source not found pyaurorax.exceptions.AuroraXUnauthorizedError: invalid API key for this operation """ return func_delete(self.__aurorax_obj, data_source, start, end, data_product_types)
def delete_urls(self, data_source: DataSource, urls: List[str]) ‑> int
-
Delete data products by URL.
The API processes this request asynchronously, so this method will return immediately whether or not the data has already been deleted.
Args
data_source
- data source associated with the data product records (note that identifier, program, platform, and instrument_type are required)
urls
- URLs of data product records to delete
Returns
0 on success
Raises
AuroraXAPIError
- An API error was encountered
AuroraXUnauthorizedError
- invalid API key for this operation
Expand source code
def delete_urls(self, data_source: DataSource, urls: List[str]) -> int: """ Delete data products by URL. The API processes this request asynchronously, so this method will return immediately whether or not the data has already been deleted. Args: data_source: data source associated with the data product records (note that identifier, program, platform, and instrument_type are required) urls: URLs of data product records to delete Returns: 0 on success Raises: pyaurorax.exceptions.AuroraXAPIError: An API error was encountered pyaurorax.exceptions.AuroraXUnauthorizedError: invalid API key for this operation """ return func_delete_urls(self.__aurorax_obj, data_source, urls)
def describe(self, search_obj: Optional[DataProductSearch] = None, query_dict: Optional[Dict] = None) ‑> str
-
Describe a data product search as an "SQL-like" string. Either a DataProductSearch object can be supplied, or a dictionary of the raw JSON query.
Args
search_obj
- the data product search to describe, optional
query_dict
- the data product search query represented as a raw dictionary, optional
Returns
the "SQL-like" string describing the data product search object
Expand source code
def describe(self, search_obj: Optional[DataProductSearch] = None, query_dict: Optional[Dict] = None) -> str: """ Describe a data product search as an "SQL-like" string. Either a DataProductSearch object can be supplied, or a dictionary of the raw JSON query. Args: search_obj: the data product search to describe, optional query_dict: the data product search query represented as a raw dictionary, optional Returns: the "SQL-like" string describing the data product search object """ return func_describe(self.__aurorax_obj, search_obj, query_dict)
def get_request_url(self, request_id: str) ‑> str
-
Get the data product search request URL for a given request ID. This URL can be used for subsequent pyaurorax.requests function calls. Primarily this method facilitates delving into details about a set of already-submitted data product searches.
Args
request_id
- the request identifier
Returns
the request URL
Expand source code
def get_request_url(self, request_id: str) -> str: """ Get the data product search request URL for a given request ID. This URL can be used for subsequent pyaurorax.requests function calls. Primarily this method facilitates delving into details about a set of already-submitted data product searches. Args: request_id: the request identifier Returns: the request URL """ return func_get_request_url(self.__aurorax_obj, request_id)
def search(self, start: datetime.datetime, end: datetime.datetime, programs: Optional[List[str]] = None, platforms: Optional[List[str]] = None, instrument_types: Optional[List[str]] = None, data_product_types: Optional[List[str]] = None, metadata_filters: Optional[List[Dict]] = None, metadata_filters_logical_operator: Optional[str] = None, response_format: Optional[Dict] = None, poll_interval: float = 1.0, return_immediately: bool = False, verbose: bool = False) ‑> DataProductSearch
-
Search for data product records
By default, this function will block and wait until the request completes and all data is downloaded. If you don't want to wait, set the 'return_immediately` value to True. The Search object will be returned right after the search has been started, and you can use the helper functions as part of that object to get the data when it's done.
Note: At least one search criteria from programs, platforms, or instrument_types, must be specified.
Args
start
- start timestamp of the search (inclusive)
end
- end timestamp of the search (inclusive)
programs
- list of programs to search through, defaults to None
platforms
- list of platforms to search through, defaults to None
instrument_types
- list of instrument types to search through, defaults to None
data_product_types
- list of dictionaries describing data product types to filter on e.g. "keogram", defaults to None. Options are in the pyaurorax.data_products module, or at the top level using the pyaurorax.DATA_PRODUCT_TYPE* variables.
metadata_filters
-
list of dictionaries describing metadata keys and values to filter on, defaults to None
Example:
[{ "key": "nbtrace_region", "operator": "in", "values": ["north polar cap"] }]
metadata_filters_logical_operator
- the logical operator to use when evaluating metadata filters (either 'AND' or 'OR'), defaults to "AND"
response_format
- JSON representation of desired data response format
poll_interval
- time in seconds to wait between polling attempts, defaults to pyaurorax.requests.STANDARD_POLLING_SLEEP_TIME
return_immediately
- initiate the search and return without waiting for data to be received, defaults to False
verbose
- output poll times and other progress messages, defaults to False
Returns
a
DataProductSearch
objectExpand source code
def search(self, start: datetime.datetime, end: datetime.datetime, programs: Optional[List[str]] = None, platforms: Optional[List[str]] = None, instrument_types: Optional[List[str]] = None, data_product_types: Optional[List[str]] = None, metadata_filters: Optional[List[Dict]] = None, metadata_filters_logical_operator: Optional[str] = None, response_format: Optional[Dict] = None, poll_interval: float = __STANDARD_POLLING_SLEEP_TIME, return_immediately: bool = False, verbose: bool = False) -> DataProductSearch: """ Search for data product records By default, this function will block and wait until the request completes and all data is downloaded. If you don't want to wait, set the 'return_immediately` value to True. The Search object will be returned right after the search has been started, and you can use the helper functions as part of that object to get the data when it's done. Note: At least one search criteria from programs, platforms, or instrument_types, must be specified. Args: start: start timestamp of the search (inclusive) end: end timestamp of the search (inclusive) programs: list of programs to search through, defaults to None platforms: list of platforms to search through, defaults to None instrument_types: list of instrument types to search through, defaults to None data_product_types: list of dictionaries describing data product types to filter on e.g. "keogram", defaults to None. Options are in the pyaurorax.data_products module, or at the top level using the pyaurorax.DATA_PRODUCT_TYPE* variables. metadata_filters: list of dictionaries describing metadata keys and values to filter on, defaults to None Example: [{ "key": "nbtrace_region", "operator": "in", "values": ["north polar cap"] }] metadata_filters_logical_operator: the logical operator to use when evaluating metadata filters (either 'AND' or 'OR'), defaults to "AND" response_format: JSON representation of desired data response format poll_interval: time in seconds to wait between polling attempts, defaults to pyaurorax.requests.STANDARD_POLLING_SLEEP_TIME return_immediately: initiate the search and return without waiting for data to be received, defaults to False verbose: output poll times and other progress messages, defaults to False Returns: a `pyaurorax.search.DataProductSearch` object """ return func_search( self.__aurorax_obj, start, end, programs, platforms, instrument_types, data_product_types, metadata_filters, metadata_filters_logical_operator, response_format, poll_interval, return_immediately, verbose, )
def upload(self, identifier: int, records: List[DataProductData], validate_source: bool = False, chunk_size: int = 500) ‑> int
-
Upload data product records to AuroraX
Args
identifier
- the AuroraX data source ID
records
- data product records to upload
validate_source
- validate all records before uploading, defaults to False
chunk_size
- number of records to upload in a single call, defaults to 500
Returns
0 for success, raises exception on error
Raises
AuroraXUploadError
- upload error
AuroraXError
- data source validation error
Expand source code
def upload(self, identifier: int, records: List[DataProductData], validate_source: bool = False, chunk_size: int = __UPLOAD_CHUNK_SIZE) -> int: """ Upload data product records to AuroraX Args: identifier: the AuroraX data source ID records: data product records to upload validate_source: validate all records before uploading, defaults to False chunk_size: number of records to upload in a single call, defaults to 500 Returns: 0 for success, raises exception on error Raises: pyaurorax.exceptions.AuroraXUploadError: upload error pyaurorax.exceptions.AuroraXError: data source validation error """ return func_upload(self.__aurorax_obj, identifier, records, validate_source, chunk_size)