# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Extract Data Origin in VOTable.
References
----------
DataOrigin is a vocabulary described in the IVOA note: https://www.ivoa.net/documents/DataOrigin/
Notes
-----
This API retrieve Metadata from INFO in VOTable.
The information can be found at different level in a VOTable:
- global level
- resource level
- table level
Contents
--------
- Query information: Each element is considered to be unique in the VOTable.
Information concerns publisher, date of execution, contact, request, etc.
- Dataset origin : basic provenance information.
Examples
--------
For more information, please see :ref:`DataOrigin documentation <astropy-io-votable-dataorigin>`.
"""
import astropy.io.votable.tree
from astropy.utils.decorators import deprecated_attribute
__all__ = [
"DataOrigin",
"DatasetOrigin",
"QueryOrigin",
"add_data_origin_info",
"extract_data_origin",
]
DATAORIGIN_QUERY_INFO = (
"service_ivoid",
"publisher",
"server_software",
"service_protocol",
"request",
"query",
"request_date",
"contact",
)
DATAORIGIN_INFO = (
"data_ivoid",
"citation",
"reference_url",
"resource_version",
"rights_uri",
"rights",
"creator",
"journal",
"article",
"cites",
"is_derived_from",
"original_date",
"publication_date",
"last_update_date",
)
[docs]
class QueryOrigin:
"""Data class storing query execution information that generated the VOTable.
Notes
-----
The Query information should be unique in the whole VOTable.
It includes reproducibility information to execute the query again.
Attributes
----------
service_ivoid : str
IVOID of the service that produced the VOTable (default: None)
publisher : str
Data centre that produced the VOTable (default: None)
server_software : str
Software version (default: None)
service_protocol : str
IVOID of the protocol through which the data was retrieved (default: None)
request : str
Full request URL including a query string (default: None)
query : str
An input query in a formal language (e.g, ADQL) (default: None)
request_date : str
Query execution date (default: None)
contact : str
Email or URL to contact publisher (default: None)
infos : list[astropy.io.votable.tree.Info]
list of ``<INFO>`` used by DataOrigin (default: empty list)
"""
_INFO_MAPPING = ("standardid",) # DALI INFO
def __init__(self, votable_element: astropy.io.votable.tree.Element = None):
self.service_ivoid = None
self.publisher = None
self.server_software = None
self.service_protocol = None
self.request = None
self.query = None
self.request_date = None
self.contact = None
self.infos = []
@property
def standardID(self) -> list:
"""Compatibility with previous version"""
return self.service_protocol
@standardID.setter
def standardID(self, value: list):
"""Compatibility with previous version"""
if not self.service_protocol:
self.service_protocol = value
def __str__(self) -> str:
s = []
for info_name in DATAORIGIN_QUERY_INFO:
info = getattr(self, info_name)
if info:
s.append(f"{info_name}: {info}")
return "\n".join(s)
[docs]
class DatasetOrigin:
"""Data class storing the basic provenance for a Dataset.
Notes
-----
DatasetOrigin is dedicated to a specific Element in a VOTable.
These ``<INFO>`` Elements describe a Resource, a TableElement or are Global.
Attributes
----------
data_ivoid : list
IVOID of underlying data collection (default: None)
citation : list
Dataset identifier that can be used for citation (default: None)
reference_url : list
Dataset landing page (default: None)
resource_version : list
Dataset version (default: None)
rights_uri : list
Licence URI (default: None)
rights : list
Licence or Copyright text (default: None)
creator : list
The person(s) mainly involved in the creation of the resource (default: None)
journal : list
Editor name of the reference article (default: None)
article : list
Bibcode or DOI of a reference article (default: None)
cites : list
An Identifier (ivoid, DOI, bibcode) of second resource (default: None)
is_derived_from : list
An Identifier (ivoid, DOI, bibcode) of second resource (default: None)
original_date : list
Date of the original resource from which the present resource is derived (default: None)
publication_date : list
Date of first publication in the data centre (default: None)
last_update_date : list
Last data centre update (default: None)
infos : list[astropy.io.votable.tree.Info]
list of ``<INFO>`` used by DataOrigin (default: None)
"""
_INFO_MAPPING = ("editor", "ivoid") # obsolete INFO
def __init__(self, votable_element: astropy.io.votable.tree.Element = None):
"""
Constructor
Parameters
----------
votable_element: astropy.io.votable.tree.Element, optional
indicates the VOTable element
"""
self.data_ivoid = None
self.citation = None
self.reference_url = None
self.resource_version = None
self.rights_uri = None
self.rights = None
self.creator = None
self.journal = None
self.article = None
self.cites = None
self.is_derived_from = None
self.original_date = None
self.publication_date = None
self.last_update_date = None
self.__vo_elt = votable_element
self.infos = []
self.ivoid = deprecated_attribute(
name="ivoid", alternative="data_ivoid", since="8.0"
)
self.editor = deprecated_attribute(
name="editor", alternative="journal", since="8.0"
)
[docs]
def get_votable_element(self) -> astropy.io.votable.tree.Element:
"""
Get the VOTable element
Returns
-------
astropy.io.votable.tree.Element
"""
return self.__vo_elt
def __str__(self) -> str:
s = []
for info_name in DATAORIGIN_INFO:
info = getattr(self, info_name)
if info:
s.append(f"{info_name}: {','.join(info)}")
return "\n".join(s)
[docs]
def is_empty(self) -> bool:
"""check if DataOrigin is filled
Returns
-------
bool
"""
for info in DATAORIGIN_INFO:
v = getattr(self, info)
if v is not None:
return False
return True
[docs]
class DataOrigin:
"""Class parsing a VOTable and storing both information about query execution
and basic provenances.
"""
def __init__(self, vot_element=None):
self.query = QueryOrigin()
self.origin = []
self.__it = None
self.__vot_element = vot_element
if vot_element:
self.parse()
def __str__(self) -> str:
origin_list = []
for origin in self.origin:
origin_list.append(str(origin))
return str(self.query) + "\n\n" + "\n\n".join(origin_list)
def __iter__(self):
self.__it = -1
return self
def __next__(self):
self.__it += 1
if self.__it >= len(self.origin):
raise StopIteration
return self.origin[self.__it]
def __extract_generic_info(
self, vo_fragment: astropy.io.votable.tree.Element, infos: list
):
"""(internal) extract info and populate DataOrigin
Parameters
----------
vo_fragment : astropy.io.votable.tree.Element
VOTable element (votable, resource or table)
infos : list[astropy.io.votable.tree.Info]
list of ``<INFO>``
"""
if not infos:
return
dataset_origin = DatasetOrigin(vo_fragment)
dataset_origin_terms = DATAORIGIN_INFO + DatasetOrigin._INFO_MAPPING
query_origin_terms = DATAORIGIN_QUERY_INFO + QueryOrigin._INFO_MAPPING
for info in infos:
info_name = info.name.lower()
for dataset_info in dataset_origin_terms:
if info_name == dataset_info:
dataset_origin.infos.append(info)
att = getattr(dataset_origin, dataset_info)
if att is None or isinstance(att, property):
setattr(dataset_origin, dataset_info, [info.value])
else:
att.append(info.value)
break
for query_info in query_origin_terms:
if info_name == query_info:
self.query.infos.append(info)
setattr(self.query, query_info, info.value)
break
if not dataset_origin.is_empty():
self.origin.append(dataset_origin)
def __extract_info_from_table(self, table: astropy.io.votable.tree.TableElement):
"""(internal) extract and populate dataOrigin from astropy.io.votable.tree.TableElement
Parameters
----------
table : astropy.io.votable.tree.TableElement
Table to explore.
"""
self.__extract_generic_info(table, table.infos)
def __extract_info_from_resource(
self,
resource: astropy.io.votable.tree.Resource,
recursive: bool = True,
):
"""(internal) extract and populate dataOrigin from astropy.io.votable.tree.Resource
Parameters
----------
resource : astropy.io.votable.tree.Resource
Resource to explore.
recursive : bool, optional
make a recursive search (default: True)
"""
self.__extract_generic_info(resource, resource.infos)
if recursive:
for table in resource.tables:
self.__extract_info_from_table(table)
def __extract_info_from_votable(
self,
votable: astropy.io.votable.tree.VOTableFile,
recursive: bool = True,
):
"""(internal) extract and populate dataOrigin from astropy.io.votable.tree.VOTableFile
Parameters
----------
votable : astropy.io.votable.tree.VOTableFile
VOTableFile to explore.
recursive : bool, optional
make a recursive search (default: True)
"""
self.__extract_generic_info(votable, votable.infos)
if recursive:
for resource in votable.resources:
self.__extract_info_from_resource(resource)
[docs]
def parse(self) -> None:
"""Extract DataOrigin in a VO element
Raises
------
TypeError
input ``vot_element`` type is not supported
"""
if isinstance(self.__vot_element, astropy.io.votable.tree.VOTableFile):
self.__extract_info_from_votable(self.__vot_element)
elif isinstance(self.__vot_element, astropy.io.votable.tree.Resource):
self.__extract_info_from_resource(self.__vot_element)
elif isinstance(self.__vot_element, astropy.io.votable.tree.TableElement):
self.__extract_info_from_table(self.__vot_element)
else:
raise TypeError("input vot_element type is not supported.")
@staticmethod
def __clean_votable_info(vot_element: astropy.io.votable.tree.Element) -> None:
"""(internal) Clean existing DataOrigin INFO in the VOTable Element
Parameters
----------
vot_element : astropy.io.votable.tree.Element
VOTable Element where to remove the INFO
"""
for info in vot_element.infos[0:]:
if (
info.name in DATAORIGIN_QUERY_INFO
or info.name in DATAORIGIN_INFO
or info.name in QueryOrigin._INFO_MAPPING
or info.name in DatasetOrigin._INFO_MAPPING
):
vot_element.infos.remove(info)
if isinstance(vot_element, astropy.io.votable.tree.Resource):
for table in vot_element.resources:
DataOrigin.__clean_votable_info(table)
elif isinstance(vot_element, astropy.io.votable.tree.VOTableFile):
for resource in vot_element.resources:
DataOrigin.__clean_votable_info(resource)
@staticmethod
def __append_votable_info(
vot_element: astropy.io.votable.tree.Element,
name: str,
value: str | list,
content: str | None = None,
unique: bool = False,
) -> None:
"""(internal) add new DATAOrigin info in the VOTable Element
Parameters
----------
vot_element : astropy.io.votable.tree.Element
VOTable Element where to add a new INFO
name : str
INFO name
value: str | list
the INFO value
content: str, optional
INFO description (default: None)
unique: bool, optional
the INFO element is unique (default: False)
"""
if not isinstance(
vot_element,
(
astropy.io.votable.tree.VOTableFile,
astropy.io.votable.tree.Resource,
astropy.io.votable.tree.TableElement,
),
):
raise TypeError("input vot_element type is not supported.")
for info in vot_element.infos:
if info.name == name:
if unique:
return
if info.value == value:
return
values = [value] if not isinstance(value, list) else value
for val in values:
new_info = astropy.io.votable.tree.Info(name=name, value=val)
if content:
new_info.content = content
vot_element.infos.extend([new_info])
[docs]
def update_votable(self):
"""Update the VOTable fragment with DataOrigin <INFO>
Returns
-------
astropy.io.votable.tree.Element
"""
if not self.__vot_element:
raise ValueError("VOTable not parsed yet (please call parse method first)")
# clean existing DataOrigin info
DataOrigin.__clean_votable_info(self.__vot_element)
for item in DATAORIGIN_QUERY_INFO:
att = getattr(self.query, item)
if not att:
continue
DataOrigin.__append_votable_info(
self.__vot_element, name=item, value=att, unique=True
)
for origin_info in self.origin:
for item in DATAORIGIN_INFO:
att = getattr(origin_info, item)
if not att:
continue
vot_fragment = origin_info.get_votable_element()
if not vot_fragment:
vot_fragment = self.__vo_elt
DataOrigin.__append_votable_info(vot_fragment, name=item, value=att)
return self.__vot_element
[docs]
def add_data_origin_info(
vot_element: astropy.io.votable.tree.VOTableFile
| astropy.io.votable.tree.Resource
| astropy.io.votable.tree.TableElement,
info_name: str,
info_value: str,
content: str | None = None,
) -> None:
"""Update VOTable element with information compatible
with DataOrigin vocabulary.
Notes
-----
The function checks information name and adds the
VOTable element with a new ``<INFO>``.
Parameters
----------
vot_element : astropy.io.votable.tree.VOTableFile | astropy.io.votable.tree.Resource | astropy.io.votable.tree.TableElement
VOTable element where to add the information
info_name : str
Attribute name (see DATAORIGIN_INFO, DATAORIGIN_QUERY_INFO)
info_value : str
value
content : str, optional
Content in ``<INFO>`` (default: None)
Raises
------
TypeError
input type not managed or information name not recognized
ValueError
``info_name`` already exists in ``vot_element``
ValueError
``info_name`` is an unknown DataOrigin name.
"""
if info_name in DATAORIGIN_INFO:
if not isinstance(
vot_element,
(
astropy.io.votable.tree.VOTableFile,
astropy.io.votable.tree.Resource,
astropy.io.votable.tree.TableElement,
),
):
raise TypeError("Unsupported vot_element type.")
elif info_name in DATAORIGIN_QUERY_INFO:
if not isinstance(vot_element, astropy.io.votable.tree.VOTableFile):
raise TypeError(
"Bad type of vot_element: this information needs VOTableFile."
)
for info in vot_element.get_infos_by_name(info_name):
raise ValueError(f"QueryOrigin {info_name} already exists")
else:
raise ValueError("Unknown DataOrigin info name.")
new_info = astropy.io.votable.tree.Info(name=info_name, value=info_value)
if content:
new_info.content = content
vot_element.infos.extend([new_info])