#!/usr/bin/env python
# encoding: utf-8
#
# @Author: Brian Cherinka, José Sánchez-Gallego, and Brett Andrews
# @Filename: core.py
# @License: BSD 3-Clause
# @Copyright: Brian Cherinka, José Sánchez-Gallego, and Brett Andrews
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
import abc
import distutils
import os
import re
import six
import warnings
import time
import astropy.io.fits
from brain.core.exceptions import BrainError
import marvin
import marvin.api.api
from marvin.core import marvin_pickle
from marvin.core.exceptions import MarvinUserWarning, MarvinError
from marvin.core.exceptions import MarvinMissingDependency, MarvinBreadCrumb
from marvin.utils.db import testDbConnection
from marvin.utils.general import mangaid2plateifu, get_nsa_data, get_dapall_file, map_dapall
try:
from sdss_access.path import Path
except ImportError:
Path = None
try:
from sdss_access import RsyncAccess
except ImportError:
RsyncAccess = None
__ALL__ = ['MarvinToolsClass']
def kwargsGet(kwargs, key, replacement):
"""As kwargs.get but uses replacement if the value is None."""
if key not in kwargs:
return replacement
elif key in kwargs and kwargs[key] is None:
return replacement
else:
return kwargs[key]
breadcrumb = MarvinBreadCrumb()
[docs]class MarvinToolsClass(object, six.with_metaclass(abc.ABCMeta)):
"""Marvin tools main base class.
This super class implements the :ref:`decision tree <marvin-dma>`
for using local files, database, or remote connection when
initialising a Marvin tools object.
Parameters:
input (str):
A string that can be a filename, plate-ifu, or mangaid. It will be
aumatically identified based on its unique format. This argument
is always the first one, so it can be defined without the keyword
for convenience.
filename (str):
The path of the file containing the file to load. If set,
``input`` is ignored.
mangaid (str):
The mangaid of the file to load. If set, ``input`` is ignored.
plateifu (str):
The plate-ifu of the data cube to load. If set, ``input`` is
ignored.
mode ({'local', 'remote', 'auto'}):
The load mode to use. See :ref:`mode-decision-tree`.
data (:class:`~astropy.io.fits.HDUList`, SQLAlchemy object, or None):
An astropy ``HDUList`` or a SQLAlchemy object, to be used for
initialisation. If ``None``, the :ref:`normal <marvin-dma>`` mode
will be used.
release (str):
The MPL/DR version of the data to use.
drpall (str):
The path to the
`drpall <https://trac.sdss.org/wiki/MANGA/TRM/TRM_MPL-5/metadata#DRP:DRPall>`_
file to use. If not set it will use the default path for the file
based on the ``release``
download (bool):
If ``True``, the data will be downloaded on instantiation. See
:ref:`marvin-download-objects`.
Attributes:
data (:class:`~astropy.io.fits.HDUList`, SQLAlchemy object, or dict):
Depending on the access mode, ``data`` is populated with the
|HDUList| from the FITS file, a
`SQLAlchemy <http://www.sqlalchemy.org>`_ object, or a dictionary
of values returned by an API call.
datamodel:
A datamodel object, whose type depends on the subclass that
initialises the datamodel.
data_origin ({'file', 'db', 'api'}):
Indicates the origin of the data, either from a file, the DB, or
an API call.
filename (str):
The path of the file used, if any.
mangaid (str):
The mangaid of the target.
plateifu:
The plateifu of the target
"""
def __init__(self, input=None, filename=None, mangaid=None, plateifu=None,
mode=None, data=None, release=None, drpall=None, download=None):
self.data = data
self.data_origin = None
self.filename = filename
self.mangaid = mangaid
self.plateifu = plateifu
self.mode = mode if mode is not None else marvin.config.mode
self._release = release if release is not None else marvin.config.release
self._drpver, self._dapver = marvin.config.lookUpVersions(release=self._release)
self._drpall = marvin.config._getDrpAllPath(self._drpver) if drpall is None else drpall
self._forcedownload = download if download is not None else marvin.config.download
# Sets filename, plateifu, and mangaid depending on the values the input parameters.
self._determine_inputs(input)
self.datamodel = None
self._set_datamodel()
# drop breadcrumb
breadcrumb.drop(message='Initializing MarvinTool {0}'.format(self.__class__),
category=self.__class__)
assert self.mode in ['auto', 'local', 'remote']
assert self.filename is not None or self.plateifu is not None, 'no inputs set.'
if self.mode == 'local':
self._doLocal()
elif self.mode == 'remote':
self._doRemote()
elif self.mode == 'auto':
try:
self._doLocal()
except Exception as ee:
if self.filename:
# If the input contains a filename we don't want to go into remote mode.
raise(ee)
else:
warnings.warn('local mode failed. Trying remote now.', MarvinUserWarning)
self._doRemote()
# Sanity check to make sure data_origin has been properly set.
assert self.data_origin in ['file', 'db', 'api'], 'data_origin is not properly set.'
def _determine_inputs(self, input):
"""Determines what inputs to use in the decision tree."""
if input:
assert self.filename is None and self.plateifu is None and self.mangaid is None, \
'if input is set, filename, plateifu, and mangaid cannot be set.'
assert isinstance(input, six.string_types), 'input must be a string.'
input_dict = self._parse_input(input)
if input_dict['plate'] is not None and input_dict['ifu'] is not None:
self.plateifu = input
elif input_dict['plate'] is not None and input_dict['ifu'] is None:
self._plate = input
elif input_dict['mangaid'] is not None:
self.mangaid = input
else:
# Assumes the input must be a filename
self.filename = input
if self.filename is None and self.mangaid is None and self.plateifu is None:
raise MarvinError('no inputs defined.')
if self.filename:
self.mangaid = None
self.plateifu = None
if self.mode == 'remote':
raise MarvinError('filename not allowed in remote mode.')
assert os.path.exists(self.filename), \
'filename {} does not exist.'.format(str(self.filename))
elif self.plateifu:
assert not self.filename, 'invalid set of inputs.'
elif self.mangaid:
assert not self.filename, 'invalid set of inputs.'
self.plateifu = mangaid2plateifu(self.mangaid,
drpall=self._drpall,
drpver=self._drpver)
elif self._plate:
assert not self.filename, 'invalid set of inputs.'
@staticmethod
def _parse_input(value):
"""Parses and input and determines plate, ifu, and mangaid."""
# Number of IFUs per size
n_ifus = {19: 2, 37: 4, 61: 4, 91: 2, 127: 5, 7: 12}
return_dict = {'plate': None, 'ifu': None, 'mangaid': None}
plateifu_pattern = re.compile(r'([0-9]{4,5})-([0-9]{4,9})')
ifu_pattern = re.compile('(7|127|[0-9]{2})([0-9]{2})')
mangaid_pattern = re.compile(r'[0-9]{1,3}-[0-9]+')
plateid_pattern = re.compile('([0-9]{4,})(?!-)(?<!-)')
plateid_match = re.match(plateid_pattern, value)
plateifu_match = re.match(plateifu_pattern, value)
mangaid_match = re.match(mangaid_pattern, value)
# Check whether the input value matches the plateifu pattern
if plateifu_match is not None:
plate, ifu = plateifu_match.groups(0)
# If the value matches a plateifu, checks that the ifu is a valid one.
ifu_match = re.match(ifu_pattern, ifu)
if ifu_match is not None:
ifu_size, ifu_id = map(int, ifu_match.groups(0))
if ifu_id <= n_ifus[ifu_size]:
return_dict['plate'] = plate
return_dict['ifu'] = ifu
# Check whether this is a mangaid
elif mangaid_match is not None:
return_dict['mangaid'] = value
# Check whether this is a plate
elif plateid_match is not None:
return_dict['plate'] = value
return return_dict
def _doLocal(self):
"""Tests if it's possible to load the data locally."""
if self.filename:
if os.path.exists(self.filename):
self.mode = 'local'
self.data_origin = 'file'
else:
raise MarvinError('input file {0} not found'.format(self.filename))
elif self.plateifu:
testDbConnection(marvin.marvindb.session)
if marvin.marvindb.db:
self.mode = 'local'
self.data_origin = 'db'
else:
fullpath = self._getFullPath()
if fullpath and os.path.exists(fullpath):
self.mode = 'local'
self.filename = fullpath
self.data_origin = 'file'
else:
if self._forcedownload:
self.download()
self.data_origin = 'file'
else:
raise MarvinError('this is the end of the road. '
'Try using some reasonable inputs.')
def _doRemote(self):
"""Tests if remote connection is possible."""
if self.filename:
raise MarvinError('filename not allowed in remote mode.')
else:
self.mode = 'remote'
self.data_origin = 'api'
[docs] @abc.abstractmethod
def _set_datamodel(self):
"""Sets the datamodel for this object. Must be overridden by each subclass."""
pass
[docs] def download(self, pathType=None, **pathParams):
"""Download using sdss_access Rsync"""
if not RsyncAccess:
raise MarvinError('sdss_access is not installed')
else:
rsync_access = RsyncAccess()
rsync_access.remote()
rsync_access.add(pathType, **pathParams)
rsync_access.set_stream()
rsync_access.commit()
paths = rsync_access.get_paths()
time.sleep(0.001) # adding a millisecond pause for download to finish and file extistence to register
self.filename = paths[0] # doing this for single files, may need to change
[docs] @abc.abstractmethod
def _getFullPath(self, pathType=None, url=None, **pathParams):
"""Returns the full path of the file in the tree.
This method must be overridden by each subclass.
"""
if not Path:
raise MarvinMissingDependency('sdss_access is not installed')
else:
try:
if url:
fullpath = Path().url(pathType, **pathParams)
else:
fullpath = Path().full(pathType, **pathParams)
except Exception as ee:
warnings.warn('sdss_access was not able to retrieve the full path of the file. '
'Error message is: {0}'.format(str(ee)), MarvinUserWarning)
fullpath = None
return fullpath
def _toolInteraction(self, url, params=None):
"""Runs an Interaction and passes self._release."""
params = params or {'release': self._release}
return marvin.api.api.Interaction(url, params=params)
@staticmethod
def _check_file(header, data, objtype):
''' Check the file input to ensure correct tool '''
# get/check various header keywords
bininhdr = ('binkey' in header) or ('bintype' in header)
dapinhdr = 'dapfrmt' in header
dapfrmt = header['DAPFRMT'] if dapinhdr else None
# check the file
if objtype == 'Maps' or objtype == 'ModelCube':
# get indices in daptype
daptype = ['MAPS', 'LOGCUBE']
dapindex = daptype.index("MAPS") if objtype == 'Maps' else daptype.index("LOGCUBE")
altdap = 1 - dapindex
# check for emline_gflux extension
gfluxindata = 'EMLINE_GFLUX' in data
wronggflux = (gfluxindata and objtype == 'ModelCube') or \
(not gfluxindata and objtype == 'Maps')
if not bininhdr:
raise MarvinError('Trying to open a non DAP file with Marvin {0}'.format(objtype))
else:
if (dapfrmt and dapfrmt != daptype[dapindex]) or (wronggflux):
raise MarvinError('Trying to open a DAP {0} with Marvin {1}'.format(daptype[altdap], objtype))
elif objtype == 'Cube':
if bininhdr or dapinhdr:
raise MarvinError('Trying to open a DAP file with Marvin Cube')
def __getstate__(self):
if self.data_origin == 'db':
raise MarvinError('objects with data_origin=\'db\' cannot be saved.')
odict = self.__dict__.copy()
del odict['data']
return odict
def __setstate__(self, idict):
data = None
if idict['data_origin'] == 'file':
try:
data = astropy.io.fits.open(idict['filename'])
except Exception as ee:
warnings.warn('there was a problem reloading the FITS object: {0}. '
'The object has been unpickled but not all the functionality '
'will be available.'.format(str(ee)), MarvinUserWarning)
self.__dict__.update(idict)
self.data = data
[docs] def save(self, path=None, overwrite=False):
"""Pickles the object.
If ``path=None``, uses the default location of the file in the tree
but changes the extension of the file to ``.mpf``. Returns the path
of the saved pickle file.
Parameters:
obj:
Marvin object to pickle.
path (str):
Path of saved file. Default is ``None``.
overwrite (bool):
If ``True``, overwrite existing file. Default is ``False``.
Returns:
str:
Path of saved file.
"""
return marvin_pickle.save(self, path=path, overwrite=overwrite)
[docs] @classmethod
def restore(cls, path, delete=False):
"""Restores a MarvinToolsClass object from a pickled file.
If ``delete=True``, the pickled file will be removed after it has been
unplickled. Note that, for objects with ``data_origin='file'``, the
original file must exists and be in the same path as when the object
was first created.
"""
return marvin_pickle.restore(path, delete=delete)
@property
def release(self):
"""Returns the release."""
return self._release
@release.setter
def release(self, value):
"""Fails when trying to set the release after instatiation."""
raise MarvinError('the release cannot be changed once the object has been instantiated.')
@property
def plate(self):
"""Returns the plate id."""
return int(self.plateifu.split('-')[0])
@property
def ifu(self):
"""Returns the IFU."""
return int(self.plateifu.split('-')[1])
def __del__(self):
"""Destructor for closing FITS files."""
if self.data_origin == 'file' and isinstance(self.data, astropy.io.fits.HDUList):
try:
self.data.close()
except Exception as ee:
warnings.warn('failed to close FITS instance: {0}'.format(ee), MarvinUserWarning)
[docs]class NSAMixIn(object):
"""A mixin that provides access to NSA paremeters.
Must be used in combination with `.MarvinToolsClass` and initialised
before `~.NSAMixIn.nsa` can be called.
Parameters:
nsa_source ({'auto', 'drpall', 'nsa'}):
Defines how the NSA data for this object should loaded when
``.nsa`` is first called. If ``drpall``, the drpall file will
be used (note that this will only contain a subset of all the NSA
information); if ``nsa``, the full set of data from the DB will be
retrieved. If the drpall file or a database are not available, a
remote API call will be attempted. If ``nsa_source='auto'``, the
source will depend on how the parent object has been
instantiated. If the parent has ``data_origin='file'``,
the drpall file will be used (as it is more likely that the user
has that file in their system). Otherwise, ``nsa_source='nsa'``
will be assumed. This behaviour can be modified during runtime by
modifying the ``nsa_mode`` attribute with one of the valid values.
"""
def __init__(self, nsa_source='auto'):
self._nsa = None
self.nsa_source = nsa_source
assert self.nsa_source in ['auto', 'nsa', 'drpall'], \
'nsa_source must be one of auto, nsa, or drpall'
@property
def nsa(self):
"""Returns the contents of the NSA catalogue for this target."""
if hasattr(self, 'nsa_source') and self.nsa_source is not None:
nsa_source = self.nsa_source
else:
nsa_source = 'auto'
if self._nsa is None:
if nsa_source == 'auto':
if self.data_origin == 'file':
nsa_source = 'drpall'
else:
nsa_source = 'nsa'
try:
self._nsa = get_nsa_data(self.mangaid, mode='auto',
source=nsa_source,
drpver=self._drpver,
drpall=self._drpall)
except (MarvinError, BrainError):
warnings.warn('cannot load NSA information for mangaid={!r}.'
.format(self.mangaid), MarvinUserWarning)
return None
return self._nsa
[docs]class DAPallMixIn(object):
"""A mixin that provides access to DAPall paremeters.
Must be used in combination with `.MarvinToolsClass` and initialised
before `~.DAPallMixIn.dapall` can be called.
`DAPallMixIn` uses the `.MarvinToolsClass.data_origin` of the object to
determine how to obtain the DAPall information. However, if the object
contains a ``dapall`` attribute with the path to a DAPall file, that file
will be used.
"""
__min_dapall_version__ = distutils.version.StrictVersion('2.1.0')
@property
def dapall(self):
"""Returns the contents of the DAPall data for this target."""
if (not self._dapver or
distutils.version.StrictVersion(self._dapver) < self.__min_dapall_version__):
raise MarvinError('DAPall is not available for versions before MPL-6.')
if hasattr(self, '_dapall') and self._dapall is not None:
return self._dapall
if self.data_origin == 'file':
try:
dapall_data = self._get_dapall_from_file()
except IOError:
warnings.warn('cannot find DAPall file. Trying remote request.',
MarvinUserWarning)
dapall_data = self._get_from_api()
elif self.data_origin == 'db':
dapall_data = self._get_dapall_from_db()
else:
dapall_data = self._get_dapall_from_api()
self._dapall = dapall_data
return self._dapall
def _get_dapall_from_file(self):
"""Uses DAPAll file to retrieve information."""
daptype = self.bintype.name + '-' + self.template.name
dapall_path = get_dapall_file(self._drpver, self._dapver)
assert dapall_path is not None, 'cannot build DAPall file.'
if not os.path.exists(dapall_path):
raise MarvinError('cannot find DAPall file in the system.')
dapall_hdu = astropy.io.fits.open(dapall_path)
header = dapall_hdu[0].header
dapall_table = dapall_hdu[-1].data
dapall_row = dapall_table[(dapall_table['PLATEIFU'] == self.plateifu) &
(dapall_table['DAPTYPE'] == daptype)]
assert len(dapall_row) == 1, 'cannot find matching row in DAPall.'
return map_dapall(header, dapall_row[0])
def _get_dapall_from_db(self):
"""Uses the DB to retrieve the DAPAll data."""
dapall_data = {}
daptype = self.bintype.name + '-' + self.template.name
mdb = marvin.marvindb
if not mdb.isdbconnected:
raise MarvinError('No DB connected')
datadb = mdb.datadb
dapdb = mdb.dapdb
dapall_row = mdb.session.query(dapdb.DapAll).join(
dapdb.File, datadb.PipelineInfo, datadb.PipelineVersion).filter(
mdb.datadb.PipelineVersion.version == self._dapver,
dapdb.DapAll.plateifu == self.plateifu,
dapdb.DapAll.daptype == daptype).first()
if dapall_row is None:
raise MarvinError('cannot find a DAPall match for this target in the DB.')
for col in dapall_row.__table__.columns.keys():
if col != 'pk' and '_pk' not in col:
dapall_data[col] = getattr(dapall_row, col)
return dapall_data
def _get_dapall_from_api(self):
"""Uses the API to retrieve the DAPall data."""
url = marvin.config.urlmap['api']['dapall']['url']
url_full = url.format(name=self.plateifu,
bintype=self.bintype.name,
template=self.template.name)
try:
response = self._toolInteraction(url_full)
except Exception as ee:
raise marvin.core.exceptions.MarvinError(
'found a problem while getting DAPall: {0}'.format(str(ee)))
if response.results['error'] is not None:
raise MarvinError('found a problem while getting DAPall: {}'
.format(str(response.results['error'])))
data = response.getData()
return data['dapall_data']