ivs.catalogs.mast

1 # -*- coding: utf-8 -*- 2 """ 3 Interface to the MAST archive. 4 5 Because the MAST archive is very inhomegeneous, this module is very limited in 6 use, and sometimes confusing. It is probably best to check the data or retrieve 7 the data manually from the archive. 8 """ 9 import urllib 10 import socket 11 import logging 12 import os 13 import ConfigParser 14 import shutil 15 16 import numpy as np 17 import astropy.io.fits as pf 18 19 from ivs.sed import filters 20 from ivs.aux import xmlparser 21 from ivs.aux import loggers 22 from ivs.aux import numpy_ext 23 from ivs.aux.decorators import retry_http 24 from ivs.inout import ascii 25 from ivs.inout import http 26 from ivs.units import conversions 27 from ivs.catalogs import vizier 28 from ivs.catalogs import sesame 29 30 31 logger = logging.getLogger("CAT.MAST") 32 logger.addHandler(loggers.NullHandler()) 33 34 basedir = os.path.dirname(os.path.abspath(__file__)) 35 36 #-- read in catalog information 37 cat_info = ConfigParser.ConfigParser() 38 cat_info.optionxform = str # make sure the options are case sensitive 39 cat_info.readfp(open(os.path.join(basedir,'mast_cats_phot.cfg'))) 40 41 42

43 -def _get_URI(name,ID=None,ra=None,dec=None,radius=5.,filetype='CSV', 44 out_max=100000,resolver='SIMBAD',coord='dec',**kwargs):

45 """ 46 Build MAST URI from available options. 47 48 Filetype should be one of: 49 50 CSV,SSV,PSV... 51 52 kwargs are to catch unused arguments. 53 54 @param name: name of a MAST mission catalog (e.g. 'hst') or search type 55 ('images','spectra') 56 @type name: str 57 @keyword ID: target name 58 @type ID: str 59 @param filetype: type of the retrieved file 60 @type filetype: str (one of 'CSV','PSV'... see MAST site) 61 @keyword radius: search radius (arcseconds) 62 @type radius: float 63 @param ra: target's right ascension 64 @type ra: float 65 @param dec: target's declination 66 @type dec: float 67 @param coord: coordinate output format 68 @type coord: str: one of 'sex',dec','dechr' 69 @param out_max: maximum number of rows 70 @type out_max: integer 71 @return: url 72 @rtype: str 73 """ 74 base_url = 'http://archive.stsci.edu/' 75 76 if name == 'images': 77 base_url += 'siap/search2.php?' 78 elif name == 'spectra': 79 base_url += 'ssap/search2.php?' 80 #elif name == 'galex': 81 # base_url = 'http://galex.stsci.edu/search.php?action=Search' 82 else: 83 base_url += '%s/search.php?action=Search'%(name) 84 85 #-- only use the ID if ra and dec are not given 86 if ID is not None and (ra is None and dec is None): 87 base_url += '&target=%s'%(ID) 88 89 if ra is not None and dec is not None: 90 base_url += '&radius=%s'%(radius/60.) 91 if ra is not None: base_url += '&RA=%s'%(ra) 92 if dec is not None: base_url += '&DEC=%s'%(dec) 93 elif radius is not None: 94 base_url += '&SIZE=%s'%(radius/60.) 95 96 for kwarg in kwargs: 97 base_url += '&%s=%s'%(kwarg,kwargs[kwarg]) 98 99 if name not in ['spectra','images']: 100 base_url += '&outputformat=%s'%(filetype) 101 base_url += '&resolver=%s'%(resolver) 102 #base_url += '&max_records=%d'%(out_max) 103 base_url += '&coordformat=%s'%(coord) 104 logger.debug('url: %s'%(base_url)) 105 return base_url

106 107 108

109 -def galex(**kwargs):

110 """ 111 Cone search for Galex targets. 112 """ 113 ID = kwargs.pop('ID',None) 114 if ID is None: 115 ra,dec = kwargs.pop('ra'),kwargs.pop('dec') 116 else: 117 info = sesame.search(ID,db='A') 118 if not 'jradeg' in info: return None,None,None 119 ra,dec = info['jradeg'],info['jdedeg'] 120 radius = 0.1 121 #radius = radius/60. 122 base_url = 'http://galex.stsci.edu/gxws/conesearch/conesearch.asmx/ConeSearchToXml?ra={0:f}&dec={1:f}&sr={2:f}&verb=1'.format(ra,dec,radius) 123 #base_url = 'http://galex.stsci.edu/GR4/?page=searchresults&RA={ra:f}&DEC={dec:f}&query=no'.format(ra=ra,dec=dec) 124 url = urllib.URLopener() 125 filen,msg = url.retrieve(base_url,filename=None) 126 fuv_flux,e_fuv_flux = None,None 127 columns = ['_r','ra','dec','fuv_flux','fuv_fluxerr','nuv_flux','nuv_fluxerr'] 128 values = [np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan] 129 130 #flux in microJy 131 units = dict(fuv_flux='muJy',nuv_flux='muJy') 132 133 got_target = False 134 with open(filen,'r') as ff: 135 for line in ff.readlines(): 136 for i,col in enumerate(columns): 137 if col+'>' in line: 138 values[i] = np.float(line.split('>')[1].split('<')[0]) 139 got_target = (col=='fuv_fluxerr') 140 if got_target: 141 break 142 143 values[0] = np.sqrt( (values[1]-ra)**2 + (values[2]-dec)**2)*3600 144 columns[1] = '_RAJ2000' 145 columns[2] = '_DEJ2000' 146 147 results = np.rec.fromarrays(np.array([values]).T,names=columns) 148 if np.all(np.isnan(np.array(values))): 149 results = None 150 151 return results,units,None

152 153 154 155

156 -def search(catalog,**kwargs):

157 """ 158 Search and retrieve information from a MAST catalog. 159 160 Two ways to search for data within a catalog C{name}: 161 162 1. You're looking for info on B{one target}, then give the target's 163 C{ID} or coordinates (C{ra} and C{dec}), and a search C{radius}. 164 165 2. You're looking for information of B{a whole field}, then give the 166 field's coordinates (C{ra} and C{dec}), and C{radius}. 167 168 If you have a list of targets, you need to loop this function. 169 170 If you supply a filename, the results will be saved to that path, and you 171 will get the filename back as received from urllib.URLopener (should be the 172 same as the input name, unless something went wrong). 173 174 If you don't supply a filename, you should leave C{filetype} to the default 175 C{tsv}, and the results will be saved to a temporary 176 file and deleted after the function is finished. The content of the file 177 will be read into a dictionary, as well as the units (two separate 178 dictionaries with the same keys, depending on the colum names in the 179 catalog). The entries in the dictionary are of type C{ndarray}, and will 180 be converted to a float-array if possible. If not, the array will consist 181 of strings. The comments are also returned as a list of strings. 182 183 184 @param catalog: name of a MAST mission catalog 185 @type catalog: str 186 @keyword filename: name of the file to write the results to (no extension) 187 @type filename: str 188 @return: filename / catalog data columns, units, comments 189 @rtype: str/ record array, dict, list of str 190 """ 191 filename = kwargs.pop('filename',None) # remove filename from kwargs 192 filetype = kwargs.setdefault('filetype','CSV') 193 if filename is not None and '.' in os.path.basename(filename): 194 filetype = os.path.splitext(filename)[1][1:] 195 elif filename is not None: 196 filename = '%s.%s'%(filename,filetype) 197 198 #-- gradually build URI 199 base_url = _get_URI(catalog,**kwargs) 200 #-- prepare to open URI 201 202 url = urllib.URLopener() 203 filen,msg = url.retrieve(base_url,filename=filename) 204 # maybe we are just interest in the file, not immediately in the content 205 if filename is not None: 206 logger.info('Querying MAST source %s and downloading to %s'%(catalog,filename)) 207 url.close() 208 return filen 209 210 # otherwise, we read everything into a dictionary 211 212 if filetype=='CSV' and not filename: 213 try: 214 results,units,comms = csv2recarray(filen) 215 #-- raise an exception when multiple catalogs were specified 216 except ValueError: 217 url.close() 218 #raise ValueError, "failed to read %s, perhaps multiple catalogs specified (e.g. III/168 instead of III/168/catalog)"%(catalog) 219 results,units,comms = None,None,None 220 url.close() 221 logger.info('Querying MAST source %s (%d)'%(catalog,(results is not None and len(results) or 0))) 222 return results,units,comms 223 else: 224 return filename

225 226

227 -def mast2phot(source,results,units,master=None,extra_fields=None):

228 """ 229 Convert/combine MAST record arrays to measurement record arrays. 230 231 Every line in the combined array represents a measurement in a certain band. 232 233 This is probably only useful if C{results} contains only information on 234 one target (or you have to give 'ID' as an extra field, maybe). 235 236 The standard columns are: 237 238 1. C{meas}: containing the photometric measurement 239 2. C{e_meas}: the error on the photometric measurement 240 3. C{flag}: an optional quality flag 241 4. C{unit}: the unit of the measurement 242 5. C{photband}: the photometric passband (FILTER.BAND) 243 6. C{source}: name of the source catalog 244 245 You can add extra information from the Mast catalog via the list of keys 246 C{extra_fields}. 247 248 If you give a C{master}, the information will be added to a previous 249 record array. If not, a new master will be created. 250 251 The result is a record array with each row a measurement. 252 253 @param source: name of the Mast source 254 @type source: str 255 @param results: results from Mast C{search} 256 @type results: record array 257 @param units: header of Mast catalog with key name the column name and 258 key value the units of that column 259 @type units: dict 260 @param master: master record array to add information to 261 @type master: record array 262 @param extra_fields: any extra columns you want to add information from 263 @type extra_fields: list of str 264 @return: array with each row a measurement 265 @rtype: record array 266 """ 267 e_flag = 'e_' 268 q_flag = 'q_' 269 #-- basic dtypes 270 dtypes = [('meas','f8'),('e_meas','f8'),('flag','a20'), 271 ('unit','a30'),('photband','a30'),('source','a50')] 272 273 #-- MAST has no unified terminology: 274 translations = {'kepler/kgmatch':{'_r':"Ang Sep (')", 275 '_RAJ2000':'KIC RA (J2000)', 276 '_DEJ2000':'KIC Dec (J2000)'}} 277 278 #-- extra can be added: 279 names = list(results.dtype.names) 280 if extra_fields is not None: 281 if source in translations: 282 translation = translations[source] 283 else: 284 translation = {'_r':'_r','_RAJ2000':'_RAJ2000','_DEJ2000':'_DEJ2000'} 285 for e_dtype in extra_fields: 286 try: 287 dtypes.append((e_dtype,results.dtype[names.index(translation[e_dtype])].str)) 288 except ValueError: 289 if e_dtype != '_r': raise 290 dtypes.append((e_dtype,results.dtype[names.index('AngSep')].str)) 291 292 293 #-- create empty master if not given 294 newmaster = False 295 if master is None or len(master)==0: 296 master = np.rec.array([tuple([('f' in dt[1]) and np.nan or 'none' for dt in dtypes])],dtype=dtypes) 297 newmaster = True 298 299 #-- add fluxes and magnitudes to the record array 300 cols_added = 0 301 for key in cat_info.options(source): 302 if key[:2] in [e_flag,q_flag] or '_unit' in key or key=='bibcode': 303 continue 304 photband = cat_info.get(source,key) 305 #-- contains measurement, error, quality, units, photometric band and 306 # source 307 cols = [results[key][:1], 308 (e_flag+key in cat_info.options(source) and results[cat_info.get(source,e_flag+key)][:1] or np.ones(len(results[:1]))*np.nan), 309 (q_flag+key in cat_info.options(source) and results[cat_info.get(source,q_flag+key)][:1] or np.ones(len(results[:1]))*np.nan), 310 np.array(len(results[:1])*[units[key]],str), 311 np.array(len(results[:1])*[photband],str), 312 np.array(len(results[:1])*[source],str)] 313 #-- add any extra fields if desired. 314 if extra_fields is not None: 315 for e_dtype in extra_fields: 316 cols += [e_dtype in results.dtype.names and results[e_dtype][:1] or np.ones(len(results[:1]))*np.nan] 317 #-- add to the master 318 rows = [] 319 for i in range(len(cols[0])): 320 rows.append(tuple([col[i] for col in cols])) 321 master = np.core.records.fromrecords(master.tolist()+rows,dtype=dtypes) 322 cols_added += 1 323 324 #-- fix colours: we have to run through it two times to be sure to have 325 # all the colours 326 N = len(master)-cols_added 327 master_ = vizier._breakup_colours(master[N:]) 328 master_ = vizier._breakup_colours(master_) 329 #-- combine and return 330 master = np.core.records.fromrecords(master.tolist()[:N]+master_.tolist(),dtype=dtypes) 331 332 #-- skip first line from building 333 if newmaster: master = master[1:] 334 return master

335 336 337

338 -def csv2recarray(filename):

339 """ 340 Read a MAST csv (comma-sep) file into a record array. 341 342 @param filename: name of the TCSV file 343 @type filename: str 344 @return: catalog data columns, units, comments 345 @rtype: record array, dict, list of str 346 """ 347 data,comms = ascii.read2array(filename,dtype=np.str,splitchar=',',return_comments=True) 348 results = None 349 units = {} 350 #-- retrieve the data and put it into a record array 351 if len(data)>1: 352 #-- now convert this thing into a nice dictionary 353 data = np.array(data) 354 #-- retrieve the format of the columns. They are given in the 355 # Fortran format. In rare cases, columns contain multiple values 356 # themselves (so called vectors). In those cases, we interpret 357 # the contents as a long string 358 formats = np.zeros_like(data[0]) 359 for i,fmt in enumerate(data[1]): 360 if 'string' in fmt or fmt=='datetime': formats[i] = 'a100' 361 if fmt=='integer': formats[i] = 'f8' 362 if fmt=='ra': formats[i] = 'f8' 363 if fmt=='dec': formats[i] = 'f8' 364 if fmt=='float': formats[i] = 'f8' 365 #-- define dtypes for record array 366 dtypes = np.dtype([(i,j) for i,j in zip(data[0],formats)]) 367 #-- remove spaces or empty values 368 cols = [] 369 for i,key in enumerate(data[0]): 370 col = data[2:,i] 371 #-- fill empty values with nan 372 cols.append([(row.isspace() or not row) and np.nan or row for row in col]) 373 #-- fix unit name 374 for source in cat_info.sections(): 375 if cat_info.has_option(source,data[0,i]+'_unit'): 376 units[key] = cat_info.get(source,data[0,i]+'_unit') 377 break 378 else: 379 units[key] = 'nan' 380 #-- define columns for record array and construct record array 381 cols = [np.cast[dtypes[i]](cols[i]) for i in range(len(cols))] 382 results = np.rec.array(cols, dtype=dtypes) 383 else: 384 results = None 385 units = {} 386 return results,units,comms

387 388 389

390 -def get_photometry(ID=None,extra_fields=['_r','_RAJ2000','_DEJ2000'],**kwargs):

391 """ 392 Download all available photometry from a star to a record array. 393 394 For extra kwargs, see L{_get_URI} and L{mast2phot} 395 """ 396 to_units = kwargs.pop('to_units','erg/s/cm2/AA') 397 master_ = kwargs.get('master',None) 398 master = None 399 #-- retrieve all measurements 400 for source in cat_info.sections(): 401 if source=='galex': 402 results,units,comms = galex(ID=ID,**kwargs) 403 else: 404 results,units,comms = search(source,ID=ID,**kwargs) 405 406 if results is not None: 407 master = mast2phot(source,results,units,master,extra_fields=extra_fields) 408 409 #-- convert the measurement to a common unit. 410 if to_units and master is not None: 411 #-- prepare columns to extend to basic master 412 dtypes = [('cwave','f8'),('cmeas','f8'),('e_cmeas','f8'),('cunit','a50')] 413 cols = [[],[],[],[]] 414 #-- forget about 'nan' errors for the moment 415 no_errors = np.isnan(master['e_meas']) 416 master['e_meas'][no_errors] = 0. 417 #-- extend basic master 418 zp = filters.get_info(master['photband']) 419 for i in range(len(master)): 420 try: 421 value,e_value = conversions.convert(master['unit'][i],to_units,master['meas'][i],master['e_meas'][i],photband=master['photband'][i]) 422 except ValueError: # calibrations not available 423 value,e_value = np.nan,np.nan 424 except AssertionError: # postive flux and errors! 425 value,e_value = np.nan,np.nan 426 try: 427 eff_wave = filters.eff_wave(master['photband'][i]) 428 except IOError: 429 eff_wave = np.nan 430 cols[0].append(eff_wave) 431 cols[1].append(value) 432 cols[2].append(e_value) 433 cols[3].append(to_units) 434 master = numpy_ext.recarr_addcols(master,cols,dtypes) 435 #-- reset errors 436 master['e_meas'][no_errors] = np.nan 437 master['e_cmeas'][no_errors] = np.nan 438 439 if master_ is not None and master is not None: 440 master = numpy_ext.recarr_addrows(master_,master.tolist()) 441 elif master_ is not None: 442 master = master_ 443 444 #-- and return the results 445 return master

446 447 448 #@retry_http(3)

449 -def get_dss_image(ID,ra=None,dec=None,width=5,height=5):

450 """ 451 Retrieve an image from DSS 452 453 plot with 454 455 >>> data,coords,size = mast.get_image('HD21389') 456 >>> pl.imshow(data[::-1],extent=[coords[0]-size[0]/2,coords[0]+size[0]/2, 457 coords[1]-size[1]/2,coords[1]+size[1]/2]) 458 """ 459 #-- set a reasonable timeout 460 timeout = socket.getdefaulttimeout() 461 socket.setdefaulttimeout(30.) 462 if ra is None or dec is None: 463 info = sesame.search(ID) 464 ra,dec = info['jradeg'],info['jdedeg'] 465 url = urllib.URLopener() 466 myurl = "http://archive.stsci.edu/cgi-bin/dss_search?ra=%s&dec=%s&equinox=J2000&height=%s&generation=%s&width=%s&format=FITS"%(ra,dec,height,'2i',width) 467 out = url.retrieve(myurl) 468 data1 = pf.getdata(out[0]) 469 #-- reset timeout to original value 470 socket.setdefaulttimeout(timeout) 471 return data1,(ra,dec),(width/60.,height/60.)

472 473 474

475 -def get_FUSE_spectra(ID=None,directory=None,cat_info=False,select=['ano']):

476 """ 477 Get Fuse spectrum. 478 479 select can be ['ano','all'] 480 481 # what about MDRS aperture? 482 """ 483 direc = (directory is None) and os.getcwd() or directory 484 if not os.path.isdir(direc): 485 os.mkdir(direc) 486 data = search('fuse',ID=ID) 487 download_link = 'https://archive.stsci.edu/pub/fuse/data/{folder}/{filename}' 488 #-- maybe no information was found 489 if data[0] is None: 490 return None 491 elif cat_info: 492 return data 493 output = [] 494 for spectrum in data[0]: 495 folder = spectrum['Data ID'][:8] 496 aperture = spectrum['Aperture'] 497 noexp = int(spectrum['No. Exps']) 498 an = dict(MDWRS=2,LWRS=4) 499 if not aperture in an: 500 continue 501 an = an[aperture] 502 for exp in range(noexp): 503 data_id = folder+'{0:03d}00'.format(exp) 504 for type in select: 505 filename = '{data_id}{type}{an}ttagfcal.fit.gz'.format(data_id=data_id,type=type,an=an) 506 link = download_link.format(folder=folder,filename=filename) 507 link = link.lower() 508 filename = os.path.join(direc,filename) 509 myfile = http.download(link,filename=filename) 510 #-- if the file is too small to be a science file, it is 511 # actually an HTML file stating that the URL does not exist. 512 if os.path.getsize(myfile)<1000: 513 os.unlink(myfile) 514 continue 515 logger.info('FUSE spectrum %s to %s'%(link,direc)) 516 output.append(myfile) 517 return output

518 #https://archive.stsci.edu/pub/fuse/data/a0010101/a001010100000all4ttagfcal.fit.gz 519 520 if __name__=="__main__": 521 #get_FUSE_spectrum(ID='hd163296') 522 print get_FUSE_spectrum(ID='hd163296').dtype.names 523 524 raise SystemExit 525 mission = 'fuse' 526 base_url = _get_URI(mission,ID='hd163296') 527 print base_url 528 url = urllib.URLopener() 529 filen,msg = url.retrieve(base_url,filename='%s.test'%(mission)) 530 url.close() 531 raise SystemExit 532 533 missions = ['hst',# - Hubble Space Telescope 534 'kepler',# - Kepler Data search form 535 'kepler/kepler_fov',# - Kepler Target search form 536 'kepler/kic10',# - Kepler Input Catalog search form 537 'kepler/kgmatch',# - Kepler/Galex Cross match 538 'iue',# - International Ultraviolet Explorer 539 'hut',# - Hopkins Ultraviolet Telescope 540 'euve',# - Extreme Ultraviolet Explorer 541 'fuse',# - Far-UV Spectroscopic Explorer 542 'uit',# - Ultraviolet Imageing Telescope 543 'wuppe',# - Wisconsin UV Photo-Polarimeter Explorer 544 'befs',# - Berkeley Extreme and Far-UV Spectrometer 545 'tues',# - Tübingen Echelle Spectrograph 546 'imaps',# - Interstellar Medium Absorption Profile Spectrograph 547 'hlsp',# - High Level Science Products 548 'pointings',# - HST Image Data grouped by position 549 'copernicus',# - Copernicus Satellite 550 'hpol',# - ground based spetropolarimeter 551 'vlafirst',# - VLA Faint Images of the Radio Sky (21-cm) 552 'xmm-om']# - X-ray Multi-Mirror Telescope Optical Monitor 553 554 #search('euve',ID='hd46149') 555 #filename = search('kepler/kgmatch',ID='KIC3955868',filename='kepler.test') 556 557 #results,units,comms = search('kepler/kgmatch',ID='T CrB') 558 #sys.exit() 559 out = search('kepler/kepler_fov',ID='TYC3134-165-1',filename='kepler_fov.test',radius=1.) 560 out = search('kepler/kgmatch',ID='3749404',filename='kgmatch.test',radius=1.) 561 #results,units,comms = search('kepler/kgmatch',ID='3749404') 562 master = mast2phot('kepler/kgmatch',results,units,master=None,extra_fields=None) 563 print master 564 #data,units,comms = search('spectra',ID='hd46149',filename='ssap.test') 565 sys.exit() 566 567 for mission in missions: 568 base_url = _get_URI(mission,ID='hd46149') 569 #ff = urllib.urlopen(base_url) 570 try: 571 url = urllib.URLopener() 572 filen,msg = url.retrieve(base_url,filename='%s.test'%(mission)) 573 url.close() 574 except IOError: 575 print 'failed' 576 continue 577

Source Code for Module ivs.catalogs.mast