1
2 """
3 Interface to the MAST archive.
4
5 Because the MAST archive is very inhomegeneous, this module is very limited in
6 use, and sometimes confusing. It is probably best to check the data or retrieve
7 the data manually from the archive.
8 """
9 import urllib
10 import socket
11 import logging
12 import os
13 import ConfigParser
14 import shutil
15
16 import numpy as np
17 import astropy.io.fits as pf
18
19 from ivs.sed import filters
20 from ivs.aux import xmlparser
21 from ivs.aux import loggers
22 from ivs.aux import numpy_ext
23 from ivs.aux.decorators import retry_http
24 from ivs.inout import ascii
25 from ivs.inout import http
26 from ivs.units import conversions
27 from ivs.catalogs import vizier
28 from ivs.catalogs import sesame
29
30
31 logger = logging.getLogger("CAT.MAST")
32 logger.addHandler(loggers.NullHandler())
33
34 basedir = os.path.dirname(os.path.abspath(__file__))
35
36
37 cat_info = ConfigParser.ConfigParser()
38 cat_info.optionxform = str
39 cat_info.readfp(open(os.path.join(basedir,'mast_cats_phot.cfg')))
40
41
42
43 -def _get_URI(name,ID=None,ra=None,dec=None,radius=5.,filetype='CSV',
44 out_max=100000,resolver='SIMBAD',coord='dec',**kwargs):
45 """
46 Build MAST URI from available options.
47
48 Filetype should be one of:
49
50 CSV,SSV,PSV...
51
52 kwargs are to catch unused arguments.
53
54 @param name: name of a MAST mission catalog (e.g. 'hst') or search type
55 ('images','spectra')
56 @type name: str
57 @keyword ID: target name
58 @type ID: str
59 @param filetype: type of the retrieved file
60 @type filetype: str (one of 'CSV','PSV'... see MAST site)
61 @keyword radius: search radius (arcseconds)
62 @type radius: float
63 @param ra: target's right ascension
64 @type ra: float
65 @param dec: target's declination
66 @type dec: float
67 @param coord: coordinate output format
68 @type coord: str: one of 'sex',dec','dechr'
69 @param out_max: maximum number of rows
70 @type out_max: integer
71 @return: url
72 @rtype: str
73 """
74 base_url = 'http://archive.stsci.edu/'
75
76 if name == 'images':
77 base_url += 'siap/search2.php?'
78 elif name == 'spectra':
79 base_url += 'ssap/search2.php?'
80
81
82 else:
83 base_url += '%s/search.php?action=Search'%(name)
84
85
86 if ID is not None and (ra is None and dec is None):
87 base_url += '&target=%s'%(ID)
88
89 if ra is not None and dec is not None:
90 base_url += '&radius=%s'%(radius/60.)
91 if ra is not None: base_url += '&RA=%s'%(ra)
92 if dec is not None: base_url += '&DEC=%s'%(dec)
93 elif radius is not None:
94 base_url += '&SIZE=%s'%(radius/60.)
95
96 for kwarg in kwargs:
97 base_url += '&%s=%s'%(kwarg,kwargs[kwarg])
98
99 if name not in ['spectra','images']:
100 base_url += '&outputformat=%s'%(filetype)
101 base_url += '&resolver=%s'%(resolver)
102
103 base_url += '&coordformat=%s'%(coord)
104 logger.debug('url: %s'%(base_url))
105 return base_url
106
107
108
110 """
111 Cone search for Galex targets.
112 """
113 ID = kwargs.pop('ID',None)
114 if ID is None:
115 ra,dec = kwargs.pop('ra'),kwargs.pop('dec')
116 else:
117 info = sesame.search(ID,db='A')
118 if not 'jradeg' in info: return None,None,None
119 ra,dec = info['jradeg'],info['jdedeg']
120 radius = 0.1
121
122 base_url = 'http://galex.stsci.edu/gxws/conesearch/conesearch.asmx/ConeSearchToXml?ra={0:f}&dec={1:f}&sr={2:f}&verb=1'.format(ra,dec,radius)
123
124 url = urllib.URLopener()
125 filen,msg = url.retrieve(base_url,filename=None)
126 fuv_flux,e_fuv_flux = None,None
127 columns = ['_r','ra','dec','fuv_flux','fuv_fluxerr','nuv_flux','nuv_fluxerr']
128 values = [np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]
129
130
131 units = dict(fuv_flux='muJy',nuv_flux='muJy')
132
133 got_target = False
134 with open(filen,'r') as ff:
135 for line in ff.readlines():
136 for i,col in enumerate(columns):
137 if col+'>' in line:
138 values[i] = np.float(line.split('>')[1].split('<')[0])
139 got_target = (col=='fuv_fluxerr')
140 if got_target:
141 break
142
143 values[0] = np.sqrt( (values[1]-ra)**2 + (values[2]-dec)**2)*3600
144 columns[1] = '_RAJ2000'
145 columns[2] = '_DEJ2000'
146
147 results = np.rec.fromarrays(np.array([values]).T,names=columns)
148 if np.all(np.isnan(np.array(values))):
149 results = None
150
151 return results,units,None
152
153
154
155
157 """
158 Search and retrieve information from a MAST catalog.
159
160 Two ways to search for data within a catalog C{name}:
161
162 1. You're looking for info on B{one target}, then give the target's
163 C{ID} or coordinates (C{ra} and C{dec}), and a search C{radius}.
164
165 2. You're looking for information of B{a whole field}, then give the
166 field's coordinates (C{ra} and C{dec}), and C{radius}.
167
168 If you have a list of targets, you need to loop this function.
169
170 If you supply a filename, the results will be saved to that path, and you
171 will get the filename back as received from urllib.URLopener (should be the
172 same as the input name, unless something went wrong).
173
174 If you don't supply a filename, you should leave C{filetype} to the default
175 C{tsv}, and the results will be saved to a temporary
176 file and deleted after the function is finished. The content of the file
177 will be read into a dictionary, as well as the units (two separate
178 dictionaries with the same keys, depending on the colum names in the
179 catalog). The entries in the dictionary are of type C{ndarray}, and will
180 be converted to a float-array if possible. If not, the array will consist
181 of strings. The comments are also returned as a list of strings.
182
183
184 @param catalog: name of a MAST mission catalog
185 @type catalog: str
186 @keyword filename: name of the file to write the results to (no extension)
187 @type filename: str
188 @return: filename / catalog data columns, units, comments
189 @rtype: str/ record array, dict, list of str
190 """
191 filename = kwargs.pop('filename',None)
192 filetype = kwargs.setdefault('filetype','CSV')
193 if filename is not None and '.' in os.path.basename(filename):
194 filetype = os.path.splitext(filename)[1][1:]
195 elif filename is not None:
196 filename = '%s.%s'%(filename,filetype)
197
198
199 base_url = _get_URI(catalog,**kwargs)
200
201
202 url = urllib.URLopener()
203 filen,msg = url.retrieve(base_url,filename=filename)
204
205 if filename is not None:
206 logger.info('Querying MAST source %s and downloading to %s'%(catalog,filename))
207 url.close()
208 return filen
209
210
211
212 if filetype=='CSV' and not filename:
213 try:
214 results,units,comms = csv2recarray(filen)
215
216 except ValueError:
217 url.close()
218
219 results,units,comms = None,None,None
220 url.close()
221 logger.info('Querying MAST source %s (%d)'%(catalog,(results is not None and len(results) or 0)))
222 return results,units,comms
223 else:
224 return filename
225
226
227 -def mast2phot(source,results,units,master=None,extra_fields=None):
228 """
229 Convert/combine MAST record arrays to measurement record arrays.
230
231 Every line in the combined array represents a measurement in a certain band.
232
233 This is probably only useful if C{results} contains only information on
234 one target (or you have to give 'ID' as an extra field, maybe).
235
236 The standard columns are:
237
238 1. C{meas}: containing the photometric measurement
239 2. C{e_meas}: the error on the photometric measurement
240 3. C{flag}: an optional quality flag
241 4. C{unit}: the unit of the measurement
242 5. C{photband}: the photometric passband (FILTER.BAND)
243 6. C{source}: name of the source catalog
244
245 You can add extra information from the Mast catalog via the list of keys
246 C{extra_fields}.
247
248 If you give a C{master}, the information will be added to a previous
249 record array. If not, a new master will be created.
250
251 The result is a record array with each row a measurement.
252
253 @param source: name of the Mast source
254 @type source: str
255 @param results: results from Mast C{search}
256 @type results: record array
257 @param units: header of Mast catalog with key name the column name and
258 key value the units of that column
259 @type units: dict
260 @param master: master record array to add information to
261 @type master: record array
262 @param extra_fields: any extra columns you want to add information from
263 @type extra_fields: list of str
264 @return: array with each row a measurement
265 @rtype: record array
266 """
267 e_flag = 'e_'
268 q_flag = 'q_'
269
270 dtypes = [('meas','f8'),('e_meas','f8'),('flag','a20'),
271 ('unit','a30'),('photband','a30'),('source','a50')]
272
273
274 translations = {'kepler/kgmatch':{'_r':"Ang Sep (')",
275 '_RAJ2000':'KIC RA (J2000)',
276 '_DEJ2000':'KIC Dec (J2000)'}}
277
278
279 names = list(results.dtype.names)
280 if extra_fields is not None:
281 if source in translations:
282 translation = translations[source]
283 else:
284 translation = {'_r':'_r','_RAJ2000':'_RAJ2000','_DEJ2000':'_DEJ2000'}
285 for e_dtype in extra_fields:
286 try:
287 dtypes.append((e_dtype,results.dtype[names.index(translation[e_dtype])].str))
288 except ValueError:
289 if e_dtype != '_r': raise
290 dtypes.append((e_dtype,results.dtype[names.index('AngSep')].str))
291
292
293
294 newmaster = False
295 if master is None or len(master)==0:
296 master = np.rec.array([tuple([('f' in dt[1]) and np.nan or 'none' for dt in dtypes])],dtype=dtypes)
297 newmaster = True
298
299
300 cols_added = 0
301 for key in cat_info.options(source):
302 if key[:2] in [e_flag,q_flag] or '_unit' in key or key=='bibcode':
303 continue
304 photband = cat_info.get(source,key)
305
306
307 cols = [results[key][:1],
308 (e_flag+key in cat_info.options(source) and results[cat_info.get(source,e_flag+key)][:1] or np.ones(len(results[:1]))*np.nan),
309 (q_flag+key in cat_info.options(source) and results[cat_info.get(source,q_flag+key)][:1] or np.ones(len(results[:1]))*np.nan),
310 np.array(len(results[:1])*[units[key]],str),
311 np.array(len(results[:1])*[photband],str),
312 np.array(len(results[:1])*[source],str)]
313
314 if extra_fields is not None:
315 for e_dtype in extra_fields:
316 cols += [e_dtype in results.dtype.names and results[e_dtype][:1] or np.ones(len(results[:1]))*np.nan]
317
318 rows = []
319 for i in range(len(cols[0])):
320 rows.append(tuple([col[i] for col in cols]))
321 master = np.core.records.fromrecords(master.tolist()+rows,dtype=dtypes)
322 cols_added += 1
323
324
325
326 N = len(master)-cols_added
327 master_ = vizier._breakup_colours(master[N:])
328 master_ = vizier._breakup_colours(master_)
329
330 master = np.core.records.fromrecords(master.tolist()[:N]+master_.tolist(),dtype=dtypes)
331
332
333 if newmaster: master = master[1:]
334 return master
335
336
337
339 """
340 Read a MAST csv (comma-sep) file into a record array.
341
342 @param filename: name of the TCSV file
343 @type filename: str
344 @return: catalog data columns, units, comments
345 @rtype: record array, dict, list of str
346 """
347 data,comms = ascii.read2array(filename,dtype=np.str,splitchar=',',return_comments=True)
348 results = None
349 units = {}
350
351 if len(data)>1:
352
353 data = np.array(data)
354
355
356
357
358 formats = np.zeros_like(data[0])
359 for i,fmt in enumerate(data[1]):
360 if 'string' in fmt or fmt=='datetime': formats[i] = 'a100'
361 if fmt=='integer': formats[i] = 'f8'
362 if fmt=='ra': formats[i] = 'f8'
363 if fmt=='dec': formats[i] = 'f8'
364 if fmt=='float': formats[i] = 'f8'
365
366 dtypes = np.dtype([(i,j) for i,j in zip(data[0],formats)])
367
368 cols = []
369 for i,key in enumerate(data[0]):
370 col = data[2:,i]
371
372 cols.append([(row.isspace() or not row) and np.nan or row for row in col])
373
374 for source in cat_info.sections():
375 if cat_info.has_option(source,data[0,i]+'_unit'):
376 units[key] = cat_info.get(source,data[0,i]+'_unit')
377 break
378 else:
379 units[key] = 'nan'
380
381 cols = [np.cast[dtypes[i]](cols[i]) for i in range(len(cols))]
382 results = np.rec.array(cols, dtype=dtypes)
383 else:
384 results = None
385 units = {}
386 return results,units,comms
387
388
389
390 -def get_photometry(ID=None,extra_fields=['_r','_RAJ2000','_DEJ2000'],**kwargs):
391 """
392 Download all available photometry from a star to a record array.
393
394 For extra kwargs, see L{_get_URI} and L{mast2phot}
395 """
396 to_units = kwargs.pop('to_units','erg/s/cm2/AA')
397 master_ = kwargs.get('master',None)
398 master = None
399
400 for source in cat_info.sections():
401 if source=='galex':
402 results,units,comms = galex(ID=ID,**kwargs)
403 else:
404 results,units,comms = search(source,ID=ID,**kwargs)
405
406 if results is not None:
407 master = mast2phot(source,results,units,master,extra_fields=extra_fields)
408
409
410 if to_units and master is not None:
411
412 dtypes = [('cwave','f8'),('cmeas','f8'),('e_cmeas','f8'),('cunit','a50')]
413 cols = [[],[],[],[]]
414
415 no_errors = np.isnan(master['e_meas'])
416 master['e_meas'][no_errors] = 0.
417
418 zp = filters.get_info(master['photband'])
419 for i in range(len(master)):
420 try:
421 value,e_value = conversions.convert(master['unit'][i],to_units,master['meas'][i],master['e_meas'][i],photband=master['photband'][i])
422 except ValueError:
423 value,e_value = np.nan,np.nan
424 except AssertionError:
425 value,e_value = np.nan,np.nan
426 try:
427 eff_wave = filters.eff_wave(master['photband'][i])
428 except IOError:
429 eff_wave = np.nan
430 cols[0].append(eff_wave)
431 cols[1].append(value)
432 cols[2].append(e_value)
433 cols[3].append(to_units)
434 master = numpy_ext.recarr_addcols(master,cols,dtypes)
435
436 master['e_meas'][no_errors] = np.nan
437 master['e_cmeas'][no_errors] = np.nan
438
439 if master_ is not None and master is not None:
440 master = numpy_ext.recarr_addrows(master_,master.tolist())
441 elif master_ is not None:
442 master = master_
443
444
445 return master
446
447
448
450 """
451 Retrieve an image from DSS
452
453 plot with
454
455 >>> data,coords,size = mast.get_image('HD21389')
456 >>> pl.imshow(data[::-1],extent=[coords[0]-size[0]/2,coords[0]+size[0]/2,
457 coords[1]-size[1]/2,coords[1]+size[1]/2])
458 """
459
460 timeout = socket.getdefaulttimeout()
461 socket.setdefaulttimeout(30.)
462 if ra is None or dec is None:
463 info = sesame.search(ID)
464 ra,dec = info['jradeg'],info['jdedeg']
465 url = urllib.URLopener()
466 myurl = "http://archive.stsci.edu/cgi-bin/dss_search?ra=%s&dec=%s&equinox=J2000&height=%s&generation=%s&width=%s&format=FITS"%(ra,dec,height,'2i',width)
467 out = url.retrieve(myurl)
468 data1 = pf.getdata(out[0])
469
470 socket.setdefaulttimeout(timeout)
471 return data1,(ra,dec),(width/60.,height/60.)
472
473
474
476 """
477 Get Fuse spectrum.
478
479 select can be ['ano','all']
480
481 # what about MDRS aperture?
482 """
483 direc = (directory is None) and os.getcwd() or directory
484 if not os.path.isdir(direc):
485 os.mkdir(direc)
486 data = search('fuse',ID=ID)
487 download_link = 'https://archive.stsci.edu/pub/fuse/data/{folder}/{filename}'
488
489 if data[0] is None:
490 return None
491 elif cat_info:
492 return data
493 output = []
494 for spectrum in data[0]:
495 folder = spectrum['Data ID'][:8]
496 aperture = spectrum['Aperture']
497 noexp = int(spectrum['No. Exps'])
498 an = dict(MDWRS=2,LWRS=4)
499 if not aperture in an:
500 continue
501 an = an[aperture]
502 for exp in range(noexp):
503 data_id = folder+'{0:03d}00'.format(exp)
504 for type in select:
505 filename = '{data_id}{type}{an}ttagfcal.fit.gz'.format(data_id=data_id,type=type,an=an)
506 link = download_link.format(folder=folder,filename=filename)
507 link = link.lower()
508 filename = os.path.join(direc,filename)
509 myfile = http.download(link,filename=filename)
510
511
512 if os.path.getsize(myfile)<1000:
513 os.unlink(myfile)
514 continue
515 logger.info('FUSE spectrum %s to %s'%(link,direc))
516 output.append(myfile)
517 return output
518
519
520 if __name__=="__main__":
521
522 print get_FUSE_spectrum(ID='hd163296').dtype.names
523
524 raise SystemExit
525 mission = 'fuse'
526 base_url = _get_URI(mission,ID='hd163296')
527 print base_url
528 url = urllib.URLopener()
529 filen,msg = url.retrieve(base_url,filename='%s.test'%(mission))
530 url.close()
531 raise SystemExit
532
533 missions = ['hst',
534 'kepler',
535 'kepler/kepler_fov',
536 'kepler/kic10',
537 'kepler/kgmatch',
538 'iue',
539 'hut',
540 'euve',
541 'fuse',
542 'uit',
543 'wuppe',
544 'befs',
545 'tues',
546 'imaps',
547 'hlsp',
548 'pointings',
549 'copernicus',
550 'hpol',
551 'vlafirst',
552 'xmm-om']
553
554
555
556
557
558
559 out = search('kepler/kepler_fov',ID='TYC3134-165-1',filename='kepler_fov.test',radius=1.)
560 out = search('kepler/kgmatch',ID='3749404',filename='kgmatch.test',radius=1.)
561
562 master = mast2phot('kepler/kgmatch',results,units,master=None,extra_fields=None)
563 print master
564
565 sys.exit()
566
567 for mission in missions:
568 base_url = _get_URI(mission,ID='hd46149')
569
570 try:
571 url = urllib.URLopener()
572 filen,msg = url.retrieve(base_url,filename='%s.test'%(mission))
573 url.close()
574 except IOError:
575 print 'failed'
576 continue
577