1   
  2  """ 
  3  Interface to the MAST archive. 
  4   
  5  Because the MAST archive is very inhomegeneous, this module is very limited in 
  6  use, and sometimes confusing. It is probably best to check the data or retrieve 
  7  the data manually from the archive. 
  8  """ 
  9  import urllib 
 10  import socket 
 11  import logging 
 12  import os 
 13  import ConfigParser 
 14  import shutil 
 15   
 16  import numpy as np 
 17  import astropy.io.fits as pf 
 18   
 19  from ivs.sed import filters 
 20  from ivs.aux import xmlparser 
 21  from ivs.aux import loggers 
 22  from ivs.aux import numpy_ext 
 23  from ivs.aux.decorators import retry_http 
 24  from ivs.inout import ascii 
 25  from ivs.inout import http 
 26  from ivs.units import conversions 
 27  from ivs.catalogs import vizier 
 28  from ivs.catalogs import sesame 
 29   
 30   
 31  logger = logging.getLogger("CAT.MAST") 
 32  logger.addHandler(loggers.NullHandler()) 
 33   
 34  basedir = os.path.dirname(os.path.abspath(__file__)) 
 35   
 36   
 37  cat_info = ConfigParser.ConfigParser() 
 38  cat_info.optionxform = str  
 39  cat_info.readfp(open(os.path.join(basedir,'mast_cats_phot.cfg'))) 
 40   
 41   
 42   
 43 -def _get_URI(name,ID=None,ra=None,dec=None,radius=5.,filetype='CSV', 
 44               out_max=100000,resolver='SIMBAD',coord='dec',**kwargs): 
  45      """ 
 46      Build MAST URI from available options. 
 47       
 48      Filetype should be one of: 
 49       
 50      CSV,SSV,PSV... 
 51       
 52      kwargs are to catch unused arguments. 
 53       
 54      @param name: name of a MAST mission catalog (e.g. 'hst') or search type 
 55      ('images','spectra') 
 56      @type name: str 
 57      @keyword ID: target name 
 58      @type ID: str 
 59      @param filetype: type of the retrieved file  
 60      @type filetype: str (one of 'CSV','PSV'... see MAST site) 
 61      @keyword radius: search radius (arcseconds) 
 62      @type radius: float 
 63      @param ra: target's right ascension 
 64      @type ra: float 
 65      @param dec: target's declination 
 66      @type dec: float 
 67      @param coord: coordinate output format 
 68      @type coord: str: one of 'sex',dec','dechr' 
 69      @param out_max: maximum number of rows 
 70      @type out_max: integer 
 71      @return: url 
 72      @rtype: str 
 73      """ 
 74      base_url = 'http://archive.stsci.edu/' 
 75       
 76      if name == 'images': 
 77          base_url += 'siap/search2.php?' 
 78      elif name == 'spectra': 
 79          base_url += 'ssap/search2.php?' 
 80       
 81       
 82      else: 
 83          base_url += '%s/search.php?action=Search'%(name) 
 84       
 85       
 86      if ID is not None and (ra is None and dec is None): 
 87          base_url += '&target=%s'%(ID) 
 88       
 89      if ra is not None and dec is not None: 
 90          base_url += '&radius=%s'%(radius/60.) 
 91          if ra is not None: base_url += '&RA=%s'%(ra) 
 92          if dec is not None: base_url += '&DEC=%s'%(dec) 
 93      elif radius is not None: 
 94          base_url += '&SIZE=%s'%(radius/60.) 
 95       
 96      for kwarg in kwargs: 
 97          base_url += '&%s=%s'%(kwarg,kwargs[kwarg]) 
 98       
 99      if name not in ['spectra','images']: 
100          base_url += '&outputformat=%s'%(filetype) 
101          base_url += '&resolver=%s'%(resolver) 
102           
103          base_url += '&coordformat=%s'%(coord) 
104      logger.debug('url: %s'%(base_url)) 
105      return base_url 
 106   
107   
108   
110      """ 
111      Cone search for Galex targets. 
112      """ 
113      ID = kwargs.pop('ID',None) 
114      if ID is None: 
115          ra,dec = kwargs.pop('ra'),kwargs.pop('dec') 
116      else: 
117          info = sesame.search(ID,db='A') 
118          if not 'jradeg' in info: return None,None,None 
119          ra,dec = info['jradeg'],info['jdedeg'] 
120      radius = 0.1 
121       
122      base_url = 'http://galex.stsci.edu/gxws/conesearch/conesearch.asmx/ConeSearchToXml?ra={0:f}&dec={1:f}&sr={2:f}&verb=1'.format(ra,dec,radius) 
123       
124      url = urllib.URLopener() 
125      filen,msg = url.retrieve(base_url,filename=None) 
126      fuv_flux,e_fuv_flux = None,None 
127      columns = ['_r','ra','dec','fuv_flux','fuv_fluxerr','nuv_flux','nuv_fluxerr'] 
128      values = [np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan] 
129       
130       
131      units = dict(fuv_flux='muJy',nuv_flux='muJy') 
132       
133      got_target = False 
134      with open(filen,'r') as ff: 
135          for line in ff.readlines(): 
136              for i,col in enumerate(columns): 
137                  if col+'>' in line: 
138                      values[i] = np.float(line.split('>')[1].split('<')[0]) 
139                      got_target = (col=='fuv_fluxerr') 
140              if got_target:  
141                  break 
142       
143      values[0] = np.sqrt( (values[1]-ra)**2 + (values[2]-dec)**2)*3600 
144      columns[1] = '_RAJ2000' 
145      columns[2] = '_DEJ2000' 
146       
147      results = np.rec.fromarrays(np.array([values]).T,names=columns) 
148      if np.all(np.isnan(np.array(values))): 
149          results = None 
150       
151      return results,units,None 
 152       
153   
154   
155   
157      """ 
158      Search and retrieve information from a MAST catalog. 
159       
160      Two ways to search for data within a catalog C{name}: 
161           
162          1. You're looking for info on B{one target}, then give the target's 
163          C{ID} or coordinates (C{ra} and C{dec}), and a search C{radius}. 
164       
165          2. You're looking for information of B{a whole field}, then give the 
166          field's coordinates (C{ra} and C{dec}), and C{radius}. 
167       
168      If you have a list of targets, you need to loop this function. 
169       
170      If you supply a filename, the results will be saved to that path, and you 
171      will get the filename back as received from urllib.URLopener (should be the 
172      same as the input name, unless something went wrong). 
173       
174      If you don't supply a filename, you should leave C{filetype} to the default 
175      C{tsv}, and the results will be saved to a temporary 
176      file and deleted after the function is finished. The content of the file 
177      will be read into a dictionary, as well as the units (two separate 
178      dictionaries with the same keys, depending on the colum names in the 
179      catalog). The entries in the dictionary are of type C{ndarray}, and will 
180      be converted to a float-array if possible. If not, the array will consist 
181      of strings. The comments are also returned as a list of strings. 
182           
183       
184      @param catalog: name of a MAST mission catalog 
185      @type catalog: str 
186      @keyword filename: name of the file to write the results to (no extension) 
187      @type filename: str 
188      @return: filename / catalog data columns, units, comments 
189      @rtype: str/ record array, dict, list of str 
190      """ 
191      filename = kwargs.pop('filename',None)  
192      filetype = kwargs.setdefault('filetype','CSV') 
193      if filename is not None and '.' in os.path.basename(filename): 
194          filetype = os.path.splitext(filename)[1][1:] 
195      elif filename is not None: 
196          filename = '%s.%s'%(filename,filetype) 
197       
198       
199      base_url = _get_URI(catalog,**kwargs) 
200       
201       
202      url = urllib.URLopener() 
203      filen,msg = url.retrieve(base_url,filename=filename) 
204       
205      if filename is not None: 
206          logger.info('Querying MAST source %s and downloading to %s'%(catalog,filename)) 
207          url.close() 
208          return filen 
209       
210       
211       
212      if filetype=='CSV' and not filename: 
213          try: 
214              results,units,comms = csv2recarray(filen) 
215           
216          except ValueError: 
217              url.close() 
218               
219              results,units,comms = None,None,None 
220          url.close() 
221          logger.info('Querying MAST source %s (%d)'%(catalog,(results is not None and len(results) or 0))) 
222          return results,units,comms 
223      else: 
224          return filename 
 225   
226   
227 -def mast2phot(source,results,units,master=None,extra_fields=None): 
 228      """ 
229      Convert/combine MAST record arrays to measurement record arrays. 
230       
231      Every line in the combined array represents a measurement in a certain band. 
232       
233      This is probably only useful if C{results} contains only information on 
234      one target (or you have to give 'ID' as an extra field, maybe). 
235       
236      The standard columns are: 
237       
238          1. C{meas}: containing the photometric measurement 
239          2. C{e_meas}: the error on the photometric measurement 
240          3. C{flag}: an optional quality flag 
241          4. C{unit}: the unit of the measurement 
242          5. C{photband}: the photometric passband (FILTER.BAND) 
243          6. C{source}: name of the source catalog 
244       
245      You can add extra information from the Mast catalog via the list of keys 
246      C{extra_fields}. 
247       
248      If you give a C{master}, the information will be added to a previous 
249      record array. If not, a new master will be created. 
250       
251      The result is a record array with each row a measurement. 
252       
253      @param source: name of the Mast source 
254      @type source: str 
255      @param results: results from Mast C{search} 
256      @type results: record array 
257      @param units: header of Mast catalog with key name the column name and 
258      key value the units of that column 
259      @type units: dict 
260      @param master: master record array to add information to 
261      @type master: record array 
262      @param extra_fields: any extra columns you want to add information from 
263      @type extra_fields: list of str 
264      @return: array with each row a measurement 
265      @rtype: record array 
266      """ 
267      e_flag = 'e_' 
268      q_flag = 'q_' 
269       
270      dtypes = [('meas','f8'),('e_meas','f8'),('flag','a20'), 
271                    ('unit','a30'),('photband','a30'),('source','a50')] 
272       
273       
274      translations = {'kepler/kgmatch':{'_r':"Ang Sep (')", 
275                                        '_RAJ2000':'KIC RA (J2000)', 
276                                        '_DEJ2000':'KIC Dec (J2000)'}} 
277       
278       
279      names = list(results.dtype.names) 
280      if extra_fields is not None: 
281          if source in translations: 
282              translation = translations[source] 
283          else: 
284              translation = {'_r':'_r','_RAJ2000':'_RAJ2000','_DEJ2000':'_DEJ2000'} 
285          for e_dtype in extra_fields: 
286              try: 
287                  dtypes.append((e_dtype,results.dtype[names.index(translation[e_dtype])].str)) 
288              except ValueError: 
289                  if e_dtype != '_r': raise 
290                  dtypes.append((e_dtype,results.dtype[names.index('AngSep')].str)) 
291                   
292       
293       
294      newmaster = False 
295      if master is None or len(master)==0: 
296          master = np.rec.array([tuple([('f' in dt[1]) and np.nan or 'none' for dt in dtypes])],dtype=dtypes) 
297          newmaster = True 
298       
299       
300      cols_added = 0 
301      for key in cat_info.options(source): 
302          if key[:2] in [e_flag,q_flag] or '_unit' in key or key=='bibcode': 
303              continue 
304          photband = cat_info.get(source,key) 
305           
306           
307          cols = [results[key][:1], 
308                  (e_flag+key in cat_info.options(source) and results[cat_info.get(source,e_flag+key)][:1] or np.ones(len(results[:1]))*np.nan), 
309                  (q_flag+key in cat_info.options(source) and results[cat_info.get(source,q_flag+key)][:1] or np.ones(len(results[:1]))*np.nan), 
310                  np.array(len(results[:1])*[units[key]],str), 
311                  np.array(len(results[:1])*[photband],str), 
312                  np.array(len(results[:1])*[source],str)] 
313           
314          if extra_fields is not None: 
315              for e_dtype in extra_fields: 
316                  cols += [e_dtype in results.dtype.names and results[e_dtype][:1] or np.ones(len(results[:1]))*np.nan] 
317           
318          rows = [] 
319          for i in range(len(cols[0])): 
320              rows.append(tuple([col[i] for col in cols])) 
321          master = np.core.records.fromrecords(master.tolist()+rows,dtype=dtypes) 
322          cols_added += 1 
323       
324       
325       
326      N = len(master)-cols_added 
327      master_ = vizier._breakup_colours(master[N:]) 
328      master_ = vizier._breakup_colours(master_) 
329       
330      master = np.core.records.fromrecords(master.tolist()[:N]+master_.tolist(),dtype=dtypes) 
331       
332       
333      if newmaster: master = master[1:] 
334      return master 
 335   
336   
337   
339      """ 
340      Read a MAST csv (comma-sep) file into a record array. 
341       
342      @param filename: name of the TCSV file 
343      @type filename: str 
344      @return: catalog data columns, units, comments 
345      @rtype: record array, dict, list of str 
346      """ 
347      data,comms = ascii.read2array(filename,dtype=np.str,splitchar=',',return_comments=True) 
348      results = None 
349      units = {} 
350       
351      if len(data)>1: 
352           
353          data = np.array(data) 
354           
355           
356           
357           
358          formats = np.zeros_like(data[0]) 
359          for i,fmt in enumerate(data[1]): 
360              if 'string' in fmt or fmt=='datetime': formats[i] = 'a100' 
361              if fmt=='integer': formats[i] = 'f8' 
362              if fmt=='ra': formats[i] = 'f8' 
363              if fmt=='dec': formats[i] = 'f8' 
364              if fmt=='float': formats[i] = 'f8' 
365           
366          dtypes = np.dtype([(i,j) for i,j in zip(data[0],formats)]) 
367           
368          cols = [] 
369          for i,key in enumerate(data[0]): 
370               col = data[2:,i] 
371                
372               cols.append([(row.isspace() or not row) and np.nan or row for row in col]) 
373                
374               for source in cat_info.sections(): 
375                  if cat_info.has_option(source,data[0,i]+'_unit'): 
376                      units[key] = cat_info.get(source,data[0,i]+'_unit') 
377                      break 
378               else:   
379                  units[key] = 'nan' 
380           
381          cols = [np.cast[dtypes[i]](cols[i]) for i in range(len(cols))] 
382          results = np.rec.array(cols, dtype=dtypes) 
383      else: 
384          results = None 
385          units = {} 
386      return results,units,comms 
 387   
388   
389   
390 -def get_photometry(ID=None,extra_fields=['_r','_RAJ2000','_DEJ2000'],**kwargs): 
 391      """ 
392      Download all available photometry from a star to a record array. 
393       
394      For extra kwargs, see L{_get_URI} and L{mast2phot} 
395      """ 
396      to_units = kwargs.pop('to_units','erg/s/cm2/AA') 
397      master_ = kwargs.get('master',None) 
398      master = None 
399       
400      for source in cat_info.sections(): 
401          if source=='galex': 
402              results,units,comms = galex(ID=ID,**kwargs) 
403          else: 
404              results,units,comms = search(source,ID=ID,**kwargs) 
405           
406          if results is not None: 
407              master = mast2phot(source,results,units,master,extra_fields=extra_fields) 
408       
409       
410      if to_units and master is not None: 
411           
412          dtypes = [('cwave','f8'),('cmeas','f8'),('e_cmeas','f8'),('cunit','a50')] 
413          cols = [[],[],[],[]] 
414           
415          no_errors = np.isnan(master['e_meas']) 
416          master['e_meas'][no_errors] = 0. 
417           
418          zp = filters.get_info(master['photband']) 
419          for i in range(len(master)): 
420              try: 
421                  value,e_value = conversions.convert(master['unit'][i],to_units,master['meas'][i],master['e_meas'][i],photband=master['photband'][i]) 
422              except ValueError:  
423                  value,e_value = np.nan,np.nan 
424              except AssertionError:  
425                  value,e_value = np.nan,np.nan 
426              try: 
427                  eff_wave = filters.eff_wave(master['photband'][i]) 
428              except IOError: 
429                  eff_wave = np.nan 
430              cols[0].append(eff_wave) 
431              cols[1].append(value) 
432              cols[2].append(e_value) 
433              cols[3].append(to_units) 
434          master = numpy_ext.recarr_addcols(master,cols,dtypes) 
435           
436          master['e_meas'][no_errors] = np.nan 
437          master['e_cmeas'][no_errors] = np.nan 
438       
439      if master_ is not None and master is not None: 
440          master = numpy_ext.recarr_addrows(master_,master.tolist()) 
441      elif master_ is not None: 
442          master = master_ 
443       
444       
445      return master     
 446   
447   
448   
450      """ 
451      Retrieve an image from DSS 
452       
453      plot with 
454       
455      >>> data,coords,size = mast.get_image('HD21389') 
456      >>> pl.imshow(data[::-1],extent=[coords[0]-size[0]/2,coords[0]+size[0]/2, 
457                                      coords[1]-size[1]/2,coords[1]+size[1]/2]) 
458      """ 
459       
460      timeout = socket.getdefaulttimeout() 
461      socket.setdefaulttimeout(30.) 
462      if ra is None or dec is None: 
463          info = sesame.search(ID) 
464          ra,dec = info['jradeg'],info['jdedeg'] 
465      url  = urllib.URLopener() 
466      myurl = "http://archive.stsci.edu/cgi-bin/dss_search?ra=%s&dec=%s&equinox=J2000&height=%s&generation=%s&width=%s&format=FITS"%(ra,dec,height,'2i',width) 
467      out = url.retrieve(myurl) 
468      data1 = pf.getdata(out[0]) 
469       
470      socket.setdefaulttimeout(timeout) 
471      return data1,(ra,dec),(width/60.,height/60.) 
 472   
473   
474   
476      """ 
477      Get Fuse spectrum. 
478       
479      select can be ['ano','all'] 
480       
481      # what about MDRS aperture? 
482      """ 
483      direc = (directory is None) and os.getcwd() or directory 
484      if not os.path.isdir(direc): 
485          os.mkdir(direc) 
486      data = search('fuse',ID=ID) 
487      download_link = 'https://archive.stsci.edu/pub/fuse/data/{folder}/{filename}' 
488       
489      if data[0] is None: 
490          return None 
491      elif cat_info: 
492          return data 
493      output = [] 
494      for spectrum in data[0]: 
495          folder = spectrum['Data ID'][:8] 
496          aperture = spectrum['Aperture'] 
497          noexp = int(spectrum['No. Exps']) 
498          an = dict(MDWRS=2,LWRS=4) 
499          if not aperture in an: 
500              continue 
501          an = an[aperture] 
502          for exp in range(noexp): 
503              data_id = folder+'{0:03d}00'.format(exp) 
504              for type in select: 
505                  filename = '{data_id}{type}{an}ttagfcal.fit.gz'.format(data_id=data_id,type=type,an=an) 
506                  link = download_link.format(folder=folder,filename=filename) 
507                  link = link.lower() 
508                  filename = os.path.join(direc,filename) 
509                  myfile = http.download(link,filename=filename) 
510                   
511                   
512                  if os.path.getsize(myfile)<1000: 
513                      os.unlink(myfile) 
514                      continue 
515                  logger.info('FUSE spectrum %s to %s'%(link,direc)) 
516                  output.append(myfile) 
517      return output 
 518       
519   
520  if __name__=="__main__": 
521       
522      print get_FUSE_spectrum(ID='hd163296').dtype.names 
523       
524      raise SystemExit 
525      mission = 'fuse' 
526      base_url = _get_URI(mission,ID='hd163296') 
527      print base_url 
528      url = urllib.URLopener() 
529      filen,msg = url.retrieve(base_url,filename='%s.test'%(mission)) 
530      url.close() 
531      raise SystemExit 
532       
533      missions = ['hst', 
534                  'kepler', 
535                  'kepler/kepler_fov', 
536                  'kepler/kic10', 
537                  'kepler/kgmatch', 
538                  'iue', 
539                  'hut', 
540                  'euve', 
541                  'fuse', 
542                  'uit', 
543                  'wuppe', 
544                  'befs', 
545                  'tues', 
546                  'imaps', 
547                  'hlsp', 
548                  'pointings', 
549                  'copernicus', 
550                  'hpol', 
551                  'vlafirst', 
552                  'xmm-om'] 
553       
554       
555       
556       
557       
558       
559      out = search('kepler/kepler_fov',ID='TYC3134-165-1',filename='kepler_fov.test',radius=1.) 
560      out = search('kepler/kgmatch',ID='3749404',filename='kgmatch.test',radius=1.) 
561       
562      master = mast2phot('kepler/kgmatch',results,units,master=None,extra_fields=None) 
563      print master 
564       
565      sys.exit() 
566       
567      for mission in missions: 
568          base_url = _get_URI(mission,ID='hd46149') 
569           
570          try: 
571              url = urllib.URLopener() 
572              filen,msg = url.retrieve(base_url,filename='%s.test'%(mission)) 
573              url.close() 
574          except IOError: 
575              print 'failed' 
576              continue 
577