Package ivs :: Package inout :: Module ascii
[hide private]
[frames] | no frames]

Source Code for Module ivs.inout.ascii

  1  # -*- coding: utf-8 -*- 
  2  """ 
  3  Read and write ASCII files. 
  4  """ 
  5  import gzip 
  6  import logging 
  7  import os 
  8  import re 
  9  import numpy as np 
 10   
 11  logger = logging.getLogger("IO.ASCII") 
 12  logger.addHandler(logging.NullHandler()) 
 13   
 14  #{ General Input 
15 -def read2list(filename,**kwargs):
16 """ 17 Load an ASCII file to list of lists. 18 19 The comments and data go to two different lists. 20 21 Also opens gzipped files. 22 23 @param filename: name of file with the data 24 @type filename: string 25 @keyword commentchar: character(s) denoting comment rules 26 @type commentchar: list of str 27 @keyword splitchar: character seperating entries in a row (default: whitespace) 28 @type splitchar: str or None 29 @keyword skip_empty: skip empty lines 30 @type skip_empty: bool 31 @keyword skip_lines: skip nr of lines (including comment and empty lines) 32 @type skip_lines: integer 33 @return: list of lists (data rows) 34 list of lists (comments lines without commentchar), 35 @rtype: (list,list) 36 """ 37 commentchar = kwargs.get('commentchar',['#']) 38 splitchar = kwargs.get('splitchar',None) 39 skip_empty = kwargs.get('skip_empty',True) 40 skip_lines = kwargs.get('skip_lines',0) 41 42 if os.path.splitext(filename)[1] == '.gz': 43 ff = gzip.open(filename) 44 else: 45 ff = open(filename) 46 47 data = [] # data 48 comm = [] # comments 49 50 line_nr = -1 51 52 #-- fixwidth split or character split? 53 if splitchar is None or isinstance(splitchar,str): 54 fixwidth = False 55 else: 56 fixwidth = True 57 58 while 1: # might call read several times for a file 59 line = ff.readline() 60 if not line: break # end of file 61 line_nr += 1 62 if line_nr<skip_lines: 63 continue 64 65 #-- strip return character from line 66 if skip_empty and line.isspace(): 67 continue # empty line 68 69 #-- remove return characters 70 line = line.replace('\n','') 71 #-- when reading a comment line 72 if line[0] in commentchar: 73 comm.append(line[1:]) 74 continue # treat next line 75 76 #-- when reading data, split the line 77 if not fixwidth: 78 data.append(line.split(splitchar)) 79 else: 80 data.append(fw2python(line,splitchar)) 81 ff.close() 82 83 #-- report that the file has been read 84 logger.debug('Data file %s read'%(filename)) 85 86 #-- and return the contents 87 return data,comm
88
89 -def read2array(filename,**kwargs):
90 """ 91 Load ASCII file to a numpy array. 92 93 For a list of extra keyword arguments, see C{<read2list>}. 94 95 If you want to return a list of the columns instead of rows, just do 96 97 C{>>> col1,col2,col3 = ascii.read2array(myfile).T} 98 99 @param filename: name of file with the data 100 @type filename: string 101 @keyword dtype: type of numpy array (default: float) 102 @type dtype: numpy dtype 103 @keyword return_comments: flag to return comments (default: False) 104 @type return_comments: bool 105 @return: data array (, list of comments) 106 @rtype: ndarray (, list) 107 """ 108 dtype = kwargs.get('dtype',np.float) 109 return_comments = kwargs.get('return_comments',False) 110 data,comm = read2list(filename,**kwargs) 111 data = np.array(data,dtype=dtype) 112 return return_comments and (data,comm) or data
113
114 -def read2recarray(filename,**kwargs):
115 """ 116 Load ASCII file to a numpy record array. 117 118 For a list of extra keyword arguments, see C{<read2list>}. 119 120 IF dtypes is None, we have some room to automatically detect the contents 121 of the columns. This is not implemented yet. 122 123 the keyword 'dtype' should be equal to a list of tuples, e.g. 124 125 C{dtype = [('col1','a10'),('col2','>f4'),..]} 126 127 @param filename: name of file with the data 128 @type filename: string 129 @keyword dtype: dtypes of record array 130 @type dtype: list of tuples 131 @keyword return_comments: flag to return comments (default: False) 132 @type return_comments: bool 133 @return: data array (, list of comments) 134 @rtype: ndarray (, list) 135 """ 136 dtype = kwargs.get('dtype',None) 137 return_comments = kwargs.get('return_comments',False) 138 splitchar = kwargs.get('splitchar',None) 139 140 #-- first read in as a normal array 141 data,comm = read2list(filename,**kwargs) 142 143 #-- if dtypes is None, we have some room to automatically detect the contents 144 # of the columns. This is not fully implemented yet, and works only 145 # if the second-to-last and last columns of the comments denote the 146 # name and dtype, respectively 147 if dtype is None: 148 data = np.array(data,dtype=str).T 149 header = comm[-2].replace('|',' ').split() 150 types = comm[-1].replace('|','').split() 151 dtype = [(head,typ) for head,typ in zip(header,types)] 152 dtype = np.dtype(dtype) 153 elif isinstance(dtype,list): 154 data = np.array(data,dtype=str).T 155 dtype = np.dtype(dtype) 156 #-- if dtype is a list, assume it is a list of fixed width stuff. 157 elif isinstance(splitchar,list): 158 types,lengths = fws2info(splitchar) 159 dtype = [] 160 names = range(300) 161 for i,(fmt,lng) in enumerate(zip(types,lengths)): 162 if fmt.__name__=='str': 163 dtype.append((str(names[i]),(fmt,lng))) 164 else: 165 dtype.append((str(names[i]),fmt)) 166 dtype = np.dtype(dtype) 167 168 #-- cast all columns to the specified type 169 data = [np.cast[dtype[i]](data[i]) for i in range(len(data))] 170 171 #-- and build the record array 172 data = np.rec.array(data, dtype=dtype) 173 return return_comments and (data,comm) or data
174 #} 175 176 #{ General Output 177
178 -def write_array(data, filename, **kwargs):
179 """ 180 Save a numpy array to an ASCII file. 181 182 Add comments via keyword comments (a list of strings denoting every comment 183 line). By default, the comment lines will be preceded by the C{commentchar}. 184 If you want to override this behaviour, set C{commentchar=''}. 185 186 If you give a record array, you can simply set C{header} to C{True} to write 187 the header, instead of specifying a list of strings. 188 189 @keyword header: optional header for column names 190 @type header: list of str (or boolean for record arrays) 191 @keyword comments: comment lines 192 @type comments: list of str 193 @keyword commentchar: comment character 194 @type commentchar: str 195 @keyword sep: separator for the columns and header names 196 @type sep: str 197 @keyword axis0: string denoting the orientation of the matrix. If you gave 198 a list of columns, set C{axis0='cols'}, otherwise C{axis='rows'} (default). 199 @type axis0: str, one of C{cols}, C{rows}. 200 @keyword mode: file mode (a for appending, w for (over)writing...) 201 @type mode: char (one of 'a','w'...) 202 @keyword auto_width: automatically determine the width of the columns 203 @type auto_width: bool 204 @keyword formats: formats to use to write each column 205 @type formats: list of string formatters 206 """ 207 header = kwargs.get('header',None) 208 comments = kwargs.get('comments',None) 209 commentchar = kwargs.get('commentchar','#') 210 sep = kwargs.get('sep',' ') 211 axis0 = kwargs.get('axis0','rows') 212 mode = kwargs.get('mode','w') 213 auto_width = kwargs.get('auto_width',False) 214 formats = kwargs.get('formats',None) 215 # use '%g' or '%f' or '%e' for writing floats automatically from record arrays with auto width 216 use_float = kwargs.get('use_float','%f') 217 218 #-- switch to rows first if a list of columns is given 219 if not isinstance(data,np.ndarray): 220 data = np.array(data) 221 if not 'row' in axis0.lower(): 222 data = data.T 223 224 if formats is None: 225 try: 226 formats = [('S' in str(data[col].dtype) and '%s' or use_float) for col in data.dtype.names] 227 except TypeError: 228 formats = [('S' in str(col.dtype) and '%s' or '%s') for col in data.T] 229 #-- determine width of columns: also take the header label into account 230 col_widths = [] 231 #-- for record arrays 232 if auto_width is True and header==True: 233 for fmt,head in zip(formats,data.dtype.names): 234 col_widths.append(max([len('%s'%(fmt)%(el)) for el in data[head]]+[len(head)])) 235 #-- for normal arrays and specified header 236 elif auto_width is True and header is not None: 237 for i,head in enumerate(header): 238 col_widths.append(max([len('%s'%(formats[i])%(el)) for el in data[:,i]]+[len(head)])) 239 #-- for normal arrays without header 240 elif auto_width is True and header is not None: 241 for i in range(data.shape[1]): 242 col_widths.append(max([len('%s'%(formats[i])%(el)) for el in data[:,i]])) 243 244 if header is True: 245 col_fmts = [str(data.dtype[i]) for i in range(len(data.dtype))] 246 header = data.dtype.names 247 else: 248 col_fmts = None 249 250 ff = open(filename,mode) 251 if comments is not None: 252 ff.write('\n'.join(comments)+'\n') 253 254 #-- WRITE HEADER 255 #-- when header is desired and automatic width 256 if header is not None and col_widths: 257 ff.write('#'+sep.join(['%%%s%ss'%(('s' in fmt and '-' or ''),cw)%(head) for fmt,head,cw in zip(formats,header,col_widths)])+'\n') 258 #-- when header is desired 259 elif header is not None: 260 ff.write('#'+sep.join(header)+'\n') 261 262 #-- WRITE COLUMN FORMATS 263 if col_fmts is not None and col_widths: 264 ff.write('#'+sep.join(['%%%s%ss'%(('s' in fmt and '-' or ''),cw)%(colfmt) for fmt,colfmt,cw in zip(formats,col_fmts,col_widths)])+'\n') 265 elif col_fmts is not None: 266 ff.write('#'+sep.join(['%%%ss'%('s' in fmt and '-' or '')%(colfmt) for fmt,colfmt in zip(formats,col_fmts)])+'\n') 267 268 #-- WRITE DATA 269 #-- with automatic width 270 if col_widths: 271 for row in data: 272 ff.write(' '+sep.join(['%%%s%s%s'%(('s' in fmt and '-' or ''),cw,fmt[1:])%(col) for col,cw,fmt in zip(row,col_widths,formats)])+'\n') 273 #-- without automatic width 274 else: 275 for row in data: 276 ff.write(sep.join(['%s'%(col) for col in row])+'\n') 277 ff.close()
278 279 280 281 #} 282 283 #{ Helper functions 284
285 -def fw2info(fixwidth):
286 """ 287 Convert fix width information to python information 288 289 >>> fw2info('F13.4') 290 ([float], [13]) 291 >>> fw2info('I2') 292 ([int], [2]) 293 >>> fw2info('6I2') 294 ([int, int, int, int, int, int], [2, 2, 2, 2, 2, 2]) 295 >>> fw2info('5F12.3') 296 ([float, float, float, float, float], [12, 12, 12, 12, 12]) 297 """ 298 translator = {} 299 translator['I'] = int 300 translator['F'] = float 301 translator['A'] = str 302 303 numbers = re.compile('[\d]*[\d\.*]+',re.IGNORECASE) 304 formats = re.compile('[a-z]+',re.IGNORECASE) 305 306 numbers = numbers.findall(fixwidth) 307 formats = formats.findall(fixwidth) 308 if len(numbers)==1: 309 numbers = ['1'] + numbers 310 widths = int(numbers[0])*[int(nr.split('.')[0]) for nr in numbers[-1:]] 311 formats= int(numbers[0])*formats 312 formats=[translator[fmt] for fmt in formats] 313 return formats,widths
314
315 -def fws2info(fixwidths):
316 info = [fw2info(fw) for fw in fixwidths] 317 types = [] 318 for iinfo in info: types += iinfo[0] 319 length = [] 320 for iinfo in info: length += iinfo[1] 321 length = [0]+[sum(length[:i+1]) for i in range(len(length))] 322 return types,length
323 324
325 -def fw2python(line,fixwidths,missing_value=0):
326 """ 327 Fixwidth info to python objects 328 """ 329 types,length = fws2info(fixwidths) 330 processed_line = [] 331 for i,itype in enumerate(types): 332 number = line[length[i]:length[i+1]] 333 if number.isspace(): 334 processed_line.append(itype(missing_value)) 335 else: 336 processed_line.append(itype(line[length[i]:length[i+1]])) 337 return processed_line
338 #} 339 340 #{ Source specific 341
342 -def read_harps(filename):
343 """ 344 Return spectrum and header from a HARPS normalised file. 345 346 @parameter filename: name of the file 347 @type filename: str 348 @return: wavelengths,flux,header 349 @rtype: array,array,dict 350 """ 351 data,comments = read2array(filename,return_comments=True) 352 wave,flux = data[:,0],data[:,1] 353 header = {'instrument':'harps'} 354 for line in comments: 355 line = line.split() 356 if line and line[0]=='HJD': 357 header['HJD'] =float(line[-1]) 358 return wave,flux,header
359
360 -def read_mad(infile,add_dp=False,**kwargs):
361 """ 362 Reads .phot from MADROT or .nad from MAD. 363 364 MAD is the nonadiabatic pulsation code from Marc-Antoine Dupret. 365 366 MADROT is the extension for slowly rotating stars in the traditional 367 approximation. 368 369 This function serves a generic read function, it reads *.phot and *.nad 370 files, both from MAD and MADROT. 371 372 Returns list of dicts 373 374 @param add_dp: add period spacings information 375 @type add_dp: boolean 376 @return: star info, blocks 377 """ 378 try: 379 if os.path.splitext(infile)[1]=='.phot': 380 star_info,blocks = read_photrot(infile,**kwargs) 381 elif os.path.splitext(infile)[1]=='.nad': 382 star_info,blocks = read_nad(infile,**kwargs) 383 else: 384 logger.error('Cannot interpret file "%s"'%(infile)) 385 except: 386 logger.error('Error encountered in file %s'%(infile)) 387 raise 388 #-- add period spacing 389 if add_dp: 390 add_period_spacings(star_info,blocks) 391 return star_info,blocks
392 393
394 -def read_photrot(infile,teff=None,logg=None):
395 """ 396 Reads .phot output file of MADROT. 397 398 Returns list of dictionaries 399 400 For quick reading, you can already give a range on teff and logg, and skip 401 the rest of file. 402 403 if there is a NAD file with the same name (but extension .nad instead of .phot), 404 we read in the header of this file too for information on the star:: 405 406 # M/M_sun = 1.40 Teff = 6510.2 Log (L/L_sun) = 0.5168 407 # Log g = 4.2748 R/R_sun = 1.4283 age (y) = 2.3477E+08 408 # X = 0.70 Z = 0.020 alphaconv = 1.80 overshoot = 0.40 409 410 if the filename is given as C{M1.4_0.020_0.40_0.70_001-0097.phot}, we add 411 information on mass, Z, overshoot, X and evolution number to star_info, 412 except for those quantities already derived from a NAD file. 413 414 One dictionary representes information about one pulsation mode. 415 416 @param teff: range in effective temperature for file selection (value, sigma) 417 @type teff: tuple 418 @param logg: range in surface gravity for file selection (value, sigma) 419 @type logg: tuple 420 """ 421 dict_keys = ('l', 'm', 'par', 'n', 'f_cd_com', 'f_cd_obs', 'ltilde', 'fT', 422 'psiT', 'fg', 'psig', 'K', 'omega_puls', 'omega_rot') 423 with open(infile,'r') as f: 424 line = f.readline() 425 #first line gives teff, logg, XYZ, remember this as global information 426 #about the star 427 logTeff, logg_, Fe_H = line.split() 428 logTeff, logg_, Fe_H = float(logTeff), float(logg_), float(Fe_H) 429 star_info = dict(teff=10**logTeff,logg=logg_,Fe_H=Fe_H) 430 431 #-- read the NAD file if it exists: 432 nadfile = os.path.splitext(infile)[0]+'.nad' 433 if os.path.isfile(nadfile): 434 star_info.update(read_nad(nadfile,only_header=True)[0]) 435 else: 436 pass 437 # not implemented yet. 438 439 #second line gives number of blocks 440 line = f.readline() 441 nr_b = int(line) 442 blocks = [] 443 444 #-- check for fundamental parameters 445 valid_funds = True 446 if teff is not None: 447 if not ((teff[0]-teff[1]) <= star_info['teff'] <= (teff[0]+teff[1])): 448 valid_funds = False 449 if logg is not None: 450 if not ((logg[0]-logg[1]) <= star_info['logg'] <= (logg[0]+logg[1])): 451 valid_funds = False 452 print star_info['teff'],teff,valid_funds 453 #loop over all blocks and store them 454 pos = 0 455 fail = False 456 nrfails = 0 457 while 1 and valid_funds: 458 line = f.readline() 459 if not line: break 460 # skip empty lines 461 if len(line.strip())==0: continue 462 elif pos==0: 463 # wait for the line which gives the column info 464 if line.strip()[0]=='l': 465 pos = 2 466 continue 467 elif pos==2: 468 #we are now at the line which gives values for l,m,par,... 469 #create dictionary and store values 470 d = {} 471 for key_, val_ in zip(dict_keys,line.split()): 472 #-- sometimes values can be merged together 473 try: 474 d[key_]=float(val_) 475 except ValueError: 476 fail = True 477 nrfails += 1 478 #add spin parameter if not failed 479 if not fail: 480 d['s'] = 2*d['omega_rot']/d['omega_puls'] 481 482 for key_ in ('l', 'm', 'par', 'n'): d[key_]=int(d[key_]) 483 #create the lists to store B 484 d['B']=[] 485 d['l_B']=[] 486 pos=3 487 elif pos==3: 488 #reading the 50 next lines which give vector B and associated l-values 489 l_B_, B_ = line.split() 490 d['B'].append(float(B_)) 491 d['l_B'].append(int(l_B_)) 492 if len(d['B'])==50: 493 d['B'] = array(d['B']) 494 d['l_B'] = array(d['l_B']) 495 if not fail: blocks.append(d) 496 pos = 0 497 continue 498 499 logger.debug("Read %d modes from MADROT %s (%d modes failed)"%(len(blocks),infile,nrfails)) 500 501 return star_info,blocks
502 503
504 -def read_phot(photfile,teff=None,logg=None):
505 """ 506 Reads .phot output file of MAD. 507 508 Returns list of dictionaries 509 510 One dictionary representes information about one pulsation mode. 511 """ 512 dict_keys = ('l', 'fT','psiT', 'fg', 'psig') 513 514 blocks = [] 515 516 with open(photfile,'r') as f: 517 fc = f.readlines() 518 #first line gives teff, logg, XYZ, remember this as global information 519 #about the star 520 logTeff, logg = fc[0].split()[:2] 521 logTeff, logg = float(logTeff), float(logg) 522 star_info = dict(teff=10**logTeff,logg=logg) 523 524 #second line gives number of blocks 525 nr_b = int(fc[1]) 526 blocks = [] 527 528 #loop over all blocks and store them 529 pos = 0 530 fail = False 531 nrfails = 0 532 for line in fc[2:]: 533 entries = line.strip().split() 534 block = {} 535 for i,key in enumerate(dict_keys): 536 block[key] = entries[i] 537 block['m'] = 0 # only one m-value 538 block['n'] = 0 # no information on nodes 539 block['f_cd_com'] = 0 # no information on frequency 540 block['omega_rot'] = 0 # no rotation 541 blocks.append(block) 542 543 logger.debug("Read %d modes from MAD %s"%(len(blocks),photfile)) 544 return star_info,blocks
545
546 -def read_nad(nadfile,only_header=False):
547 """ 548 Reads .nad output file of MAD. 549 550 Returns list of dictionaries 551 One dictionary representes information about one pulsation mode. 552 """ 553 554 dict_keys = {'l':'l', 555 'm':'m', 556 'n':'n', 557 'freq_com (c/d)':'f_cd_com', 558 'freq_in (c/d)':'f_cd_obs', 559 'freq (c/d)': 'f_cd_obs', # for non rotating nad 560 'freq (micHz)': 'f_hz_obs', # for non rotating nad 561 'ltilde':'l-tilde', 562 'l-tilde':'l-tilde', 563 'Im(omega)':'Im(omega)', 564 'Im(sigma) (micHz)':'Im(sigma)_hz', 565 'Re(omega)':'Re(omega)', 566 'Re_ad(omega)':'Re_ad(omega)', 567 'f_T':'fT', 568 'psi_T':'psiT', 569 'f_g':'fg', 570 'psi_g':'psig', 571 'K':'K', 572 'lifetime (d)':'lifetime', 573 'Q (d)':'Q'} # for non rotating nad 574 star_info = {} 575 576 blocks = [] 577 myheader = None 578 lines = 0 579 580 with open(nadfile,'r') as ff: 581 582 while 1: 583 lines += 1 584 line = ff.readline() 585 if not line: break # end of file, quit 586 if line.isspace(): continue # blank line, continue 587 588 line = line.strip() 589 if line[0]=='#' and lines<=3: # general header containing star info 590 contents = [i for i in line[1:].split()] 591 entry_name = None 592 entry_value = None 593 for i in range(len(contents)): 594 if not entry_name: 595 entry_name = contents[i] 596 continue 597 if entry_name and contents[i]=='=': 598 entry_value = 0. 599 continue 600 if entry_value is not None: 601 entry_value = float(contents[i]) 602 star_info[entry_name.lower().replace(' ','')] = entry_value 603 entry_name = None 604 entry_value = None 605 else: 606 entry_name += contents[i] 607 elif only_header and lines>3: # we were only interested in the header 608 break 609 elif line[0]=='#': 610 myheader = [head.strip() for head in line[1:].split(' ') if head] 611 else: 612 line = line.split() 613 thismode = {} 614 for i,key in enumerate(myheader): 615 thismode[dict_keys[key]] = float(line[i]) 616 #-- for nonrotating nad files, m is not available 617 if not 'm' in myheader: 618 thismode['m'] = 0 619 #-- for rotating AND nonrotating nad files, omega_rot is not available 620 thismode['omega_rot'] = 0 621 blocks.append(thismode) 622 623 logger.debug("Read %d modes from MAD (nad) %s"%(len(blocks),nadfile)) 624 return star_info,blocks
625 626 627 628 629 #} 630