""" A module for plotting ALMA ATF PSI archive data using CASA. Use: From casapy, execfile 'psi_archive_plotter.py' example: # Set vadtxtb to be a tb tool filled with the contents of # ./VA--1-DTX----0x050-2007-09-26-test02.csv. The contents will be # fetched from ./VA--1-DTX----0x050-2007-09-26-test02.tb if it already # exists, and ./VA--1-DTX----0x050-2007-09-26-test02.csv otherwise. vadtxtb = filetotable('VA--1-DTX----0x050-2007-09-26-test02.csv') vadtxtb.colnames() # Plot V____DG_VOLTAGE_3.1_-_3.5V vs. time. simpleplot(vadtxtb, 'V____DG_VOLTAGE_3.1_-_3.5V') # Plot a list of voltages vs. time. simpleplot(vadtxtb, ['V____DG_VOLTAGE_3.1_-_3.5V', 'V____DG_VOLTAGE_4.8_-_5.2V']) # Plot two arbitrary variables vs. time. two_y_axes(vadtxtb, 'V____DG_VOLTAGE_4.8_-_5.2V', 'V____DG_VOLTAGE_3.1_-_3.5V') # Create a dictionary containing a tb for each .csv file in the # current directory (slow if there are a lot of new ones): pap = PSI_archive_plotter() # timeaxis() is just a helper function now, but Gene DuVall might # extend it to use minutes, hours, etc., and/or to supply a time axis # for customized plots. # tablename() returns the table's filename without any directory or # .csv. Sample directory: /groups/psi/Public_PSI/Data/Daily/2007/2007-10-05-ACU_02 """ __version__ = 0.02 __author__ = "Rob Reid, rreid@nrao.edu, starting from an example by D. Shepherd" import glob, os, re def truer_glob(patlist = '*'): """Unfortunately glob.glob('a b.*') doesn't work. This function returns a list of filenames matching the string patlist like it would in a shell (i.e. like perl's ). """ # I got the idea of using sum(nestedlist, []) to get a flattened copy of # nestedlist from Hans Meine's excellent page # http://kogs-www.informatik.uni-hamburg.de/~meine/python_tricks # and don't understand it yet, but it works. return sum([glob.glob(fn) for fn in patlist.split()], []) def uniquify_strlist(duplist): """Goes through the list of strings duplist, and appends a count number to any duplicates. i.e. given ['a', 'b', 'c', 'a', 'c', 'd', 'a', 'e'], it returns ['a_0', 'b', 'c_0', 'a_1', 'c_1', 'd', 'a_2', 'e']. Note that ['a', 'a', 'a_0'] would go to ['a_0', 'a_1', 'a_0']. If you have a list like that, you could run uniquify_strlist on the result. """ strlist = duplist[:] # Make a copy and leave duplist alone. strdict = {} for i in range(len(strlist)): s = strlist[i] if s in strdict: strdict[s].append(i) else: strdict[s] = [i] for s in strdict: if len(strdict[s]) > 1: for j in range(len(strdict[s])): strlist[strdict[s][j]] += "_%d" % j strdict[s] = [] return strlist def filetotable(fn, overwrite=false): """Returns a CASA table as read from the comma separated value file fn. Borrows heavily from an example by Debra Shepherd.""" tbname = fn.replace('.csv', '') # Do it this way instead of one tbname += '.tb' # replace in case fn doesn't end # in .csv. mytb = tbtool.create() # tb is persistently global! if os.path.exists(tbname) and not overwrite: mytb.open(tbname) return mytb ## Fill (load) the data into CASA. # autoheader=True instructs mytb to create its own headers # (called Column1, Column2, etc) because the headers cannot be # interpreted. mytb.fromascii(tablename = tbname, asciifile = fn, sep=',', autoheader = True, nomodify = false) mytb.close() # Probably NOT modifiable now. mytb.open(tbname, nomodify=false) # Yes, I want to modify it! # The first 3(?) rows are header info. Store them elsewhere and # remove those rows. boringcns = mytb.colnames() # Column1, Column2, ... TLAs = [mytb.getcell(bcn, 0) for bcn in boringcns] RCAs = [mytb.getcell(bcn, 1) for bcn in boringcns] # CASA doesn't like column names with spaces. colnames = [mytb.getcell(bcn, 2).replace(' ', '_') for bcn in boringcns] RCAs[0] = '' colnames[0] = TLAs[0] # Replace Names >>> with Seconds. colnames[1] = TLAs[1] # These would be blank otherwise. colnames[2] = TLAs[2] # Assigning to a slice doesn't seem to work. # This isn't just paranoia. colnames = uniquify_strlist(colnames) mytb.removerows([0, 1, 2]) for (bcn, ncn) in zip(boringcns, colnames): mytb.renamecol(bcn, ncn) # This sort of works, but upsets the browser. trdict = {} for i in range(len(colnames)): trdict['TLA'] = TLAs[i] trdict['RCA'] = RCAs[i] mytb.putcolkeywords(colnames[i], trdict) mytb.flush() # return mytb, TLAs, RCAs return mytb def timeaxis(datatb): """ Attempts to return a reasonable time axis and label for it from datatb. """ sec = datatb.getcol('Seconds').astype(float) t = sec # REMOVE when things get smarter! tlabel = "Time (s)" tint = sec[-1] - sec[0] if tint < 300.0: t = sec # Leave it in seconds tlabel = "Time (s)" return t, tlabel def tablename(datatb): """ Returns a slightly cleaned up name for datatb. """ tablab = datatb.name() tablab = re.sub(r"^.*/", '', tablab) tablab = re.sub(r"\.tb", '', tablab) return tablab def simpleplot(datatb, colnames): """ Graph datatb[colname] vs. datatb['time'] for each colname in colnames, on the same plot. """ t, tlabel = timeaxis(datatb) # clear plotter pl.clf() # turn interactive mode ON (allows automatic updates of plots) pl.ion() if isinstance(colnames, str): y = datatb.getcol(colnames) pl.plot(t, y) pl.ylabel(colnames + ', ' + datatb.getcolkeywords(colnames)['RCA']) elif len(colnames) == 1: y = datatb.getcol(colnames[0]) pl.plot(t, y) pl.ylabel(colnames[0] + ', ' + datatb.getcolkeywords(colnames[0])['RCA']) else: for colname in colnames: y = datatb.getcol(colname) pl.plot(t, y, label = colname + ', ' + datatb.getcolkeywords(colname)['RCA']) pl.legend(loc='best') pl.xlabel(tlabel) pl.title(tablename(datatb)) def two_y_axes(datatb, lcol, rcol): """ Graph datatb[lcol] and datatb[rcol] vs. datatb['time'] with lcol's y axis on the left, and rcol's y axis on the right, on the same plot. Unfortunately lcol and rcol cannot be lists like in simpleplot, because the second y axis essentially starts a new plot and resets the color cycler. """ t, tlabel = timeaxis(datatb) # clear plotter pl.clf() # turn interactive mode ON (allows automatic updates of plots) pl.ion() # Following the gospel of examples/two_scales.py ax1 = pl.subplot(111) y = datatb.getcol(lcol) pl.plot(t, y, 'b-', label = lcol) pl.ylabel(lcol + ", " + datatb.getcolkeywords(lcol)['RCA']) pl.legend(loc='upper left') ax2 = pl.twinx() y = datatb.getcol(rcol) pl.plot(t, y, 'r-', label=rcol) pl.ylabel(rcol + ", " + datatb.getcolkeywords(rcol)['RCA']) pl.legend(loc='upper right') pl.xlabel(tlabel) pl.title(tablename(datatb)) class PSI_archive_plotter(object): """Reads all directory/files and stores the data in a dictionary of dictionaries of dictionaries for each column: self.tables[tkey][colname][dtype], where tkey = filename with directory/ and .csv stripped off, colname = column name There is always a key 'colnames' with a list of column names in tkey as its value. dtype = data type """ def __init__(self, directory='.', files='*.csv'): self.directory = directory self.files = files self.tables = {} for fn in truer_glob(directory + '/' + files): # Shorten fn = ./AEC-3-ACU----0x000-2007-10-05-ACU_02.csv to # tkey = AEC-3-ACU----0x000-2007-10-05-ACU_02. It'd be nice to # have some way of doing completion on keys. tkey = fn.replace(directory + '/', '') tkey = tkey.replace('.csv', '') self.tables[tkey] = filetotable(fn) # Debra's example: ## a=tb.getcol('Column1') # the the entire column 1 (seconds) and put it ## # in the array called 'sec' ## sec=a[3:7] # Now ignore the header info in first 3 rows, just get ## +the data ## # note: the data in the browser shows up in row numbers 4-8, ## # BUT! CASA/python is always 0-based, so this translated into rows 3-7. ## sec=sec.astype('float') # identify data type as float ## b=tb.getcol('Column11') # get column with DG_TEMP 10-40 C ## dgtemp=b[3:7] ## dgtemp=dgtemp.astype('float') ## c=tb.getcol('Column19') # get column with 1.5V_B_VOLTSB ## volts1=c[3:7] ## volts1=volts1.astype('float') ## # Get a read out on the CASA prompt of what each array holds by typing the ## # names of each array (a, b, c, sec, dgtemp, volts1). ## ***** Problem, seconds has 12 significant digits with 2 digits after the ## +decimal point. ## Array output shows only 9 significant digits in floating format but ## +changes are ## all in the last 3 digits... So you cannot see if the data increments ## +properly. ## This may be causing problems in the plotting below... ## # To see all functions you can do on each array (numpy arrays), type, e.g. ## sec. # See all the options for manipulating the values ## # To see the plotting options, type: ## pl. ## hrs=sec/3600 ## pl.clf() # clear plotter ## pl.ion() # turn interactive mode ON (allows automatic updates of plots) ## # ## ****** matplotlib always takes the first number and subtracts all others so ## +both ## axes are of similar magnitude. Difficult to interpret but it ## +works. ## pl.plot(hrs,volts1) ## pl.axis([330784.209,330784.212,1.51,1.52]) ## # setting axes explicitly works but auto-scale is just fine. ## pl.xlabel('Time [sec]') ## pl.ylabel('1.5V_B_VOLTSB') ## # try plotting 2 values versus time: ## pl.clf ## pl.plot(hrs,volts1, hrs,dgtemp) ## # worked, yaxis scaled properly showing both volts1 & dgtemp ## pl.axis([330784.209,330784.212,1.0,19]) ## ######################################## ## Now, Gene wants to see if he can plot one value versus time downloaded ## in one file along with another value versus the same time that was ## output in another file. The files Gene gave me were collected at the ## same time so they have the same time-stamps. ## tb.close ## tb.fromascii(tablename='casaFLOOG.tb', ## asciifile='VA--1-FLOOG--0x032-2007-09-26-test02.csv', ## sep=',', autoheader=True) ## tb.browse ## d=tb.getcol('Column1') # get seconds col ## sec2=d[3:7] ## sec2=sec2.astype('float') ## hrs2=sec2/3600 ## # check: yes, hrs2=hrs, so we should be able to plot ## e=tb.getcol('Column5') # get Column 5: AMBSI Tmp = 44deg all the time. ## ambsitmp=e[3:7] ## ambsitmp=ambsitmp.astype('float') ## pl.clf ## pl.plot(hrs,volts1, hrs,dgtemp, hrs2,ambsitmp) ## # worked, yaxis scaled properly showing volts1, dgtemp & ambsitmp