import mdp.utils from numpy import * #Written 26 Jan 2009 by Alex O. Holcombe, http://www.psych.usyd.edu.au/staff/alexh/ def collapseBy(data,DV,*factors): #function should take a variable number of column names, then collapse across all other variables to return means, standard error, standard deviation #of the dependent variable with name #data must be a numpy ndarray with a dtype dictionary (like that returned by loadtxt) that includes the column names, which must include the *factors #instead of recursion, determine all the index combinations #then loop through that flat list, calculating the mean for each case dimSizes=list(); factorVals=list() for d in factors: uniqVals = mdp.utils.uniq(data[d]) factorVals.append( uniqVals ) numvals = len( uniqVals ) dimSizes.append(numvals) avgs = zeros(dimSizes)*NAN; stddevs = zeros(dimSizes)*NAN; ns=zeros(dimSizes)*NAN #create every possible combination of uniqVals factorIdxs= indices(dimSizes) #reshape each factor's grid into a flat list factorIdxs = factorIdxs.reshape(len(factors),prod(dimSizes)) #first dim is now factor, second is just the list of factor values #proceed through length of facA for i in range( prod(dimSizes) ): #for every combination of indices eachFacIdx=list() #will hold this combination for d in range( len(factors) ): #for this particular combination, determine the value for each factor thisFacUniqVals = factorVals[d] eachFacIdx.append( factorIdxs[d,i] ) dataIdxsThis = where( data[ factors[d] ]== thisFacUniqVals[factorIdxs[d,i]] )#in which rows does data have this value of the factor? if d==0: dataIdxsCombo = set(list(squeeze(dataIdxsThis))) else: dataIdxsCombo = dataIdxsCombo.intersection( set(list(squeeze(dataIdxsThis))) ) dataIdxsCombo = list( dataIdxsCombo ) #print 'eachFacIdx=', eachFacIdx #print 'DV=',DV,' dataIdxsCombo=',dataIdxsCombo #print mean( data[DV][dataIdxsCombo] ) #assigning this particular combination of factors- eachFacIdx, but can't use array because then thinks all for the first dimension, have to use tuple with commas avgs[tuple(eachFacIdx)] = mean( data[DV][dataIdxsCombo] ) stddevs[tuple(eachFacIdx)] = std( data[DV][dataIdxsCombo] ) ns[tuple(eachFacIdx)] = len(dataIdxsCombo) return avgs,stddevs,ns