""" esps.py by Kyle Gorman (kgorman@ling.upenn.edu) Python module for speech processing and acoustic analysis using ESPS For more information on esps.py, or to obtain the latest version, visit http://www.ling.upenn.edu/~kgorman/papers/esps.html If you get the following error: ImportError: No module named stats You need to install the stats package to use that module (if you don't need the module that generates that error, simply don't import it). The stats package can be obtained here: http://www.nmr.mgh.harvard.edu/Neural_Systems_Group/gary/python/stats.py If you get the following error: ImportError: No module named numpy You need to install NumPy to use that module (or simply don't import that module, if you don't need it). NumPy can be obtained here: http://numpy.scipy.org/ """ from os import path,popen,system # used in nearly every program here textgrid = """ ### TEXTGRID FUNCTIONS #### Manipulate Praat TextGrids (n.b. on TextGrid IO: Each tier is represented as a Python list. Each entry in the list, interval or point, is a tuple in the list, in proper temporal order If necessary, do numeric sort by the first value of the tuple. The first value in the tuple of each list entry is the string associated with it. In the case of a point tier, the second value of the list-entry tupleis the time of the point, expressed as a float. In the case of a interval tier, the second and third values of the list-entry tuple contain start and stop times as floats.) EXAMPLE(S) ~ from esps import read,write grid = read('test.TextGrid') # read in a TextGrid write(grid,'new.TextGrid') # write that TextGrid to a new file """ def read(inputFile): """ def read(inputFile): Input: name of Praat TextGrid file Output: List of tiers, which are lists of (string,time) tuples (in the case of point tiers), or (string,start,stop) tuples (in the case of interval tiers) """ file = open(inputFile,'r') # read it in lines = file.readlines() # sadly read it all into memory file.close() # close it out lines.pop(0) # file type lines.pop(0) # object class lines.pop(0) # blank tiers = [] # data structure that will contain all the tiers if 'xmin' in lines[0]: # long TextGrid if this is true lines.pop(0) # xmin lines.pop(0) # xmax lines.pop(0) # tiers? junk,nTiers = lines.pop(0).rstrip().split(' = ') # size lines.pop(0) # item [] for i in range(int(nTiers)): # loop over the tiers lines.pop(0) # the first tier's start if 'IntervalTier' in lines[0]: # we can check directly for interval lines.pop(0) # class type lines.pop(0) # tier name lines.pop(0) # xmin lines.pop(0) # xmax junk,nInter = lines.pop(0).rstrip().split(' = ') # size intervalTier = [] # data struct, gonna be full of tuples for j in range(0,int(nInter)): # loop over interval tier itself lines.pop(0) # interval number junk,xmin = lines.pop(0).rstrip().split(' = ') # start junk,xmax = lines.pop(0).rstrip().split(' = ') # stop junk,label = lines.pop(0).rstrip().split(' = ') # label intervalTier.append((float(xmin),float(xmax), \ label.strip('"'))) tiers.append(intervalTier) # now write it into big structure else: # if point tier lines.pop(0) # class type lines.pop(0) # tier name lines.pop(0) # xmin lines.pop(0) # xmax junk,nInter = lines.pop(0).rstrip().split(' = ') # size pointTier = [] # data struct, gonna be full of tuples for j in range(0,int(nInter)): # loop over point tier itself lines.pop(0) # point number junk,time = lines.pop(0).rstrip().split(' = ') # time junk,label = lines.pop(0).rstrip().split(' = ') # label pointTier.append((float(time),label.strip('"'))) # save it tiers.append(pointTier) # now write it into big structure else: # short TextGrid format lines.pop(0) # xmin lines.pop(0) # xmax lines.pop(0) # tiers nTiers = lines.pop(0).rstrip() # size for i in range(0,int(nTiers)): # loop over the tiers if 'IntervalTier' in lines[0]: lines.pop(0) # class type lines.pop(0) # tier name lines.pop(0) # xmin lines.pop(0) # xmax nInter = lines.pop(0).rstrip() # interval size intervalTier = [] # data struct, gonna be full of tuples for j in range(int(nInter)): # loop over the ier xmin = float(lines.pop(0).rstrip()) # start xmax = float(lines.pop(0).rstrip()) # stop label = lines.pop(0).rstrip() # label intervalTier.append((xmin,xmax,label.strip('"'))) # save tiers.append(intervalTier) # now write it into big structure else: # point tier lines.pop(0) # class type lines.pop(0) # tier name lines.pop(0) # xmin lines.pop(0) # xmax nInter = lines.pop(0).rstrip() # interval size pointTier = [] # data struct, gonna be full of tuples for j in range(int(nInter)): # loop over the ier time = float(lines.pop(0).rstrip()) # time label = lines.pop(0).rstrip() # label pointTier.append((time,label.strip('"'))) # save it tiers.append(pointTier) # now write it into big structure return tiers # now we are done # write a Praat TextGrid def write(list,outputFile,format=None): """ def write(list,outputFile,format=None): Input: lists of tiers, which are lists of (string,time) tuples (in the case of point tiers) in temporal order and/or lists of (string,start,stop) tuples (in the case of interval tiers), an output filename, and an output filename(,format i.e. TextGrid is 'short' format if non-null) Output: none, but TextGrid is printed to output file """ file = open(outputFile,'w') # open for writing xmin,xmax = (),None # positive and negative infinity if (format): # nonnull, so short file.write('File type = "ooTextFile"\n') # write header first line file.write('Object class = "TextGrid"\n\n') # 2nd and third line for tier in list: # loop over tiers if len(tier[0]) > 2: # interval if tier[0][0] < xmin: # if a smaller xmin xmin = tier[0][0] # save it if tier[-1][2] > xmax: # if a bigger xmax xmax = tier[-1][1] # save it else: # point tier if tier[0][0] < xmin: # if a smaller xmin xmin = tier[0][0] # save it if tier[-1][0] > xmax: # if a bigger xmax xmax = tier[-1][0] # save it file.write(str(xmin) + '\n') # xmin file.write(str(xmax) + '\n') # xmax file.write('\n') # tiers line file.write(str(len(list)) + '\n') # number of tiers tCounter = 1 # keep track of the number of tiers for tier in list: # loop over tiers if len(tier[0]) > 2: # is it start/stop or just point? file.write('"IntervalTier"\n') # class label file.write('"' + str(tCounter) + '"\n') # n/m file.write(str(xmin) + '\n') # xmin file.write(str(xmax) + '\n') # xmax file.write(str(len(tier)) +'\n') # number of intervals iCounter = 1 # keep track of the number of intervals for intrvl in tier: # loop over vals file.write(str(intrvl[0]) + '\n') # xmin file.write(str(intrvl[1]) + '\n') # xmax file.write('"' + intrvl[2] + '"\n') # label iCounter = iCounter + 1 # increment interval counter else: # type is point tier file.write('"TextTier"\n') # class label file.write('"' + str(tCounter) + '"\n') file.write(str(xmin) + '\n') # xmin file.write(str(xmax) + '\n') # xmax file.write(str(len(tier)) +'\n') pCounter = 1 # keep track of the number of points for point in tier: # loop over vals file.write(str(point[0]) + '\n') file.write('"' + point[1] + '"\n') pCounter = pCounter + 1 # increment point counter tCounter = tCounter + 1 # increment tier counter else: # long format TextGrid file.write('File type = "ooTextFile"\n') # write header first line file.write('Object class = "TextGrid"\n\n') # 2nd and third line for tier in list: # loop over tiers if len(tier[0]) > 2: # is it interval? if tier[0][0] < xmin: # if a smaller xmin xmin = tier[0][0] # save it if tier[-1][1] > xmax: # if a bigger xmax xmax = tier[-1][1] # save it else: # point tier if tier[0][0] < xmin: # if a smaller xmin xmin = tier[0][0] # save it if tier[-1][0] > xmax: # if a bigger xmax xmax = tier[-1][0] # save it file.write('xmin = ' + str(xmin) + '\n') # xmin file.write('xmax = ' + str(xmax) + '\n') # xmax file.write('tiers? \n') # tiers line file.write('size = ' + str(len(list)) + '\n') # number of tiers file.write('item []:\n') # last piece of header tCounter = 1 # keep track of the number of tiers for tier in list: # loop over tiers file.write(' item [' + str(tCounter) + ']:\n') # item number if len(tier[0]) > 2: # is it start/stop or just point? file.write(' class = "IntervalTier"\n') # class label file.write(' name = "' + str(tCounter) + '"\n') # n/m file.write(' xmin = ' + str(xmin) + '\n') # xmin file.write(' xmax = ' + str(xmax) + '\n') # xmax file.write(' intervals: size = ' + str(len(tier)) +'\n') iCounter = 1 # keep track of the number of intervals for intrvl in tier: # loop over vals file.write(' intervals [' + str(iCounter) + ']:\n') file.write(' xmin = ' + str(intrvl[0]) + '\n') file.write(' xmax = ' + str(intrvl[1]) + '\n') file.write(' text = "' + intrvl[2] + '"\n') iCounter = iCounter + 1 # increment interval counter else: # type is point tier file.write(' class = "TextTier"\n') # class label file.write(' name = "' + str(tCounter) + '"\n') file.write(' xmin = ' + str(xmin) + '\n') # xmin file.write(' xmax = ' + str(xmax) + '\n') # xmax file.write(' points: size = ' + str(len(tier)) +'\n') pCounter = 1 # keep track of the number of points for point in tier: # loop over vals file.write(' points [' + str(pCounter) + ']:\n') file.write(' time = ' + str(point[0]) + '\n') file.write(' mark = "' + point[1] + '"\n') pCounter = pCounter + 1 # increment point counter tCounter = tCounter + 1 # increment tier counter file.close() # close it out def readMLF(inputFile): """ def readMLF(inputFile): Input: name of HTK .mlf file created by issuing the command HVITE -o SM... Output: A list of tuples. Each tuple is a (string,list) pair. The string corresponds to the string denoting the file used to generate the .mlf. The list is a grid, a list of tiers. The list is always two items long (but not a tuple for conformity to the TextGrid functions above). The first list is the phones list. The second list is the word list. Each one of these lists is a list of (string,start,stop) tuples. By passing each list in the top-level tuple to an appropriately named file via writeTextGrid(), you can create TextGrids for all the files in an .mlf. """ file = open(inputFile,'r') # open file lines = file.readlines() # and read it in file.close() # let the file go lines.pop(0) # get rid of the useless first line name,word,words,phones,gridList = '',(),[],[],[] # out of scope of loop sr = 10000000 # 100 ns sampling rate for .mlfs, i think this is fixed? for line in lines: # loop over lines if line[0] == '"': # look for filename gridList.append((name,[words,phones])) # write out that word,words,phones = '',[],[] # reset these folder,item = path.split(line.lstrip('"').rstrip().rstrip('"')) name,ext = path.splitext(item) # get file name else: # actual data strings = line.split() # get line if len(strings) == 4: # string and word if len(word) == 2: # not the blank initial one words.append((word[0],float(strings[0])/sr,word[1])) word = (float(strings[0])/sr,strings[3]) # save word for later phones.append((float(strings[0])/sr,float(strings[1])/sr, strings[2])) # phones elif len(strings) == 3: # string only, if it's 1, do nothing phones.append((float(strings[0])/sr,float(strings[1])/sr, strings[2])) # phones else: # string is period, dump al the data and the word words.append((word[0],phones[len(phones)-1][1],word[1])) word = () # clean this up for later gridList.pop(0) # hack to save a bunch of conditional checking return gridList # this is a list of grids which are lists and so on arpabet = """ ### ARPABET FUNCTIONS ### Get ARPABET phones by features """ def monothongs(): return ['IY','UW','IH','UH','EH','AH','AE','AO','AA'] def diphthongs(): return ['EY','OW','OY','AY','AW'] def rhoticVowels(): return ['AOR','AAR','IHR','URH','EHR','ER'] def vowels(): return monothongs() + diphthongs() + rhoticVowels() def vlStops(): return ['P','T','K'] def vdStops(): return ['B','D','G'] def stops(): return vlStops() + vdStops() def affricates(): return ['JH','CH'] def vlFricatives(): return ['SH','TH','S','F','H'] def vdFricatives(): return ['DH','ZH','V','Z'] def fricatives(): return vdFricatives() + vlFricatives() def vlObstruents(): return vlFricatives() + vlStops() + ['CH'] def vdObstruents(): return vdFricatives() + vdStops() + ['JH'] def obstruents(): return vlObstruents() + vdObstruents() def nasals(): return ['M','N','NG'] def approximants(): return ['R','Y','L','W'] def sonorants(): return nasals() + approximants() def voiceless(): return vlFricatives() + vlStops() + ['CH'] def voiced(): return vdFricatives() + vdStops() + ['JH'] + sonorants() def consonants(): return voiceless() + voiced() def arpabet(): return vowels() + consonants() get_f0 = """ ### F0/RMS FUNCTIONS ### Do F0 and/or RMS analysis EXAMPLE(S) ~ import esps as E f0s = E.F0('test.wav',0.002,100,450) # extract F0 for male speaker wf0s = E.whiskerSquash(f0s) # squash whisker outliers lf0s = E.logNormal(wf0s) # log-normalize slope,intercept = E.linearFit(lf0s) # linear regression parameters pf0s = E.percentileSquash(f0s) # squash outside 10-90th percentile zf0s = E.zNormal(pf0s) # z-normalize polys = E.legendreFit(zf0s,4) # Legendre Polynomials (n=4) fitting f0_sample = E.F0[E.f0Sample(f0s,1.2)] # get the f0 value nearest to 1.2 S f0_rangle = E.f0Slice(f0s,0.8,2.2) # get f0 samples between 0.8 and 2.2 S HOW TO CITE ~ To cite get_f0, you can cite a paper by one of the authors (David Talkin) on the RAPT algorithm used; it appeared in a volume in 1995: @incollection{talkin1995, Author = {Talkin, David}, Booktitle = {Speech Coding and Synthesis}, Editor = {Kleijn, W.B. and Paliwal, K.K.}, Publisher = {Elsevier}, Title = {{A Robust Algorithm for Pitch Tracking (RAPT)}}, Year = {1995}} Or just the get_f0 manual: @manual{get_f0, Author = {Talkin, David and Lin, Derek}, Organization = {{Entropic Research Laboratory}}, Title = {get_f0}} """ def F0(file,sr=0.01,xmin=100,xmax=500): """ def F0(file,sr=0.01,xmin=100,xmax=500): Input: wav file name(,sampling rate,min f0,max f0) Output: List of (time,F0) tuples """ system('export USE_ESPS_COMMON="off"') # turn off Common paramFile = open('params','w') # make param file paramFile.write('float\tmin_f0\t= '+str(xmin)+';\n') # min paramFile.write('float\tmax_f0\t= '+str(xmax)+';\n') # max paramFile.close() # shut her down folder,item = path.split(file) # get folder name,ext = path.splitext(item) # get extension out = '/tmp/' + name + '.f0' # output file system('get_f0 -i ' + str(sr) + ' -P params ' + file + ' ' + out) # call offset = float(popen('hditem -i start_time ' + out).readline().rstrip()) # 1 fstep = float(popen('hditem -i frame_step ' + out).readline().rstrip()) # fs f0s = [] # data structure for f0s for line in popen('pplain ' + out,'r'): # open f0 file value, junk, junk, junk = line.rstrip().split() # split it if float(value) > 0: # test if it's non-zero f0s.append((offset,float(value))) # now a tuple offset = offset + fstep # either way, increment time system('rm -f ' + out) # clean up the mess system('rm -f params') # and the params return f0s # a tuple list of value, def RMS(file,sr=0.01): """ def RMS(file,sr=0.01): Input: wav file name(,sampling rate) Output: List of (time,RMS) tuples """ system('export USE_ESPS_COMMON="off"') # turn off Common folder,item = path.split(file) # get folder name,ext = path.splitext(item) # get extension out = '/tmp/' + name + '.f0' # output file for get_f0 system('get_f0 -i ' + str(sr) + ' ' + file + ' ' + out) # call offset = float(popen('hditem -i start_time ' + out).readline().rstrip()) fstep = float(popen('hditem -i frame_step ' + out).readline().rstrip()) rmss = [] # data structure for Rms values for line in popen('pplain ' + out,'r'): # open f0 file junk, junk, value, junk = line.rstrip().split() # split the data rmss.append((offset,float(value))) # append useful stuff offset = offset + fstep system('rm ' + out) # clean up the mess return rmss def both(file,sr=0.01,xmin=100,xmax=500): """ def both(file,sr=0.01,xmin=100,xmax=500): Input: wav file name(,sampling rate,min f0,max f0) Output: List of (time,F0) tuples, list of (time,RMS) tuples """ system('export USE_ESPS_COMMON="off"') # turn off Common paramFile = open('params','w') # make param file paramFile.write('float\tmin_f0\t= '+str(xmin)+';\n') # min paramFile.write('float\tmax_f0\t= '+str(xmax)+';\n') # max paramFile.close() # shut her down folder,item = path.split(file) # get folder name,ext = path.splitext(item) # get extension out = '/tmp/' + name + '.f0' # output file for get_f0 system('get_f0 -i ' + str(sr) + ' -P params ' + file + ' ' + out) # call offset = float(popen('hditem -i start_time ' + out).readline().rstrip()) fstep = float(popen('hditem -i frame_step ' + out).readline().rstrip()) f0s = [] # data structure for f0s values rmss = [] # data structure for Rms values for line in popen('pplain ' + out,'r'): # open f0 file f0, junk, rms, junk = line.rstrip().split() # split the data if float(f0) > 0: f0s.append((offset,float(f0))) # tuple now rmss.append((offset,float(rms))) # tuple now offset = offset + fstep # increment current time by the framestep system('rm ' + out) # clean up the mess return f0s,rmss def readPitchTier(inputFile): """ def readPitchTier(inputFile): Input: Praat PitchTier file name Output: List of (time,F0) tuples """ file = open(inputFile,'r') # read it in lines = file.readlines() # sadly, put it all into memory file.close() # close it out lines.pop(0) # file type lines.pop(0) # object class lines.pop(0) # blank f0s = [] # f0 best candidates if 'xmin' in lines[0]: # long PitchTier if this is true lines.pop(0) # xmin lines.pop(0) # xmax lines.pop(0) # size while lines: # loop over lines, sorta lines.pop(0) # 'points [n]' header line = lines.pop(0).split(' = ') # split time = float(line[1]) line = lines.pop(0).split(' = ') # split f0s.append((time,float(line[1]))) # tuple here else: # short PitchTier lines.pop(0) # xmin lines.pop(0) # xmax lines.pop(0) # size while lines: # loop over lines, sorta f0s.append((float(lines.pop(0)),float(lines.pop(0)))) # tuple return f0s # return statement def percentileSquash(f0s,lo=.1,hi=.9): """ def percentileSquash(f0s,lo=.1,hi=.9): Input: list of (time,F0) tuples(,low percentile, high percentile) Output: list of percentile-squashed (time,F0) tuples """ sF0s = sorted([f0 for (time,f0) in f0s]) # destructure into sorted f0 vals xmin = sF0s[int(round(lo*len(sF0s)))] # get min via rank xmax = sF0s[int(round(hi*len(sF0s)))] # get max via rank return [(time,f0) for (time,f0) in f0s if (xmin < f0 < xmax)] def whiskerSquash(f0s): """ def whiskerSquash(f0s): Input: list of (time,F0) tuples Output: list of whisker-squashed (time,F0) tuples """ sF0s = sorted([f0 for (time,f0) in f0s]) # destructure into sorted f0 vals Q1 = sF0s[int(round(.25*len(sF0s)))] # get min via rank Q3 = sF0s[int(round(.75*len(sF0s)))] # get max via rank xmin = Q1-1.5*(Q3-Q1) # find min xmax = Q3+1.5*(Q3-Q1) # find max return [(time,f0) for (time,f0) in f0s if (xmin < f0 < xmax)] # filter def zNormal(f0s): """ def zNormal(f0s): Input: list of (time,F0) tuples Output: list of z-normalized (time,F0) tuples """ from stats import stdev # it's easiest this way nF0s = [f0 for (time,f0) in f0s] # destructure mu = sum(nF0s)/len(nF0s) # get mean sigma = stdev(nF0s) # get s.d. return [(time,(f0-mu)/sigma) for (time,f0) in f0s] # apply normalization def logNormal(f0s): """ def logNormal(f0s): Input: list of (time,F0) tuples Output: list of log-normalized (time,F0) tuples """ from math import log # not part of the base, i guess? nF0s = [f0 for (time,f0) in f0s] # destructure xmin = min(nF0s) # get min xmax = max(nF0s) # get max return [(time,(1/log(xmax/xmin))*log(f0/xmin)) for (time,f0) in f0s] # norm def linearFit(f0s): """ def linearFit(f0s): Input: list of (time,F0) tuples Output: slope, corrected intercept """ from stats import linregress # this is just a wrapper slope,intercept,a,b,c = linregress([f0 for (time,f0) in f0s], [time-f0s[0][0] for (time,f0) in f0s]) return slope,intercept # return slope, and corrected intercept def legendreFit(f0s,n=3): """ def legendreFit(f0s,n=3): Input: list of (time,F0) tuples, and the number of polynomials (n<8) Output: list of length-normalized polynomial weights """ import numpy as N # need numpy for matrix functions assert type(n) == int and 0 < n <8,'n must be integer between 0 and 8' # step one: interpolate interF0s = [] # interpolation f0 value list timeStep = f0s[1][0] - f0s[0][0] for i in range(len(f0s)-1): interF0s.append(f0s[i][1]) if ((f0s[i+1][0] - timeStep) - f0s[i][0]) > 0.0001: f0Dif = f0s[i][1] - f0s[i+1][1] timeDif = ((f0s[i+1][0] - timeStep) - f0s[i][0]) nSamples = int(round(timeDif/timeStep)) - 1 f0Step = f0Dif/nSamples for j in range(nSamples): # loop over issing samples interF0s.append(f0s[i][1]+((j+1)*f0Step)) # append interpol interF0s.append(f0s[-1][1]) # last case, can't leave this off left = (max(interF0s) - min(interF0s))/2 # norm factor for orthonormalizaion right = min(interF0s) + left # norm factor for orthonormalization interF0s = [(f0-right)/left for f0 in interF0s] # orthonormalize in place length = len(interF0s) # need this a couple times nZer = N.ones(length) # n = 0: can't find a cloesd form for this anywhere nOne = N.linspace(-1,1,length) # n = 1 nTwo = N.array((.5)*(3*(nOne**2)-1)) # n = 2 nTre = N.array((.5)*(5*(nOne**3)-3*nOne)) # n = 3 nFor = N.array((.125)*(35*(nOne**4)-30*(nOne**2)+3)) # n = 4 nFiv = N.array((.125)*(63*(nOne**5)-70*(nOne**3)+15*nOne)) # n = 5 nSix = N.array((.0625)*(231*(nOne**6)-315*(nOne**4)+105*nOne**2)-5) # n = 6 nSev = N.array((.0625)*(429*(nOne**7)-639*(nOne**5)+315*nOne**3)-35*nOne) #7 basis = N.matrix([nZer,nOne,nTwo,nTre,nFor,nFiv,nSix,nSev]) # basis set return ((N.matrix(interF0s)*N.transpose(basis[:n+1,:]))/length).tolist()[0] def f0Sample(f0s,sampleTime): """ def f0Sample(f0s,sampleTime): Input: a list of (time,f0) tuples, a time to be sampled at Output: returns the (0-initial) index of the nearest sample """ from bisect import bisect # bianry search code index = bisect([time for (time,f0) in f0s],sampleTime) # list index if f0s[index][0] - sampleTime > sampleTime -f0s[index-1][0]: # go left return index - 1 # left side else: # go right return index # right side def f0Slice(f0s,start,stop): """ def f0Slice(f0s,start,stop): Input: a list of (time,f0) tuples, start time, stop time Output: a list of tuples that fall between the start and stop times """ from bisect import bisect # binary search, sorta i = bisect([time for (time,f0) in f0s],start) # leftside, inclusive j = bisect([time for (time,f0) in f0s],stop) # rightside, exclusive return f0s[i:j] # return if inside this """ ### FORMANT FUNCTIONS ### Get formant frequencies and bandwidths EXAMPLE(S) ~ import esps as e freqs,bands = e.formant('test.wav',0.001) # extract formants at 1 ms timestep region = e.formantSlice(freqs,1.2,2.0) # grab region of a vowel, for instance f1region = [(f1,f2) for (time,f1,f2,f3,f4) in region] # f1/f2 during the vowel HOW TO CITE ~ To cite formant, you can cite the paper on the algorithm used, from the proceedings of ICASSP83: @inproceedings{Secrest1983, Author = {Secrest, B.G. and Doddington, G.R.}, Booktitle = {Proceedeings ICASSP83}, Pages = {1352--1355}, Title = {{An integrated pitch tracking algorithm for speech systems}}, Year = {1983}} You can also cite the formant manual: @manual{formant, Author = {Talkin, David}, Organization = {{Entropic Research Laboratory}}, Title = {formant}, Year = {1993}} """ def formant(file,sr=0.01): """ def formant(file,sr=0.01): Input: a audio file name (, sampling rate) Output: list of (time,f1,f2,f3,f4) tuples, list of (time,b1,b2,b3,b4) tuples """ system('export USE_ESPS_COMMON="off"') # turn off Common folder,item = path.split(file) # get prefix name,ext = path.splitext(item) # get extension system('formant -B 999999999 -O /tmp -i '+ str(sr) + ' ' + file) # call offset = float(popen('hditem -i start_time /tmp/'+name+'.fb').readline()) fstep = 1/float(popen('hditem -i record_freq /tmp/'+name+'.fb').readline()) freqs,bands = [],[] # data-structs for line in popen('pplain /tmp/'+name+'.fb','r'): # open fb file formants = line.rstrip().split() # split line freqs.append(tuple([offset] + formants[:4])) # list formant freqs bands.append(tuple([offset] + formants[4:])) # list formant bands offset = offset + fstep # increment time system('rm /tmp/' + name + '.*') # clean up the mess return freqs,bands # return both lists def formantSample(formants,sampleTime): """ def formantSample(formants,sampleTime): Input: a list of (time,f1,f2,f3,f4) tuples (or bandwidth tuples), a time to be sampled at Output: returns the (0-initial) index of the nearest sample """ from bisect import bisect # binary search index = bisect([time for (time,f1,f2,f3,f4) in formants],sampleTime) # index if formants[index][0] - sampleTime > sampleTime - formants[index-1][0]: # lt return index - 1 # left side else: # go right return index # right side def formantSlice(formants,start,stop): """ def formantSlice(formants,star,stop): Input: a list of (time,f1,f2,f3,f4) tuples (or bandwidth tuples), start time, stop time Output: a list of tuples that fall between the start and stop times """ from bisect import bisect # binary search, sorta i = bisect([time for (time,f1,f2,f3,f4) in formants],start) # leftside j = bisect([time for (time,f1,f2,f3,f4) in formants],stop) # right side return formants[i:j] # return if inside sgram = """ SPECTROGRAM FUNCTIONS ~ Get spectral intensities EXAMPLE(S) ~ import esps as e # import module ffts = e.FFT('test.wav') # do the extraction syl = e.FFTSlice(ffts,0.2,0.4) # get a region avg = e.FFTAverage(syl) # get the spectral avverage in that region balance = e.balance(avg) # get Sluitjer and van Heven spectral balance coefs slope,intercept = e.tilt(syl) # get Thiessen and Saffran slope and interncept HOW TO CITE ~ The algorithm used is a version of the well-known Fast Fourier Transform, originally discovered by Gauss. You can, however, cite the sgram manual: @manual{sgram, Author = {Burton, David and Johnson, Rod and Shore, John}, Organization = {{Entropic Research Laboratory}}, Title = {sgram}, Year = {1997}} The balance technique is modified from Thiessen and Saffran 2004: @article{Thiessen2004, Author = {Thiessen, Erik and Saffran, Jenny}, Journal = {Perception \& Psychophysics}, Number = {5}, Pages = {779--791}, Title = {{Spectral tilt as a cue to word segmentation in infancy and adulthood}}, Volume = {66}, Year = {2004}} The bands used for the spectral tilt calculation are as suggested by Sluitjer and van Heven 1996: @article{sluitjer1996, Author = {Sluijter, Agaath M.C. and van Heven, Vincent J.}, Journal = {Journal of the Acoustical Society of America}, Number = {4}, Pages = {2471--2485}, Title = {{Spectral balance as an acoustic correlate of lingustic stress}}, Volume = {100}, Year = {1996}} """ def FFT(file): """ def FFT(file): Input: wav file name Output: a list of (time, list of energy bins) tuples """ system('export USE_ESPS_COMMON="off"') # turn off Common folder,item = path.split(file) # get prefix name,ext = path.splitext(item) # get extension system('sgram -m wb '+' '+file+' /tmp/'+name+'.sg') offset = float(popen('hditem -i start_time /tmp/'+name+'.sg').readline()) fstep = 1/float(popen('hditem -i record_freq /tmp/'+name+'.sg').readline()) slices = [] # data structure for line in popen('pplain /tmp/'+name+'.sg'): # open fb file powers = line.rstrip().split() # split it up powers.pop(0) # the first one is junk slices.append((offset,[int(power) for power in powers])) # stor offset = offset + fstep # increment time system('rm /tmp/'+name+'.sg') # clean that up return slices # return the data def tilt(fft): """ def tilt(fft): Input: a single (time, list of energies) tuple Output: the spectral tilt slope and intercept, defined by regression on the energy between 500 Hz and 4 KHz. """ from stats import linregress # need this to calculate band = fft[1][6:48] # between 500 Hz and 4 KHz, assuming CD-qual audio slope,intercept,a,b,c = linregress(range(len(band)),band) # do the regress return slope,intercept def balance(fft): """ def balance(fft): Input: a single (time, list of energies) tuple Output: a tuple of the energies in the bands (0:500 Hz, 500:1000 Hz, 1:2 KHz, 2-4 KHz) """ from math import log # need this bandOne = log(sum(fft[1][0:6])) # f0, 0:500 Hz bandTwo = log(sum(fft[1][6:13])) # f1, 500:1000 Hz bandThr = log(sum(fft[1][13:25])) # f2, 1:2 KHz bandFor = log(sum(fft[1][25:48])) # f3 and f4, 2:4 KHz return (bandOne,bandTwo,bandThr,bandFor) # return def FFTSample(ffts,sampleTime): """ def FFTSample(ffts,sampleTime): Input: a list of (time,list of energies) tuples, a time to be sampled at Output: returns the (0-initial) index of the nearest sample """ from bisect import bisect # binary search index = bisect([time for (time,energies) in ffts],sampleTime) # index if ffts[index][0] - sampleTime > sampleTime - ffts[index-1][0]: # lt return index - 1 # left side else: # go right return index # right side def FFTAverage(ffts): """ def FFTAverage(ffts): Input: a list of (time, list of energies) tuples Output: a single (tme,list of energies) tuple, averaging across inputs """ time, energies = 0, [0] * len(ffts[0][1]) # list of zeros for fft in ffts: # loop over observations time = time + fft[0] # time for i in range(len(fft[1])): # energies energies[i] = energies[i] + fft[1][i] # add each one return (time/len(ffts),[energy/len(ffts) for energy in energies]) # return def FFTSlice(ffts,start,stop): """ def FFTSlice(ffts,star,stop): Input: a list of (time,list of energy) tuples, start time, stop time Output: a list of tuples that fall between the start and stop times """ from bisect import bisect # binary search, sorta i = bisect([time for (time,powers) in ffts],start) # leftside j = bisect([time for (time,powers) in ffts],stop) # right side return ffts[i:j] # return if inside