data(orni, peewit, package = 'seewave')
compareSounds(orni, peewit)
# spectrogram(orni); playme(orni)
# spectrogram(peewit); playme(peewit)
if (FALSE) {
s1 = soundgen(formants = 'a', play = TRUE)
s2 = soundgen(formants = 'ae', play = TRUE)
s3 = soundgen(formants = 'eae', sylLen = 700, play = TRUE)
s4 = runif(8000, -1, 1) # white noise
compareSounds(s1, s2, samplingRate = 16000)
compareSounds(s1, s4, samplingRate = 16000)
# the central section of s3 is more similar to s1 than is the beg/eng of s3
compareSounds(s1, s3, samplingRate = 16000, padDir = 'left')
compareSounds(s1, s3, samplingRate = 16000, padDir = 'central')
# padding with 0 penalizes differences in duration, whereas padding with NA
# is like saying we only care about the overlapping part
compareSounds(s1, s3, samplingRate = 16000, padWith = 0)
compareSounds(s1, s3, samplingRate = 16000, padWith = NA)
# comparing linear (Hz) vs mel-spectrograms produces quite different results
compareSounds(s1, s3, samplingRate = 16000, specType = 'linear')
compareSounds(s1, s3, samplingRate = 16000, specType = 'mel')
# pass additional control parameters to dtw and melfcc
compareSounds(s1, s3, samplingRate = 16000,
specPars = list(nbands = 128),
dtwPars = list(dist.method = "Manhattan"))
# use feature matrices instead of spectrograms (time in columns, features in rows)
a1 = t(as.matrix(analyze(s1, samplingRate = 16000)$detailed))
a1 = a1[4:nrow(a1), ]; a1[is.na(a1)] = 0
a2 = t(as.matrix(analyze(s2, samplingRate = 16000)$detailed))
a2 = a2[4:nrow(a2), ]; a2[is.na(a2)] = 0
a4 = t(as.matrix(analyze(s4, samplingRate = 16000)$detailed))
a4 = a4[4:nrow(a4), ]; a4[is.na(a4)] = 0
compareSounds(a1, a2, method = c('cosine', 'dtw'))
compareSounds(a1, a4, method = c('cosine', 'dtw'))
# a demo for comparing different similarity metrics
target = soundgen(sylLen = 500, formants = 'a',
pitch = data.frame(time = c(0, 0.1, 0.9, 1),
value = c(100, 150, 135, 100)),
temperature = 0.001)
spec1 = soundgen:::getMelSpec(target, samplingRate = 16000)
parsToTry = list(
list(formants = 'i', # wrong
pitch = data.frame(time = c(0, 1), # wrong
value = c(200, 300))),
list(formants = 'i', # wrong
pitch = data.frame(time = c(0, 0.1, 0.9, 1), # right
value = c(100, 150, 135, 100))),
list(formants = 'a', # right
pitch = data.frame(time = c(0,1), # wrong
value = c(200, 300))),
list(formants = 'a',
pitch = data.frame(time = c(0, 0.1, 0.9, 1), # right
value = c(100, 150, 135, 100))) # right
)
sounds = list()
for (s in seq_along(parsToTry)) {
sounds[[length(sounds) + 1]] = do.call(soundgen,
c(parsToTry[[s]], list(temperature = 0.001, sylLen = 500)))
}
lapply(sounds, playme)
method = c('cor', 'cosine', 'diff', 'dtw')
df = matrix(NA, nrow = length(parsToTry), ncol = length(method))
colnames(df) = method
df = as.data.frame(df)
for (i in 1:nrow(df)) {
df[i, ] = compareSounds(
x = spec1, # faster to calculate spec1 once
y = sounds[[i]],
samplingRate = 16000,
method = method
)[, 2]
}
df$av = rowMeans(df, na.rm = TRUE)
# row 1 = wrong pitch & formants, ..., row 4 = right pitch & formants
df$formants = c('wrong', 'wrong', 'right', 'right')
df$pitch = c('wrong', 'right', 'wrong', 'right')
df
}
Run the code above in your browser using DataLab