Set up the recording method using java script:
# all imports
from IPython.display import Javascript
from google.colab import output
from base64 import b64decode
RECORD = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise(resolve => {
const reader = new FileReader()
reader.onloadend = e => resolve(e.srcElement.result)
reader.readAsDataURL(blob)
})
var record = time => new Promise(async resolve => {
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
recorder = new MediaRecorder(stream)
chunks = []
recorder.ondataavailable = e => chunks.push(e.data)
recorder.start()
await sleep(time)
recorder.onstop = async ()=>{
blob = new Blob(chunks)
text = await b2text(blob)
resolve(text)
}
recorder.stop()
})
"""
def record(fn, sec):
display(Javascript(RECORD))
s = output.eval_js('record(%d)' % (sec*1000))
b = b64decode(s.split(',')[1])
with open(fn,'wb') as f:
f.write(b)
return fn
Record something:
filename = 'felixtest.wav'
record(filename, 5)
Play it back:
import IPython
IPython.display.Audio(filename)
install Google speechbrain
%%capture
!pip install speechbrain
import speechbrain as sb
Load the ASR nodel train on libri speech:
from speechbrain.pretrained import EncoderDecoderASR
asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-crdnn-rnnlm-librispeech", savedir="pretrained_model")
And get a transcript on your audio:
asr_model.transcribe_file(audio_file )