Add a bunch of test files
This commit is contained in:
parent
89cb732e42
commit
41bc6015bf
73
audio.py
Normal file
73
audio.py
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
from copy import copy
|
||||||
|
import numpy as np
|
||||||
|
import time
|
||||||
|
import whisper
|
||||||
|
|
||||||
|
print('Loaded audio.py')
|
||||||
|
|
||||||
|
CHUNK_LENGTH = 24000 # 48000 Hz * 0.5 s
|
||||||
|
|
||||||
|
def process_pcm(audio_chunks, data):
|
||||||
|
# pymumble PCM is 16-bit 48000 Hz
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
audio_chunks.append(data)
|
||||||
|
|
||||||
|
if len(audio_chunks) > 75:
|
||||||
|
audio_chunks.pop(0)
|
||||||
|
|
||||||
|
#print('finished chunk in', time.time() - start, 's')
|
||||||
|
|
||||||
|
def process_stream(audio_chunks, model):
|
||||||
|
|
||||||
|
if len(audio_chunks) != 75:
|
||||||
|
print('Skipping, bad length.')
|
||||||
|
time.sleep(0.5)
|
||||||
|
return
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
a = copy(audio_chunks)
|
||||||
|
b = b''.join(a)
|
||||||
|
c = np.frombuffer(b, np.int16)
|
||||||
|
|
||||||
|
# Define a low-pass filter kernel
|
||||||
|
fs = 48000
|
||||||
|
cutoff_freq = fs / 6
|
||||||
|
nyquist_freq = fs / 2
|
||||||
|
num_taps = 101
|
||||||
|
taps = np.sinc(2 * cutoff_freq / fs * (np.arange(num_taps) - (num_taps - 1) / 2))
|
||||||
|
taps *= np.blackman(num_taps)
|
||||||
|
taps /= np.sum(taps)
|
||||||
|
|
||||||
|
# Apply the filter kernel to audio_data using convolution
|
||||||
|
filtered_audio_data = np.convolve(c, taps, mode='same')
|
||||||
|
# Downsample filtered_audio_data by a factor of 3 using take
|
||||||
|
downsampled_audio_data = filtered_audio_data.take(np.arange(0, len(filtered_audio_data), 3)).flatten()
|
||||||
|
|
||||||
|
norm_audio = downsampled_audio_data.astype(np.float32) / 32768.0
|
||||||
|
|
||||||
|
#abs_mean = np.mean(np.abs(downsampled_audio_data ** 3))
|
||||||
|
#print('abs mean:', abs_mean)
|
||||||
|
#if abs_mean < 0.0:
|
||||||
|
# print('silence detected, skipping')
|
||||||
|
# time.sleep(1)
|
||||||
|
# return
|
||||||
|
|
||||||
|
d = whisper.pad_or_trim(norm_audio)
|
||||||
|
|
||||||
|
#print('processed audio in', time.time() - start, 's')
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
e = model.transcribe(d, language='en')
|
||||||
|
print('transcribed audio in', time.time() - start, 's')
|
||||||
|
|
||||||
|
if time.time() - start > 10:
|
||||||
|
with open('downsampled.pcm', 'wb') as f:
|
||||||
|
f.write(downsampled_audio_data.astype(np.int16).tobytes())
|
||||||
|
|
||||||
|
print('wrote file, sleeping')
|
||||||
|
#breakpoint()
|
||||||
|
time.sleep(100)
|
||||||
|
|
||||||
|
print(' ', e['text'])
|
79
audio_only_client.py
Normal file
79
audio_only_client.py
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
# A python script to do both listening and talking. This is the basic model
|
||||||
|
# for an audio-only mumble client.
|
||||||
|
|
||||||
|
# Usage:
|
||||||
|
|
||||||
|
# Install pyaudio (instructions: https://people.csail.mit.edu/hubert/pyaudio/#downloads)
|
||||||
|
# If `fatal error: 'portaudio.h' file not found` is encountered while installing
|
||||||
|
# pyaudio even after following the instruction, this solution might be of help:
|
||||||
|
# https://stackoverflow.com/questions/33513522/when-installing-pyaudio-pip-cannot-find-portaudio-h-in-usr-local-include
|
||||||
|
#
|
||||||
|
# Install dependencies for pymumble.
|
||||||
|
#
|
||||||
|
# Set up a mumber server. For testing purpose, you can use https://guildbit.com/
|
||||||
|
# to spin up a free server. Hard code the server details in this file.
|
||||||
|
#
|
||||||
|
# run `python3 ./listen_n_talk.py`. Now an audio-only mumble client is connected
|
||||||
|
# to the server.
|
||||||
|
#
|
||||||
|
# To test its functionality, in a separate device, use some official mumble
|
||||||
|
# client (https://www.mumble.com/mumble-download.php) to verbally communicate
|
||||||
|
# with this audio-only client.
|
||||||
|
#
|
||||||
|
# Works on MacOS. Does NOT work on RPi 3B+ (I cannot figure out why. Help will
|
||||||
|
# be much appreciated)
|
||||||
|
|
||||||
|
import pymumble_py3 as pymumble_py3
|
||||||
|
from pymumble_py3.callbacks import PYMUMBLE_CLBK_SOUNDRECEIVED as PCS
|
||||||
|
#import pyaudio
|
||||||
|
|
||||||
|
# Connection details for mumble server. Hardcoded for now, will have to be
|
||||||
|
# command line arguments eventually
|
||||||
|
pwd = "" # password
|
||||||
|
server = "protospace.ca" # server address
|
||||||
|
nick = "python"
|
||||||
|
port = 64738 # port number
|
||||||
|
|
||||||
|
|
||||||
|
# pyaudio set up
|
||||||
|
#CHUNK = 1024
|
||||||
|
#FORMAT = pyaudio.paInt16 # pymumble soundchunk.pcm is 16 bits
|
||||||
|
#CHANNELS = 1
|
||||||
|
#RATE = 48000 # pymumble soundchunk.pcm is 48000Hz
|
||||||
|
|
||||||
|
#p = pyaudio.PyAudio()
|
||||||
|
#stream = p.open(format=FORMAT,
|
||||||
|
# channels=CHANNELS,
|
||||||
|
# rate=RATE,
|
||||||
|
# input=True, # enable both talk
|
||||||
|
# output=True, # and listen
|
||||||
|
# frames_per_buffer=CHUNK)
|
||||||
|
|
||||||
|
|
||||||
|
# mumble client set up
|
||||||
|
def sound_received_handler(user, soundchunk):
|
||||||
|
""" play sound received from mumble server upon its arrival """
|
||||||
|
#stream.write(soundchunk.pcm)
|
||||||
|
print(len(soundchunk.pcm))
|
||||||
|
|
||||||
|
|
||||||
|
# Spin up a client and connect to mumble server
|
||||||
|
mumble = pymumble_py3.Mumble(server, nick, password=pwd, port=port)
|
||||||
|
# set up callback called when PCS event occurs
|
||||||
|
mumble.callbacks.set_callback(PCS, sound_received_handler)
|
||||||
|
mumble.set_receive_sound(1) # Enable receiving sound from mumble server
|
||||||
|
mumble.start()
|
||||||
|
mumble.is_ready() # Wait for client is ready
|
||||||
|
|
||||||
|
|
||||||
|
# constant capturing sound and sending it to mumble server
|
||||||
|
while True:
|
||||||
|
#data = stream.read(CHUNK, exception_on_overflow=False)
|
||||||
|
#mumble.sound_output.add_sound(data)
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# close the stream and pyaudio instance
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
p.terminate()
|
61
read_file.py
Normal file
61
read_file.py
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
import os
|
||||||
|
import importlib
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import whisper
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
import audio
|
||||||
|
|
||||||
|
print('Loading whisper model...')
|
||||||
|
start = time.time()
|
||||||
|
model = whisper.load_model('medium')
|
||||||
|
print('Done after', time.time() - start, 's')
|
||||||
|
|
||||||
|
# array of audio chunks
|
||||||
|
audio_chunks = [bytearray()]
|
||||||
|
|
||||||
|
def read_audio_thread():
|
||||||
|
global audio_chunks
|
||||||
|
|
||||||
|
while True:
|
||||||
|
with open('whispercppexample.pcm', 'rb') as f:
|
||||||
|
while True:
|
||||||
|
data = f.read(1920)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
audio.process_pcm(audio_chunks, data)
|
||||||
|
time.sleep(0.04)
|
||||||
|
|
||||||
|
def process_stream_thread():
|
||||||
|
global audio_chunks
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
audio.process_stream(audio_chunks, model)
|
||||||
|
except BaseException as e:
|
||||||
|
print('exception')
|
||||||
|
traceback.print_exc()
|
||||||
|
print('sleeping...')
|
||||||
|
time.sleep(5)
|
||||||
|
|
||||||
|
def monitor_module():
|
||||||
|
mod_time = os.path.getmtime('audio.py')
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if os.path.getmtime('audio.py') > mod_time:
|
||||||
|
mod_time = os.path.getmtime('audio.py')
|
||||||
|
print('Change detected, reloading.')
|
||||||
|
importlib.reload(audio)
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
t1 = threading.Thread(target=read_audio_thread)
|
||||||
|
t2 = threading.Thread(target=process_stream_thread)
|
||||||
|
t3 = threading.Thread(target=monitor_module)
|
||||||
|
t1.start()
|
||||||
|
t2.start()
|
||||||
|
t3.start()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
pass
|
||||||
|
|
90
stream.py
Normal file
90
stream.py
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
DEBUG = os.environ.get('DEBUG')
|
||||||
|
|
||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='[%(asctime)s] %(levelname)s %(module)s/%(funcName)s: - %(message)s',
|
||||||
|
level=logging.DEBUG if DEBUG else logging.INFO)
|
||||||
|
|
||||||
|
import pymumble_py3 as pymumble_py3
|
||||||
|
from pymumble_py3.callbacks import PYMUMBLE_CLBK_SOUNDRECEIVED as PCS
|
||||||
|
import whisper
|
||||||
|
from copy import copy
|
||||||
|
import numpy as np
|
||||||
|
import time
|
||||||
|
|
||||||
|
logging.info('Loading whisper model...')
|
||||||
|
model = whisper.load_model('medium')
|
||||||
|
logging.info('Done.')
|
||||||
|
|
||||||
|
# Connection details for mumble server. Hardcoded for now, will have to be
|
||||||
|
# command line arguments eventually
|
||||||
|
pwd = "" # password
|
||||||
|
server = "protospace.ca" # server address
|
||||||
|
nick = "python"
|
||||||
|
port = 64738 # port number
|
||||||
|
|
||||||
|
CHUNK_LENGTH = 24000 # 48000 Hz * 0.5 s
|
||||||
|
|
||||||
|
# array of 0.5 sec audio chunks
|
||||||
|
audio_chunks = [bytearray()]
|
||||||
|
|
||||||
|
def sound_received_handler(user, soundchunk):
|
||||||
|
# pymumble PCM is 16-bit 48000 Hz
|
||||||
|
|
||||||
|
if len(audio_chunks[-1]) < CHUNK_LENGTH:
|
||||||
|
audio_chunks[-1].extend(soundchunk.pcm)
|
||||||
|
else:
|
||||||
|
audio_chunks.append(bytearray())
|
||||||
|
|
||||||
|
if len(audio_chunks) > 10:
|
||||||
|
audio_chunks.pop(0)
|
||||||
|
|
||||||
|
|
||||||
|
# Spin up a client and connect to mumble server
|
||||||
|
mumble = pymumble_py3.Mumble(server, nick, password=pwd, port=port)
|
||||||
|
# set up callback called when PCS event occurs
|
||||||
|
mumble.callbacks.set_callback(PCS, sound_received_handler)
|
||||||
|
mumble.set_receive_sound(1) # Enable receiving sound from mumble server
|
||||||
|
mumble.start()
|
||||||
|
mumble.is_ready() # Wait for client is ready
|
||||||
|
|
||||||
|
# constant capturing sound and sending it to mumble server
|
||||||
|
while True:
|
||||||
|
#data = stream.read(CHUNK, exception_on_overflow=False)
|
||||||
|
#mumble.sound_output.add_sound(data)
|
||||||
|
|
||||||
|
if len(audio_chunks) != 10:
|
||||||
|
continue
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
a = copy(audio_chunks)
|
||||||
|
b = b''.join(a)
|
||||||
|
c = np.frombuffer(b, np.int16)
|
||||||
|
|
||||||
|
# Define a low-pass filter kernel
|
||||||
|
fs = 48000
|
||||||
|
cutoff_freq = fs / 6
|
||||||
|
nyquist_freq = fs / 2
|
||||||
|
num_taps = 101
|
||||||
|
taps = np.sinc(2 * cutoff_freq / fs * (np.arange(num_taps) - (num_taps - 1) / 2))
|
||||||
|
taps *= np.blackman(num_taps)
|
||||||
|
taps /= np.sum(taps)
|
||||||
|
|
||||||
|
# Apply the filter kernel to audio_data using convolution
|
||||||
|
filtered_audio_data = np.convolve(c, taps, mode='same')
|
||||||
|
# Downsample filtered_audio_data by a factor of 3 using take
|
||||||
|
downsampled_audio_data = filtered_audio_data.take(np.arange(0, len(filtered_audio_data), 3))
|
||||||
|
downsampled_audio_data = downsampled_audio_data.flatten().astype(np.float32) / 32768.0
|
||||||
|
|
||||||
|
d = whisper.pad_or_trim(downsampled_audio_data)
|
||||||
|
|
||||||
|
#print('processed audio in', time.time() - start, 's')
|
||||||
|
|
||||||
|
e = model.transcribe(d)
|
||||||
|
|
||||||
|
print(e['text'])
|
||||||
|
|
||||||
|
|
||||||
|
|
79
test.py
Normal file
79
test.py
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
import os
|
||||||
|
|
||||||
|
DEBUG = os.environ.get('DEBUG')
|
||||||
|
|
||||||
|
import logging
|
||||||
|
logging.basicConfig(
|
||||||
|
format='[%(asctime)s] %(levelname)s %(module)s/%(funcName)s: - %(message)s',
|
||||||
|
level=logging.DEBUG if DEBUG else logging.INFO)
|
||||||
|
logging.getLogger('aiohttp').setLevel(logging.DEBUG if DEBUG else logging.WARNING)
|
||||||
|
|
||||||
|
|
||||||
|
import ffmpeg
|
||||||
|
import whisper
|
||||||
|
import time
|
||||||
|
import asyncio
|
||||||
|
from aiohttp import web, ClientSession, ClientError
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
app = web.Application()
|
||||||
|
PORT = 3002
|
||||||
|
SAMPLE_RATE = 16000
|
||||||
|
|
||||||
|
logging.info('Loading whisper model...')
|
||||||
|
model = whisper.load_model('medium')
|
||||||
|
logging.info('Done.')
|
||||||
|
|
||||||
|
#start = time.time()
|
||||||
|
#result = model.transcribe('whisper-test.ogg')
|
||||||
|
#print('finished in', time.time() - start, 's')
|
||||||
|
#
|
||||||
|
#print(result['text'])
|
||||||
|
|
||||||
|
def load_audio(binary_file, sr = SAMPLE_RATE):
|
||||||
|
# stolen from https://github.com/ckaytev/tgisper/blob/main/tgisper/tgisperbot.py
|
||||||
|
try:
|
||||||
|
# This launches a subprocess to decode audio while down-mixing and
|
||||||
|
# resampling as necessary.
|
||||||
|
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
|
||||||
|
out, _ = (
|
||||||
|
ffmpeg.input("pipe:", threads=0)
|
||||||
|
.output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=sr)
|
||||||
|
.run(cmd="ffmpeg", capture_stdout=True, capture_stderr=True, input=binary_file)
|
||||||
|
)
|
||||||
|
except ffmpeg.Error as e:
|
||||||
|
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
||||||
|
|
||||||
|
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
|
||||||
|
|
||||||
|
async def index(request):
|
||||||
|
return web.Response(text='hello world', content_type='text/html')
|
||||||
|
|
||||||
|
async def post_whisper(request):
|
||||||
|
data = await request.post()
|
||||||
|
audio = load_audio(data['audio'].file.read())
|
||||||
|
|
||||||
|
logging.info('Starting audio transcription...')
|
||||||
|
result = model.transcribe(audio)
|
||||||
|
logging.info('Done.')
|
||||||
|
|
||||||
|
return web.json_response(result)
|
||||||
|
|
||||||
|
async def run_webserver():
|
||||||
|
logging.info('Starting webserver on port: %s', PORT)
|
||||||
|
runner = web.AppRunner(app)
|
||||||
|
await runner.setup()
|
||||||
|
site = web.TCPSite(runner, '0.0.0.0', PORT)
|
||||||
|
await site.start()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
await asyncio.sleep(10)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.router.add_get('/', index)
|
||||||
|
app.router.add_post('/whisper', post_whisper)
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
a = loop.create_task(run_webserver())
|
||||||
|
loop.run_forever()
|
||||||
|
|
142
transcribe_demo.py
Normal file
142
transcribe_demo.py
Normal file
|
@ -0,0 +1,142 @@
|
||||||
|
#! python3.7
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import speech_recognition as sr
|
||||||
|
import whisper
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from queue import Queue
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
|
from time import sleep
|
||||||
|
from sys import platform
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
if 'linux' in platform:
|
||||||
|
parser.add_argument("--default_microphone", default='pulse',
|
||||||
|
help="Default microphone name for SpeechRecognition. "
|
||||||
|
"Run this with 'list' to view available Microphones.", type=str)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
# The last time a recording was retreived from the queue.
|
||||||
|
phrase_time = None
|
||||||
|
# Current raw audio bytes.
|
||||||
|
last_sample = bytes()
|
||||||
|
# Thread safe Queue for passing data from the threaded recording callback.
|
||||||
|
data_queue = Queue()
|
||||||
|
# We use SpeechRecognizer to record our audio because it has a nice feauture where it can detect when speech ends.
|
||||||
|
recorder = sr.Recognizer()
|
||||||
|
recorder.energy_threshold = 1000
|
||||||
|
# Definitely do this, dynamic energy compensation lowers the energy threshold dramtically to a point where the SpeechRecognizer never stops recording.
|
||||||
|
recorder.dynamic_energy_threshold = False
|
||||||
|
|
||||||
|
# Important for linux users.
|
||||||
|
# Prevents permanent application hang and crash by using the wrong Microphone
|
||||||
|
if 'linux' in platform:
|
||||||
|
mic_name = args.default_microphone
|
||||||
|
if not mic_name or mic_name == 'list':
|
||||||
|
print("Available microphone devices are: ")
|
||||||
|
for index, name in enumerate(sr.Microphone.list_microphone_names()):
|
||||||
|
print(f"Microphone with name \"{name}\" found")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
for index, name in enumerate(sr.Microphone.list_microphone_names()):
|
||||||
|
if mic_name in name:
|
||||||
|
source = sr.Microphone(sample_rate=16000, device_index=index)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
source = sr.Microphone(sample_rate=16000)
|
||||||
|
|
||||||
|
# Load / Download model
|
||||||
|
model = 'medium'
|
||||||
|
non_english = False
|
||||||
|
if args.model != "large" and not non_english:
|
||||||
|
model = model + ".en"
|
||||||
|
audio_model = whisper.load_model(model)
|
||||||
|
|
||||||
|
record_timeout = 2 # seconds
|
||||||
|
phrase_timeout = 3 # seconds between new lines
|
||||||
|
|
||||||
|
temp_file = NamedTemporaryFile().name
|
||||||
|
transcription = ['']
|
||||||
|
|
||||||
|
with source:
|
||||||
|
recorder.adjust_for_ambient_noise(source)
|
||||||
|
|
||||||
|
def record_callback(_, audio:sr.AudioData) -> None:
|
||||||
|
"""
|
||||||
|
Threaded callback function to recieve audio data when recordings finish.
|
||||||
|
audio: An AudioData containing the recorded bytes.
|
||||||
|
"""
|
||||||
|
# Grab the raw bytes and push it into the thread safe queue.
|
||||||
|
data = audio.get_raw_data()
|
||||||
|
data_queue.put(data)
|
||||||
|
|
||||||
|
# Create a background thread that will pass us raw audio bytes.
|
||||||
|
# We could do this manually but SpeechRecognizer provides a nice helper.
|
||||||
|
recorder.listen_in_background(source, record_callback, phrase_time_limit=record_timeout)
|
||||||
|
|
||||||
|
# Cue the user that we're ready to go.
|
||||||
|
print("Model loaded.\n")
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
now = datetime.utcnow()
|
||||||
|
# Pull raw recorded audio from the queue.
|
||||||
|
if not data_queue.empty():
|
||||||
|
phrase_complete = False
|
||||||
|
# If enough time has passed between recordings, consider the phrase complete.
|
||||||
|
# Clear the current working audio buffer to start over with the new data.
|
||||||
|
if phrase_time and now - phrase_time > timedelta(seconds=phrase_timeout):
|
||||||
|
last_sample = bytes()
|
||||||
|
phrase_complete = True
|
||||||
|
# This is the last time we received new audio data from the queue.
|
||||||
|
phrase_time = now
|
||||||
|
|
||||||
|
# Concatenate our current audio data with the latest audio data.
|
||||||
|
while not data_queue.empty():
|
||||||
|
data = data_queue.get()
|
||||||
|
last_sample += data
|
||||||
|
|
||||||
|
# Use AudioData to convert the raw data to wav data.
|
||||||
|
audio_data = sr.AudioData(last_sample, source.SAMPLE_RATE, source.SAMPLE_WIDTH)
|
||||||
|
wav_data = io.BytesIO(audio_data.get_wav_data())
|
||||||
|
|
||||||
|
# Write wav data to the temporary file as bytes.
|
||||||
|
with open(temp_file, 'w+b') as f:
|
||||||
|
f.write(wav_data.read())
|
||||||
|
|
||||||
|
# Read the transcription.
|
||||||
|
result = audio_model.transcribe(temp_file, fp16=torch.cuda.is_available())
|
||||||
|
text = result['text'].strip()
|
||||||
|
|
||||||
|
# If we detected a pause between recordings, add a new item to our transcripion.
|
||||||
|
# Otherwise edit the existing one.
|
||||||
|
if phrase_complete:
|
||||||
|
transcription.append(text)
|
||||||
|
else:
|
||||||
|
transcription[-1] = text
|
||||||
|
|
||||||
|
# Clear the console to reprint the updated transcription.
|
||||||
|
os.system('cls' if os.name=='nt' else 'clear')
|
||||||
|
for line in transcription:
|
||||||
|
print(line)
|
||||||
|
# Flush stdout.
|
||||||
|
print('', end='', flush=True)
|
||||||
|
|
||||||
|
# Infinite loops are bad for processors, must sleep.
|
||||||
|
sleep(0.25)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
break
|
||||||
|
|
||||||
|
print("\n\nTranscription:")
|
||||||
|
for line in transcription:
|
||||||
|
print(line)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
33
write_file.py
Normal file
33
write_file.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
import pymumble_py3 as pymumble_py3
|
||||||
|
from pymumble_py3.callbacks import PYMUMBLE_CLBK_SOUNDRECEIVED as PCS
|
||||||
|
|
||||||
|
# Connection details for mumble server. Hardcoded for now, will have to be
|
||||||
|
# command line arguments eventually
|
||||||
|
pwd = "" # password
|
||||||
|
server = "protospace.ca" # server address
|
||||||
|
nick = "python"
|
||||||
|
port = 64738 # port number
|
||||||
|
|
||||||
|
audio_file = open('audio.wav', 'wb')
|
||||||
|
|
||||||
|
# mumble client set up
|
||||||
|
def sound_received_handler(user, soundchunk):
|
||||||
|
""" play sound received from mumble server upon its arrival """
|
||||||
|
print(len(soundchunk.pcm))
|
||||||
|
|
||||||
|
audio_file.write(soundchunk.pcm)
|
||||||
|
|
||||||
|
# Spin up a client and connect to mumble server
|
||||||
|
mumble = pymumble_py3.Mumble(server, nick, password=pwd, port=port)
|
||||||
|
# set up callback called when PCS event occurs
|
||||||
|
mumble.callbacks.set_callback(PCS, sound_received_handler)
|
||||||
|
mumble.set_receive_sound(1) # Enable receiving sound from mumble server
|
||||||
|
mumble.start()
|
||||||
|
mumble.is_ready() # Wait for client is ready
|
||||||
|
|
||||||
|
# constant capturing sound and sending it to mumble server
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
audio_file.close()
|
Loading…
Reference in New Issue
Block a user