手把手教你使用 Python 第三方库 PyAudio 打造一款录音工具

彭东成

2021/12/31 23:21

大家好，我是【🌑（这是月亮的背面）】。今天给大家分享 Python 使用 PyAudio 制作录音工具，文章目录如下：

应用平台
音频录制部分
音频播放部分
GUI 窗口所需属性值代码部分
pynput 监听键盘
总结

最近有在使用屏幕录制软件录制桌面，在用的过程中突发奇想，使用 python 能不能做屏幕录制工具，也锻炼下自己的动手能力。接下准备写使用 python 如何做屏幕录制工具的系列文章：

录制屏幕制作视频
录制音频
合成视频，音频
基于 Pyqt5 制作可视化窗口

大概上述四个部分，希望自己能够尽快完善，上一篇文章利用 opencv 制作了屏幕录制部分，接下继续更新系列，使用 python 录制音频。

应用平台

windows 10
python 3.7

音频录制部分

音频录制与视频录制相似，也是以数据帧的方式录制保存，这次使用强大的第三方包 PyAudio 和内置的 wave 模块编写主要部分代码：pip install PyAudio

如果出现安装失败，可点击去此处下载对应.whl 文件，cp37 代表 python3.7 环境，64 代表 64 位操作系统。假如不是下载对应的 whl 包会导致安装失败，下载完成后，cmd 窗口下进入 whl 的所在目录，使用pip install PyAudio-xx.whl即可完成安装。音频录制主要代码：

frompyaudioimportPyAudio,paInt16,paContinue,paComplete

#设置固定参数
chunk=1024#每个缓冲区的帧数
format_sample=paInt16#采样位数
channels=2#声道：1，单声道；2，双声道
fps=44100#采样频率

#这里采用回调的方式录制音频
defcallback(in_data,frame_count,time_info,status):
"""录制回调函数"""
wf.writeframes(in_data)
ifxx:#当某某条件满足时
returnin_data,paContinue
else:
returnin_data,paComplete

#实例化 PyAudio
p=PyAudio()
stream=p.open(format=format_sample,
channels=channels,
rate=fps,
frames_per_buffer=chunk,
input=True,
input_device_index=None,#输入设备索引,None 为默认设备
stream_callback=callback#回调函数
)
#开始流录制
stream.start_stream()
#判断流是否活跃
whilestream.is_active():
time.sleep(0.1)#0.1 为灵敏度
#录制完成，关闭流及实例
stream.stop_stream()
stream.close()
p.terminate()

采取流式并用回调函数录制，需要先定义保存音频文件，用wave新建音频二进制文件：

importwave
wf=wave.open('test.wav','wb')
wf.setnchannels(channels)
wf.setsampwidth(p.get_sample_size(format_sample))
wf.setframerate(fps)

为了后续代码可以很好的与之结合复用，将上面的代码包装成类

frompyaudioimportPyAudio

classAudioRecord(PyAudio):

def__init__(self,):

源码于文末补充。

音频播放部分

播放部分代码与录制部分代码相差不大，核心部分：

wf=wave.open('test.wav','rb')
defcallback(in_data,frame_count,time_info,status):
data=wf.readframes(frame_count)
returndata,paContinue

stream=p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=output_device_index,#输入设备索引
stream_callback=callback#输出用回调函数
)
stream.start_stream()
whilestream.is_active():
time.sleep(0.1)

目前暂时测试了.wav和.mp3格式可以正常录制及播放，其它类型格式音频可以自行调用代码进行测试。

GUI 窗口所需属性值代码部分

考虑到 GUI 窗口能较为人性化的输出及输入值，编写该部分代码，内容含音频时长及获取输入设备及输出设备。

#音频时长
duration=wf.getnframes()/wf.getframerate()
#获取系统目前已安装的输入输出设备
dev_info=self.get_device_info_by_index(i)
default_rate=int(dev_info['defaultSampleRate'])
ifnotdev_info['hostApi']anddefault_rate==fpsand'映射器'notindev_info['name']:
ifdev_info['maxInputChannels']:
print('输入设备：',dev_info['name'])
elifdev_info['maxOutputChannels']:
print('输出设备：',dev_info['name'])

pynput 监听键盘

在这部分代码也暂时使用pynput监听键盘来对录音做中断处理。可以调用上一篇文章中的键盘监听代码。

defhotkey(self):
"""热键监听"""
withkeyboard.Listener(on_press=self.on_press)aslistener:
listener.join()

defon_press(self,key):
try:
ifkey.char=='t':#t 键，录制结束，保存音频
self.flag=True
elifkey.char=='k':#k 键，录制中止，删除文件
self.flag=True
self.kill=True
exceptExceptionase:
print(e)

功能与上一篇类似，不再赘述。

总结

大家好，我是【🌑（这是月亮的背面）】。以上就是使用 PyAudio 调用 windows 的音频设备进行录制及播放的内容了，这篇文章带大家整体学习了使用类及其继承相关知识，用法在这只是展示了冰山一角，还有更多的知识等待着我们一起去探索！

于二零二一年十二月二十日作

源码:

importwave
importtime
frompathlibimportPath
fromthreadingimportThread
frompyaudioimportPyAudio,paInt16,paContinue,paComplete
frompynputimportkeyboard#pipinstallpynput


classAudioRecord(PyAudio):

def__init__(self,channels=2):
super().__init__()
self.chunk=1024#每个缓冲区的帧数
self.format_sample=paInt16#采样位数
self.channels=channels#声道：1，单声道；2，双声道
self.fps=44100#采样频率
self.input_dict=None
self.output_dict=None
self.stream=None
self.filename='~test.wav'
self.duration=0#音频时长
self.flag=False
self.kill=False

def__call__(self,filename):
"""重载文件名"""
self.filename=filename

defcallback_input(self,in_data,frame_count,time_info,status):
"""录制回调函数"""
self.wf.writeframes(in_data)
ifnotself.flag:
returnin_data,paContinue
else:
returnin_data,paComplete

defcallback_output(self,in_data,frame_count,time_info,status):
"""播放回调函数"""
data=self.wf.readframes(frame_count)
returndata,paContinue

defopen_stream(self,name):
"""打开录制流"""
input_device_index=self.get_device_index(name,True)ifnameelseNone
returnself.open(format=self.format_sample,
channels=self.channels,
rate=self.fps,
frames_per_buffer=self.chunk,
input=True,
input_device_index=input_device_index,#输入设备索引
stream_callback=self.callback_input
)

defaudio_record_run(self,name=None):
"""音频录制"""
self.wf=self.save_audio_file(self.filename)
self.stream=self.open_stream(name)
self.stream.start_stream()
whileself.stream.is_active():
time.sleep(0.1)
self.wf.close()
ifself.kill:
Path(self.filename).unlink()
self.duration=self.get_duration(self.wf)
print(self.duration)
self.terminate_run()

defrun(self,filename=None,name=None,record=True):
"""音频录制线程"""
thread_1=Thread(target=self.hotkey,daemon=True)
ifrecord:
#录制
iffilename:
self.filename=filename
thread_2=Thread(target=self.audio_record_run,args=(name,))
else:
#播放
ifnotfilename:
raiseException('未输入音频文件名，不能播放，请输入后再试！')
thread_2=Thread(target=self.read_audio,args=(filename,name,))
thread_1.start()
thread_2.start()

defread_audio(self,filename,name=None):
"""音频播放"""
output_device_index=self.get_device_index(name,False)ifnameelseNone
withwave.open(filename,'rb')asself.wf:
self.duration=self.get_duration(self.wf)
self.stream=self.open(format=self.get_format_from_width(self.wf.getsampwidth()),
channels=self.wf.getnchannels(),
rate=self.wf.getframerate(),
output=True,
output_device_index=output_device_index,#输出设备索引
stream_callback=self.callback_output
)
self.stream.start_stream()
whileself.stream.is_active():
time.sleep(0.1)
print(self.duration)
self.terminate_run()

@staticmethod
defget_duration(wf):
"""获取音频时长"""
returnround(wf.getnframes()/wf.getframerate(),2)

defget_in_out_devices(self):
"""获取系统输入输出设备"""
self.input_dict={}
self.output_dict={}
foriinrange(self.get_device_count()):
dev_info=self.get_device_info_by_index(i)
default_rate=int(dev_info['defaultSampleRate'])
ifnotdev_info['hostApi']anddefault_rate==self.fpsand'映射器'notindev_info['name']:
ifdev_info['maxInputChannels']:
self.input_dict[dev_info['name']]=i
elifdev_info['maxOutputChannels']:
self.output_dict[dev_info['name']]=i

defget_device_index(self,name,input_in=True):
"""获取选定设备索引"""
ifinput_inandself.input_dict:
returnself.input_dict.get(name,-1)
elifnotinput_inandself.output_dict:
returnself.output_dict.get(name,-1)

defsave_audio_file(self,filename):
"""音频文件保存"""
wf=wave.open(filename,'wb')
wf.setnchannels(self.channels)
wf.setsampwidth(self.get_sample_size(self.format_sample))
wf.setframerate(self.fps)
returnwf

defterminate_run(self):
"""结束流录制或流播放"""
ifself.stream:
self.stream.stop_stream()
self.stream.close()
self.terminate()

defhotkey(self):
"""热键监听"""
withkeyboard.Listener(on_press=self.on_press)aslistener:
listener.join()

defon_press(self,key):
try:
ifkey.char=='t':#t 键，录制结束，保存音频
self.flag=True
elifkey.char=='k':#k 键，录制中止，删除文件
self.flag=True
self.kill=True
exceptExceptionase:
print(e)


if__name__=='__main__':
audio_record=AudioRecord()
audio_record.get_in_out_devices()
#录制
print(audio_record.input_dict)
audio_record.run('test.mp3')
#播放
print(audio_record.output_dict)
audio_record.run('test.mp3',record=False)

小伙伴们，快快用实践一下吧！如果在学习过程中，有遇到任何问题，欢迎加我好友，我拉你进 Python 学习交流群共同探讨学习。

小伙伴们，快快用实践一下吧！如果在学习过程中，有遇到任何问题，欢迎加我好友，我拉你进 Python 学习交流群共同探讨学习。

文章为作者独立观点，不代表BOSS直聘立场。未经账号授权，禁止随意转载。