版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/u011342403/article/details/88073360
最近在做语音识别有关的内容,但是语音是被之前需要进行录音,因为一些情况,我分别调研了js录音,java录音,C#录音的三种方式,下面简单的粘贴一下录音代码
C#录音工具类
using Microsoft.DirectX.DirectSound;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Threading;
using System.Windows.Forms;
namespace SelfServiceMachines
{
public class SoundRecord {
#region 成员数据
private Capture mCapDev = null; // 音频捕捉设备
private CaptureBuffer mRecBuffer = null; // 缓冲区对象
private WaveFormat mWavFormat; // 录音的格式
private int mNextCaptureOffset = 0; // 该次录音缓冲区的起始点
private int mSampleCount = 0; // 录制的样本数目
private Notify mNotify = null; // 消息通知对象
public const int cNotifyNum = 16; // 通知的个数
private int mNotifySize = 0; // 每次通知大小
private int mBufferSize = 0; // 缓冲队列大小
private Thread mNotifyThread = null; // 处理缓冲区消息的线程
private AutoResetEvent mNotificationEvent = null; // 通知事件
private string mFileName = string.Empty; // 文件保存路径
private FileStream mWaveFile = null; // 文件流
private BinaryWriter mWriter = null; // 写文件
#endregion
public SoundRecord() {
//初始化音频捕捉设备
InitCaptureDevice();
//设定录音格式
mWavFormat = CreateWaveFormat();
}
#region 对外操作函数
/// <summary>
/// 创建录音格式,此处使用16bit,16KHz,Mono的录音格式
/// <summary>
private WaveFormat CreateWaveFormat() {
WaveFormat format = new WaveFormat();
format.FormatTag = WaveFormatTag.Pcm; // PCM
format.SamplesPerSecond = 16000; // 采样率:16KHz
format.BitsPerSample = 16; // 采样位数:16Bit
format.Channels = 1; // 声道:Mono
format.BlockAlign = (short) ( format.Channels * ( format.BitsPerSample / 8 ) ); // 单位采样点的字节数
format.AverageBytesPerSecond = format.BlockAlign * format.SamplesPerSecond;
return format;
// 按照以上采样规格,可知采样1秒钟的字节数为 16000*2=32000B 约为31K
}
/// <summary>
/// 设定录音结束后保存的文件,包括路径
/// </summary>
/// <param name="filename">保存wav文件的路径名</param>
public void SetFileName(string filename) {
mFileName = filename;
}
/// <summary>
/// 开始录音
/// </summary>
public void RecStart() {
// 创建录音文件
CreateSoundFile();
// 创建一个录音缓冲区,并开始录音
CreateCaptureBuffer();
// 建立通知消息,当缓冲区满的时候处理方法
InitNotifications();
mRecBuffer.Start(true);
}
/// <summary>
/// 停止录音
/// </summary>
public void RecStop() {
mRecBuffer.Stop(); // 调用缓冲区的停止方法,停止采集声音
if (null != mNotificationEvent)
mNotificationEvent.Set(); //关闭通知
mNotifyThread.Abort(); //结束线程
RecordCapturedData(); // 将缓冲区最后一部分数据写入到文件中
// 写WAV文件尾
mWriter.Seek(4, SeekOrigin.Begin);
mWriter.Write((int) ( mSampleCount + 36 )); // 写文件长度
mWriter.Seek(40, SeekOrigin.Begin);
mWriter.Write(mSampleCount); // 写数据长度
mWriter.Close();
mWaveFile.Close();
mWriter = null;
mWaveFile = null;
}
#endregion
#region 对内操作函数
/// <summary>
/// 初始化录音设备,此处使用主录音设备.
/// </summary>
/// <returns>调用成功返回true,否则返回false</returns>
private bool InitCaptureDevice() {
//获取默认音频捕捉设备
CaptureDevicesCollection devices = new CaptureDevicesCollection(); // 枚举音频捕捉设备
Guid deviceGuid = Guid.Empty;
if (devices.Count > 0)
deviceGuid = devices[0].DriverGuid;
else {
MessageBox.Show("系统中没有音频捕捉设备");
return false;
}
// 用指定的捕捉设备创建Capture对象
try {
mCapDev = new Capture(deviceGuid);
} catch (Microsoft.DirectX.DirectXException e) {//MessageBox.Show(e.ToString());
return false;
}
return true;
}
/// <summary>
/// 创建录音使用的缓冲区
/// </summary>
private void CreateCaptureBuffer() {
// 缓冲区的描述对象
CaptureBufferDescription bufferdescription = new CaptureBufferDescription();
if (null != mNotify) {
mNotify.Dispose();
mNotify = null;
}
if (null != mRecBuffer) {
mRecBuffer.Dispose();
mRecBuffer = null;
}
// 设定通知的大小,默认为1s钟
mNotifySize = ( 1024 > mWavFormat.AverageBytesPerSecond / 8 ) ? 1024 : ( mWavFormat.AverageBytesPerSecond / 8 );
mNotifySize -= mNotifySize % mWavFormat.BlockAlign;
// 设定缓冲区大小
mBufferSize = mNotifySize * cNotifyNum;
// 创建缓冲区描述
bufferdescription.BufferBytes = mBufferSize;
bufferdescription.Format = mWavFormat; // 录音格式
// 创建缓冲区
mRecBuffer = new CaptureBuffer(bufferdescription, mCapDev);
mNextCaptureOffset = 0;
}
/// <summary>
/// 初始化通知事件,将原缓冲区分成16个缓冲队列,在每个缓冲队列的结束点设定通知点.
/// </summary>
/// <returns>是否成功</returns>
private bool InitNotifications() {
if (null == mRecBuffer) {
// MessageBox.Show("未创建录音缓冲区");
return false;
}
// 创建一个通知事件,当缓冲队列满了就激发该事件.
mNotificationEvent = new AutoResetEvent(false);
// 创建一个线程管理缓冲区事件
if (null == mNotifyThread) {
mNotifyThread = new Thread(new ThreadStart(WaitThread));
mNotifyThread.Start();
}
// 设定通知的位置
BufferPositionNotify[] PositionNotify = new BufferPositionNotify[cNotifyNum + 1];
for (int i = 0; i < cNotifyNum; i++) {
PositionNotify[i].Offset = ( mNotifySize * i ) + mNotifySize - 1;
PositionNotify[i].EventNotifyHandle = mNotificationEvent.SafeWaitHandle.DangerousGetHandle();
}
mNotify = new Notify(mRecBuffer);
mNotify.SetNotificationPositions(PositionNotify, cNotifyNum);
return true;
}
/// <summary>
/// 接收缓冲区满消息的处理线程
/// </summary>
private void WaitThread() {
while (true) {
// 等待缓冲区的通知消息
mNotificationEvent.WaitOne(Timeout.Infinite, true);
// 录制数据
RecordCapturedData();
}
}
/// <summary>
/// 将录制的数据写入wav文件
/// </summary>
private void RecordCapturedData() {
byte[] CaptureData = null;
int ReadPos = 0, CapturePos = 0, LockSize = 0;
mRecBuffer.GetCurrentPosition(out CapturePos, out ReadPos);
LockSize = ReadPos - mNextCaptureOffset;
if (LockSize < 0) // 因为是循环的使用缓冲区,所以有一种情况下为负:当文以载读指针回到第一个通知点,而Ibuffeoffset还在最后一个通知处
LockSize += mBufferSize;
LockSize -= ( LockSize % mNotifySize ); // 对齐缓冲区边界,实际上由于开始设定完整,这个操作是多余的.
if (0 == LockSize)
return;
// 读取缓冲区内的数据
CaptureData = (byte[]) mRecBuffer.Read(mNextCaptureOffset, typeof(byte), LockFlag.None, LockSize);
// 写入Wav文件
mWriter.Write(CaptureData, 0, CaptureData.Length);
// 更新已经录制的数据长度.
mSampleCount += CaptureData.Length;
// 移动录制数据的起始点,通知消息只负责指示产生消息的位置,并不记录上次录制的位置
mNextCaptureOffset += CaptureData.Length;
mNextCaptureOffset %= mBufferSize; // Circular buffer
}
/// <summary>
/// 创建保存的波形文件,并写入必要的文件头.
/// </summary>
private void CreateSoundFile() {
// Open up the wave file for writing.
mWaveFile = new FileStream(mFileName, FileMode.Create);
mWriter = new BinaryWriter(mWaveFile);
/**************************************************************************
Here is where the file will be created. A
wave file is a RIFF file, which has chunks
of data that describe what the file contains.
A wave RIFF file is put together like this:
The 12 byte RIFF chunk is constructed like this:
Bytes 0 - 3 : 'R' 'I' 'F' 'F'
Bytes 4 - 7 : Length of file, minus the first 8 bytes of the RIFF description.
(4 bytes for "WAVE" + 24 bytes for format chunk length +
8 bytes for data chunk description + actual sample data size.)
Bytes 8 - 11: 'W' 'A' 'V' 'E'
The 24 byte FORMAT chunk is constructed like this:
Bytes 0 - 3 : 'f' 'm' 't' ' '
Bytes 4 - 7 : The format chunk length. This is always 16.
Bytes 8 - 9 : File padding. Always 1.
Bytes 10- 11: Number of channels. Either 1 for mono, or 2 for stereo.
Bytes 12- 15: Sample rate.
Bytes 16- 19: Number of bytes per second.
Bytes 20- 21: Bytes per sample. 1 for 8 bit mono, 2 for 8 bit stereo or
16 bit mono, 4 for 16 bit stereo.
Bytes 22- 23: Number of bits per sample.
The DATA chunk is constructed like this:
Bytes 0 - 3 : 'd' 'a' 't' 'a'
Bytes 4 - 7 : Length of data, in bytes.
Bytes 8 -: Actual sample data.
***************************************************************************/
// Set up file with RIFF chunk info.
char[] ChunkRiff = { 'R', 'I', 'F', 'F' };
char[] ChunkType = { 'W', 'A', 'V', 'E' };
char[] ChunkFmt = { 'f', 'm', 't', ' ' };
char[] ChunkData = { 'd', 'a', 't', 'a' };
short shPad = 1; // File padding
int nFormatChunkLength = 0x10; // Format chunk length.
int nLength = 0; // File length, minus first 8 bytes of RIFF description. This will be filled in later.
short shBytesPerSample = 0; // Bytes per sample.
// 一个样本点的字节数目
if (8 == mWavFormat.BitsPerSample && 1 == mWavFormat.Channels)
shBytesPerSample = 1;
else if (( 8 == mWavFormat.BitsPerSample && 2 == mWavFormat.Channels ) || ( 16 == mWavFormat.BitsPerSample && 1 == mWavFormat.Channels ))
shBytesPerSample = 2;
else if (16 == mWavFormat.BitsPerSample && 2 == mWavFormat.Channels)
shBytesPerSample = 4;
// RIFF 块
mWriter.Write(ChunkRiff);
mWriter.Write(nLength);
mWriter.Write(ChunkType);
// WAVE块
mWriter.Write(ChunkFmt);
mWriter.Write(nFormatChunkLength);
mWriter.Write(shPad);
mWriter.Write(mWavFormat.Channels);
mWriter.Write(mWavFormat.SamplesPerSecond);
mWriter.Write(mWavFormat.AverageBytesPerSecond);
mWriter.Write(shBytesPerSample);
mWriter.Write(mWavFormat.BitsPerSample);
// 数据块
mWriter.Write(ChunkData);
mWriter.Write((int) 0); // The sample length will be written in later.
}
#endregion
}
}
使用这个工具类需要在项目种引入DirecX.dll和DirectX.Sound.dll
java录音
java录音工具类如下
public class EngineeCore {
String filePath = "E:\\voice\\voice_cache.wav";
AudioFormat audioFormat;
TargetDataLine targetDataLine;
boolean flag = true;
private void stopRecognize() {
flag = false;
targetDataLine.stop();
targetDataLine.close();
}private AudioFormat getAudioFormat() {
float sampleRate = 16000;
// 8000,11025,16000,22050,44100
int sampleSizeInBits = 16;
// 8,16
int channels = 1;
// 1,2
boolean signed = true;
// true,false
boolean bigEndian = false;
// true,false
return new AudioFormat(sampleRate, sampleSizeInBits, channels, signed, bigEndian);
}// end getAudioFormat
private void startRecognize() {
try {
// 获得指定的音频格式
audioFormat = getAudioFormat();
DataLine.Info dataLineInfo = new DataLine.Info(TargetDataLine.class, audioFormat);
targetDataLine = (TargetDataLine) AudioSystem.getLine(dataLineInfo);
// Create a thread to capture the microphone
// data into an audio file and start the
// thread running. It will run until the
// Stop button is clicked. This method
// will return after starting the thread.
flag = true;
new CaptureThread().start();
} catch (Exception e) {
e.printStackTrace();
} // end catch
}// end captureAudio method
class CaptureThread extends Thread {
public void run() {
AudioFileFormat.Type fileType = null;
File audioFile = new File(filePath);
fileType = AudioFileFormat.Type.WAVE;
//声音录入的权值
int weight = 2;
//判断是否停止的计数
int downSum = 0;
ByteArrayInputStream bais = null;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
AudioInputStream ais = null;
try {
targetDataLine.open(audioFormat);
targetDataLine.start();
byte[] fragment = new byte[1024];
ais = new AudioInputStream(targetDataLine);
while (flag) {
targetDataLine.read(fragment, 0, fragment.length);
//当数组末位大于weight时开始存储字节(有声音传入),一旦开始不再需要判断末位
if (Math.abs(fragment[fragment.length-1]) > weight || baos.size() > 0) {
baos.write(fragment);
System.out.println("守卫:"+fragment[0]+",末尾:"+fragment[fragment.length-1]+",lenght"+fragment.length);
//判断语音是否停止
if(Math.abs(fragment[fragment.length-1])<=weight){
downSum++;
}else{
System.out.println("重置奇数");
downSum=0;
}
//计数超过20说明此段时间没有声音传入(值也可更改)
if(downSum>20){
System.out.println("停止录入");
break;
}
}
}
//取得录音输入流
audioFormat = getAudioFormat();
byte audioData[] = baos.toByteArray();
bais = new ByteArrayInputStream(audioData);
ais = new AudioInputStream(bais, audioFormat, audioData.length / audioFormat.getFrameSize());
//定义最终保存的文件名
System.out.println("开始生成语音文件");
AudioSystem.write(ais, AudioFileFormat.Type.WAVE, audioFile);
downSum = 0;
stopRecognize();
} catch (Exception e) {
e.printStackTrace();
} finally {
//关闭流
try {
ais.close();
bais.close();
baos.reset();
} catch (IOException e) {
e.printStackTrace();
}
}
}// end run
}// end inner class CaptureThread
这个例子实现了多线程录音。支持自动停止
javascript录音
代码如下
/* 音频 */
var recorder;
var audio = document.querySelector('audio');
/* 开始 */
function startRecording() {
HZRecorder.get(function(rec) {
recorder = rec;
recorder.start();
});
}
/* 获取数据 */
function obtainRecord() {
var record = recorder.getBlob();
// debugger;
};
/* 停止 */
function stopRecord() {
recorder.stop();
};
使用这个来录音需要先下载一个HZRecorder.js。可以在网上下载到
遇到的问题
遇到的问题主要是在识别的过程种因为采样率的问题而无法识别
一定要保持识别的时候的采样率和录音的采样率一致,否则可能会识别失败