摘要:语音通话已经是IM的基本功能了,qq,MSN甚至连刚出来的百度HI都自带语音聊天的功能,大家可能觉得很炫,其实大家都是用的windows平台上的API,懂了原理之后自己也可以做,再说了微软也提供了DirectSound的托管互操作程序集,使.net开发人员也很容易的介入到这个领域,甚至你还可以写一个能跑在window mobile上的语音电话,现在好多手机都支持wifi,这样一个简单的wifi电话就由你的手里诞生了。本帖来和大家一起看看如何来做网络电话。
思路:要想做一个网络电话,基本遵循以下步骤
1、一方实时的录音,把模拟信号转换成数字信号;
2、把声音实时压缩;
3、通过网络协议把压缩后的数据传输给接收方;
4、接收方解压缩接受到的音频数据;
5、实时的把接收到的数字信号转换成模拟信号并播放出来。
下面我们来看看每一步面临的挑战及其解决方案。
1、第一步,实时录音,DirectxSound有录音方面的API,托管的类分别是Microsoft.DirectX.DirectSound.CaptureDevicesCollection,Microsoft.DirectX.DirectSound.Capture和Microsoft.DirectX.DirectSound.CaptureBuffer,CaptureDevicesCollection用来枚举本机的可用的录音设备,Capture则表示一个录音设备,CaptureBuffer是用来存放录音数据的缓冲区,我们开始录音后,音频数据会不断的写入到环形的流式缓冲区,然后我们定期从缓冲区中把录音数据取出来返回给上层应用层就可以了。关于环形的流式缓冲区,可以看参考链接部分。
2、声音的压缩是一个很难抉择的步骤,默认的DirectSound只能播放和录制PCM格式(WAV)的音频数据,但这种声音格式特别大。常用的声音压缩格式有h.7231,gsm,amr,h.711等等,各种压缩算法都有自己的码率和适用范围。因为我们做的是互联网的语音电话,不考虑慢速网络和无线连接下的情况,也不用考虑终端设备的CPU能不能支持我们选用的压缩算法,我们做的语音电话双方都是PC机,应该什么解压缩算法都不会引起什么性能上的问题,所以只要网络快一些,选择哪个压缩算法都无所谓了,网上有h.711的压缩算法,我打算就采用这个,他的码率是64Kbps,比PCM的1.544Mbps和2.048Mbps要小的多。然后我们进行了音频数据压缩后,还可以对字节流进行GZIP或者7ZIP压缩,前者用SharpZip,后者7zip的官方有c#的使用代码,大家可以测试一下这两个算法的性能后做出适合自己的决定。关于各种压缩格式的特性可以参考我做的PPT及提供的参考链接。
3、网络电话注重实时性,而把声音从网络上传输就要走IP网络,而IP网络不是一个等时系统,所以我们就要尽量的去模拟实时的语音传输,提到实时,肯定UDP比TCP要实时,因为TCP要保证传输的可靠性,有序性等,而专门用于实时传输有一个应用层协议是RTP协议,这个协议一般就是建立在UDP基础上的,它在每个包头提供了一些序列号、时间戳等信息,但UDP本身并不会使用这些信息,这时候就有一个RTCP协议来用这些信息进行流量控制和拥塞控制,比如说RTCP检测到网络拥挤,会告诉发送方变换一种低码率的语音压缩算法来传输数据。这些大多都需要自己去实现,本文的源码没有去实现这些,关于RTP和RTCP可以参考相关资料或者我做的PPT。
4、每个压缩算法都有相应的解压缩算法,呵呵。
5、播放声音肯定也需要用到DS,也需要用到StreamBuffer,大致流程如下
1)创建一个声音设备Microsoft.DirectX.DirectSound.Device dev = new Microsoft.DirectX.DirectSound.Device();
2)设置协调级别dev.SetCooperativeLevel(this, Microsoft.DirectX.DirectSound.CooperativeLevel.Normal);
3)创建声音格式、缓冲区描述、及辅助缓冲区;
4)给辅助缓冲区设定通知;
5)用声音数据填满缓冲区;
6)播放缓冲区的声音数据,播放到一定的通知点,通知填充线程,填充新的声音数据;
7)循环第6步,直到没有新的声音数据填充到缓冲区。
具体的过程参考PPT或者具体代码。
版权声明:
附件源代码里的CaptureSound,SoundPlayer和CircularBuffer类反编译自随意桌面的代码(注释是我加的),版权归作者所有。
PPT里的图片和一些文字选自一个叫做ch11-DxSound&Input2.ppt的文件,源链接已丢失,还有一些选择一个叫做“SIP之 穿越NAT.ppt”的文件,网上可以搜索到,版权均归原作者所有,源作者要是再引用别人的东西,我就不知道了。
下面看一些具体的代码
用户创建声音格式
public class DirectSoundManager
{
public static WaveFormat CreateWaveFormat(int hz, short bits, short channels)
{
WaveFormat format = new WaveFormat();
//声音的格式,通常使用WAVE_FORMAT_PCM来设定,
//因为PCM是比较常用的声音格式。
format.FormatTag = WaveFormatTag.Pcm;
//采样率(单位:赫兹)典型值:11025、22050、44100Hz
format.SamplesPerSecond = hz;
//每个采样点数;8-bit或16-bit;
format.BitsPerSample = bits;
//声道的设置,当其值为1时是单声道,为2时是双声道;
format.Channels = channels;
//每个采样点字节数
format.BlockAlign = (short)(format.Channels * (format.BitsPerSample / 8));
//平均传输率,每秒的数据流量
format.AverageBytesPerSecond = format.BlockAlign * format.SamplesPerSecond;
return format;
}
属性#region 属性
// Properties
public static WaveFormat DefaultFormat
{
get
{
return WaveFormat_8000_8_1;
}
}
public static WaveFormat WaveFormat_11025_8_1
{
get
{
return CreateWaveFormat(0x2b11, 8, 1);
}
}
public static WaveFormat WaveFormat_22050_16_2
{
get
{
return CreateWaveFormat(0x5622, 0x10, 2);
}
}
public static WaveFormat WaveFormat_44100_16_2
{
get
{
return CreateWaveFormat(0xac44, 0x10, 2);
}
}
public static WaveFormat WaveFormat_8000_8_1
{
get
{
return CreateWaveFormat(0x1f40, 8, 1);
}
}
#endregion
}
用于播放流式声音
public class SoundPlayer : IDisposable
{
私有成员#region 私有成员
private const int MaxLatencyMs = 300;
private const int NumberRecordNotifications = 4;
private readonly CircularBuffer circularBuffer;
private readonly int m_BufferBytes;
private readonly bool m_OwnsDevice;
private readonly int notifySize;
private readonly BufferPositionNotify[] positionNotify;
private bool isRunning;
private SecondaryBuffer m_Buffer;
private Device m_Device;
private int nextWriteOffset;
private AutoResetEvent notificationEvent;
private Notify notify;
private Thread notifyThread;
#endregion
构造函数#region 构造函数
public SoundPlayer(Control owner, WaveFormat format)
: this(owner, null, format)
{
}
public SoundPlayer(Control owner, Device device, WaveFormat format)
{
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
notify = null;
notifyThread = null;
notifySize = 0;
m_Device = device;
if (m_Device == null)
{
m_Device = new Device();
m_Device.SetCooperativeLevel(owner, CooperativeLevel.Normal);
m_OwnsDevice = true;
}
// 设定通知的大小, 大小为播放一秒钟声音所需要的字节。这里为什么除以8,我不清楚
notifySize = (1024 > (format.AverageBytesPerSecond / 8)) ? (1024) : ((format.AverageBytesPerSecond / 8));
notifySize = (notifySize - (notifySize % format.BlockAlign));
m_BufferBytes = (notifySize * 4); //整体缓冲区的大小
BufferDescription desc = new BufferDescription(format);
//缓冲区具有控制音量的能力;
desc.ControlVolume = true;
//缓冲区具有控制位置的能力。
desc.ControlPositionNotify = true;
//设置缓冲区能取到当前的播放位置
desc.CanGetCurrentPosition = true;
//缓冲区不具有控制3D音效的能力;
desc.Control3D = false;
//Specifies whether the buffer supports effects processing.
desc.ControlEffects = false;
//缓冲区具有控制频率的能力;
desc.ControlFrequency = true;
//缓冲区具有控制左右声道的能力;
desc.ControlPan = true;
//设置是否使用全局缓存
desc.GlobalFocus = true;
//设置缓冲区大小为整个缓冲区的大小
desc.BufferBytes = m_BufferBytes;
//创建辅助缓冲区
m_Buffer = new SecondaryBuffer(desc, m_Device);
//创建环形缓冲区
circularBuffer = new CircularBuffer((m_BufferBytes * 10));
InitNotifications();
m_Buffer.Play(0, BufferPlayFlags.Looping);
}
public SoundPlayer(Control owner, int sr, short bps, short ch)
: this(owner, null, DirectSoundManager.CreateWaveFormat(sr, bps, ch))
{
}
public SoundPlayer(Control owner, Device device, int sr, short bps, short ch)
: this(owner, device, DirectSoundManager.CreateWaveFormat(sr, bps, ch))
{
}
#endregion
公开属性#region 公开属性
public int BitsPerSample
{
get { return m_Buffer.Format.BitsPerSample; }
}
public int Channels
{
get { return m_Buffer.Format.Channels; }
}
public Device Device
{
get { return m_Device; }
}
public int SamplingRate
{
get { return m_Buffer.Format.SamplesPerSecond; }
}
#endregion
IDisposable Members#region IDisposable Members
public void Dispose()
{
Stop();
if (m_Buffer != null)
{
m_Buffer.Dispose();
m_Buffer = null;
}
if (m_OwnsDevice && (m_Device != null))
{
m_Device.Dispose();
m_Device = null;
}
GC.SuppressFinalize(this);
}
~SoundPlayer()
{
Dispose();
}
#endregion
私有方法#region 私有方法
private void InitNotifications()
{
notifyThread = new Thread(NotifyThreadHandler);
isRunning = true;
notifyThread.IsBackground = true;
notifyThread.Start();
notificationEvent = new AutoResetEvent(false);
notify = new Notify(m_Buffer);
//把整个缓冲区分成4个缓冲区片段,每播放4分之一就会给写线程发送一个信号
for (int i = 0; i < 4; i = (i + 1))
{
positionNotify[i].Offset = (((notifySize * i) + notifySize) - 1);
positionNotify[i].EventNotifyHandle = notificationEvent.SafeWaitHandle.DangerousGetHandle();
}
notify.SetNotificationPositions(positionNotify, 4);
nextWriteOffset = 0;
}
private void NotifyThreadHandler()
{
while (isRunning)
{
try
{
notificationEvent.WaitOne(-1, true);
Play();
}
catch (Exception)
{
}
}
}
private void Play()
{
try
{
try
{
int currentPlayPosition;
int currentWritePosition;
m_Buffer.GetCurrentPosition(out currentPlayPosition, out currentWritePosition);
//得到刚刚播放完的缓冲区片段,这个片段需要用新的数据去填充
int lockSize = (currentWritePosition - nextWriteOffset);
//todo:这里不知道什么时候会发生
if (lockSize < 0)
{
lockSize = (lockSize + m_BufferBytes);
}
//对齐需要填充的缓冲区片段
lockSize = (lockSize - (lockSize % notifySize));
if (0 != lockSize)
{
if (lockSize == m_BufferBytes)
{
}
byte[] data = new byte[lockSize];
if (circularBuffer.Read(data) > 0)
{
m_Buffer.Write(nextWriteOffset, data, LockFlag.None);
nextWriteOffset = (nextWriteOffset + lockSize);
//如果完整写完一次缓冲区,那么把写数据指针放到缓冲区的最开始,
//因为前面设置了m_Buffer.Play(0, BufferPlayFlags.Looping);
//所以系统在播放缓冲区后会自动重新开始播放缓冲区起始处的声音数据
nextWriteOffset = (nextWriteOffset % m_BufferBytes);
}
}
}
catch (Exception)
{
}
}
finally
{
}
}
#endregion
公开方法#region 公开方法
public void Stop()
{
isRunning = false;
if (m_Buffer != null)
{
m_Buffer.Stop();
}
}
public void Write(byte[] data)
{
try
{
Console.WriteLine("播放声音:{0}", data.Length);
circularBuffer.Write(data);
}
catch (Exception)
{
}
}
#endregion
}
用户录制声音
public class CaptureSound
{
私有成员#region 私有成员
private const int NumberRecordNotifications = 4;
private readonly BufferPositionNotify[] positionNotify;
private Capture selectedDevice;
private CaptureBuffer buffer;
private int captureBufferSize;
private string fileName;
private bool isRecording;
private int nextCaptureOffset;
private AutoResetEvent notificationEvent;
private Notify notify;
private int notifySize;
private Thread notifyThread;
private long sampleCount;
private WaveFormat selectedFormat;
private FileStream waveFile;
private BinaryWriter writer;
#endregion
构造函数#region 构造函数
public CaptureSound()
{
isRecording = false;
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
buffer = null;
fileName = string.Empty;
notify = null;
notifyThread = null;
waveFile = null;
writer = null;
captureBufferSize = 0;
nextCaptureOffset = 0;
sampleCount = 0L;
notifySize = 0;
InitializeDeviceSelector();
InitializeWaveFormatSelector();
Initialize();
}
public CaptureSound(Capture device)
{
isRecording = false;
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
buffer = null;
fileName = string.Empty;
notify = null;
notifyThread = null;
waveFile = null;
writer = null;
captureBufferSize = 0;
nextCaptureOffset = 0;
sampleCount = 0L;
notifySize = 0;
selectedDevice = device;
InitializeWaveFormatSelector();
Initialize();
}
public CaptureSound(WaveFormat waveFormat)
{
isRecording = false;
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
buffer = null;
fileName = string.Empty;
notify = null;
notifyThread = null;
waveFile = null;
writer = null;
captureBufferSize = 0;
nextCaptureOffset = 0;
sampleCount = 0L;
notifySize = 0;
selectedFormat = waveFormat;
InitializeDeviceSelector();
Initialize();
}
public CaptureSound(Capture device, WaveFormat waveFormat)
{
isRecording = false;
positionNotify = new BufferPositionNotify[5];
notificationEvent = null;
buffer = null;
fileName = string.Empty;
notify = null;
notifyThread = null;
waveFile = null;
writer = null;
captureBufferSize = 0;
nextCaptureOffset = 0;
sampleCount = 0L;
notifySize = 0;
selectedDevice = device;
selectedFormat = waveFormat;
Initialize();
}
#endregion
公开属性#region 公开属性
public int BufferBytes
{
get { return captureBufferSize; }
}
public string FileName
{
get { return fileName; }
set
{
fileName = value;
CreateRIFF();
}
}
public long SampleCount
{
get { return sampleCount; }
}
public WaveFormat SelectedFormat
{
get { return selectedFormat; }
}
#endregion
公开事件#region 公开事件
public event DirectSoundBufferDataEventHandler BufferData;
#endregion
私有方法#region 私有方法
private void CreateCaptureBuffer()
{
CaptureBufferDescription desc = new CaptureBufferDescription();
if (null != notify)
{
notify.Dispose();
notify = null;
}
if (null != buffer)
{
buffer.Dispose();
buffer = null;
}
if (0 != selectedFormat.Channels)
{
notifySize = (1024 > (selectedFormat.AverageBytesPerSecond / 8))
? (1024)
:
((selectedFormat.AverageBytesPerSecond / 8));
notifySize = (notifySize - (notifySize % selectedFormat.BlockAlign));
captureBufferSize = (notifySize * 4);
desc.BufferBytes = captureBufferSize;
selectedFormat.FormatTag = WaveFormatTag.Pcm;
desc.Format = selectedFormat;
buffer = new CaptureBuffer(desc, selectedDevice);
nextCaptureOffset = 0;
InitNotifications();
}
}
private void CreateRIFF()
{
waveFile = new FileStream(FileName, FileMode.Create);
writer = new BinaryWriter(waveFile);
char[] chArray = new char[] { 'R', 'I', 'F', 'F' };
char[] chArray2 = new char[] { 'W', 'A', 'V', 'E' };
char[] chArray3 = new char[] { 'f', 'm', 't', ' ' };
char[] chArray4 = new char[] { 'd', 'a', 't', 'a' };
short num = 1;
int num2 = 0x10;
int num3 = 0;
short num4 = 0;
if ((8 == selectedFormat.BitsPerSample) && (1 == selectedFormat.Channels))
{
num4 = 1;
}
else if (((8 == selectedFormat.BitsPerSample) && (2 == selectedFormat.Channels)) ||
((0x10 == selectedFormat.BitsPerSample) && (1 == selectedFormat.Channels)))
{
num4 = 2;
}
else if ((0x10 == selectedFormat.BitsPerSample) && (2 == selectedFormat.Channels))
{
num4 = 4;
}
writer.Write(chArray);
writer.Write(num3);
writer.Write(chArray2);
writer.Write(chArray3);
writer.Write(num2);
writer.Write(num);
writer.Write(selectedFormat.Channels);
writer.Write(selectedFormat.SamplesPerSecond);
writer.Write(selectedFormat.AverageBytesPerSecond);
writer.Write(num4);
writer.Write(selectedFormat.BitsPerSample);
writer.Write(chArray4);
writer.Write(0);
}
private void Initialize()
{
CreateCaptureBuffer();
}
private void InitializeDeviceSelector()
{
CaptureDevicesCollection devices = new CaptureDevicesCollection(); // 枚举音频捕捉设备
if (devices.Count > 0)
selectedDevice = new Capture(devices[0].DriverGuid);
else
throw new ArgumentException("无法初始化声音设备");
}
private void InitializeWaveFormatSelector()
{
if (selectedDevice == null)
{
throw new ArgumentException("尚未設定音訊裝置,無法選擇輸出格式。");
}
selectedFormat = DirectSoundManager.DefaultFormat;
}
private void InitNotifications()
{
if (null == buffer)
{
throw new NullReferenceException();
}
if (null == notifyThread)
{
isRecording = true;
notifyThread = new Thread(WaitThread);
notifyThread.IsBackground = true;
notifyThread.Start();
notificationEvent = new AutoResetEvent(false);
}
for (int i = 0; i < 4; i++)
{
positionNotify[i].Offset = (((notifySize * i) + notifySize) - 1);
positionNotify[i].EventNotifyHandle = notificationEvent.SafeWaitHandle.DangerousGetHandle();
}
notify = new Notify(buffer);
notify.SetNotificationPositions(positionNotify, 4);
}
private void OnBufferData(object sender, DirectSoundBufferDataEventArgs e)
{
if (BufferData != null)
{
BufferData(sender, e);
}
}
private void RecordCapturedData()
{
byte[] data = null;
try
{
try
{
int currentPlayPosition;
int currentWritePosition;
buffer.GetCurrentPosition(out currentWritePosition, out currentPlayPosition);
int lockSize = (currentPlayPosition - nextCaptureOffset);
if (lockSize < 0)
{
lockSize = (lockSize + captureBufferSize);
}
lockSize = (lockSize - (lockSize % notifySize));
if (0 != lockSize)
{
data = (byte[])buffer.Read(nextCaptureOffset, typeof(byte), LockFlag.None, new int[] { lockSize });
OnBufferData(this, new DirectSoundBufferDataEventArgs(data));
if (writer != null)
{
writer.Write(data, 0, data.Length);
}
sampleCount = (sampleCount + data.Length);
nextCaptureOffset = (nextCaptureOffset + data.Length);
nextCaptureOffset = (nextCaptureOffset % captureBufferSize);
}
}
catch (Exception)
{
}
}
finally
{
data = null;
}
}
private void WaitThread()
{
while (isRecording)
{
try
{
notificationEvent.WaitOne(-1, true);
RecordCapturedData();
}
catch (Exception)
{
}
}
}
private void StartOrStopRecord(bool StartRecording)
{
if (StartRecording)
{
isRecording = true;
CreateCaptureBuffer();
buffer.Start(true);
}
else
{
isRecording = false;
buffer.Stop();
RecordCapturedData();
if (writer != null)
{
writer.Seek(4, SeekOrigin.Begin);
writer.Write(((int)(sampleCount + 0x24L)));
writer.Seek(40, SeekOrigin.Begin);
writer.Write(sampleCount);
writer.Close();
writer = null;
waveFile = null;
}
}
}
#endregion
公开方法#region 公开方法
public void Pause()
{
buffer.Stop();
}
public void Resume()
{
buffer.Start(true);
}
public void Start()
{
StartOrStopRecord(true);
}
public void Stop()
{
StartOrStopRecord(false);
notifyThread = null;
nextCaptureOffset = 0;
sampleCount = 0L;
}
#endregion
}
程序下载地址如下(自己反射看源码,因为程序只是用于演示,所以考虑很不周全,不足以效仿)
https://files.cnblogs.com/onlytiancai/wawaim.zip
PPT下载地址如下
https://files.cnblogs.com/onlytiancai/p2p语音.zip
参考链接如下:
ch11-DxSound&Input2.ppt:建立DirectSound 声音的播放与控制 使用3D音效
SIP之 穿越NAT.ppt
DirectX技术实现视频会议中的音频通信
http://www.ctiforum.com/forum/2008/03/forum08_0357.htm
C#中使用DirectSound录音
http://blog.donews.com/uplook/archive/2005/12/14/657145.aspx
在C#下利用DirectSound实现声音播放
http://www.cnblogs.com/yangbeibei/archive/2006/08/30/490270.html
隨意桌面,數位溝通
http://cuteofdragon.blogspot.com/2007/05/blog-post_9694.html
用DirectX Audio和DirectShow播放声音和音乐
http://www.cppblog.com/lovedday/archive/2007/09/24/32815.html