要分析媒体中的静音,需要分成几步:
- 提取媒体中的音频;
- 转化音频为裸pcm数据;
- 分析pcm数据,输出静音结果。
引入 ffmpeg
我们可以使用ffmpeg来提取媒体中的音频。
在flutter中使用ffmpeg,可以使用ffmpeg_kit_flutter_new
模块。
flutter add ffmpeg_kit_flutter_new
也可以在pubspec.yaml中添加:
dependencies:
flutter:
sdk: flutter
……
ffmpeg_kit_flutter_new: ^3.1.0
然后执行flutter pub get
。
之后就可以使用FFMpegKit
等类来实现功能了。
如以下代码,就可以提取出a.mp4视频文件中的音频到a.mp3:
import 'dart:async';
import 'dart:io';
import 'package:ffmpeg_kit_flutter_new/ffmpeg_kit.dart';
import 'package:ffmpeg_kit_flutter_new/ffmpeg_kit_config.dart';
final command = '-y -i a.mp4 a.mp3';
try {
final session = await FFmpegKit.execute(command);
final returnCode = await session.getReturnCode();
if (!ReturnCode.isSuccess(returnCode)) {
final logs = await session.getLogsAsString();
_logger.warning(
'FFmpeg execution failed for a.mp4. Return code: $returnCode\n$logs');
}
} catch (e, s) {
_logger.severe('Error during audio processing for $filePath', e, s);
}
管道
以上代码,虽然可以把媒体中的音频提取出来,但是要保存到文件中。对于嵌入式设备来说,频繁地IO对设备不够友好。
我们可以使用管道:ffmpeg执行写入管道,另外一个异步方法读取管道的数据,进行解析。
ffmpeg使用管道的方法非常简单,只要使用FfMpegConfig.registerPipe()
获取一个管道的路径,然后以普通路径的方式在FfmpegKit.execute()
中调用就行了。
如:
// 注册管道
final pipePath = await FFmpegKitConfig.registerNewFFmpegPipe();
if (pipePath == null) {
_logger.severe('Failed to create an FFmpeg pipe.');
return null;
}
// 使用s16le转码为16位小端序整数
// 单通道
// 拼接管道文件到末尾
final command = '-y -i a.mp4 -f s16le -ac 1 $pipePath';
try {
// 这里的onData是另外一个异步方法,后文会使用。
final processingFuture = onData(File(pipePath).openRead(), duration);
final session = await FFmpegKit.execute(command);
final returnCode = await session.getReturnCode();
if (ReturnCode.isSuccess(returnCode)) {
return await processingFuture;
} else {
final logs = await session.getLogsAsString();
_logger.warning(
'FFmpeg execution failed for a.mp4. Return code: $returnCode\n$logs');
}
} catch (e, s) {
_logger.severe('Error during audio processing for $filePath', e, s);
} finally {
// 最后释放管道
FFmpegKitConfig.closeFFmpegPipe(pipePath);
}
}
检测静音
我们使用ffmpeg提取出媒体中的音频数据,数据格式为每秒16000个采样点,每个采样点一个16位整数,就可以通过声音的分贝计算公式,来判断每个采样点是否是静音了。
// 静音分贝阈值暂时设为30分贝
// 采样点分贝数低于这个数值的则为静音
final silenceThreshold = -30;
// 静音时间阈值设为2秒
// 低于2秒的静音忽略
final minSilenceDuration = Duration(seconds: 2);
// 计算分贝数对应的16位采样点值
final linearThreshold = pow(10, silenceThreshold / 20) * 32767;
// 计算静音时间对应的采样数
final minSilenceSamples =
(minSilenceDuration.inMilliseconds / 1000.0 * 16000).round();
// 根据时长计算总采样点数
final totalSamples =
(audioDuration.inMilliseconds / 1000.0 * options.sampleRate).round();
int samplesProcessed = 0;
int silenceStartSample = -1;
await for (var chunk in pcmStream) {
final samples = Int16List.view(Uint8List.fromList(chunk).buffer);
for (int i = 0; i < samples.length; i++) {
final currentSampleIndex = samplesProcessed + i;
final isSilent = samples[i].abs() < linearThreshold;
// 如果当前采样点是静音,而且前面的不是静音,表示静音开始
if (isSilent && silenceStartSample == -1) {
silenceStartSample = currentSampleIndex;
// 如果当前采样点不是静音,而且前面的是静音,则表示一个静音段
} else if (!isSilent && silenceStartSample != -1) {
final silentSamples = currentSampleIndex - silenceStartSample;
// 如果静音段的节点数超过时长阈值,则记录一个静音
if (silentSamples >= minSilenceSamples) {
final startTime =
_calculateTime(silenceStartSample, totalSamples, audioDuration);
final endTime =
_calculateTime(currentSampleIndex, totalSamples, audioDuration);
silenceSegments.add((startTime, endTime));
}
silenceStartSample = -1;
}
}
samplesProcessed += samples.length;
}
样例代码
我们把以上代码总结起来,写成两个服务。
- 一个FFmpegProcessingService,用于通过给定的参数调用ffmpeg。
import 'dart:async';
import 'dart:io';
import 'package:ffmpeg_kit_flutter_new/ffmpeg_kit.dart';
import 'package:ffmpeg_kit_flutter_new/ffmpeg_kit_config.dart';
import 'package:ffmpeg_kit_flutter_new/return_code.dart';
import 'package:logging/logging.dart';
class FFmpegPcmConversionOptions {
final int sampleRate;
final String format;
final int channels;
FFmpegPcmConversionOptions({
required this.sampleRate,
required this.format,
this.channels = 1,
});
String toArgs() {
return '-f $format -ar $sampleRate -ac $channels';
}
}
class FFmpegProcessingService {
final _logger = Logger('FFmpegProcessingService');
Future<T?> processAudio<T>({
required String filePath,
required Duration duration,
required FFmpegPcmConversionOptions options,
required Future<T> Function(
Stream<List<int>> pcmStream, Duration audioDuration)
onData,
}) async {
final pipePath = await FFmpegKitConfig.registerNewFFmpegPipe();
if (pipePath == null) {
_logger.severe('Failed to create an FFmpeg pipe.');
return null;
}
final command = '-y -i "$filePath" ${options.toArgs()} $pipePath';
try {
final processingFuture = onData(File(pipePath).openRead(), duration);
final session = await FFmpegKit.execute(command);
final returnCode = await session.getReturnCode();
if (ReturnCode.isSuccess(returnCode)) {
return await processingFuture;
} else {
final logs = await session.getLogsAsString();
_logger.warning(
'FFmpeg execution failed for $filePath. Return code: $returnCode\n$logs');
return null;
}
} catch (e, s) {
_logger.severe('Error during audio processing for $filePath', e, s);
return null;
} finally {
FFmpegKitConfig.closeFFmpegPipe(pipePath);
}
}
}
- 一个SilenceDetectionService,用于调用FFmpegProcessingService,注册异步回调方法,生成静音片段列表。
import 'dart:async';
import 'dart:math';
import 'dart:typed_data';
import 'package:example/app/services/ffmpeg_processing_service.dart';
import 'package:logging/logging.dart';
class SilenceDetectionService {
final _logger = Logger('SilenceDetectionService');
final VoiceDetectionService _voiceDetectionService = VoiceDetectionService();
final FFmpegProcessingService _ffmpegProcessingService =
FFmpegProcessingService();
Future<List<(Duration, Duration)>> findSilenceSegments(
String filePath,
Duration duration, {
required double silenceThreshold,
required Duration minSilenceDuration,
}) async {
try {
final options =
FFmpegPcmConversionOptions(sampleRate: 16000, format: 's16le');
final result = await _ffmpegProcessingService.processAudio(
filePath: filePath,
duration: duration,
options: options,
onData: (pcmStream, audioDuration) => _performSilenceDetection(
pcmStream: pcmStream,
audioDuration: audioDuration,
options: options,
silenceThreshold: silenceThreshold,
minSilenceDuration: minSilenceDuration,
),
);
return result ?? [];
} catch (e, s) {
_logger.severe(
'Failed to analyze audio file for silence: $filePath', e, s);
return [];
}
}
Future<List<(Duration, Duration)>> _performSilenceDetection({
required Stream<List<int>> pcmStream,
required Duration audioDuration,
required FFmpegPcmConversionOptions options,
required double silenceThreshold,
required Duration minSilenceDuration,
}) async {
final silenceSegments = <(Duration, Duration)>[];
final linearThreshold = pow(10, silenceThreshold / 20) * 32767;
final minSilenceSamples =
(minSilenceDuration.inMilliseconds / 1000.0 * options.sampleRate)
.round();
final totalSamples =
(audioDuration.inMilliseconds / 1000.0 * options.sampleRate).round();
int samplesProcessed = 0;
int silenceStartSample = -1;
await for (var chunk in pcmStream) {
final samples = Int16List.view(Uint8List.fromList(chunk).buffer);
for (int i = 0; i < samples.length; i++) {
final currentSampleIndex = samplesProcessed + i;
final isSilent = samples[i].abs() < linearThreshold;
if (isSilent && silenceStartSample == -1) {
silenceStartSample = currentSampleIndex;
} else if (!isSilent && silenceStartSample != -1) {
final silentSamples = currentSampleIndex - silenceStartSample;
if (silentSamples >= minSilenceSamples) {
final startTime =
_calculateTime(silenceStartSample, totalSamples, audioDuration);
final endTime =
_calculateTime(currentSampleIndex, totalSamples, audioDuration);
silenceSegments.add((startTime, endTime));
}
silenceStartSample = -1;
}
}
samplesProcessed += samples.length;
}
if (silenceStartSample != -1) {
final silentSamples = totalSamples - silenceStartSample;
if (silentSamples >= minSilenceSamples) {
final startTime =
_calculateTime(silenceStartSample, totalSamples, audioDuration);
silenceSegments.add((startTime, audioDuration));
}
}
_logger.info(
'Silence analysis complete, found ${silenceSegments.length} segments.');
return silenceSegments;
}
// 通过采样点计算时间
Duration _calculateTime(
int sampleIndex, int totalSamples, Duration audioDuration) {
if (totalSamples == 0) return Duration.zero;
final ratio = sampleIndex / totalSamples;
return Duration(
milliseconds: (audioDuration.inMilliseconds * ratio).round());
}
}