Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
durmisi committed Mar 19, 2024
1 parent d32dda9 commit 84692c8
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 164 deletions.
80 changes: 0 additions & 80 deletions Core/AIBroker.cs

This file was deleted.

8 changes: 5 additions & 3 deletions Core/ServiceCollectionExtensions.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Microsoft.Extensions.DependencyInjection;
using OpenAIExtensions.Chats;
using OpenAIExtensions.Services;
using OpenAIExtensions.Text2Sql;

Expand All @@ -8,10 +9,11 @@ public static class ServiceCollectionExtensions
{
public static void AddOpenAI(this IServiceCollection services)
{
services.AddScoped<IAIBroker, AIBroker>();

services.AddScoped<IAIAudioService,AIAudioService>();
services.AddScoped<IAIConversationManager, AIConversationManager>();
services.AddScoped<IAIImageService, AIImageService>();
services.AddScoped<AITranslationService, AITranslationService>();
services.AddScoped<IAISqlGenerator, AISqlGenerator>();
services.AddScoped<IAIAudioService, IAIAudioService>();
}
}
}
109 changes: 47 additions & 62 deletions Core/Services/AIAudioService.cs
Original file line number Diff line number Diff line change
@@ -1,77 +1,73 @@
using Azure;
using Azure.AI.OpenAI;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Logging;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.AudioToText;
using Microsoft.SemanticKernel.Connectors.OpenAI;

namespace OpenAIExtensions.Services
{
public interface IAIAudioService
{
Task<AudioTranscription?> TranscribeAsync(string fileName, Stream audioStream);

Task<AudioTranscription?> TranscribeAsync(string path);

Task<AudioTranslation?> TranslateAsync(string path);

Task<AudioTranslation?> TranslateAsync(string fileName, Stream audioStream);
Task<string?> AudioToTextAsync(
string fileName,
Stream audioStream,
OpenAIAudioToTextExecutionSettings? executionSettings = null,
CancellationToken ct = default);

Task<string?> AudioToTextAsync(
string path,
OpenAIAudioToTextExecutionSettings? executionSettings = null,
CancellationToken ct = default);
}

/// <summary>
/// https://drlee.io/transforming-audio-to-text-with-openais-speech-to-text-api-a-practical-step-by-step-guide-8139e4e65fdf
/// </summary>
public class AIAudioService : IAIAudioService
{
private readonly OpenAIClient _client;

private readonly Kernel _kernel;
private readonly ILogger<AIAudioService> _logger;

private readonly string _deploymentName = "whisper-001";

public AIAudioService(
IAIBroker aIBroker,
ILogger<AIAudioService> logger,
string? deploymentName = null)
Kernel kernel,
ILogger<AIAudioService> logger)
{
_kernel = kernel;
_logger = logger;
_client = aIBroker.GetClient();

if (!string.IsNullOrEmpty(deploymentName))
{
_deploymentName = deploymentName;
}
}

public async Task<AudioTranscription?> TranscribeAsync(string fileName, Stream audioStream)
public async Task<string?> AudioToTextAsync(
string audioFilename,
Stream audioStream,
OpenAIAudioToTextExecutionSettings? executionSettings = null,
CancellationToken ct = default)
{
var transcriptionOptions = new AudioTranscriptionOptions()
{
DeploymentName = _deploymentName,
AudioData = BinaryData.FromStream(audioStream),
ResponseFormat = AudioTranscriptionFormat.Verbose,
Filename = fileName
};
AudioContent audioContent = new(BinaryData.FromStream(audioStream));

Response<AudioTranscription> transcriptionResponse
= await _client.GetAudioTranscriptionAsync(transcriptionOptions);
var audioToTextService = _kernel.GetRequiredService<IAudioToTextService>();

var transcription = transcriptionResponse.Value;
return transcription;
}

public async Task<AudioTranscription?> TranscribeAsync(string path)
{
if (string.IsNullOrEmpty(path))
executionSettings ??= new(audioFilename)
{
throw new ArgumentException($"'{nameof(path)}' cannot be null or empty.", nameof(path));
}

var fileName = Path.GetFileName(path);
Prompt = null, // An optional text to guide the model's style or continue a previous audio segment.
// The prompt should match the audio language.
Language = "en", // The language of the audio data as two-letter ISO-639-1 language code (e.g. 'en' or 'es').
ResponseFormat = "text", // The format to return the transcribed text in.
// Supported formats are json, text, srt, verbose_json, or vtt. Default is 'json'.
Temperature = 0.3f, // The randomness of the generated text.
// Select a value from 0.0 to 1.0. 0 is the default.
};

using Stream audioStreamFromFile = File.OpenRead(path);
var textContent = await audioToTextService.GetTextContentAsync(
audioContent,
executionSettings: executionSettings,
cancellationToken: ct);

return await TranscribeAsync(fileName, audioStreamFromFile);
return textContent.Text;
}

public async Task<AudioTranslation?> TranslateAsync(string path)
public async Task<string?> AudioToTextAsync(
string path,
OpenAIAudioToTextExecutionSettings? executionSettings = null,
CancellationToken ct = default)
{
if (string.IsNullOrEmpty(path))
{
Expand All @@ -82,22 +78,11 @@ Response<AudioTranscription> transcriptionResponse

using Stream audioStreamFromFile = File.OpenRead(path);

return await TranslateAsync(fileName, audioStreamFromFile);
return await AudioToTextAsync(
fileName,
audioStreamFromFile,
executionSettings, ct);
}

public async Task<AudioTranslation?> TranslateAsync(string fileName, Stream audioStream)
{
var translationOptions = new AudioTranslationOptions()
{
DeploymentName = _deploymentName,
AudioData = BinaryData.FromStream(audioStream),
ResponseFormat = AudioTranslationFormat.Verbose,
Filename = fileName
};

Response<AudioTranslation> translationResponse = await _client.GetAudioTranslationAsync(translationOptions);

return translationResponse?.Value;
}
}
}
33 changes: 14 additions & 19 deletions OpenAIExtensions.Tests/AIAudioServiceTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Microsoft.Extensions.Configuration;
using Microsoft.SemanticKernel;
using OpenAIExtensions.Services;
using Xunit.Abstractions;

Expand All @@ -16,33 +17,27 @@ public AIAudioServiceTests(ITestOutputHelper outputHelper)
var endpoint = Configuration.GetValue<string>("OpenAI:AudioService:Endpoint")!;
var key = Configuration.GetValue<string>("OpenAI:AudioService:Key")!;

_audioService = new AIAudioService(new AIBroker(endpoint, key), logger);
}
var kernel = Kernel.CreateBuilder()
.AddAzureOpenAIAudioToText(
deploymentName: "whisper-001",
endpoint: endpoint,
apiKey: key)
.Build();

[Fact]
public async Task AIAudioService_Transcribe_mp4_files()
{
//Act
var response = await _audioService.TranscribeAsync("Content/18-13-52.m4a");

//Assert
Assert.NotNull(response);
Assert.NotEmpty(response.Language);
Assert.NotEmpty(response.Text);

WriteToConsole(response.Text);
_audioService = new AIAudioService(kernel, logger);
}

[Fact]
public async Task AIAudioService_Translate_mp4_files()
public async Task AIAudioService_AudioToText_works_for_mp4_files()
{
//Act
var audioTranslation = await _audioService.TranslateAsync("Content/18-13-52.m4a");
var response = await _audioService.AudioToTextAsync("Content/18-13-52.m4a");

//Assert
Assert.NotNull(audioTranslation?.Language);
Assert.NotNull(audioTranslation?.Text);
Assert.NotNull(response);
Assert.NotEmpty(response);

WriteToConsole(audioTranslation.Text);
WriteToConsole(response);
}

}
1 change: 1 addition & 0 deletions OpenAIExtensions.Tests/OpenAIExtensions.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

<IsPackable>false</IsPackable>
<IsTestProject>true</IsTestProject>
<NoWarn>SKEXP0001</NoWarn>
</PropertyGroup>

<ItemGroup>
Expand Down

0 comments on commit 84692c8

Please sign in to comment.