Refactor

durmisi · Mar 19, 2024 · 84692c8 · 84692c8
1 parent d32dda9
commit 84692c8
Show file tree

Hide file tree

Showing 5 changed files with 67 additions and 164 deletions.
diff --git a/Core/AIBroker.cs b/Core/AIBroker.cs
diff --git a/Core/ServiceCollectionExtensions.cs b/Core/ServiceCollectionExtensions.cs
@@ -1,4 +1,5 @@
 using Microsoft.Extensions.DependencyInjection;
+using OpenAIExtensions.Chats;
 using OpenAIExtensions.Services;
 using OpenAIExtensions.Text2Sql;
 
@@ -8,10 +9,11 @@ public static class ServiceCollectionExtensions
     {
         public static void AddOpenAI(this IServiceCollection services)
         {
-            services.AddScoped<IAIBroker, AIBroker>();
-
+            services.AddScoped<IAIAudioService,AIAudioService>();
+            services.AddScoped<IAIConversationManager, AIConversationManager>();
+            services.AddScoped<IAIImageService, AIImageService>();
+            services.AddScoped<AITranslationService, AITranslationService>();
             services.AddScoped<IAISqlGenerator, AISqlGenerator>();
-            services.AddScoped<IAIAudioService, IAIAudioService>();
         }
     }
 }
diff --git a/Core/Services/AIAudioService.cs b/Core/Services/AIAudioService.cs
@@ -1,77 +1,73 @@
-using Azure;
-using Azure.AI.OpenAI;
-using Microsoft.Extensions.Logging;
+using Microsoft.Extensions.Logging;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.AudioToText;
+using Microsoft.SemanticKernel.Connectors.OpenAI;
 
 namespace OpenAIExtensions.Services
 {
     public interface IAIAudioService
     {
-        Task<AudioTranscription?> TranscribeAsync(string fileName, Stream audioStream);
-
-        Task<AudioTranscription?> TranscribeAsync(string path);
-
-        Task<AudioTranslation?> TranslateAsync(string path);
-
-        Task<AudioTranslation?> TranslateAsync(string fileName, Stream audioStream);
+        Task<string?> AudioToTextAsync(
+            string fileName,
+            Stream audioStream,
+            OpenAIAudioToTextExecutionSettings? executionSettings = null,
+            CancellationToken ct = default);
+
+        Task<string?> AudioToTextAsync(
+             string path,
+             OpenAIAudioToTextExecutionSettings? executionSettings = null,
+             CancellationToken ct = default);
     }
 
     /// <summary>
     /// https://drlee.io/transforming-audio-to-text-with-openais-speech-to-text-api-a-practical-step-by-step-guide-8139e4e65fdf
     /// </summary>
     public class AIAudioService : IAIAudioService
     {
-        private readonly OpenAIClient _client;
-
+        private readonly Kernel _kernel;
         private readonly ILogger<AIAudioService> _logger;
 
-        private readonly string _deploymentName = "whisper-001";
-
         public AIAudioService(
-            IAIBroker aIBroker,
-            ILogger<AIAudioService> logger,
-            string? deploymentName = null)
+            Kernel kernel,
+            ILogger<AIAudioService> logger)
         {
+            _kernel = kernel;
             _logger = logger;
-            _client = aIBroker.GetClient();
-
-            if (!string.IsNullOrEmpty(deploymentName))
-            {
-                _deploymentName = deploymentName;
-            }
         }
 
-        public async Task<AudioTranscription?> TranscribeAsync(string fileName, Stream audioStream)
+        public async Task<string?> AudioToTextAsync(
+            string audioFilename,
+            Stream audioStream,
+            OpenAIAudioToTextExecutionSettings? executionSettings = null,
+            CancellationToken ct = default)
         {
-            var transcriptionOptions = new AudioTranscriptionOptions()
-            {
-                DeploymentName = _deploymentName,
-                AudioData = BinaryData.FromStream(audioStream),
-                ResponseFormat = AudioTranscriptionFormat.Verbose,
-                Filename = fileName
-            };
+            AudioContent audioContent = new(BinaryData.FromStream(audioStream));
 
-            Response<AudioTranscription> transcriptionResponse
-                = await _client.GetAudioTranscriptionAsync(transcriptionOptions);
+            var audioToTextService = _kernel.GetRequiredService<IAudioToTextService>();
 
-            var transcription = transcriptionResponse.Value;
-            return transcription;
-        }
-
-        public async Task<AudioTranscription?> TranscribeAsync(string path)
-        {
-            if (string.IsNullOrEmpty(path))
+            executionSettings ??= new(audioFilename)
             {
-                throw new ArgumentException($"'{nameof(path)}' cannot be null or empty.", nameof(path));
-            }
-
-            var fileName = Path.GetFileName(path);
+                Prompt = null, // An optional text to guide the model's style or continue a previous audio segment.
+                               // The prompt should match the audio language.
+                Language = "en", // The language of the audio data as two-letter ISO-639-1 language code (e.g. 'en' or 'es').
+                ResponseFormat = "text", // The format to return the transcribed text in.
+                                         // Supported formats are json, text, srt, verbose_json, or vtt. Default is 'json'.
+                Temperature = 0.3f, // The randomness of the generated text.
+                                    // Select a value from 0.0 to 1.0. 0 is the default.
+            };
 
-            using Stream audioStreamFromFile = File.OpenRead(path);
+            var textContent = await audioToTextService.GetTextContentAsync(
+                audioContent,
+                executionSettings: executionSettings,
+                cancellationToken: ct);
 
-            return await TranscribeAsync(fileName, audioStreamFromFile);
+            return textContent.Text;
         }
 
-        public async Task<AudioTranslation?> TranslateAsync(string path)
+        public async Task<string?> AudioToTextAsync(
+            string path,
+            OpenAIAudioToTextExecutionSettings? executionSettings = null,
+            CancellationToken ct = default)
         {
             if (string.IsNullOrEmpty(path))
             {
@@ -82,22 +78,11 @@ Response<AudioTranscription> transcriptionResponse
 
             using Stream audioStreamFromFile = File.OpenRead(path);
 
-            return await TranslateAsync(fileName, audioStreamFromFile);
+            return await AudioToTextAsync(
+                fileName,
+                audioStreamFromFile,
+                executionSettings, ct);
         }
 
-        public async Task<AudioTranslation?> TranslateAsync(string fileName, Stream audioStream)
-        {
-            var translationOptions = new AudioTranslationOptions()
-            {
-                DeploymentName = _deploymentName,
-                AudioData = BinaryData.FromStream(audioStream),
-                ResponseFormat = AudioTranslationFormat.Verbose,
-                Filename = fileName
-            };
-
-            Response<AudioTranslation> translationResponse = await _client.GetAudioTranslationAsync(translationOptions);
-
-            return translationResponse?.Value;
-        }
     }
 }
diff --git a/OpenAIExtensions.Tests/AIAudioServiceTests.cs b/OpenAIExtensions.Tests/AIAudioServiceTests.cs
@@ -1,4 +1,5 @@
 using Microsoft.Extensions.Configuration;
+using Microsoft.SemanticKernel;
 using OpenAIExtensions.Services;
 using Xunit.Abstractions;
 
@@ -16,33 +17,27 @@ public AIAudioServiceTests(ITestOutputHelper outputHelper)
         var endpoint = Configuration.GetValue<string>("OpenAI:AudioService:Endpoint")!;
         var key = Configuration.GetValue<string>("OpenAI:AudioService:Key")!;
 
-        _audioService = new AIAudioService(new AIBroker(endpoint, key), logger);
-    }
+        var kernel = Kernel.CreateBuilder()
+          .AddAzureOpenAIAudioToText(
+              deploymentName: "whisper-001",
+              endpoint: endpoint,
+              apiKey: key)
+          .Build();
 
-    [Fact]
-    public async Task AIAudioService_Transcribe_mp4_files()
-    {
-        //Act
-        var response = await _audioService.TranscribeAsync("Content/18-13-52.m4a");
-
-        //Assert
-        Assert.NotNull(response);
-        Assert.NotEmpty(response.Language);
-        Assert.NotEmpty(response.Text);
-
-        WriteToConsole(response.Text);
+        _audioService = new AIAudioService(kernel, logger);
     }
 
     [Fact]
-    public async Task AIAudioService_Translate_mp4_files()
+    public async Task AIAudioService_AudioToText_works_for_mp4_files()
     {
         //Act
-        var audioTranslation = await _audioService.TranslateAsync("Content/18-13-52.m4a");
+        var response = await _audioService.AudioToTextAsync("Content/18-13-52.m4a");
 
         //Assert
-        Assert.NotNull(audioTranslation?.Language);
-        Assert.NotNull(audioTranslation?.Text);
+        Assert.NotNull(response);
+        Assert.NotEmpty(response);
 
-        WriteToConsole(audioTranslation.Text);
+        WriteToConsole(response);
     }
+
 }
diff --git a/OpenAIExtensions.Tests/OpenAIExtensions.Tests.csproj b/OpenAIExtensions.Tests/OpenAIExtensions.Tests.csproj
@@ -7,6 +7,7 @@
 
 		<IsPackable>false</IsPackable>
 		<IsTestProject>true</IsTestProject>
+		<NoWarn>SKEXP0001</NoWarn>
 	</PropertyGroup>
 
 	<ItemGroup>