Skip to content

Commit

Permalink
transcript: create single package with components
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmethakanbesel committed Feb 25, 2025
1 parent 6b990e1 commit f4e0bce
Show file tree
Hide file tree
Showing 4 changed files with 372 additions and 0 deletions.
117 changes: 117 additions & 0 deletions internal/transcript/repository.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package transcript

import (
"context"
"errors"
"log/slog"
"sync"

"github.com/ahmethakanbesel/youtube-video-summary/pkg/youtube"
)

var (
ErrTranscriptNotFound = errors.New("transcript not found")
ErrInvalidTranscript = errors.New("invalid transcript")
)

type Repository interface {
Get(ctx context.Context, videoID string) (*youtube.TranscriptResponse, error)
Save(ctx context.Context, videoID string, transcript *youtube.TranscriptResponse) error
Clear(ctx context.Context) error
Size() int
}

type MemoryRepository struct {
logger *slog.Logger
cache map[string]*youtube.TranscriptResponse
cacheLock sync.RWMutex
}

var _ Repository = (*MemoryRepository)(nil)

func NewMemoryRepository(logger *slog.Logger) *MemoryRepository {
if logger == nil {
logger = slog.Default()
}

return &MemoryRepository{
logger: logger,
cache: make(map[string]*youtube.TranscriptResponse),
}
}

func (r *MemoryRepository) Get(ctx context.Context, videoID string) (*youtube.TranscriptResponse, error) {
if videoID == "" {
return nil, errors.New("video ID cannot be empty")
}

r.cacheLock.RLock()
defer r.cacheLock.RUnlock()

select {
case <-ctx.Done():
return nil, ctx.Err()
default:
transcript, exists := r.cache[videoID]
if !exists {
r.logger.Debug("Cache miss", "video_id", videoID)
return nil, ErrTranscriptNotFound
}

if transcript == nil {
r.logger.Warn("Found nil transcript in cache", "video_id", videoID)
return nil, ErrInvalidTranscript
}

r.logger.Debug("Cache hit", "video_id", videoID)
// Return a copy to prevent modifications to cached data
transcriptCopy := *transcript
return &transcriptCopy, nil
}
}

func (r *MemoryRepository) Save(ctx context.Context, videoID string, transcript *youtube.TranscriptResponse) error {
if videoID == "" {
return errors.New("video ID cannot be empty")
}
if transcript == nil {
return ErrInvalidTranscript
}

r.cacheLock.Lock()
defer r.cacheLock.Unlock()

select {
case <-ctx.Done():
return ctx.Err()
default:
// Make a copy of the transcript to prevent external modifications
transcriptCopy := *transcript
r.cache[videoID] = &transcriptCopy
r.logger.Debug("Cached transcript",
"video_id", videoID,
"cache_size", len(r.cache),
)
return nil
}
}

func (r *MemoryRepository) Clear(ctx context.Context) error {
r.cacheLock.Lock()
defer r.cacheLock.Unlock()

select {
case <-ctx.Done():
return ctx.Err()
default:
r.cache = make(map[string]*youtube.TranscriptResponse)
r.logger.Info("Cache cleared")
return nil
}
}

func (r *MemoryRepository) Size() int {
r.cacheLock.RLock()
defer r.cacheLock.RUnlock()
return len(r.cache)
}
89 changes: 89 additions & 0 deletions internal/transcript/router.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package transcript

import (
"embed"
"encoding/json"
"io/fs"
"log/slog"
"net/http"
"strconv"
)

type Router struct {
service *Service
}

func NewRouter(svc *Service, uiAssets embed.FS) *http.ServeMux {
r := &Router{service: svc}
mux := http.NewServeMux()
mux.HandleFunc("/api/v1/transcripts", r.handleGetTranscripts)

// Serve static files from the dist directory
distFS, err := fs.Sub(uiAssets, "dist")
if err != nil {
panic(err)
}
fs := http.FileServer(http.FS(distFS))
mux.Handle("/", fs)

return mux
}

func (r *Router) writeJSONError(w http.ResponseWriter, errMsg string, statusCode int) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(statusCode)
err := json.NewEncoder(w).Encode(ErrorResponse{
Error: http.StatusText(statusCode),
Message: errMsg,
})
if err != nil {
slog.Error("Failed to encode error response", "error", err)
http.Error(w, "Internal Server Error", http.StatusInternalServerError)
}
}

func (r *Router) handleGetTranscripts(w http.ResponseWriter, req *http.Request) {
if req.Method != http.MethodGet {
r.writeJSONError(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}

videoURL := req.URL.Query().Get("videoUrl")
if videoURL == "" {
r.writeJSONError(w, "Missing videoUrl parameter", http.StatusBadRequest)
return
}

intervalStr := req.URL.Query().Get("interval")
interval, err := strconv.ParseFloat(intervalStr, 64)
if err != nil {
interval = 0 // Will default to 10.0 in service
}

svcReq := TranscriptRequest{
VideoURL: videoURL,
IntervalSeconds: interval,
}

resp, err := r.service.GetTranscripts(req.Context(), svcReq)
if err != nil {
switch {
case err == ErrInvalidURL:
r.writeJSONError(w, "Invalid YouTube video URL", http.StatusBadRequest)
default:
r.writeJSONError(w, "Internal server error", http.StatusInternalServerError)
}
return
}

if resp.Raw == nil && resp.Formatted == nil {
r.writeJSONError(w, "No transcript available", http.StatusNotFound)
return
}

w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)
if err := json.NewEncoder(w).Encode(resp); err != nil {
r.writeJSONError(w, "Failed to encode response", http.StatusInternalServerError)
}
}
146 changes: 146 additions & 0 deletions internal/transcript/service.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
package transcript

import (
"context"
"errors"
"fmt"
"net/url"
"regexp"
"slices"
"strings"

"github.com/ahmethakanbesel/youtube-video-summary/pkg/youtube"
)

var (
ErrNoTranscript = errors.New("no transcript available")
ErrFailedToGet = errors.New("failed to get transcript")
ErrFailedToFormat = errors.New("failed to format transcript")
ErrInvalidURL = errors.New("invalid YouTube video URL")
)

type Service struct {
client *youtube.Client
repo Repository
}

func NewService(client *youtube.Client, repo Repository) *Service {
return &Service{
client: client,
repo: repo,
}
}

func (s *Service) GetTranscripts(ctx context.Context, req TranscriptRequest) (TranscriptResponse, error) {
interval := req.IntervalSeconds
if interval <= 0 {
interval = 10.0
}

// Validate video URL
if req.VideoURL == "" || !s.IsValidUrl(req.VideoURL) {
return TranscriptResponse{}, ErrInvalidURL
}

// Extract video ID from URL if not provided
if req.VideoID == "" {
req.VideoID = s.ExtractVideoId(req.VideoURL)
if req.VideoID == "" {
return TranscriptResponse{}, ErrInvalidURL
}
}

var youtubeResp *youtube.TranscriptResponse
var err error

// Try to get from cache first
youtubeResp, err = s.repo.Get(ctx, req.VideoID)
if err != nil {
if !errors.Is(err, ErrTranscriptNotFound) {
s.client.Logger().Error("Failed to get transcript from repository", "video_id", req.VideoID, "error", err)
}

// If not in cache or error, fetch from YouTube
youtubeResp, err = s.client.GetTranscript(ctx, req.VideoID)
if err != nil {
s.client.Logger().Error("Failed to fetch raw transcript", "video_id", req.VideoID, "error", err)
return TranscriptResponse{}, fmt.Errorf("%w: %v", ErrFailedToGet, err)
}

// Validate YouTube response
if youtubeResp == nil || youtubeResp.Raw == nil || len(youtubeResp.Raw.Segments) == 0 {
s.client.Logger().Warn("No transcript available", "video_id", req.VideoID)
return TranscriptResponse{}, ErrNoTranscript
}

// Save the successful response
if err := s.repo.Save(ctx, req.VideoID, youtubeResp); err != nil {
s.client.Logger().Error("Failed to cache transcript", "video_id", req.VideoID, "error", err)
// Continue despite cache error
}
}

// Create response
resp := TranscriptResponse{
Title: youtubeResp.Title,
Raw: youtubeResp.Raw,
}

// Format the transcript
formatted, err := s.client.FormatTranscript(ctx, youtubeResp.Raw, interval)
if err != nil {
s.client.Logger().Error("Failed to format transcript", "video_id", req.VideoID, "error", err)
return TranscriptResponse{}, fmt.Errorf("%w: %v", ErrFailedToFormat, err)
}
resp.Formatted = formatted

return resp, nil
}

// ExtractVideoId attempts to extract a YouTube video ID from a string.
// It can handle both direct 11-character IDs and various URL formats.
// Returns empty string if no valid video ID is found.
func (s *Service) ExtractVideoId(str string) string {
// Check if the string is exactly 11 characters (direct video ID)
if len(str) == 11 {
return str
}

// Regular expression to match YouTube video ID in various URL formats
pattern := `(?:\/|%3D|v=|vi=)([a-zA-Z0-9_-]{11})(?:[%#?&\/]|$)`
regex := regexp.MustCompile(pattern)
matches := regex.FindStringSubmatch(str)

if len(matches) > 1 {
return matches[1]
}

return ""
}

// IsValidUrl checks if the provided URL has a valid YouTube domain.
// It handles domains: youtu.be, youtube.com, m.youtube.com, with or without www.
// Returns true if the domain is a valid YouTube domain, false otherwise.
func (s *Service) IsValidUrl(urlStr string) bool {
// Parse the URL
parsedURL, err := url.Parse(urlStr)
if err != nil {
return false
}

// Extract the host
host := strings.ToLower(parsedURL.Host)

// Remove 'www.' if present
host = strings.TrimPrefix(host, "www.")

// List of valid YouTube domains
validDomains := []string{
"youtube.com",
"youtu.be",
"m.youtube.com",
}

// Check if the host matches any valid domain
return slices.Contains(validDomains, host)
}
20 changes: 20 additions & 0 deletions internal/transcript/transport.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package transcript

import "github.com/ahmethakanbesel/youtube-video-summary/pkg/youtube"

type TranscriptRequest struct {
VideoURL string
VideoID string
IntervalSeconds float64
}

type TranscriptResponse struct {
Title string `json:"title"`
Raw *youtube.Transcript `json:"raw"`
Formatted []string `json:"formatted"`
}

type ErrorResponse struct {
Error string `json:"error"`
Message string `json:"message"`
}

0 comments on commit f4e0bce

Please sign in to comment.