-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
transcript: create single package with components
- Loading branch information
1 parent
6b990e1
commit f4e0bce
Showing
4 changed files
with
372 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
package transcript | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"log/slog" | ||
"sync" | ||
|
||
"github.com/ahmethakanbesel/youtube-video-summary/pkg/youtube" | ||
) | ||
|
||
var ( | ||
ErrTranscriptNotFound = errors.New("transcript not found") | ||
ErrInvalidTranscript = errors.New("invalid transcript") | ||
) | ||
|
||
type Repository interface { | ||
Get(ctx context.Context, videoID string) (*youtube.TranscriptResponse, error) | ||
Save(ctx context.Context, videoID string, transcript *youtube.TranscriptResponse) error | ||
Clear(ctx context.Context) error | ||
Size() int | ||
} | ||
|
||
type MemoryRepository struct { | ||
logger *slog.Logger | ||
cache map[string]*youtube.TranscriptResponse | ||
cacheLock sync.RWMutex | ||
} | ||
|
||
var _ Repository = (*MemoryRepository)(nil) | ||
|
||
func NewMemoryRepository(logger *slog.Logger) *MemoryRepository { | ||
if logger == nil { | ||
logger = slog.Default() | ||
} | ||
|
||
return &MemoryRepository{ | ||
logger: logger, | ||
cache: make(map[string]*youtube.TranscriptResponse), | ||
} | ||
} | ||
|
||
func (r *MemoryRepository) Get(ctx context.Context, videoID string) (*youtube.TranscriptResponse, error) { | ||
if videoID == "" { | ||
return nil, errors.New("video ID cannot be empty") | ||
} | ||
|
||
r.cacheLock.RLock() | ||
defer r.cacheLock.RUnlock() | ||
|
||
select { | ||
case <-ctx.Done(): | ||
return nil, ctx.Err() | ||
default: | ||
transcript, exists := r.cache[videoID] | ||
if !exists { | ||
r.logger.Debug("Cache miss", "video_id", videoID) | ||
return nil, ErrTranscriptNotFound | ||
} | ||
|
||
if transcript == nil { | ||
r.logger.Warn("Found nil transcript in cache", "video_id", videoID) | ||
return nil, ErrInvalidTranscript | ||
} | ||
|
||
r.logger.Debug("Cache hit", "video_id", videoID) | ||
// Return a copy to prevent modifications to cached data | ||
transcriptCopy := *transcript | ||
return &transcriptCopy, nil | ||
} | ||
} | ||
|
||
func (r *MemoryRepository) Save(ctx context.Context, videoID string, transcript *youtube.TranscriptResponse) error { | ||
if videoID == "" { | ||
return errors.New("video ID cannot be empty") | ||
} | ||
if transcript == nil { | ||
return ErrInvalidTranscript | ||
} | ||
|
||
r.cacheLock.Lock() | ||
defer r.cacheLock.Unlock() | ||
|
||
select { | ||
case <-ctx.Done(): | ||
return ctx.Err() | ||
default: | ||
// Make a copy of the transcript to prevent external modifications | ||
transcriptCopy := *transcript | ||
r.cache[videoID] = &transcriptCopy | ||
r.logger.Debug("Cached transcript", | ||
"video_id", videoID, | ||
"cache_size", len(r.cache), | ||
) | ||
return nil | ||
} | ||
} | ||
|
||
func (r *MemoryRepository) Clear(ctx context.Context) error { | ||
r.cacheLock.Lock() | ||
defer r.cacheLock.Unlock() | ||
|
||
select { | ||
case <-ctx.Done(): | ||
return ctx.Err() | ||
default: | ||
r.cache = make(map[string]*youtube.TranscriptResponse) | ||
r.logger.Info("Cache cleared") | ||
return nil | ||
} | ||
} | ||
|
||
func (r *MemoryRepository) Size() int { | ||
r.cacheLock.RLock() | ||
defer r.cacheLock.RUnlock() | ||
return len(r.cache) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
package transcript | ||
|
||
import ( | ||
"embed" | ||
"encoding/json" | ||
"io/fs" | ||
"log/slog" | ||
"net/http" | ||
"strconv" | ||
) | ||
|
||
type Router struct { | ||
service *Service | ||
} | ||
|
||
func NewRouter(svc *Service, uiAssets embed.FS) *http.ServeMux { | ||
r := &Router{service: svc} | ||
mux := http.NewServeMux() | ||
mux.HandleFunc("/api/v1/transcripts", r.handleGetTranscripts) | ||
|
||
// Serve static files from the dist directory | ||
distFS, err := fs.Sub(uiAssets, "dist") | ||
if err != nil { | ||
panic(err) | ||
} | ||
fs := http.FileServer(http.FS(distFS)) | ||
mux.Handle("/", fs) | ||
|
||
return mux | ||
} | ||
|
||
func (r *Router) writeJSONError(w http.ResponseWriter, errMsg string, statusCode int) { | ||
w.Header().Set("Content-Type", "application/json") | ||
w.WriteHeader(statusCode) | ||
err := json.NewEncoder(w).Encode(ErrorResponse{ | ||
Error: http.StatusText(statusCode), | ||
Message: errMsg, | ||
}) | ||
if err != nil { | ||
slog.Error("Failed to encode error response", "error", err) | ||
http.Error(w, "Internal Server Error", http.StatusInternalServerError) | ||
} | ||
} | ||
|
||
func (r *Router) handleGetTranscripts(w http.ResponseWriter, req *http.Request) { | ||
if req.Method != http.MethodGet { | ||
r.writeJSONError(w, "Method not allowed", http.StatusMethodNotAllowed) | ||
return | ||
} | ||
|
||
videoURL := req.URL.Query().Get("videoUrl") | ||
if videoURL == "" { | ||
r.writeJSONError(w, "Missing videoUrl parameter", http.StatusBadRequest) | ||
return | ||
} | ||
|
||
intervalStr := req.URL.Query().Get("interval") | ||
interval, err := strconv.ParseFloat(intervalStr, 64) | ||
if err != nil { | ||
interval = 0 // Will default to 10.0 in service | ||
} | ||
|
||
svcReq := TranscriptRequest{ | ||
VideoURL: videoURL, | ||
IntervalSeconds: interval, | ||
} | ||
|
||
resp, err := r.service.GetTranscripts(req.Context(), svcReq) | ||
if err != nil { | ||
switch { | ||
case err == ErrInvalidURL: | ||
r.writeJSONError(w, "Invalid YouTube video URL", http.StatusBadRequest) | ||
default: | ||
r.writeJSONError(w, "Internal server error", http.StatusInternalServerError) | ||
} | ||
return | ||
} | ||
|
||
if resp.Raw == nil && resp.Formatted == nil { | ||
r.writeJSONError(w, "No transcript available", http.StatusNotFound) | ||
return | ||
} | ||
|
||
w.Header().Set("Content-Type", "application/json") | ||
w.WriteHeader(http.StatusOK) | ||
if err := json.NewEncoder(w).Encode(resp); err != nil { | ||
r.writeJSONError(w, "Failed to encode response", http.StatusInternalServerError) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
package transcript | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
"net/url" | ||
"regexp" | ||
"slices" | ||
"strings" | ||
|
||
"github.com/ahmethakanbesel/youtube-video-summary/pkg/youtube" | ||
) | ||
|
||
var ( | ||
ErrNoTranscript = errors.New("no transcript available") | ||
ErrFailedToGet = errors.New("failed to get transcript") | ||
ErrFailedToFormat = errors.New("failed to format transcript") | ||
ErrInvalidURL = errors.New("invalid YouTube video URL") | ||
) | ||
|
||
type Service struct { | ||
client *youtube.Client | ||
repo Repository | ||
} | ||
|
||
func NewService(client *youtube.Client, repo Repository) *Service { | ||
return &Service{ | ||
client: client, | ||
repo: repo, | ||
} | ||
} | ||
|
||
func (s *Service) GetTranscripts(ctx context.Context, req TranscriptRequest) (TranscriptResponse, error) { | ||
interval := req.IntervalSeconds | ||
if interval <= 0 { | ||
interval = 10.0 | ||
} | ||
|
||
// Validate video URL | ||
if req.VideoURL == "" || !s.IsValidUrl(req.VideoURL) { | ||
return TranscriptResponse{}, ErrInvalidURL | ||
} | ||
|
||
// Extract video ID from URL if not provided | ||
if req.VideoID == "" { | ||
req.VideoID = s.ExtractVideoId(req.VideoURL) | ||
if req.VideoID == "" { | ||
return TranscriptResponse{}, ErrInvalidURL | ||
} | ||
} | ||
|
||
var youtubeResp *youtube.TranscriptResponse | ||
var err error | ||
|
||
// Try to get from cache first | ||
youtubeResp, err = s.repo.Get(ctx, req.VideoID) | ||
if err != nil { | ||
if !errors.Is(err, ErrTranscriptNotFound) { | ||
s.client.Logger().Error("Failed to get transcript from repository", "video_id", req.VideoID, "error", err) | ||
} | ||
|
||
// If not in cache or error, fetch from YouTube | ||
youtubeResp, err = s.client.GetTranscript(ctx, req.VideoID) | ||
if err != nil { | ||
s.client.Logger().Error("Failed to fetch raw transcript", "video_id", req.VideoID, "error", err) | ||
return TranscriptResponse{}, fmt.Errorf("%w: %v", ErrFailedToGet, err) | ||
} | ||
|
||
// Validate YouTube response | ||
if youtubeResp == nil || youtubeResp.Raw == nil || len(youtubeResp.Raw.Segments) == 0 { | ||
s.client.Logger().Warn("No transcript available", "video_id", req.VideoID) | ||
return TranscriptResponse{}, ErrNoTranscript | ||
} | ||
|
||
// Save the successful response | ||
if err := s.repo.Save(ctx, req.VideoID, youtubeResp); err != nil { | ||
s.client.Logger().Error("Failed to cache transcript", "video_id", req.VideoID, "error", err) | ||
// Continue despite cache error | ||
} | ||
} | ||
|
||
// Create response | ||
resp := TranscriptResponse{ | ||
Title: youtubeResp.Title, | ||
Raw: youtubeResp.Raw, | ||
} | ||
|
||
// Format the transcript | ||
formatted, err := s.client.FormatTranscript(ctx, youtubeResp.Raw, interval) | ||
if err != nil { | ||
s.client.Logger().Error("Failed to format transcript", "video_id", req.VideoID, "error", err) | ||
return TranscriptResponse{}, fmt.Errorf("%w: %v", ErrFailedToFormat, err) | ||
} | ||
resp.Formatted = formatted | ||
|
||
return resp, nil | ||
} | ||
|
||
// ExtractVideoId attempts to extract a YouTube video ID from a string. | ||
// It can handle both direct 11-character IDs and various URL formats. | ||
// Returns empty string if no valid video ID is found. | ||
func (s *Service) ExtractVideoId(str string) string { | ||
// Check if the string is exactly 11 characters (direct video ID) | ||
if len(str) == 11 { | ||
return str | ||
} | ||
|
||
// Regular expression to match YouTube video ID in various URL formats | ||
pattern := `(?:\/|%3D|v=|vi=)([a-zA-Z0-9_-]{11})(?:[%#?&\/]|$)` | ||
regex := regexp.MustCompile(pattern) | ||
matches := regex.FindStringSubmatch(str) | ||
|
||
if len(matches) > 1 { | ||
return matches[1] | ||
} | ||
|
||
return "" | ||
} | ||
|
||
// IsValidUrl checks if the provided URL has a valid YouTube domain. | ||
// It handles domains: youtu.be, youtube.com, m.youtube.com, with or without www. | ||
// Returns true if the domain is a valid YouTube domain, false otherwise. | ||
func (s *Service) IsValidUrl(urlStr string) bool { | ||
// Parse the URL | ||
parsedURL, err := url.Parse(urlStr) | ||
if err != nil { | ||
return false | ||
} | ||
|
||
// Extract the host | ||
host := strings.ToLower(parsedURL.Host) | ||
|
||
// Remove 'www.' if present | ||
host = strings.TrimPrefix(host, "www.") | ||
|
||
// List of valid YouTube domains | ||
validDomains := []string{ | ||
"youtube.com", | ||
"youtu.be", | ||
"m.youtube.com", | ||
} | ||
|
||
// Check if the host matches any valid domain | ||
return slices.Contains(validDomains, host) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package transcript | ||
|
||
import "github.com/ahmethakanbesel/youtube-video-summary/pkg/youtube" | ||
|
||
type TranscriptRequest struct { | ||
VideoURL string | ||
VideoID string | ||
IntervalSeconds float64 | ||
} | ||
|
||
type TranscriptResponse struct { | ||
Title string `json:"title"` | ||
Raw *youtube.Transcript `json:"raw"` | ||
Formatted []string `json:"formatted"` | ||
} | ||
|
||
type ErrorResponse struct { | ||
Error string `json:"error"` | ||
Message string `json:"message"` | ||
} |