feat(backend): add video scene preview generation and endpoint
All checks were successful
Build and Publish Docker Image / deploy (push) Successful in 1m52s

- Register a new route `GET /d/{boxID}/scene/{fileID}` to serve video scene previews.
- Implement the `VideoScenesPreview` handler to serve existing previews or generate them on-demand.
- Add helper functions to analyze video frames (e.g., luma calculation to filter out dark frames) and render the final scene thumbnail.
- Update the `fileView` struct to include scene URL and status fields.
This commit is contained in:
2026-06-05 10:42:30 +03:00
parent 2eba04b9da
commit f9755fa98f
9 changed files with 552 additions and 32 deletions

View File

@@ -3,6 +3,7 @@ package jobs
import (
"bytes"
"context"
"fmt"
"html"
"image"
"image/color"
@@ -16,6 +17,7 @@ import (
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
@@ -108,25 +110,40 @@ func generateMissingThumbnailsForBox(uploadService *services.UploadService, logg
changed := false
for i := range box.Files {
file := &box.Files[i]
if file.Thumbnail != "" || !needsThumbnail(*file) {
needsPrimary := file.Thumbnail == "" && needsThumbnail(*file)
needsScenes := file.SceneThumbnail == "" && needsVideoScenes(*file)
if !needsPrimary && !needsScenes {
continue
}
result.Scanned++
thumbnail, err := generateThumbnail(uploadService, box, *file)
if err != nil {
logger.Warn("thumbnail generation failed", "source", "thumbnail", "severity", "warn", "code", 4101, "file_id", file.ID, "error", err.Error())
result.Failed++
continue
}
if thumbnail == "" {
result.Failed++
continue
if needsPrimary {
thumbnail, err := generateThumbnail(uploadService, box, *file)
if err != nil {
logger.Warn("thumbnail generation failed", "source", "thumbnail", "severity", "warn", "code", 4101, "file_id", file.ID, "error", err.Error())
result.Failed++
} else if thumbnail == "" {
result.Failed++
} else {
file.Thumbnail = thumbnail
changed = true
result.Generated++
}
}
file.Thumbnail = thumbnail
changed = true
result.Generated++
if needsScenes {
sceneThumbnail, err := generateVideoScenesThumbnail(uploadService, box, *file)
if err != nil {
logger.Warn("video scenes preview generation failed", "source", "thumbnail", "severity", "warn", "code", 4104, "file_id", file.ID, "error", err.Error())
result.Failed++
} else if sceneThumbnail == "" {
result.Failed++
} else {
file.SceneThumbnail = sceneThumbnail
changed = true
result.Generated++
}
}
}
if changed {
@@ -141,14 +158,26 @@ func needsThumbnail(file services.File) bool {
return file.PreviewKind == "image" || file.PreviewKind == "video" || isTextThumbnailCandidate(file)
}
func needsVideoScenes(file services.File) bool {
return file.PreviewKind == "video" || strings.HasPrefix(strings.ToLower(file.ContentType), "video/")
}
func NeedsThumbnail(file services.File) bool {
return needsThumbnail(file)
}
func NeedsVideoScenes(file services.File) bool {
return needsVideoScenes(file)
}
func GenerateThumbnailForFile(uploadService *services.UploadService, box services.Box, file services.File) (string, error) {
return generateThumbnail(uploadService, box, file)
}
func GenerateVideoScenesForFile(uploadService *services.UploadService, box services.Box, file services.File) (string, error) {
return generateVideoScenesThumbnail(uploadService, box, file)
}
func generateThumbnail(uploadService *services.UploadService, box services.Box, file services.File) (string, error) {
thumbnailName := "@thumb@" + file.ID + ".jpg"
object, err := uploadService.OpenFileObject(context.Background(), box, file)
@@ -184,6 +213,25 @@ func generateThumbnail(uploadService *services.UploadService, box services.Box,
}
}
func generateVideoScenesThumbnail(uploadService *services.UploadService, box services.Box, file services.File) (string, error) {
if !needsVideoScenes(file) {
return "", nil
}
sceneName := "@scene@" + file.ID + ".jpg"
object, err := uploadService.OpenFileObject(context.Background(), box, file)
if err != nil {
return "", err
}
defer object.Body.Close()
data, err := createVideoScenesThumbnail(file, object.Body)
if err != nil {
return "", err
}
_, err = uploadService.PutThumbnailObject(context.Background(), box, sceneName, bytes.NewReader(data), int64(len(data)), "image/jpeg")
return sceneName, err
}
func isTextThumbnailCandidate(file services.File) bool {
contentType := strings.ToLower(strings.TrimSpace(file.ContentType))
if i := strings.IndexByte(contentType, ';'); i >= 0 {
@@ -233,17 +281,320 @@ func createVideoThumbnail(source io.Reader) ([]byte, error) {
if err := sourceFile.Close(); err != nil {
return nil, err
}
targetFile, err := os.CreateTemp("", "warpbox-thumb-*.jpg")
sourcePath := sourceFile.Name()
candidates := []string{"00:00:01", "00:00:03", "00:00:06"}
var fallback []byte
for _, timestamp := range candidates {
targetFile, err := os.CreateTemp("", "warpbox-thumb-*.jpg")
if err != nil {
return nil, err
}
targetPath := targetFile.Name()
targetFile.Close()
if err := extractVideoFrame(sourcePath, timestamp, targetPath, "scale=360:-1"); err != nil {
os.Remove(targetPath)
continue
}
data, err := os.ReadFile(targetPath)
os.Remove(targetPath)
if err != nil {
continue
}
if len(fallback) == 0 {
fallback = data
}
if usableVideoFrame(data) {
return data, nil
}
}
scenes, err := createVideoScenesThumbnailFromPath(services.File{Name: "video", ContentType: "video"}, sourcePath)
if err == nil {
img, err := jpeg.Decode(bytes.NewReader(scenes))
if err == nil {
thumb := resizeNearest(img, 360, 240)
var target bytes.Buffer
if err := jpeg.Encode(&target, thumb, &jpeg.Options{Quality: 82}); err == nil {
return target.Bytes(), nil
}
}
}
if len(fallback) > 0 {
return fallback, nil
}
return nil, fmt.Errorf("could not extract a usable video thumbnail")
}
func createVideoScenesThumbnail(file services.File, source io.Reader) ([]byte, error) {
sourceFile, err := os.CreateTemp("", "warpbox-video-*")
if err != nil {
return nil, err
}
targetPath := targetFile.Name()
targetFile.Close()
defer os.Remove(targetPath)
if err := exec.Command("ffmpeg", "-y", "-loglevel", "error", "-ss", "00:00:01", "-i", sourceFile.Name(), "-frames:v", "1", "-vf", "scale=360:-1", targetPath).Run(); err != nil {
defer os.Remove(sourceFile.Name())
if _, err := io.Copy(sourceFile, source); err != nil {
sourceFile.Close()
return nil, err
}
return os.ReadFile(targetPath)
if err := sourceFile.Close(); err != nil {
return nil, err
}
return createVideoScenesThumbnailFromPath(file, sourceFile.Name())
}
func createVideoScenesThumbnailFromPath(file services.File, sourcePath string) ([]byte, error) {
info := probeVideoInfo(sourcePath, file)
timestamps := videoSceneTimestamps(info.Duration)
frames := make([]videoSceneFrame, 0, len(timestamps))
for _, timestamp := range timestamps {
targetFile, err := os.CreateTemp("", "warpbox-scene-*.jpg")
if err != nil {
continue
}
targetPath := targetFile.Name()
targetFile.Close()
if err := extractVideoFrame(sourcePath, timestamp, targetPath, "scale=640:-1"); err != nil {
os.Remove(targetPath)
continue
}
data, err := os.ReadFile(targetPath)
os.Remove(targetPath)
if err != nil {
continue
}
img, err := jpeg.Decode(bytes.NewReader(data))
if err != nil {
continue
}
frames = append(frames, videoSceneFrame{Timestamp: timestamp, Image: img})
}
return renderVideoScenesThumbnail(file, info, frames), nil
}
func extractVideoFrame(sourcePath, timestamp, targetPath, scaleFilter string) error {
return exec.Command("ffmpeg", "-y", "-loglevel", "error", "-ss", timestamp, "-i", sourcePath, "-frames:v", "1", "-vf", scaleFilter, targetPath).Run()
}
type videoSceneFrame struct {
Timestamp string
Image image.Image
}
type videoInfo struct {
Codec string
Width int
Height int
Duration float64
FrameRate string
}
func probeVideoInfo(sourcePath string, file services.File) videoInfo {
info := videoInfo{Codec: "unknown", FrameRate: "unknown"}
output, err := exec.Command("ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "stream=codec_name,width,height,duration,avg_frame_rate", "-of", "default=noprint_wrappers=1", sourcePath).Output()
if err != nil {
if file.ContentType != "" {
info.Codec = file.ContentType
}
return info
}
for _, line := range strings.Split(string(output), "\n") {
key, value, ok := strings.Cut(strings.TrimSpace(line), "=")
if !ok || value == "" || value == "N/A" {
continue
}
switch key {
case "codec_name":
info.Codec = value
case "width":
info.Width, _ = strconv.Atoi(value)
case "height":
info.Height, _ = strconv.Atoi(value)
case "duration":
info.Duration, _ = strconv.ParseFloat(value, 64)
case "avg_frame_rate":
info.FrameRate = simplifyFrameRate(value)
}
}
return info
}
func simplifyFrameRate(value string) string {
if value == "0/0" || value == "" {
return "unknown"
}
parts := strings.Split(value, "/")
if len(parts) != 2 {
return value
}
n, errN := strconv.ParseFloat(parts[0], 64)
d, errD := strconv.ParseFloat(parts[1], 64)
if errN != nil || errD != nil || d == 0 {
return value
}
return fmt.Sprintf("%.2f fps", n/d)
}
func videoSceneTimestamps(duration float64) []string {
if duration > 4 {
points := []float64{0.12, 0.33, 0.58, 0.82}
timestamps := make([]string, 0, len(points))
for _, point := range points {
seconds := duration * point
if seconds < 1 {
seconds = 1
}
timestamps = append(timestamps, secondsToTimestamp(seconds))
}
return timestamps
}
return []string{"00:00:01", "00:00:03", "00:00:06", "00:00:10"}
}
func secondsToTimestamp(seconds float64) string {
total := int(seconds + 0.5)
hours := total / 3600
minutes := total % 3600 / 60
secs := total % 60
return fmt.Sprintf("%02d:%02d:%02d", hours, minutes, secs)
}
func usableVideoFrame(data []byte) bool {
img, err := jpeg.Decode(bytes.NewReader(data))
if err != nil {
return false
}
return averageLuma(img) >= 18
}
func averageLuma(img image.Image) float64 {
bounds := img.Bounds()
width := bounds.Dx()
height := bounds.Dy()
if width <= 0 || height <= 0 {
return 0
}
stepX := max(1, width/80)
stepY := max(1, height/80)
var total float64
var samples int
for y := bounds.Min.Y; y < bounds.Max.Y; y += stepY {
for x := bounds.Min.X; x < bounds.Max.X; x += stepX {
r, g, b, _ := img.At(x, y).RGBA()
total += 0.2126*float64(r>>8) + 0.7152*float64(g>>8) + 0.0722*float64(b>>8)
samples++
}
}
if samples == 0 {
return 0
}
return total / float64(samples)
}
func renderVideoScenesThumbnail(file services.File, info videoInfo, frames []videoSceneFrame) []byte {
canvas := image.NewRGBA(image.Rect(0, 0, 1200, 630))
drawSolid(canvas, canvas.Bounds(), color.RGBA{R: 0x0b, G: 0x0b, B: 0x12, A: 0xff})
drawSolid(canvas, image.Rect(0, 0, 1200, 630), color.RGBA{R: 0x10, G: 0x13, B: 0x1f, A: 0xff})
drawSolid(canvas, image.Rect(36, 36, 1164, 594), color.RGBA{R: 0x17, G: 0x17, B: 0x22, A: 0xff})
drawSolid(canvas, image.Rect(36, 36, 1164, 96), color.RGBA{R: 0x20, G: 0x1b, B: 0x34, A: 0xff})
drawSolid(canvas, image.Rect(36, 96, 1164, 100), color.RGBA{R: 0x7c, G: 0x3a, B: 0xed, A: 0xff})
face := basicfont.Face7x13
drawThumbText(canvas, face, "VIDEO SCENES PREVIEW", 62, 63, color.RGBA{R: 0xc4, G: 0xb5, B: 0xfd, A: 0xff})
drawThumbText(canvas, face, trimThumbnailText(file.Name, 72), 62, 84, color.RGBA{R: 0xff, G: 0xfb, B: 0xeb, A: 0xff})
meta := videoMetaLines(file, info)
y := 122
for _, line := range meta {
drawThumbText(canvas, face, line, 62, y, color.RGBA{R: 0xcb, G: 0xd5, B: 0xe1, A: 0xff})
y += 20
}
cells := []image.Rectangle{
image.Rect(62, 212, 586, 388),
image.Rect(614, 212, 1138, 388),
image.Rect(62, 414, 586, 566),
image.Rect(614, 414, 1138, 566),
}
for i, rect := range cells {
drawSolid(canvas, rect, color.RGBA{R: 0x0f, G: 0x17, B: 0x22, A: 0xff})
if i < len(frames) {
drawImageCover(canvas, rect, frames[i].Image)
drawSolid(canvas, image.Rect(rect.Min.X, rect.Min.Y, rect.Min.X+88, rect.Min.Y+24), color.RGBA{R: 0x00, G: 0x00, B: 0x00, A: 0xcc})
drawThumbText(canvas, face, frames[i].Timestamp, rect.Min.X+10, rect.Min.Y+17, color.RGBA{R: 0xff, G: 0xff, B: 0xff, A: 0xff})
} else {
drawThumbText(canvas, face, "No frame available", rect.Min.X+18, rect.Min.Y+34, color.RGBA{R: 0x94, G: 0xa3, B: 0xb8, A: 0xff})
}
}
var target bytes.Buffer
_ = jpeg.Encode(&target, canvas, &jpeg.Options{Quality: 86})
return target.Bytes()
}
func videoMetaLines(file services.File, info videoInfo) []string {
resolution := "unknown resolution"
if info.Width > 0 && info.Height > 0 {
resolution = fmt.Sprintf("%dx%d", info.Width, info.Height)
}
duration := "unknown duration"
if info.Duration > 0 {
duration = secondsToHumanDuration(info.Duration)
}
contentType := file.ContentType
if contentType == "" {
contentType = "video"
}
return []string{
"Duration: " + duration + " Codec: " + info.Codec,
"Resolution: " + resolution + " Frame rate: " + info.FrameRate,
"Type: " + contentType + " Generated by Warpbox",
}
}
func secondsToHumanDuration(seconds float64) string {
total := int(seconds + 0.5)
hours := total / 3600
minutes := total % 3600 / 60
secs := total % 60
if hours > 0 {
return fmt.Sprintf("%d:%02d:%02d", hours, minutes, secs)
}
return fmt.Sprintf("%d:%02d", minutes, secs)
}
func drawImageCover(dst *image.RGBA, rect image.Rectangle, src image.Image) {
bounds := src.Bounds()
srcW := bounds.Dx()
srcH := bounds.Dy()
dstW := rect.Dx()
dstH := rect.Dy()
if srcW <= 0 || srcH <= 0 || dstW <= 0 || dstH <= 0 {
return
}
srcRatio := float64(srcW) / float64(srcH)
dstRatio := float64(dstW) / float64(dstH)
crop := bounds
if srcRatio > dstRatio {
newW := int(float64(srcH) * dstRatio)
x0 := bounds.Min.X + (srcW-newW)/2
crop = image.Rect(x0, bounds.Min.Y, x0+newW, bounds.Max.Y)
} else if srcRatio < dstRatio {
newH := int(float64(srcW) / dstRatio)
y0 := bounds.Min.Y + (srcH-newH)/2
crop = image.Rect(bounds.Min.X, y0, bounds.Max.X, y0+newH)
}
for y := rect.Min.Y; y < rect.Max.Y; y++ {
for x := rect.Min.X; x < rect.Max.X; x++ {
u := float64(x-rect.Min.X) / float64(dstW)
v := float64(y-rect.Min.Y) / float64(dstH)
srcX := crop.Min.X + min(crop.Dx()-1, int(u*float64(crop.Dx())))
srcY := crop.Min.Y + min(crop.Dy()-1, int(v*float64(crop.Dy())))
dst.Set(x, y, src.At(srcX, srcY))
}
}
}
func createTextThumbnail(file services.File, source io.Reader) ([]byte, error) {