Whisper (#26)

2025-10-18 14:28:40 +00:00 · 2024-01-29 00:12:27 +00:00
parent 078dc8e416
commit 24d3d8c00a
10 changed files with 500 additions and 46 deletions
--- a/PulumiWebServer/Nix/whisper/requirements.txt
+++ b/PulumiWebServer/Nix/whisper/requirements.txt
@@ -0,0 +1,2 @@
+flask
+waitress
--- a/PulumiWebServer/Nix/whisper/transcribe.html
+++ b/PulumiWebServer/Nix/whisper/transcribe.html
@@ -0,0 +1,107 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <title>Whisper Transcription</title>
+    <style>
+        #output {
+            white-space: pre-wrap;
+            border: 1px solid #ccc;
+            padding: 10px;
+            margin: 10px;
+            width: 95%;
+            height: 300px;
+            overflow-y: auto;
+        }
+    </style>
+</head>
+<body>
+    <h1>Whisper Transcription</h1>
+    <p>Submit file for transcription</p>
+    <form action="/upload" method="POST" enctype="multipart/form-data">
+        <input type="file" name="file">
+        <input type="submit" value="Submit">
+    </form>
+
+    <div>
+      <label for="file-to-analyze">File to analyze:</label>
+      <div contenteditable="true" id="file-to-analyze">{no file set}</div>
+    </div>
+    <button id="start">Start analysing</button>
+
+    <button id="displayWav" hidden="hidden">Listen to file being transcribed</button>
+    <div id="wavContainer"></div>
+
+    <div id="status"></div>
+    <div id="output"></div>
+
+    <script>
+        const uploadForm = document.querySelector('form');
+        const uploadResultDiv = document.getElementById('file-to-analyze');
+        uploadForm.addEventListener('submit', e => {
+            e.preventDefault();
+
+            const files = document.querySelector('[type=file]').files;
+            const formData = new FormData();
+            formData.append('file', files[0]);
+
+            fetch('/upload', {
+                method: 'POST',
+                body: formData
+            })
+            .then(response => response.text())
+            .then(response => {
+                uploadResultDiv.innerText = response;
+            })
+            .catch(error => console.error(error))
+        });
+
+        const outputDiv = document.getElementById('output');
+        const statusDiv = document.getElementById('status');
+        const displayButton = document.getElementById('displayWav');
+        const wavContainer = document.getElementById('wavContainer');
+        const startButton = document.getElementById('start');
+
+        startButton.onclick = function() {
+            // Create a new EventSource instance pointing to the SSE route
+            // const eventSource = new EventSource('/transcribe-youtube?url=https://www.youtube.com/watch?v=-xZQ0YZ7ls4');
+            const eventSource = new EventSource('/transcribe-file?file=' + uploadResultDiv.innerText);
+
+            let file = '';
+
+            displayButton.onclick = function () {
+                const audioElt = document.createElement('audio');
+                audioElt.controls = true;
+                audioElt.src = '/download?file=' + file;
+
+                wavContainer.innerHTML = '';
+                wavContainer.appendChild(audioElt);
+            };
+
+            eventSource.addEventListener('started', function (e) {
+                statusDiv.innerText = 'Transcription has begun. Please hold the line; my server is only very small and weedy.';
+                displayButton.hidden = false;
+                file = e.data;
+            });
+
+            eventSource.addEventListener('quit', function (e) {
+                statusDiv.innerText = 'Transcription finished';
+                eventSource.close()
+            });
+
+            eventSource.onmessage = function (e) {
+                outputDiv.innerText += e.data + '\n';
+            };
+
+            // Handle any errors
+            eventSource.onerror = function (e) {
+                if (eventSource.readyState === EventSource.CLOSED) {
+                    console.log('Connection was closed');
+                } else {
+                    outputDiv.innerText += 'Error! Connection was lost. Refresh the page to retry.\n';
+                    eventSource.close()
+                }
+            };
+        };
+    </script>
+</body>
+</html>
--- a/PulumiWebServer/Nix/whisper/whisper.nix
+++ b/PulumiWebServer/Nix/whisper/whisper.nix
@@ -0,0 +1,65 @@
+{
+  config,
+  pkgs,
+  lib,
+  whisper-packages,
+  ...
+}: {
+  options = {
+    services.whisper-config = {
+      domain = lib.mkOption {
+        type = lib.types.str;
+        example = "example.com";
+        description = lib.mdDoc "Top-level domain to configure";
+      };
+      subdomain = lib.mkOption {
+        type = lib.types.str;
+        example = "whisper";
+        description = lib.mdDoc "Subdomain in which to put the Whisper server";
+      };
+      port = lib.mkOption {
+        type = lib.types.port;
+        description = lib.mdDoc "Whisper localhost port to be forwarded";
+        default = 1739;
+      };
+    };
+  };
+
+  config = {
+    users.users."whisper".extraGroups = [config.users.groups.keys.name];
+    users.users."whisper".group = "whisper";
+    users.groups.whisper = {};
+    users.users."whisper".isSystemUser = true;
+
+    systemd.services.whisper-server = {
+      description = "whisper-server";
+      wantedBy = ["multi-user.target"];
+      serviceConfig = let
+        python = pkgs.python3.withPackages (p: with p; [flask waitress]);
+      in {
+        Restart = "always";
+        Type = "exec";
+        User = "whisper";
+        Group = "whisper";
+        ExecStart = "${python}/bin/python ${./whisper.py}";
+      };
+      environment = {
+        WHISPER_NORMALIZE = "${whisper-packages.normalize}/bin/normalize.sh";
+        WHISPER_CLIENT = "${whisper-packages.default}/bin/whisper-cpp";
+        WHISPER_PORT = toString config.services.whisper-config.port;
+        INDEX_PAGE_PATH = ./transcribe.html;
+        YT_DLP = "${pkgs.yt-dlp}/bin/yt-dlp";
+      };
+    };
+
+    services.nginx.proxyTimeout = "300s";
+    services.nginx.clientMaxBodySize = "50M";
+    services.nginx.virtualHosts."${config.services.whisper-config.subdomain}.${config.services.whisper-config.domain}" = {
+      forceSSL = true;
+      enableACME = true;
+      locations."/" = {
+        proxyPass = "http://localhost:${toString config.services.whisper-config.port}/";
+      };
+    };
+  };
+}
--- a/PulumiWebServer/Nix/whisper/whisper.py
+++ b/PulumiWebServer/Nix/whisper/whisper.py
@@ -0,0 +1,129 @@
+import subprocess
+import os
+from typing import AnyStr
+import re
+from flask import Flask, Response, request, render_template_string
+import waitress
+import tempfile
+
+app = Flask(__name__)
+
+youtube_regex = re.compile(
+    r"^(?:https?://)?(?:www\.)?(?:youtu\.be/|youtube\.com/(?:embed/|v/|watch\?v=|watch\?.+&v=))((\w|-){11})(?:\S+)?$")
+
+alnum_regex = re.compile(r"^[a-zA-Z0-9]+$")
+
+
+def generate_output(wav_file):
+    process = subprocess.Popen([whisper, "--file", f"/tmp/whisper/{wav_file}.wav", "--output-txt"],
+                               stdout=subprocess.PIPE, bufsize=1,
+                               text=True)
+
+    yield f'event: started\ndata: {wav_file}\n\n'
+
+    for line in iter(process.stdout.readline, ''):
+        yield f"data: {line}\n\n"
+
+    yield 'event: quit\ndata: \n\n'
+
+    os.remove(f"/tmp/whisper/{wav_file}.wav")
+
+
+def obtain_youtube(url: AnyStr) -> str:
+    # handle, temp_file = tempfile.mkstemp(".wav", text=False)
+    # os.close(handle)
+    # os.remove(temp_file)
+
+    # output = subprocess.run(
+    #     [ytdlp, '--extract-audio', '--audio-format', 'wav', '--cookies', '/tmp/cookies.txt', '--audio-quality', '16k', '--force-ipv6', '--output', temp_file,
+    #      url], check=True, capture_output=True, text=True)
+    # if "429 Too Many Requests" in output.stdout:
+    #     raise subprocess.CalledProcessError(1, whisper, "YouTube replied saying Too Many Requests")
+    # return temp_file
+
+    raise Exception("DigitalOcean is rate limited to YouTube")
+
+
+def normalize(path: str, output: str):
+    try:
+        subprocess.run([normalize_binary, path, output], check=True)
+    except subprocess.CalledProcessError:
+        os.remove(path)
+        return Response("failed to normalize", status=500)
+
+
+@app.route('/transcribe-youtube')
+def transcribe_youtube():
+    try:
+        url = request.args.get('url')
+    except KeyError:
+        return Response("must have a URL in the format ?url=https://www.youtube.com/watch?v=...", status=400)
+    if youtube_regex.match(url) is None:
+        return Response(f"url '{url}' did not appear to be a YouTube video", status=400)
+    wav_file = obtain_youtube(url)
+    return Response(generate_output(wav_file), mimetype="text/event-stream")
+
+
+@app.route('/transcribe-file')
+def transcribe_file():
+    try:
+        file = request.args.get('file')
+    except KeyError:
+        return Response("must have a file as obtained from /upload, in the format ?file=...", status=400)
+    if alnum_regex.match(file) is None:
+        return Response(f"filename '{file}' was not alphanumeric", status=400)
+    return Response(generate_output(file), mimetype="text/event-stream")
+
+
+@app.route('/transcribe-ui')
+def index():
+    return render_template_string(open(index_page_path).read())  # Assuming 'index.html' is in the same directory
+
+
+@app.route('/upload', methods=["POST"])
+def upload():
+    if 'file' not in request.files:
+        return 'No "file" part in request', 400
+    file = request.files['file']
+
+    # Create temp file for this upload
+    handle, temp_file = tempfile.mkstemp(text=False)
+    try:
+        os.close(handle)
+        file.save(temp_file)
+        # get filename from absolute path
+        temp_file_frag = os.path.basename(temp_file)
+
+        normalize(temp_file, f"/tmp/whisper/{temp_file_frag}")
+    finally:
+        try:
+            os.remove(temp_file)
+        finally:
+            pass
+
+    return Response(temp_file_frag, mimetype="text/plain")
+
+
+@app.route('/download')
+def download():
+    try:
+        file = request.args.get('file')
+    except KeyError:
+        return Response("must have a file parameter", status=400)
+
+    if alnum_regex.match(file) is None:
+        return Response(f"file '{file}' was not alphanumeric, bad format", status=400)
+
+    return Response(open(f"/tmp/whisper/{file}.wav", 'rb').read(), mimetype="audio/wav")
+
+
+def run(port: int):
+    waitress.serve(app, host="0.0.0.0", port=port)
+
+
+if __name__ == "__main__":
+    normalize_binary = os.environ["WHISPER_NORMALIZE"]
+    whisper = os.environ["WHISPER_CLIENT"]
+    index_page_path = os.environ["INDEX_PAGE_PATH"]
+    ytdlp = os.environ["YT_DLP"]
+    run(int(os.environ["WHISPER_PORT"]))