PulumiConfig/PulumiWebServer/Nix/whisper/whisper.py

import subprocess
import os
from typing import AnyStr
import re
from flask import Flask, Response, request, render_template_string
import waitress
import tempfile

app = Flask(__name__)

youtube_regex = re.compile(
    r"^(?:https?://)?(?:www\.)?(?:youtu\.be/|youtube\.com/(?:embed/|v/|watch\?v=|watch\?.+&v=))((\w|-){11})(?:\S+)?$")

acceptable_regex = re.compile(r"^[a-zA-Z0-9_]+$")


def generate_output(wav_file):
    process = subprocess.Popen([whisper, "--file", f"/tmp/whisper/{wav_file}.wav", "--output-txt"],
                               stdout=subprocess.PIPE, bufsize=1,
                               text=True)

    yield f'event: started\ndata: {wav_file}\n\n'

    for line in iter(process.stdout.readline, ''):
        yield f"data: {line}\n\n"

    yield 'event: quit\ndata: \n\n'

    os.remove(f"/tmp/whisper/{wav_file}.wav")


def obtain_youtube(url: AnyStr) -> str:
    # handle, temp_file = tempfile.mkstemp(".wav", text=False)
    # os.close(handle)
    # os.remove(temp_file)

    # output = subprocess.run(
    #     [ytdlp, '--extract-audio', '--audio-format', 'wav', '--cookies', '/tmp/cookies.txt', '--audio-quality', '16k', '--force-ipv6', '--output', temp_file,
    #      url], check=True, capture_output=True, text=True)
    # if "429 Too Many Requests" in output.stdout:
    #     raise subprocess.CalledProcessError(1, whisper, "YouTube replied saying Too Many Requests")
    # return temp_file

    raise Exception("DigitalOcean is rate limited to YouTube")


def normalize(path: str, output: str):
    try:
        subprocess.run([normalize_binary, path, output], check=True)
    except subprocess.CalledProcessError:
        os.remove(path)
        return Response("failed to normalize", status=500)


@app.route('/transcribe-youtube')
def transcribe_youtube():
    try:
        url = request.args.get('url')
    except KeyError:
        return Response("must have a URL in the format ?url=https://www.youtube.com/watch?v=...", status=400)
    if youtube_regex.match(url) is None:
        return Response(f"url '{url}' did not appear to be a YouTube video", status=400)
    wav_file = obtain_youtube(url)
    return Response(generate_output(wav_file), mimetype="text/event-stream")


@app.route('/transcribe-file')
def transcribe_file():
    try:
        file = request.args.get('file')
    except KeyError:
        return Response("must have a file as obtained from /upload, in the format ?file=...", status=400)
    if acceptable_regex.match(file) is None:
        return Response(f"filename '{file}' did not match acceptable regex", status=400)
    return Response(generate_output(file), mimetype="text/event-stream")


@app.route('/transcribe-ui')
def index():
    return render_template_string(open(index_page_path).read())  # Assuming 'index.html' is in the same directory


@app.route('/upload', methods=["POST"])
def upload():
    if 'file' not in request.files:
        return 'No "file" part in request', 400
    file = request.files['file']

    try:
        os.mkdir("/tmp/whisper")
    except FileExistsError:
        pass

    # Create temp file for this upload
    handle, temp_file = tempfile.mkstemp(text=False)
    try:
        os.close(handle)
        file.save(temp_file)
        # get filename from absolute path
        temp_file_frag = os.path.basename(temp_file)

        normalize(temp_file, f"/tmp/whisper/{temp_file_frag}")
    finally:
        try:
            os.remove(temp_file)
        finally:
            pass

    return Response(temp_file_frag, mimetype="text/plain")


@app.route('/download')
def download():
    try:
        file = request.args.get('file')
    except KeyError:
        return Response("must have a file parameter", status=400)

    if acceptable_regex.match(file) is None:
        return Response(f"file '{file}' did not match acceptable regex, bad format", status=400)

    return Response(open(f"/tmp/whisper/{file}.wav", 'rb').read(), mimetype="audio/wav")


def run(port: int):
    waitress.serve(app, host="0.0.0.0", port=port)


if __name__ == "__main__":
    normalize_binary = os.environ["WHISPER_NORMALIZE"]
    whisper = os.environ["WHISPER_CLIENT"]
    index_page_path = os.environ["INDEX_PAGE_PATH"]
    ytdlp = os.environ["YT_DLP"]
    run(int(os.environ["WHISPER_PORT"]))