This commit is contained in:
Patrick Stevens
2024-01-29 00:12:27 +00:00
committed by GitHub
parent 078dc8e416
commit 24d3d8c00a
10 changed files with 500 additions and 46 deletions

View File

@@ -0,0 +1,2 @@
flask
waitress

View File

@@ -0,0 +1,107 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Whisper Transcription</title>
<style>
#output {
white-space: pre-wrap;
border: 1px solid #ccc;
padding: 10px;
margin: 10px;
width: 95%;
height: 300px;
overflow-y: auto;
}
</style>
</head>
<body>
<h1>Whisper Transcription</h1>
<p>Submit file for transcription</p>
<form action="/upload" method="POST" enctype="multipart/form-data">
<input type="file" name="file">
<input type="submit" value="Submit">
</form>
<div>
<label for="file-to-analyze">File to analyze:</label>
<div contenteditable="true" id="file-to-analyze">{no file set}</div>
</div>
<button id="start">Start analysing</button>
<button id="displayWav" hidden="hidden">Listen to file being transcribed</button>
<div id="wavContainer"></div>
<div id="status"></div>
<div id="output"></div>
<script>
const uploadForm = document.querySelector('form');
const uploadResultDiv = document.getElementById('file-to-analyze');
uploadForm.addEventListener('submit', e => {
e.preventDefault();
const files = document.querySelector('[type=file]').files;
const formData = new FormData();
formData.append('file', files[0]);
fetch('/upload', {
method: 'POST',
body: formData
})
.then(response => response.text())
.then(response => {
uploadResultDiv.innerText = response;
})
.catch(error => console.error(error))
});
const outputDiv = document.getElementById('output');
const statusDiv = document.getElementById('status');
const displayButton = document.getElementById('displayWav');
const wavContainer = document.getElementById('wavContainer');
const startButton = document.getElementById('start');
startButton.onclick = function() {
// Create a new EventSource instance pointing to the SSE route
// const eventSource = new EventSource('/transcribe-youtube?url=https://www.youtube.com/watch?v=-xZQ0YZ7ls4');
const eventSource = new EventSource('/transcribe-file?file=' + uploadResultDiv.innerText);
let file = '';
displayButton.onclick = function () {
const audioElt = document.createElement('audio');
audioElt.controls = true;
audioElt.src = '/download?file=' + file;
wavContainer.innerHTML = '';
wavContainer.appendChild(audioElt);
};
eventSource.addEventListener('started', function (e) {
statusDiv.innerText = 'Transcription has begun. Please hold the line; my server is only very small and weedy.';
displayButton.hidden = false;
file = e.data;
});
eventSource.addEventListener('quit', function (e) {
statusDiv.innerText = 'Transcription finished';
eventSource.close()
});
eventSource.onmessage = function (e) {
outputDiv.innerText += e.data + '\n';
};
// Handle any errors
eventSource.onerror = function (e) {
if (eventSource.readyState === EventSource.CLOSED) {
console.log('Connection was closed');
} else {
outputDiv.innerText += 'Error! Connection was lost. Refresh the page to retry.\n';
eventSource.close()
}
};
};
</script>
</body>
</html>

View File

@@ -0,0 +1,65 @@
{
config,
pkgs,
lib,
whisper-packages,
...
}: {
options = {
services.whisper-config = {
domain = lib.mkOption {
type = lib.types.str;
example = "example.com";
description = lib.mdDoc "Top-level domain to configure";
};
subdomain = lib.mkOption {
type = lib.types.str;
example = "whisper";
description = lib.mdDoc "Subdomain in which to put the Whisper server";
};
port = lib.mkOption {
type = lib.types.port;
description = lib.mdDoc "Whisper localhost port to be forwarded";
default = 1739;
};
};
};
config = {
users.users."whisper".extraGroups = [config.users.groups.keys.name];
users.users."whisper".group = "whisper";
users.groups.whisper = {};
users.users."whisper".isSystemUser = true;
systemd.services.whisper-server = {
description = "whisper-server";
wantedBy = ["multi-user.target"];
serviceConfig = let
python = pkgs.python3.withPackages (p: with p; [flask waitress]);
in {
Restart = "always";
Type = "exec";
User = "whisper";
Group = "whisper";
ExecStart = "${python}/bin/python ${./whisper.py}";
};
environment = {
WHISPER_NORMALIZE = "${whisper-packages.normalize}/bin/normalize.sh";
WHISPER_CLIENT = "${whisper-packages.default}/bin/whisper-cpp";
WHISPER_PORT = toString config.services.whisper-config.port;
INDEX_PAGE_PATH = ./transcribe.html;
YT_DLP = "${pkgs.yt-dlp}/bin/yt-dlp";
};
};
services.nginx.proxyTimeout = "300s";
services.nginx.clientMaxBodySize = "50M";
services.nginx.virtualHosts."${config.services.whisper-config.subdomain}.${config.services.whisper-config.domain}" = {
forceSSL = true;
enableACME = true;
locations."/" = {
proxyPass = "http://localhost:${toString config.services.whisper-config.port}/";
};
};
};
}

View File

@@ -0,0 +1,129 @@
import subprocess
import os
from typing import AnyStr
import re
from flask import Flask, Response, request, render_template_string
import waitress
import tempfile
app = Flask(__name__)
youtube_regex = re.compile(
r"^(?:https?://)?(?:www\.)?(?:youtu\.be/|youtube\.com/(?:embed/|v/|watch\?v=|watch\?.+&v=))((\w|-){11})(?:\S+)?$")
alnum_regex = re.compile(r"^[a-zA-Z0-9]+$")
def generate_output(wav_file):
process = subprocess.Popen([whisper, "--file", f"/tmp/whisper/{wav_file}.wav", "--output-txt"],
stdout=subprocess.PIPE, bufsize=1,
text=True)
yield f'event: started\ndata: {wav_file}\n\n'
for line in iter(process.stdout.readline, ''):
yield f"data: {line}\n\n"
yield 'event: quit\ndata: \n\n'
os.remove(f"/tmp/whisper/{wav_file}.wav")
def obtain_youtube(url: AnyStr) -> str:
# handle, temp_file = tempfile.mkstemp(".wav", text=False)
# os.close(handle)
# os.remove(temp_file)
# output = subprocess.run(
# [ytdlp, '--extract-audio', '--audio-format', 'wav', '--cookies', '/tmp/cookies.txt', '--audio-quality', '16k', '--force-ipv6', '--output', temp_file,
# url], check=True, capture_output=True, text=True)
# if "429 Too Many Requests" in output.stdout:
# raise subprocess.CalledProcessError(1, whisper, "YouTube replied saying Too Many Requests")
# return temp_file
raise Exception("DigitalOcean is rate limited to YouTube")
def normalize(path: str, output: str):
try:
subprocess.run([normalize_binary, path, output], check=True)
except subprocess.CalledProcessError:
os.remove(path)
return Response("failed to normalize", status=500)
@app.route('/transcribe-youtube')
def transcribe_youtube():
try:
url = request.args.get('url')
except KeyError:
return Response("must have a URL in the format ?url=https://www.youtube.com/watch?v=...", status=400)
if youtube_regex.match(url) is None:
return Response(f"url '{url}' did not appear to be a YouTube video", status=400)
wav_file = obtain_youtube(url)
return Response(generate_output(wav_file), mimetype="text/event-stream")
@app.route('/transcribe-file')
def transcribe_file():
try:
file = request.args.get('file')
except KeyError:
return Response("must have a file as obtained from /upload, in the format ?file=...", status=400)
if alnum_regex.match(file) is None:
return Response(f"filename '{file}' was not alphanumeric", status=400)
return Response(generate_output(file), mimetype="text/event-stream")
@app.route('/transcribe-ui')
def index():
return render_template_string(open(index_page_path).read()) # Assuming 'index.html' is in the same directory
@app.route('/upload', methods=["POST"])
def upload():
if 'file' not in request.files:
return 'No "file" part in request', 400
file = request.files['file']
# Create temp file for this upload
handle, temp_file = tempfile.mkstemp(text=False)
try:
os.close(handle)
file.save(temp_file)
# get filename from absolute path
temp_file_frag = os.path.basename(temp_file)
normalize(temp_file, f"/tmp/whisper/{temp_file_frag}")
finally:
try:
os.remove(temp_file)
finally:
pass
return Response(temp_file_frag, mimetype="text/plain")
@app.route('/download')
def download():
try:
file = request.args.get('file')
except KeyError:
return Response("must have a file parameter", status=400)
if alnum_regex.match(file) is None:
return Response(f"file '{file}' was not alphanumeric, bad format", status=400)
return Response(open(f"/tmp/whisper/{file}.wav", 'rb').read(), mimetype="audio/wav")
def run(port: int):
waitress.serve(app, host="0.0.0.0", port=port)
if __name__ == "__main__":
normalize_binary = os.environ["WHISPER_NORMALIZE"]
whisper = os.environ["WHISPER_CLIENT"]
index_page_path = os.environ["INDEX_PAGE_PATH"]
ytdlp = os.environ["YT_DLP"]
run(int(os.environ["WHISPER_PORT"]))