mirror of
https://github.com/Smaug123/PulumiConfig
synced 2025-10-07 17:48:40 +00:00
135 lines
4.2 KiB
Python
135 lines
4.2 KiB
Python
import subprocess
|
|
import os
|
|
from typing import AnyStr
|
|
import re
|
|
from flask import Flask, Response, request, render_template_string
|
|
import waitress
|
|
import tempfile
|
|
|
|
app = Flask(__name__)
|
|
|
|
youtube_regex = re.compile(
|
|
r"^(?:https?://)?(?:www\.)?(?:youtu\.be/|youtube\.com/(?:embed/|v/|watch\?v=|watch\?.+&v=))((\w|-){11})(?:\S+)?$")
|
|
|
|
acceptable_regex = re.compile(r"^[a-zA-Z0-9_]+$")
|
|
|
|
|
|
def generate_output(wav_file):
|
|
process = subprocess.Popen([whisper, "--file", f"/tmp/whisper/{wav_file}.wav", "--output-txt"],
|
|
stdout=subprocess.PIPE, bufsize=1,
|
|
text=True)
|
|
|
|
yield f'event: started\ndata: {wav_file}\n\n'
|
|
|
|
for line in iter(process.stdout.readline, ''):
|
|
yield f"data: {line}\n\n"
|
|
|
|
yield 'event: quit\ndata: \n\n'
|
|
|
|
os.remove(f"/tmp/whisper/{wav_file}.wav")
|
|
|
|
|
|
def obtain_youtube(url: AnyStr) -> str:
|
|
# handle, temp_file = tempfile.mkstemp(".wav", text=False)
|
|
# os.close(handle)
|
|
# os.remove(temp_file)
|
|
|
|
# output = subprocess.run(
|
|
# [ytdlp, '--extract-audio', '--audio-format', 'wav', '--cookies', '/tmp/cookies.txt', '--audio-quality', '16k', '--force-ipv6', '--output', temp_file,
|
|
# url], check=True, capture_output=True, text=True)
|
|
# if "429 Too Many Requests" in output.stdout:
|
|
# raise subprocess.CalledProcessError(1, whisper, "YouTube replied saying Too Many Requests")
|
|
# return temp_file
|
|
|
|
raise Exception("DigitalOcean is rate limited to YouTube")
|
|
|
|
|
|
def normalize(path: str, output: str):
|
|
try:
|
|
subprocess.run([normalize_binary, path, output], check=True)
|
|
except subprocess.CalledProcessError:
|
|
os.remove(path)
|
|
return Response("failed to normalize", status=500)
|
|
|
|
|
|
@app.route('/transcribe-youtube')
|
|
def transcribe_youtube():
|
|
try:
|
|
url = request.args.get('url')
|
|
except KeyError:
|
|
return Response("must have a URL in the format ?url=https://www.youtube.com/watch?v=...", status=400)
|
|
if youtube_regex.match(url) is None:
|
|
return Response(f"url '{url}' did not appear to be a YouTube video", status=400)
|
|
wav_file = obtain_youtube(url)
|
|
return Response(generate_output(wav_file), mimetype="text/event-stream")
|
|
|
|
|
|
@app.route('/transcribe-file')
|
|
def transcribe_file():
|
|
try:
|
|
file = request.args.get('file')
|
|
except KeyError:
|
|
return Response("must have a file as obtained from /upload, in the format ?file=...", status=400)
|
|
if acceptable_regex.match(file) is None:
|
|
return Response(f"filename '{file}' did not match acceptable regex", status=400)
|
|
return Response(generate_output(file), mimetype="text/event-stream")
|
|
|
|
|
|
@app.route('/transcribe-ui')
|
|
def index():
|
|
return render_template_string(open(index_page_path).read()) # Assuming 'index.html' is in the same directory
|
|
|
|
|
|
@app.route('/upload', methods=["POST"])
|
|
def upload():
|
|
if 'file' not in request.files:
|
|
return 'No "file" part in request', 400
|
|
file = request.files['file']
|
|
|
|
try:
|
|
os.mkdir("/tmp/whisper")
|
|
except FileExistsError:
|
|
pass
|
|
|
|
# Create temp file for this upload
|
|
handle, temp_file = tempfile.mkstemp(text=False)
|
|
try:
|
|
os.close(handle)
|
|
file.save(temp_file)
|
|
# get filename from absolute path
|
|
temp_file_frag = os.path.basename(temp_file)
|
|
|
|
normalize(temp_file, f"/tmp/whisper/{temp_file_frag}")
|
|
finally:
|
|
try:
|
|
os.remove(temp_file)
|
|
finally:
|
|
pass
|
|
|
|
return Response(temp_file_frag, mimetype="text/plain")
|
|
|
|
|
|
@app.route('/download')
|
|
def download():
|
|
try:
|
|
file = request.args.get('file')
|
|
except KeyError:
|
|
return Response("must have a file parameter", status=400)
|
|
|
|
if acceptable_regex.match(file) is None:
|
|
return Response(f"file '{file}' did not match acceptable regex, bad format", status=400)
|
|
|
|
return Response(open(f"/tmp/whisper/{file}.wav", 'rb').read(), mimetype="audio/wav")
|
|
|
|
|
|
def run(port: int):
|
|
waitress.serve(app, host="0.0.0.0", port=port)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
normalize_binary = os.environ["WHISPER_NORMALIZE"]
|
|
whisper = os.environ["WHISPER_CLIENT"]
|
|
index_page_path = os.environ["INDEX_PAGE_PATH"]
|
|
ytdlp = os.environ["YT_DLP"]
|
|
run(int(os.environ["WHISPER_PORT"]))
|