Compare commits
11 Commits
c377709bee
...
dbe938af1c
| Author | SHA1 | Date | |
|---|---|---|---|
| dbe938af1c | |||
| 3c86456a58 | |||
| f3b44d1d5a | |||
| d5e2331330 | |||
| 6e180bfe8d | |||
| fde07e3830 | |||
| a7effaec8f | |||
| e3408bf389 | |||
| ce0be76b70 | |||
| 5f57de04c8 | |||
| 19d5671894 |
17
analytics.py
17
analytics.py
@ -645,17 +645,16 @@ def file_access():
|
|||||||
'''
|
'''
|
||||||
with log_db:
|
with log_db:
|
||||||
cursor = log_db.execute(query, params_for_filter)
|
cursor = log_db.execute(query, params_for_filter)
|
||||||
rows = cursor.fetchall()
|
db_rows = cursor.fetchall()
|
||||||
|
|
||||||
# Convert rows to a list of dictionaries and add category
|
# Convert rows to a list of dictionaries and add category
|
||||||
rows = [
|
rows = []
|
||||||
{
|
for rel_path, access_count in db_rows:
|
||||||
'rel_path': rel_path,
|
rows.append({
|
||||||
'access_count': access_count,
|
'rel_path': rel_path,
|
||||||
'category': hf.extract_structure_from_string(rel_path)[0]
|
'access_count': access_count,
|
||||||
}
|
'category': hf.extract_structure_from_string(rel_path)[0]
|
||||||
for rel_path, access_count in rows
|
})
|
||||||
]
|
|
||||||
|
|
||||||
# Get possible categories from the rows
|
# Get possible categories from the rows
|
||||||
categories = sorted({r['category'] for r in rows if r['category'] is not None})
|
categories = sorted({r['category'] for r in rows if r['category'] is not None})
|
||||||
|
|||||||
10
auth.py
10
auth.py
@ -150,6 +150,16 @@ def require_secret(f):
|
|||||||
# this is required to track the devices connecting over the same ip address
|
# this is required to track the devices connecting over the same ip address
|
||||||
if 'device_id' not in session:
|
if 'device_id' not in session:
|
||||||
session['device_id'] = os.urandom(32).hex()
|
session['device_id'] = os.urandom(32).hex()
|
||||||
|
|
||||||
|
# AUTO-JUMP FOR TOKENS
|
||||||
|
try:
|
||||||
|
if args_token and is_valid_token(args_token):
|
||||||
|
token_item = decode_token(args_token)
|
||||||
|
target_foldername = token_item['folders'][0]['foldername']
|
||||||
|
return redirect(f"path/{target_foldername}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error during auto-jump: {e}")
|
||||||
|
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
else:
|
else:
|
||||||
title_short = app_config.get('TITLE_SHORT', 'Default Title')
|
title_short = app_config.get('TITLE_SHORT', 'Default Title')
|
||||||
|
|||||||
@ -1,300 +0,0 @@
|
|||||||
// read the CSS variable from :root (or any selector)
|
|
||||||
const cssVar = getComputedStyle(document.documentElement).getPropertyValue('--dark-background').trim();
|
|
||||||
|
|
||||||
// player DOM elements
|
|
||||||
const nowPlayingInfo = document.getElementById('nowPlayingInfo');
|
|
||||||
const audio = document.getElementById('globalAudio');
|
|
||||||
const audioPlayerContainer = document.getElementById('audioPlayerContainer');
|
|
||||||
|
|
||||||
const playerButton = document.querySelector('.player-button'),
|
|
||||||
timeline = document.querySelector('.timeline'),
|
|
||||||
timeInfo = document.getElementById('timeInfo'),
|
|
||||||
playIcon = `
|
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="${cssVar}">
|
|
||||||
<path fill-rule="evenodd" d="M10 18a8 8 0 100-16 8 8 0 000 16zM9.555 7.168A1 1 0 008 8v4a1 1 0 001.555.832l3-2a1 1 0 000-1.664l-3-2z" clip-rule="evenodd" />
|
|
||||||
</svg>
|
|
||||||
`,
|
|
||||||
pauseIcon = `
|
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="${cssVar}">
|
|
||||||
<path fill-rule="evenodd" d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zM7 8a1 1 0 012 0v4a1 1 0 11-2 0V8zm5-1a1 1 0 00-1 1v4a1 1 0 102 0V8a1 1 0 00-1-1z" clip-rule="evenodd" />
|
|
||||||
</svg>
|
|
||||||
`,
|
|
||||||
soundIcon = `
|
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" fill="${cssVar}">
|
|
||||||
<path fill-rule="evenodd" d="M9.383 3.076A1 1 0 0110 4v12a1 1 0 01-1.707.707L4.586 13H2a1 1 0 01-1-1V8a1 1 0 011-1h2.586l3.707-3.707a1 1 0 011.09-.217zM14.657 2.929a1 1 0 011.414 0A9.972 9.972 0 0119 10a9.972 9.972 0 01-2.929 7.071 1 1 0 01-1.414-1.414A7.971 7.971 0 0017 10c0-2.21-.894-4.208-2.343-5.657a1 1 0 010-1.414zm-2.829 2.828a1 1 0 011.415 0A5.983 5.983 0 0115 10a5.984 5.984 0 01-1.757 4.243 1 1 0 01-1.415-1.415A3.984 3.984 0 0013 10a3.983 3.983 0 00-1.172-2.828 1 1 0 010-1.415z" clip-rule="evenodd" />
|
|
||||||
</svg>
|
|
||||||
`
|
|
||||||
|
|
||||||
function toggleAudio () {
|
|
||||||
if (audio.paused) {
|
|
||||||
audio.play();
|
|
||||||
playerButton.innerHTML = pauseIcon;
|
|
||||||
} else {
|
|
||||||
audio.pause();
|
|
||||||
playerButton.innerHTML = playIcon;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
playerButton.addEventListener('click', toggleAudio);
|
|
||||||
|
|
||||||
let isSeeking = false;
|
|
||||||
|
|
||||||
// --- Slider (Timeline) events ---
|
|
||||||
// Mouse events
|
|
||||||
timeline.addEventListener('mousedown', () => { isSeeking = true; });
|
|
||||||
timeline.addEventListener('mouseup', () => { isSeeking = false;
|
|
||||||
changeSeek(); // Update the audio currentTime based on the slider position
|
|
||||||
});
|
|
||||||
|
|
||||||
// Touch events
|
|
||||||
timeline.addEventListener('touchstart', () => { isSeeking = true; });
|
|
||||||
timeline.addEventListener('touchend', () => { isSeeking = false;
|
|
||||||
changeSeek();
|
|
||||||
});
|
|
||||||
timeline.addEventListener('touchcancel', () => { isSeeking = false; });
|
|
||||||
|
|
||||||
|
|
||||||
// --- Seek function: directly set audio.currentTime using slider's value (in seconds) ---
|
|
||||||
function changeSeek() {
|
|
||||||
audio.currentTime = timeline.value;
|
|
||||||
|
|
||||||
if ('mediaSession' in navigator && 'setPositionState' in navigator.mediaSession) {
|
|
||||||
navigator.mediaSession.setPositionState({
|
|
||||||
duration: audio.duration,
|
|
||||||
playbackRate: audio.playbackRate,
|
|
||||||
position: audio.currentTime
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- Utility: Format seconds as mm:ss ---
|
|
||||||
function formatTime(seconds) {
|
|
||||||
const minutes = Math.floor(seconds / 60);
|
|
||||||
const secs = Math.floor(seconds % 60);
|
|
||||||
return `${minutes < 10 ? '0' : ''}${minutes}:${secs < 10 ? '0' : ''}${secs}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// --- Update timeline, time info, and media session on each time update ---
|
|
||||||
function updateTimeline() {
|
|
||||||
if (!isSeeking && audio.duration) {
|
|
||||||
timeline.value = audio.currentTime;
|
|
||||||
timeline.style.backgroundSize = `${(audio.currentTime / audio.duration) * 100}% 100%`;
|
|
||||||
timeInfo.textContent = `${formatTime(audio.currentTime)} / ${formatTime(audio.duration)}`;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --- When metadata is loaded, set slider range in seconds ---
|
|
||||||
audio.addEventListener('loadedmetadata', () => {
|
|
||||||
timeline.min = 0;
|
|
||||||
timeline.max = audio.duration;
|
|
||||||
timeline.value = 0;
|
|
||||||
timeline.style.backgroundSize = '0% 100%';
|
|
||||||
});
|
|
||||||
|
|
||||||
// --- Update timeline and time info on time update ---
|
|
||||||
audio.addEventListener('timeupdate', () => {
|
|
||||||
updateTimeline();
|
|
||||||
|
|
||||||
if ('mediaSession' in navigator && 'setPositionState' in navigator.mediaSession) {
|
|
||||||
navigator.mediaSession.setPositionState({
|
|
||||||
duration: audio.duration,
|
|
||||||
playbackRate: audio.playbackRate,
|
|
||||||
position: audio.currentTime
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// --- Update media session metadata when audio is loaded ---
|
|
||||||
audio.addEventListener('play', () => {
|
|
||||||
if ('mediaSession' in navigator)
|
|
||||||
navigator.mediaSession.playbackState = 'playing';
|
|
||||||
});
|
|
||||||
audio.addEventListener('pause', () => {
|
|
||||||
if ('mediaSession' in navigator)
|
|
||||||
navigator.mediaSession.playbackState = 'paused';
|
|
||||||
});
|
|
||||||
|
|
||||||
audio.ontimeupdate = updateTimeline;
|
|
||||||
|
|
||||||
// Fallback for mobile throttling
|
|
||||||
setInterval(() => {
|
|
||||||
if (!audio.paused && !isSeeking) updateTimeline();
|
|
||||||
}, 500);
|
|
||||||
|
|
||||||
|
|
||||||
// --- When audio ends ---
|
|
||||||
audio.onended = function() {
|
|
||||||
playerButton.innerHTML = playIcon;
|
|
||||||
};
|
|
||||||
|
|
||||||
async function downloadAudio() {
|
|
||||||
const src = audio.currentSrc || audio.src;
|
|
||||||
if (!src) return;
|
|
||||||
|
|
||||||
// Build the URL with your download flag + cache‑buster
|
|
||||||
const downloadUrl = new URL(src, window.location.href);
|
|
||||||
downloadUrl.searchParams.set('download', 'true');
|
|
||||||
downloadUrl.searchParams.set('_', Date.now());
|
|
||||||
|
|
||||||
// Create a “real” link to that URL and click it
|
|
||||||
const a = document.createElement('a');
|
|
||||||
a.href = downloadUrl.toString();
|
|
||||||
a.download = ''; // tell Safari “this is a download”
|
|
||||||
a.target = '_blank'; // force a real navigation on iOS
|
|
||||||
// NOTE: do NOT set a.download here – we want the server's Content-Disposition to drive it
|
|
||||||
document.body.appendChild(a);
|
|
||||||
a.click();
|
|
||||||
document.body.removeChild(a);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Global variable to store the current fetch's AbortController.
|
|
||||||
let currentFetchController = null;
|
|
||||||
|
|
||||||
async function startPlaying(relUrl) {
|
|
||||||
// Pause the audio and clear its source.
|
|
||||||
audio.pause();
|
|
||||||
audio.src = '';
|
|
||||||
|
|
||||||
// Display the audio player container.
|
|
||||||
audioPlayerContainer.style.display = "block";
|
|
||||||
|
|
||||||
// Set a timeout to display a loader message if needed.
|
|
||||||
const loaderTimeout = setTimeout(() => {
|
|
||||||
playerButton.innerHTML = playIcon;
|
|
||||||
nowPlayingInfo.textContent = "Wird geladen...";
|
|
||||||
}, 250);
|
|
||||||
const spinnerTimer = setTimeout(showSpinner, 500);
|
|
||||||
|
|
||||||
footer.style.display = 'flex';
|
|
||||||
|
|
||||||
// Abort any previous fetch if still running.
|
|
||||||
if (currentFetchController) {
|
|
||||||
currentFetchController.abort();
|
|
||||||
}
|
|
||||||
currentFetchController = new AbortController();
|
|
||||||
|
|
||||||
const mediaUrl = `/media/${relUrl}`;
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Perform a HEAD request to verify media availability.
|
|
||||||
const response = await fetch(mediaUrl, { method: 'HEAD', signal: currentFetchController.signal });
|
|
||||||
clearTimeout(loaderTimeout);
|
|
||||||
|
|
||||||
if (response.status === 403) {
|
|
||||||
nowPlayingInfo.textContent = "Fehler: Zugriff verweigert.";
|
|
||||||
window.location.href = '/';
|
|
||||||
return;
|
|
||||||
} else if (!response.ok) {
|
|
||||||
nowPlayingInfo.textContent = `Fehler: Unerwarteter Status (${response.status}).`;
|
|
||||||
console.error('Unexpected response status:', response.status);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the media URL, load, and play the audio.
|
|
||||||
audio.src = mediaUrl;
|
|
||||||
audio.load();
|
|
||||||
await audio.play();
|
|
||||||
clearTimeout(spinnerTimer);
|
|
||||||
hideSpinner();
|
|
||||||
currentTrackPath = relUrl;
|
|
||||||
playerButton.innerHTML = pauseIcon;
|
|
||||||
|
|
||||||
// Process file path for display.
|
|
||||||
const pathParts = relUrl.split('/');
|
|
||||||
const folderName = pathParts[pathParts.length - 2];
|
|
||||||
const fileName = pathParts.pop();
|
|
||||||
const pathStr = pathParts.join('/');
|
|
||||||
|
|
||||||
// Update Media Session metadata if available.
|
|
||||||
if ('mediaSession' in navigator) {
|
|
||||||
navigator.mediaSession.metadata = new MediaMetadata({
|
|
||||||
title: fileName.replace(/\.[^/.]+$/, ''), // remove extension
|
|
||||||
artist: folderName,
|
|
||||||
artwork: [
|
|
||||||
{ src: '/icons/logo-192x192.png', sizes: '192x192', type: 'image/png' }
|
|
||||||
]
|
|
||||||
});
|
|
||||||
navigator.mediaSession.playbackState = 'playing';
|
|
||||||
navigator.mediaSession.setPositionState({
|
|
||||||
duration: audio.duration,
|
|
||||||
playbackRate: audio.playbackRate,
|
|
||||||
position: 0
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
nowPlayingInfo.innerHTML = pathStr.replace(/\//g, ' > ') +
|
|
||||||
'<br><span style="font-size: larger; font-weight: bold;">' +
|
|
||||||
fileName.replace('.mp3', '') + '</span>';
|
|
||||||
} catch (error) {
|
|
||||||
if (error.name === 'AbortError') {
|
|
||||||
console.log('Previous fetch aborted.');
|
|
||||||
} else {
|
|
||||||
console.error('Error fetching media:', error);
|
|
||||||
nowPlayingInfo.textContent = "Fehler: Netzwerkproblem oder ungültige URL.";
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if ('mediaSession' in navigator) {
|
|
||||||
|
|
||||||
// Handler for the play action
|
|
||||||
navigator.mediaSession.setActionHandler('play', () => {
|
|
||||||
navigator.mediaSession.playbackState = 'playing'
|
|
||||||
document.getElementById('globalAudio').play();
|
|
||||||
});
|
|
||||||
|
|
||||||
// Handler for the pause action
|
|
||||||
navigator.mediaSession.setActionHandler('pause', () => {
|
|
||||||
navigator.mediaSession.playbackState = 'paused'
|
|
||||||
document.getElementById('globalAudio').pause();
|
|
||||||
});
|
|
||||||
|
|
||||||
// Handler for the previous track action
|
|
||||||
navigator.mediaSession.setActionHandler('previoustrack', () => {
|
|
||||||
if (currentMusicIndex > 0) {
|
|
||||||
const prevFile = currentMusicFiles[currentMusicIndex - 1];
|
|
||||||
const prevLink = document.querySelector(`.play-file[data-url="${prevFile.path}"]`);
|
|
||||||
if (prevLink) {
|
|
||||||
prevLink.click();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Handler for the next track action
|
|
||||||
navigator.mediaSession.setActionHandler('nexttrack', () => {
|
|
||||||
if (currentMusicIndex >= 0 && currentMusicIndex < currentMusicFiles.length - 1) {
|
|
||||||
const nextFile = currentMusicFiles[currentMusicIndex + 1];
|
|
||||||
const nextLink = document.querySelector(`.play-file[data-url="${nextFile.path}"]`);
|
|
||||||
if (nextLink) {
|
|
||||||
nextLink.click();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Handler for the seek backward and seek forward actions
|
|
||||||
navigator.mediaSession.setActionHandler('seekbackward', details => {
|
|
||||||
const offset = details.seekOffset || 10;
|
|
||||||
audio.currentTime = Math.max(0, audio.currentTime - offset);
|
|
||||||
});
|
|
||||||
navigator.mediaSession.setActionHandler('seekforward', details => {
|
|
||||||
const offset = details.seekOffset || 10;
|
|
||||||
audio.currentTime = Math.min(audio.duration, audio.currentTime + offset);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Handler for the seek backward action
|
|
||||||
navigator.mediaSession.setActionHandler('seekto', ({seekTime, fastSeek}) => {
|
|
||||||
if (fastSeek && 'fastSeek' in audio) {
|
|
||||||
audio.fastSeek(seekTime);
|
|
||||||
} else {
|
|
||||||
audio.currentTime = seekTime;
|
|
||||||
}
|
|
||||||
// immediately update the remote clock
|
|
||||||
navigator.mediaSession.setPositionState({
|
|
||||||
duration: audio.duration,
|
|
||||||
playbackRate: audio.playbackRate,
|
|
||||||
position: audio.currentTime
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
}
|
|
||||||
@ -107,44 +107,7 @@ def print_speed(current_length):
|
|||||||
trans_speed = 0
|
trans_speed = 0
|
||||||
|
|
||||||
print(f" | Speed: {int(trans_speed)} minutes per hour | ", end='', flush=True)
|
print(f" | Speed: {int(trans_speed)} minutes per hour | ", end='', flush=True)
|
||||||
|
|
||||||
def write_markdown(file_path, result, postfix=None):
|
|
||||||
file_dir = os.path.dirname(file_path)
|
|
||||||
txt_folder = os.path.join(file_dir, "Transkription")
|
|
||||||
os.makedirs(txt_folder, exist_ok=True)
|
|
||||||
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
|
||||||
if postfix != None:
|
|
||||||
base_name = f"{base_name}_{postfix}"
|
|
||||||
output_md = os.path.join(txt_folder, base_name + ".md")
|
|
||||||
|
|
||||||
# Prepare the markdown content.
|
|
||||||
folder_name = os.path.basename(file_dir)
|
|
||||||
md_lines = [
|
|
||||||
f"### {folder_name}",
|
|
||||||
f"#### {os.path.basename(file_path)}",
|
|
||||||
"---",
|
|
||||||
""
|
|
||||||
]
|
|
||||||
|
|
||||||
previous_text = ""
|
|
||||||
for segment in result["segments"]:
|
|
||||||
start = format_timestamp(segment["start"])
|
|
||||||
text = segment["text"].strip()
|
|
||||||
if previous_text != text: # suppress repeating lines
|
|
||||||
md_lines.append(f"`{start}` {text}")
|
|
||||||
previous_text = text
|
|
||||||
|
|
||||||
transcript_md = "\n".join(md_lines)
|
|
||||||
|
|
||||||
transcript_md = apply_error_correction(transcript_md)
|
|
||||||
|
|
||||||
transcript_md = remove_lines_with_words(transcript_md)
|
|
||||||
|
|
||||||
with open(output_md, "w", encoding="utf-8") as f:
|
|
||||||
f.write(transcript_md)
|
|
||||||
|
|
||||||
print_speed(result["segments"][-1]["end"])
|
|
||||||
print(f"... done !")
|
|
||||||
|
|
||||||
def transcribe_file(model, audio_input, language):
|
def transcribe_file(model, audio_input, language):
|
||||||
initial_prompt = (
|
initial_prompt = (
|
||||||
@ -170,51 +133,113 @@ def detect_language(model, audio):
|
|||||||
print(f"{lang_code}. ", end='', flush=True)
|
print(f"{lang_code}. ", end='', flush=True)
|
||||||
return lang_code
|
return lang_code
|
||||||
|
|
||||||
|
|
||||||
def process_file(file_path, model, audio_input):
|
def process_file(file_path, model, audio_input):
|
||||||
|
"""
|
||||||
|
Transcribe the audio file into one markdown file.
|
||||||
|
If special case (German sermon in Russian or Russian-marked file), transcribe both in Russian and German into the same file.
|
||||||
|
"""
|
||||||
file_name = os.path.basename(file_path)
|
file_name = os.path.basename(file_path)
|
||||||
|
|
||||||
# default values
|
# Detect spoken language
|
||||||
postfix = None
|
detected = detect_language(model, audio_input)
|
||||||
language = detect_language(model, audio_input)
|
|
||||||
|
# Determine which languages to transcribe
|
||||||
if language == 'ru' and 'predigt' in file_name.lower() or language == 'de' and 'russisch' in file_name.lower(): # make two files
|
if (detected == 'ru' and 'predigt' in file_name.lower()) or \
|
||||||
# first file
|
(detected == 'de' and 'russisch' in file_name.lower()):
|
||||||
language="ru"
|
langs = ['de', 'ru']
|
||||||
postfix = "ru"
|
elif detected == 'en': # songs often mis-detected as English
|
||||||
print(f"Transcribing {format_status_path(file_path)} ", end='', flush=True)
|
langs = ['de']
|
||||||
markdown = transcribe_file(model, audio_input, language)
|
elif detected in ('de', 'ru'):
|
||||||
write_markdown(file_path, markdown, postfix)
|
langs = [detected]
|
||||||
# second file
|
else:
|
||||||
language="de"
|
langs = ['ru']
|
||||||
postfix = "de"
|
|
||||||
elif language == 'en': # songs mostly detect as english
|
# Collect segments for combined result
|
||||||
language="de"
|
lang_collection = {}
|
||||||
elif language == 'de' or language == 'ru': # keep as detected
|
for lang in langs:
|
||||||
pass
|
combined_segments = []
|
||||||
else: # not german not english and not russian. --> russina
|
print(f"Transcribing {format_status_path(file_path)} as {lang}", end='', flush=True)
|
||||||
language="ru"
|
result = transcribe_file(model, audio_input, lang)
|
||||||
|
|
||||||
print(f"Transcribing {format_status_path(file_path)} ", end='', flush=True)
|
# Extend with actual segments
|
||||||
markdown = transcribe_file(model, audio_input, language)
|
if isinstance(result, dict) and 'segments' in result:
|
||||||
write_markdown(file_path, markdown, postfix)
|
combined_segments.extend(result['segments'])
|
||||||
|
else:
|
||||||
|
# If result isn't dict-of-segments, wrap entire text
|
||||||
|
text = getattr(result, 'text', None) or (result.get('text') if isinstance(result, dict) else str(result))
|
||||||
|
combined_segments.append({'start': 0, 'text': text})
|
||||||
|
lang_collection[lang] = combined_segments
|
||||||
|
|
||||||
|
# Now write out markdown using the combined segments
|
||||||
|
file_dir = os.path.dirname(file_path)
|
||||||
|
txt_folder = os.path.join(file_dir, "Transkription")
|
||||||
|
os.makedirs(txt_folder, exist_ok=True)
|
||||||
|
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||||
|
output_md = os.path.join(txt_folder, base_name + ".md")
|
||||||
|
|
||||||
|
# Build markdown lines
|
||||||
|
folder_name = os.path.basename(file_dir)
|
||||||
|
md_lines = [
|
||||||
|
f"### {folder_name}",
|
||||||
|
f"#### {os.path.basename(file_path)}",
|
||||||
|
"---",
|
||||||
|
""
|
||||||
|
]
|
||||||
|
previous_text = ""
|
||||||
|
for lang, combined_segments in lang_collection.items():
|
||||||
|
md_lines.append(f"##### Transcription ({lang.upper()})")
|
||||||
|
md_lines.append("---")
|
||||||
|
for segment in combined_segments:
|
||||||
|
start = format_timestamp(segment.get('start', 0))
|
||||||
|
text = segment.get('text', '').strip()
|
||||||
|
if text and text != previous_text:
|
||||||
|
md_lines.append(f"`{start}` {text}")
|
||||||
|
previous_text = text
|
||||||
|
|
||||||
|
# Join and post-process
|
||||||
|
transcript_md = "\n".join(md_lines)
|
||||||
|
transcript_md = apply_error_correction(transcript_md)
|
||||||
|
transcript_md = remove_lines_with_words(transcript_md)
|
||||||
|
|
||||||
|
# Write file and report
|
||||||
|
with open(output_md, "w", encoding="utf-8") as f:
|
||||||
|
f.write(transcript_md)
|
||||||
|
|
||||||
|
if combined_segments:
|
||||||
|
end_ts = combined_segments[-1].get('end', combined_segments[-1].get('start', 0))
|
||||||
|
print_speed(end_ts)
|
||||||
|
print("... done !")
|
||||||
|
|
||||||
|
|
||||||
def process_folder(root_folder):
|
def process_folder(root_folder):
|
||||||
"""
|
"""
|
||||||
Walk through root_folder and process .mp3 files, applying skip rules.
|
Walk through root_folder and process .mp3 files.
|
||||||
Only files that need to be transcribed (i.e. transcription does not already exist)
|
Differentiates between “folder not found” and “folder empty.”
|
||||||
will have their audio pre-loaded concurrently.
|
Select files that need to be transcribed (i.e. transcription does not already exist, applying certain rules)
|
||||||
"""
|
"""
|
||||||
global start_time
|
global start_time
|
||||||
keywords = ["musik", "chor", "lied", "gesang", "orchester", "orhester", "melodi", "sot"]
|
keywords = ["musik", "chor", "lied", "gesang", "orchester", "orhester", "melodi", "sot"]
|
||||||
print("Create file list...")
|
print("Create file list...")
|
||||||
|
|
||||||
|
# path actually exist / can we even try to list it?
|
||||||
|
if not os.path.exists(root_folder):
|
||||||
|
print(f"Error: Path '{root_folder}' does not exist or is not reachable.")
|
||||||
|
return
|
||||||
|
|
||||||
|
if not os.path.isdir(root_folder):
|
||||||
|
print(f"Error: Path '{root_folder}' exists but is not a folder.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Now we know the folder exists; let's scan it.
|
||||||
|
print(f"Scanning '{root_folder}' for .mp3 files…")
|
||||||
valid_files = []
|
valid_files = []
|
||||||
checked_files = 0
|
checked_files = 0
|
||||||
# Walk the folder and build a list of files to transcribe.
|
|
||||||
for dirpath, _, filenames in os.walk(root_folder):
|
for dirpath, _, filenames in os.walk(root_folder):
|
||||||
for filename in filenames:
|
for filename in filenames:
|
||||||
if filename.lower().endswith(".mp3"):
|
if filename.lower().endswith(".mp3"):
|
||||||
checked_files = checked_files + 1
|
checked_files += 1
|
||||||
filename_lower = filename.lower()
|
filename_lower = filename.lower()
|
||||||
file_path = os.path.join(dirpath, filename)
|
file_path = os.path.join(dirpath, filename)
|
||||||
# Skip files with skip keywords.
|
# Skip files with skip keywords.
|
||||||
@ -225,19 +250,25 @@ def process_folder(root_folder):
|
|||||||
txt_folder = os.path.join(dirpath, "Transkription")
|
txt_folder = os.path.join(dirpath, "Transkription")
|
||||||
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
base_name = os.path.splitext(os.path.basename(file_path))[0]
|
||||||
output_md = os.path.join(txt_folder, base_name + ".md")
|
output_md = os.path.join(txt_folder, base_name + ".md")
|
||||||
output_md_de = os.path.join(txt_folder, base_name + "_de.md")
|
|
||||||
output_md_ru = os.path.join(txt_folder, base_name + "_ru.md")
|
|
||||||
# skip files with existing md files
|
# skip files with existing md files
|
||||||
if os.path.exists(output_md) or os.path.exists(output_md_de) or os.path.exists(output_md_ru):
|
if os.path.exists(output_md):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
valid_files.append(file_path)
|
valid_files.append(file_path)
|
||||||
|
|
||||||
if len(valid_files) == 0:
|
# If the folder was empty of .mp3s, checked_files will be 0,
|
||||||
print(f"Checked {checked_files} files. All files are transcribed.")
|
# but we know it existed because we passed the exists()/isdir() tests.
|
||||||
|
if checked_files == 0:
|
||||||
|
print(f"Checked 0 files in '{root_folder}'. Folder is empty of .mp3s.")
|
||||||
return
|
return
|
||||||
else:
|
|
||||||
print(f"Checked {checked_files} files. Start to transcribe {len(valid_files)} files.")
|
# If you made it here, checked_files > 0 but maybe all were already transcribed:
|
||||||
|
if len(valid_files) == 0:
|
||||||
|
print(f"Checked {checked_files} files. All files are already transcribed.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Otherwise you have files to process…
|
||||||
|
print(f"Checked {checked_files} files. {len(valid_files)} need transcription.")
|
||||||
|
|
||||||
# Choose “cuda” if available, otherwise “cpu”
|
# Choose “cuda” if available, otherwise “cpu”
|
||||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user