data:image/s3,"s3://crabby-images/23873/238731ed5e561e65c05aeed318d2cc2bda2a4be4" alt="使用 youtube-dl 將自動產生的字幕轉換為 srt 格式"
我想從 youtube 下載生成的字幕,並使用命令將其轉換為 srt 格式,youtube-dl --write-auto-sub --sub-lang en --convert-subs=srt --skip-download <URL>
它只輸出 .vtt 文件,而不將其轉換為 srt。
但當--skip-download
刪除時,它會下載影片、vtt 文件,然後最終將 vtt 轉換為 srt。
有沒有辦法只下載 vtt 檔案然後將其轉換為 srt 而不下載影片?
答案1
也許原因是在此過程中在幕後使用的 ffmpeg 不直接處理像“ffmpeg.exe -i path.vtt path.vtt.srt”這樣的重新編碼請求。它需要文件流,為此它需要視頻文件。
至少,這樣的請求對我不起作用,並且報告缺少流,儘管互聯網上存在類似的示例。
答案2
我製作了一個非常簡單的命令列工具來將 vtt 轉換為 srt,它僅通過非常簡單的文本處理(不使用詞法分析或標記)來工作,並且僅適用於 Youtube 的自動轉錄字幕,對於以下文本:
WEBVTT 種類:字幕 語言:en
00:00:01.740 --> 00:00:05.030 對齊:開始位置:0%
沒有<00:00:02.700> 是<00:00:03.179> 不可能...
您可以在此處下載 ZIP 檔案:http://pececko.szm.com/@ VTT to SRT
。我不打算更新它,所以請將此程式碼視為公共領域。我希望這對某人有用。
// C program to convert subtitles from VTT file to SRT file
// public domain
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXLINELEN 32767
int main(int argc,char** argv)
{
FILE* fh;
char filein[MAXLINELEN]; // = "test.vtt"
char fileout[MAXLINELEN]; // = "test.srt"
int i=0, j=0, k=0, l=0, m=0, pos=0;
//float r=0;
char **arr; // array of pointers to lines loaded to memory from file
char *buffer, *line, *ptr;
long numbytes;
int erase=0, previousline=0; // booleans
fputs("Convert Youtube's Autotranscribed VTT to SRT [build 2023-06-22]\nConverts .vtt files from youtube to .srt files.\nUsage: vtttosrt.exe [sourcesubtitles.vtt] [targetsubtitles.srt]\nPress Ctrl+C to abort.\n", stdout);
if (argc>1){
strcpy(filein, argv[1]);
} else {
printf("Enter input filename (*.vtt) :");
fgets (filein, MAXLINELEN, stdin);
filein[strlen(filein) - 1] = '\0';
}
k = strlen(filein);
if (!k) {
return 0;
}
if (k<=3 || (strstr(&filein[k-4],".vtt") == NULL)) {
strcat(filein,".vtt"); //append missing extension
}
fh = fopen(filein, "r"); // Opening file in reading mode
if (fh==NULL) {
printf("File %s not found.\n",filein);
return 1;
}
//printf("Loading file %s\n", filein);
fseek(fh, 0L, SEEK_END);
numbytes = ftell(fh); // Get the number of bytes
fseek(fh, 0L, SEEK_SET);
buffer = (char*)calloc(numbytes, sizeof(char));
if(buffer == NULL) {
fclose(fh);
return 1;
}
fread(buffer, sizeof(char), numbytes, fh);
fclose(fh);
//printf("filesize is %d Bytes:\n%s\n",numbytes,buffer);
arr = (char**)calloc(numbytes/4, sizeof(char));
if(arr == NULL) {
free(buffer);
return 1;
}
pos=1;
arr[0] = &buffer[0];
for (i=0; i<numbytes; i++) {
if (buffer[i]=='\n') {
buffer[i]='\0';
arr[pos++]=&buffer[i+1];
}
}
//printf("number of lines in file = %d\n",pos); return 0;
//now we will remove <timestamps> and align+position
j=0;
for (i=0; i<pos; i++) {
line=arr[i];
if (strlen(line)>1){ // if not empty line
arr[j]=arr[i];
if (NULL != (ptr=strstr(line," --> "))) { // if timing line
ptr[17] = '\0'; //strip the text "align:start position:0%"
l = strlen(line);
for (m=0; m<=l; m++) {
if (line[m]=='.') {
line[m]=',';
}
}
} else { //else subtitle line
l = strlen(line);
k = 0;
erase=0; // boolean: keep or erase text in between <...>
for (m=0; m<=l; m++){
if (line[m]=='<') {erase=1;}
else if (line[m]=='>') {erase=0;}
else if (!erase) {
line[k++] = line[m];
}
}
}
//printf("%d:%d:%d: %s\n",i,j, strlen(line), line);
j++;
} // else {skip empty line.}
}
pos=j;
//printf("exluding empty lines = %d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(line), arr[i]);} return 0;
// now we will remove duplicate textlines:
i=0;
while ((NULL == strstr(arr[i]," --> ")) && (i<pos)){
i++;
};// seek to the first timeline
line = arr[0] = arr[i]; // pointer to first time line
i++;
j=1;
for (; i<pos; i++) {
ptr=strstr(arr[i]," --> ");
erase = (strcmp(arr[i],line)==0); // 0==are equals.
if (ptr == NULL && erase) { // if this textline and duplicate of previous textline
continue;
}
// now to treat timelines and new textlines:
if (ptr == NULL) { // if isnt timeline
line = arr[i]; // its new textline
}
arr[j] = arr[i];
j++;
//printf("%d:%d: %s\n",i, strlen(line), arr[i]);
}
pos = j;
//printf("without duplicate textlines=%d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(arr[i]), arr[i]);} return 0;
// now we will remove duplicate timelines:
previousline=0; // previous line was 0=textline, 1=timeline
j=1;
for (i=1; i<pos; i++) {
ptr=strstr(arr[i]," --> ");
if (previousline==1 || ptr==NULL){ // keep this, if previous was textline or this is textline
arr[j]=arr[i];
j++;
} //else { this is second timeline, skip it }
previousline = (ptr!=NULL)?1:0;
//printf("%d:%d: %s\n",i, strlen(line), arr[i]);
}
pos = j;
//printf("without duplicate timelines=%d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(arr[i]), arr[i]);} return 0;
printf("Subtitles from file %s loaded.\n", filein);
//create .srt file
if (argc==3) {
strcpy(fileout, argv[2]);
} else {
k=strlen(filein);
filein[k-3]='s';filein[k-2]='r';filein[k-1]='t';
printf("Enter output filename (default: %s) : ",filein);
fgets (fileout, MAXLINELEN, stdin);
fileout[strlen(fileout) - 1] = '\0';
}
k = strlen(fileout);
if (k==0) {
strcpy(fileout, filein); // use default output filename.srt
k=strlen(fileout);
}
if (k<=3 || (strstr(&fileout[k-4],".srt") == NULL)) { // if .srt extension omitted
strcat(fileout,".srt"); //append missing extension
}
fh = fopen(fileout, "w"); // create new file
if (fh==NULL) {
printf("File creating error. Sending the result to stdout.\n");
fh=stdout;
}
//find first timeline
i=0;
while (!strstr(arr[i]," --> ") && (i<pos)) {
i++;
}
j=1;
while (i<pos){
ptr = strstr(arr[i]," --> "); // NULL~textline
if (ptr) { //if timeline
fprintf(fh,"%d\n%s\n",j,arr[i]);
j++;
} else {
fprintf(fh,"%s\n\n",arr[i]);
}
i++;
//printf("%d:%d: %s\n",i, stlen(arr[i]), arr[i]);
}
if (fh!=stdout) {
fclose(fh);
}
printf("Subtitles exported to %s.\n",fileout);
free(arr);
free(buffer);
return 0;
}