使用 youtube-dl 將自動產生的字幕轉換為 srt 格式

使用 youtube-dl 將自動產生的字幕轉換為 srt 格式

我想從 youtube 下載生成的字幕,並使用命令將其轉換為 srt 格式,youtube-dl --write-auto-sub --sub-lang en --convert-subs=srt --skip-download <URL>它只輸出 .vtt 文件,而不將其轉換為 srt。

但當--skip-download刪除時,它會下載影片、vtt 文件,然後最終將 vtt 轉換為 srt。

有沒有辦法只下載 vtt 檔案然後將其轉換為 srt 而不下載影片?

答案1

也許原因是在此過程中在幕後使用的 ffmpeg 不直接處理像“ffmpeg.exe -i path.vtt path.vtt.srt”這樣的重新編碼請求。它需要文件流,為此它需要視頻文件。

至少,這樣的請求對我不起作用,並且報告缺少流,儘管互聯網上存在類似的示例。

答案2

我製作了一個非常簡單的命令列工具來將 vtt 轉換為 srt,它僅通過非常簡單的文本處理(不使用詞法分析或標記)來工作,並且僅適用於 Youtube 的自動轉錄字幕,對於以下文本:

WEBVTT 種類:字幕 語言:en

00:00:01.740 --> 00:00:05.030 對齊:開始位置:0%
沒有<00:00:02.700> 是<00:00:03.179> 不可能...

您可以在此處下載 ZIP 檔案:http://pececko.szm.com/@ VTT to SRT。我不打算更新它,所以請將此程式碼視為公共領域。我希望這對某人有用。

// C program to convert subtitles from VTT file to SRT file
// public domain
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAXLINELEN 32767

int main(int argc,char** argv)
{
    FILE* fh;
    char filein[MAXLINELEN]; // = "test.vtt"
    char fileout[MAXLINELEN]; // = "test.srt"
    int i=0, j=0, k=0, l=0, m=0, pos=0;
    //float r=0;
    char **arr; // array of pointers to lines loaded to memory from file
    char *buffer, *line, *ptr;
    long numbytes;
    int erase=0, previousline=0; // booleans
    fputs("Convert Youtube's Autotranscribed VTT to SRT [build 2023-06-22]\nConverts .vtt files from youtube to .srt files.\nUsage: vtttosrt.exe [sourcesubtitles.vtt] [targetsubtitles.srt]\nPress Ctrl+C to abort.\n", stdout);
    if (argc>1){
        strcpy(filein, argv[1]);
    } else {
        printf("Enter input filename (*.vtt) :");
        fgets (filein, MAXLINELEN, stdin);
        filein[strlen(filein) - 1] = '\0';
    }
    k = strlen(filein);
    if (!k) {
        return 0;
    }
    if (k<=3 || (strstr(&filein[k-4],".vtt") == NULL)) {
        strcat(filein,".vtt"); //append missing extension
    }

    fh = fopen(filein, "r"); // Opening file in reading mode
    if (fh==NULL) {
        printf("File %s not found.\n",filein);
        return 1;
    }
 
    //printf("Loading file %s\n", filein);
    fseek(fh, 0L, SEEK_END);
    numbytes = ftell(fh); // Get the number of bytes
    fseek(fh, 0L, SEEK_SET);
    buffer = (char*)calloc(numbytes, sizeof(char)); 
    if(buffer == NULL) {
        fclose(fh);
        return 1;
    }
    fread(buffer, sizeof(char), numbytes, fh);
    fclose(fh);
    //printf("filesize is %d Bytes:\n%s\n",numbytes,buffer);

    arr = (char**)calloc(numbytes/4, sizeof(char));
    if(arr == NULL) {
        free(buffer);
        return 1;
    }
    pos=1;
    arr[0] = &buffer[0];
    for (i=0; i<numbytes; i++) {
        if (buffer[i]=='\n') {
            buffer[i]='\0';
            arr[pos++]=&buffer[i+1];
        }
    }
    //printf("number of lines in file = %d\n",pos); return 0;
    //now we will remove <timestamps> and align+position
    j=0;
    for (i=0; i<pos; i++) {
        line=arr[i];
        if (strlen(line)>1){ // if not empty line
            arr[j]=arr[i];
            if (NULL != (ptr=strstr(line," --> "))) { // if timing line
                ptr[17] = '\0'; //strip the text "align:start position:0%"
                l = strlen(line);
                for (m=0; m<=l; m++) {
                    if (line[m]=='.') {
                        line[m]=',';
                    }
                }
            } else { //else subtitle line
                l = strlen(line);
                k = 0;
                erase=0; // boolean: keep or erase text in between <...>
                for (m=0; m<=l; m++){
                    if (line[m]=='<') {erase=1;}
                    else if (line[m]=='>') {erase=0;}
                        else if (!erase) {
                            line[k++] = line[m];
                        }
                }
            }
            //printf("%d:%d:%d: %s\n",i,j, strlen(line), line);
            j++;
        } // else {skip empty line.}
    }
    pos=j;
    //printf("exluding empty lines = %d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(line), arr[i]);} return 0;

    // now we will remove duplicate textlines:
    i=0;
    while ((NULL == strstr(arr[i]," --> ")) && (i<pos)){
        i++;
    };// seek to the first timeline
    line = arr[0] = arr[i]; // pointer to first time line
    i++;
    j=1;
    for (; i<pos; i++) {
        ptr=strstr(arr[i]," --> ");
        erase = (strcmp(arr[i],line)==0); // 0==are equals.
        if (ptr == NULL && erase) { // if this textline and duplicate of previous textline
            continue;
        }
        // now to treat timelines and new textlines:
        if (ptr == NULL) { // if isnt timeline
            line = arr[i]; // its new textline
        }
        arr[j] = arr[i];
        j++;
        //printf("%d:%d: %s\n",i, strlen(line), arr[i]);
    }
    pos = j;
    //printf("without duplicate textlines=%d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(arr[i]), arr[i]);} return 0;

    // now we will remove duplicate timelines:
    previousline=0; // previous line was 0=textline, 1=timeline
    j=1;
    for (i=1; i<pos; i++) {
        ptr=strstr(arr[i]," --> ");
        if (previousline==1 || ptr==NULL){ // keep this, if previous was textline or this is textline
            arr[j]=arr[i];
            j++;
        } //else { this is second timeline, skip it }
        previousline = (ptr!=NULL)?1:0;
        //printf("%d:%d: %s\n",i, strlen(line), arr[i]);
    }
    pos = j;
    //printf("without duplicate timelines=%d\nResult:\n",pos); for (i=0; i<pos; i++) {printf("%d:%d: %s\n",i, strlen(arr[i]), arr[i]);} return 0;
    printf("Subtitles from file %s loaded.\n", filein);
    
    //create .srt file
    if (argc==3) {
        strcpy(fileout, argv[2]);
    } else {
        k=strlen(filein);
        filein[k-3]='s';filein[k-2]='r';filein[k-1]='t';
        printf("Enter output filename (default: %s) : ",filein);
        fgets (fileout, MAXLINELEN, stdin);
        fileout[strlen(fileout) - 1] = '\0';
    }
    k = strlen(fileout);
    if (k==0) {
        strcpy(fileout, filein); // use default output filename.srt
        k=strlen(fileout);
    }
    if (k<=3 || (strstr(&fileout[k-4],".srt") == NULL)) { // if .srt extension omitted
        strcat(fileout,".srt"); //append missing extension
    }
    fh = fopen(fileout, "w"); // create new file
    if (fh==NULL) {
        printf("File creating error. Sending the result to stdout.\n");
        fh=stdout;
    }

    //find first timeline
    i=0;
    while (!strstr(arr[i]," --> ") && (i<pos)) {
        i++;
    }
    j=1;
    while (i<pos){
        ptr = strstr(arr[i]," --> "); // NULL~textline
        if (ptr) { //if timeline
            fprintf(fh,"%d\n%s\n",j,arr[i]);
            j++;
        } else {
            fprintf(fh,"%s\n\n",arr[i]);
        }
        i++;
        //printf("%d:%d: %s\n",i, stlen(arr[i]), arr[i]);
    }
    if (fh!=stdout) {
        fclose(fh);
    }
    printf("Subtitles exported to %s.\n",fileout);
    free(arr);
    free(buffer);
    return 0;
}

相關內容