PythonでMP4またはMP3をWhisper使って文字起こしし、適宜その内容に対して要約させたりするためのコード

未分類

2024.02.23

タイトル通り

とりあえず私の環境では問題なく動きましたが、適宜調整してください

pip install moviepy openai pydub

import tkinter as tk
from tkinter import filedialog
from moviepy.editor import VideoFileClip, AudioFileClip
import openai
import os
import math

class MP4ToMP3Converter:
    def __init__(self):
        self.api_key = os.getenv('OPENAI_API_KEY')
        openai.api_key = self.api_key
        self.root = tk.Tk()
        self.root.withdraw()

    def select_file(self):
        file_path = filedialog.askopenfilename(filetypes=[("Audio/Video files", "*.mp3 *.mp4")])
        if not file_path.endswith(('.mp3', '.mp4')):
            print("選択されたファイルはサポートされていない形式です。")
            return self.select_file()
        return file_path

    def convert_to_mp3_if_needed(self, file_path):
        if file_path.endswith('.mp4'):
            video_clip = VideoFileClip(file_path)
            mp3_file_path = file_path.replace('.mp4', '.mp3')
            video_clip.audio.write_audiofile(mp3_file_path)
            return mp3_file_path
        return file_path

    def split_mp3(self, file_path, split_length=900):  # 900 seconds = 15 minutes
        audio_clip = AudioFileClip(file_path)
        duration = audio_clip.duration  # duration of the audio in seconds
        splits = math.ceil(duration / split_length)  # Number of splits required

        base_path, ext = os.path.splitext(file_path)
        split_files = []

        for i in range(splits):
            start_time = i * split_length
            end_time = min((i + 1) * split_length, duration)
            split_audio_clip = audio_clip.subclip(start_time, end_time)

            split_file_path = f"{base_path}_part_{i + 1}{ext}"
            split_audio_clip.write_audiofile(split_file_path, codec='libmp3lame')
            split_files.append(split_file_path)

        audio_clip.close()
        return split_files
    

    def transcribe_audio_to_text(self, mp3_file_paths, output_file_path):
        combined_text = ""
        for mp3_file_path in mp3_file_paths:
            with open(mp3_file_path, "rb") as audio_file:
                response = openai.Audio.transcribe(
                    model="whisper-1", file=audio_file, response_format="text", language="ja"
                )
                # responseが直接テキスト内容を含む文字列であると仮定
                combined_text += response + "\n\n"

        with open(output_file_path, "w", encoding="utf-8") as file:
            file.write(combined_text)


    def process_mp3_file(self, file_path):
        audio_length = self.get_audio_length(file_path)
        if audio_length > 900:
            print("ファイルを分割しています...")
            return self.split_mp3(file_path)
        else:
            return [file_path]
        
    def analyze_text_with_chatgpt(self, text, analyze):
        messages = [{"role": "system", "content": analyze},
                    {"role": "user", "content": text}]
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=messages,
            temperature=0.7
        )
        analyzed_text = response['choices'][0]['message']['content']
        return analyzed_text

    def save_text_to_file(self, text, file_path):
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(text)


if __name__ == "__main__":
    converter = MP4ToMP3Converter()
    file_path = converter.select_file()
    mp3_file_path = converter.convert_to_mp3_if_needed(file_path)
    mp3_file_paths = converter.split_mp3(mp3_file_path, 900)  # ここを修正
    
    base_file_path = file_path.rsplit('.', 1)[0]
    transcript_file_path = base_file_path + '.vtt'
    converter.transcribe_audio_to_text(mp3_file_paths, transcript_file_path)

    # Transcriptファイルを読み込み
    with open(transcript_file_path, 'r', encoding='utf-8') as file:
        transcript = file.read()

    # 分析を行う
    analyze = """ここにどのような内容で文字起こしされた内容をまとめてほしいか記載。トークン数による適切な分割は組み込んでいないので、注意"""
    analyzed_text = converter.analyze_text_with_chatgpt(transcript, analyze)

    # 分析結果を保存
    summary_file_path = base_file_path + '_summary.txt'
    converter.save_text_to_file(analyzed_text, summary_file_path)