From a847e8cfea917293617f32cd98f4621999f06f29 Mon Sep 17 00:00:00 2001 From: yjg0815git Date: Fri, 16 May 2025 08:00:14 -0500 Subject: [PATCH 1/6] interaction --- app/service/elevenLabs.py | 20 +++---- app/service/gpt.py | 43 ++++++++++++++ app/service/interaction.py | 99 +++++++++++++++++++++++++++++++++ app/service/record_respberry.py | 33 +++++++++-- app/utils/play_file.py | 8 +-- requirements.txt | 15 +++++ 6 files changed, 199 insertions(+), 19 deletions(-) create mode 100644 app/service/interaction.py diff --git a/app/service/elevenLabs.py b/app/service/elevenLabs.py index 8eeddb0..11c2942 100644 --- a/app/service/elevenLabs.py +++ b/app/service/elevenLabs.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from elevenlabs import ElevenLabs, VoiceSettings -from app.service.s3Service import upload_to_s3 +from s3Service import upload_to_s3 load_dotenv() client = ElevenLabs( @@ -72,18 +72,18 @@ def text_to_speech_file_save_AWS(text: str, voice_id: str) -> str: def text_to_speech_file(text: str, voice_id: str) -> str: response = client.text_to_speech.convert( voice_id=voice_id, - output_format="mp3_22050_32", + # output_format="mp3_22050_32", text=text, - model_id="eleven_turbo_v2_5", - voice_settings=VoiceSettings( - stability=0.3, - similarity_boost=1.0, - style=0.0, - use_speaker_boost=True, - ), + model_id="eleven_multilingual_v2", + # voice_settings=VoiceSettings( + # stability=0.3, + # similarity_boost=1.0, + # style=0.0, + # use_speaker_boost=True, + # ), ) - save_file_path = f"{uuid.uuid4()}.mp3" + save_file_path = f"{uuid.uuid4()}.wav" with open(save_file_path, "wb") as f: for chunk in response: if chunk: diff --git a/app/service/gpt.py b/app/service/gpt.py index 33118ea..6d027c1 100644 --- a/app/service/gpt.py +++ b/app/service/gpt.py @@ -50,3 +50,46 @@ def get_schedule_json(self): schedule_dict = parsing_json.extract_json_from_content(content) return schedule_dict + +class GenerateQuestionGPT: + def __init__(self, text, alias): + self.text = text + self.alias = alias + + def create_schedule_prompt(self): + system_message = f""" + 너는 지금부터 혼자 사시는 부모님을 걱정하는 보호자야. + + 네 역할은 키워드를 보고, 키워드와 관련한 문제에 대해서 부모님을 걱정하고, 생활은 챙겨주는거야. + 키워드는 다음과 같아: {str(self.schedules)} + + 너의 목표는 두 가지야: + 1. 키워드에 대한 질문 혹은 문장을 한 줄의 텍스트로 만들어. + ex) 키워드가 '저녁' 이라면, "{self.alias}~~ 하루 잘 보냈어?? 저녁도 맛있는거 챙겨먹어!! 사랑해~~ " + 2. 만든 텍스트는 ?? !! ~~ ,, .. 등의 다양한 특수문자가 많이 들어갈 수 있어. 감정이 강하게 느껴지게 작성해줘. + 2-a. 특수문자를 붙일 때는 꼭 2개씩 붙여줘 + 3. 부모님을 지칭하는 별명은 {self.alias} 로 해줘. + 4. 문장과 문장 사이의 띄어쓰기를 2개씩 넣어줘 + + 결과는 {{"키워드": "문장"}} 형태의 JSON 문자열로 반환해줘. 꼭 큰따옴표(")만 사용해. + + """ + + messages = [ + {"role": "system", "content": system_message} + ] + return messages + + def get_schedule_json(self): + prompt = self.create_schedule_prompt() + response = client.chat.completions.create( + model="gpt-4-turbo", + messages=prompt, + temperature=0.5, + max_tokens=2048 + ) + + content = response.choices[0].message.content + schedule_dict = parsing_json.extract_json_from_content(content) + + return schedule_dict \ No newline at end of file diff --git a/app/service/interaction.py b/app/service/interaction.py new file mode 100644 index 0000000..eea5754 --- /dev/null +++ b/app/service/interaction.py @@ -0,0 +1,99 @@ +import time +import pyaudio +import numpy as np +import soundfile as sf +from faster_whisper import WhisperModel +from openai import OpenAI +import openai +from elevenlabs import play, ElevenLabs +from elevenLabs import text_to_speech_file +from dotenv import load_dotenv +import os +from datetime import datetime + +from record_respberry import emotion_record, is_silent +model = WhisperModel("tiny", device="cpu", compute_type="int8") +import subprocess + +MIC_INDEX = 1 # USB 마이크 인덱스 +FORMAT = pyaudio.paInt16 +CHANNELS = 1 +RATE = 44100 +CHUNK = 4096 +SILENCE_LIMIT = 10 # 침묵 3초 이상이면 종료 + +# 오늘 날짜 문자열 +today_str = datetime.now().strftime("%Y%m%d") +WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_" + +# === 환경 변수/API 키 세팅 === +load_dotenv() + +gpt = OpenAI( + api_key=os.getenv("OPENAI_API_KEY") +) +cloning = ElevenLabs( + api_key=os.getenv("ELEVENLABS_KEY"), +) + +# === Whisper 모델 로드 (tiny가 가장 가볍고 빠름) === +model = WhisperModel("tiny", device="cpu", compute_type="int8") + +# === 오디오 입력 설정 === +RATE = 44100 +CHANNELS = 1 +CHUNK = RATE * 3 # 3초 단위로 STT +FORMAT = pyaudio.paInt16 + +p = pyaudio.PyAudio() +stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) +INDEX = 1 + +print("실시간 STT + GPT 질문 생성 + ElevenLabs Voice Cloning 시작 (Ctrl+C로 종료)") + + +messages = [ + {"role": "system", "content": "너는 대화를 자연스럽게 이어가는 AI야. 사용자와 계속 이어지는 대화를 만들어야 해."} +] +INDEX = 0 + +try: + while True: + wav_filename = emotion_record(INDEX) + INDEX += 1 + + # === 2. STT === + segments, _ = model.transcribe(wav_filename, beam_size=1, language="ko") + user_text = " ".join([seg.text for seg in segments]).strip() + print("STT 결과:", user_text) + if not user_text: + print("음성이 인식되지 않았습니다.") + continue + + # 3. messages에 사용자 발화 추가 + messages.append({"role": "user", "content": user_text}) + + # 4. GPT에 전체 메시지 전달 + response = openai.ChatCompletion.create( + model="gpt-4o", + messages=messages + ) + question = response['choices'][0]['message']['content'].strip() + print("생성된 질문:", question) + + # 5. GPT 응답도 messages에 추가 + messages.append({"role": "assistant", "content": question}) + + # 6. ElevenLabs TTS로 변환 + audio_path = text_to_speech_file(question) + + # 7. 음성 재생 + subprocess.run(["mpg321", audio_path]) + +except KeyboardInterrupt: + print("종료합니다.") +finally: + stream.stop_stream() + stream.close() + p.terminate() + diff --git a/app/service/record_respberry.py b/app/service/record_respberry.py index 2ea44ff..3f76b84 100644 --- a/app/service/record_respberry.py +++ b/app/service/record_respberry.py @@ -3,25 +3,30 @@ import numpy as np import os from datetime import datetime +from s3Service import upload_to_s3 +from elevenLabs import text_to_speech_file +from faster_whisper import WhisperModel +model = WhisperModel("tiny", device="cpu", compute_type="int8") +import subprocess MIC_INDEX = 1 # USB 마이크 인덱스 FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 CHUNK = 4096 -SILENCE_LIMIT = 5 # 침묵 3초 이상이면 종료 +SILENCE_LIMIT = 10 # 침묵 3초 이상이면 종료 # 오늘 날짜 문자열 today_str = datetime.now().strftime("%Y%m%d") -WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_output.mp3" +WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_" def is_silent(data, threshold=100): audio_data = np.frombuffer(data, dtype=np.int16) rms = np.sqrt(np.mean(audio_data**2)) print(f"RMS: {rms}") return rms < threshold - -def emotion_record(): +model = WhisperModel("tiny", device="cpu", compute_type="int8") +def emotion_record(index): p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, input_device_index=MIC_INDEX, frames_per_buffer=CHUNK) @@ -54,7 +59,7 @@ def emotion_record(): stream.close() p.terminate() - wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') + wf = wave.open(WAVE_OUTPUT_FILENAME+index+".wav", 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) @@ -87,9 +92,27 @@ def emotion_record(): wf.close() print(f"파일 저장 완료: {WAVE_OUTPUT_FILENAME}") + return WAVE_OUTPUT_FILENAME + + # s3_path = upload_to_s3(WAVE_OUTPUT_FILENAME) + +def start(alias): + text = alias + "~~ 오늘 좋은 하루 보냈나~~?? 어떻게 지냈어!!" + save_file_path = text_to_speech_file(text) + subprocess.run(["mpg321", save_file_path]) + + while(True): + emotion_record() + segments, _ = model.transcribe(save_file_path, beam_size=1, language="ko") + user_text = " ".join([seg.text for seg in segments]).strip() + + subprocess.run(["mpg321", local_file_path]) + + + # import pyaudio # p = pyaudio.PyAudio() diff --git a/app/utils/play_file.py b/app/utils/play_file.py index 2a549e3..ce1032a 100644 --- a/app/utils/play_file.py +++ b/app/utils/play_file.py @@ -25,7 +25,7 @@ def play_at_target_time(target_time: str, local_file_path: str): subprocess.run(["mpg321", local_file_path]) # window 테스트 용 - from playsound import playsound - from pathlib import Path - safe_path = Path(local_file_path).resolve().as_posix() - playsound(safe_path) + # from playsound import playsound + # from pathlib import Path + # safe_path = Path(local_file_path).resolve().as_posix() + # playsound(safe_path) diff --git a/requirements.txt b/requirements.txt index 52a2fd7..d7099e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ annotated-types==0.7.0 anyio==4.9.0 +av==14.3.0 boto3==1.37.16 botocore==1.37.16 certifi==2025.1.31 @@ -7,16 +8,22 @@ cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 colorama==0.4.6 +coloredlogs==15.0.1 +ctranslate2==4.6.0 distro==1.9.0 dotenv==0.9.9 elevenlabs==1.54.0 exceptiongroup==1.2.2 fastapi==0.115.11 +faster-whisper==1.1.1 filelock==3.18.0 +flatbuffers==20181003210633 fsspec==2025.3.2 h11==0.14.0 httpcore==1.0.7 httpx==0.28.1 +huggingface-hub==0.31.2 +humanfriendly==10.0 idna==3.10 Jinja2==3.1.6 jiter==0.9.0 @@ -25,8 +32,12 @@ MarkupSafe==3.0.2 mpmath==1.3.0 networkx==3.2.1 numpy==2.0.2 +onnxruntime==1.19.2 openai==1.68.2 +packaging==25.0 playsound==1.3.0 +protobuf==6.31.0 +PyAudio==0.2.14 pycparser==2.22 pydantic==2.10.6 pydantic_core==2.27.2 @@ -34,14 +45,18 @@ pydub==0.25.1 python-dateutil==2.9.0.post0 python-dotenv==1.0.1 python-multipart==0.0.20 +PyYAML==6.0.2 requests==2.32.3 +RPi.GPIO==0.7.1 s3transfer==0.11.4 scipy==1.13.1 six==1.17.0 sniffio==1.3.1 sounddevice==0.5.1 +soundfile==0.13.1 starlette==0.46.1 sympy==1.14.0 +tokenizers==0.21.1 torch==2.7.0 torchaudio==2.7.0 tqdm==4.67.1 From bbf307507d28da255786b8ce9bfe9dfa10342bc9 Mon Sep 17 00:00:00 2001 From: yjg0815git Date: Thu, 22 May 2025 08:59:14 -0500 Subject: [PATCH 2/6] fix: check mic. setting & gpt interaction --- .gitignore | 3 +- app/service/elevenLabs.py | 7 +- app/service/interaction.py | 180 ++++++++++++++++--------------- app/service/record_respberry.py | 182 ++++++++++++-------------------- app/service/s3Service.py | 2 +- 5 files changed, 172 insertions(+), 202 deletions(-) diff --git a/.gitignore b/.gitignore index 25e5a12..f792265 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,5 @@ venv.bak/ __pycache__/ *.pyc audio/ -emotion_diary/ \ No newline at end of file +emotion_diary/ +pyvenv.cfg \ No newline at end of file diff --git a/app/service/elevenLabs.py b/app/service/elevenLabs.py index 11c2942..b715910 100644 --- a/app/service/elevenLabs.py +++ b/app/service/elevenLabs.py @@ -10,6 +10,7 @@ client = ElevenLabs( api_key=os.getenv("ELEVENLABS_KEY"), ) +yjg_voice_id = os.getenv("YJG_VOICE_ID") def get_voice(): @@ -42,7 +43,7 @@ def add_voice(name: str, local_file_paths: list): return response.voice_id -def text_to_speech_file_save_AWS(text: str, voice_id: str) -> str: +def text_to_speech_file_save_AWS(text: str, voice_id=yjg_voice_id) -> str: response = client.text_to_speech.convert( voice_id=voice_id, output_format="mp3_22050_32", @@ -69,7 +70,7 @@ def text_to_speech_file_save_AWS(text: str, voice_id: str) -> str: return aws_file_url -def text_to_speech_file(text: str, voice_id: str) -> str: +def text_to_speech_file(text: str, voice_id=yjg_voice_id) -> str: response = client.text_to_speech.convert( voice_id=voice_id, # output_format="mp3_22050_32", @@ -89,7 +90,7 @@ def text_to_speech_file(text: str, voice_id: str) -> str: if chunk: f.write(chunk) # aws_file_url = upload_to_s3(local_file_path=save_file_path) - os.remove(save_file_path) + # os.remove(save_file_path) # delete_voice(voice_id) diff --git a/app/service/interaction.py b/app/service/interaction.py index eea5754..4518264 100644 --- a/app/service/interaction.py +++ b/app/service/interaction.py @@ -1,99 +1,111 @@ -import time +import os +import subprocess +from datetime import datetime + import pyaudio import numpy as np -import soundfile as sf from faster_whisper import WhisperModel from openai import OpenAI -import openai -from elevenlabs import play, ElevenLabs from elevenLabs import text_to_speech_file +from elevenlabs import ElevenLabs from dotenv import load_dotenv -import os -from datetime import datetime +# 아래 두 함수는 record_respberry.py 에 구현된 그대로 사용합니다. +# emotion_record(index) → "{prefix}{index}.wav" 파일을 만들어 리턴 +# is_silent(data) → 음성 청크가 침묵인지 여부 판단 from record_respberry import emotion_record, is_silent -model = WhisperModel("tiny", device="cpu", compute_type="int8") -import subprocess - -MIC_INDEX = 1 # USB 마이크 인덱스 -FORMAT = pyaudio.paInt16 -CHANNELS = 1 -RATE = 44100 -CHUNK = 4096 -SILENCE_LIMIT = 10 # 침묵 3초 이상이면 종료 -# 오늘 날짜 문자열 -today_str = datetime.now().strftime("%Y%m%d") -WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_" - -# === 환경 변수/API 키 세팅 === +# ==== 공통 설정 ==== load_dotenv() +OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") +ELEVENLABS_KEY = os.getenv("ELEVENLABS_KEY") + +if not OPENAI_API_KEY or not ELEVENLABS_KEY: + raise RuntimeError(".env 에 OPENAI_API_KEY/ELEVENLABS_KEY 를 설정하세요") -gpt = OpenAI( - api_key=os.getenv("OPENAI_API_KEY") -) -cloning = ElevenLabs( - api_key=os.getenv("ELEVENLABS_KEY"), -) +# OpenAI / ElevenLabs 클라이언트 +gpt_client = OpenAI(api_key=OPENAI_API_KEY) +tts_client = ElevenLabs(api_key=ELEVENLABS_KEY) -# === Whisper 모델 로드 (tiny가 가장 가볍고 빠름) === -model = WhisperModel("tiny", device="cpu", compute_type="int8") +# Whisper 모델 (tiny, CPU, int8) +whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8") -# === 오디오 입력 설정 === -RATE = 44100 +# 녹음 파라미터 (ALSA default=USBMIC 으로 잡힌 상태) +FORMAT = pyaudio.paInt16 CHANNELS = 1 -CHUNK = RATE * 3 # 3초 단위로 STT -FORMAT = pyaudio.paInt16 - -p = pyaudio.PyAudio() -stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) -INDEX = 1 - -print("실시간 STT + GPT 질문 생성 + ElevenLabs Voice Cloning 시작 (Ctrl+C로 종료)") - - -messages = [ - {"role": "system", "content": "너는 대화를 자연스럽게 이어가는 AI야. 사용자와 계속 이어지는 대화를 만들어야 해."} -] -INDEX = 0 - -try: - while True: - wav_filename = emotion_record(INDEX) - INDEX += 1 - - # === 2. STT === - segments, _ = model.transcribe(wav_filename, beam_size=1, language="ko") - user_text = " ".join([seg.text for seg in segments]).strip() - print("STT 결과:", user_text) - if not user_text: - print("음성이 인식되지 않았습니다.") - continue - - # 3. messages에 사용자 발화 추가 - messages.append({"role": "user", "content": user_text}) - - # 4. GPT에 전체 메시지 전달 - response = openai.ChatCompletion.create( - model="gpt-4o", - messages=messages - ) - question = response['choices'][0]['message']['content'].strip() - print("생성된 질문:", question) - - # 5. GPT 응답도 messages에 추가 - messages.append({"role": "assistant", "content": question}) - - # 6. ElevenLabs TTS로 변환 - audio_path = text_to_speech_file(question) - - # 7. 음성 재생 - subprocess.run(["mpg321", audio_path]) - -except KeyboardInterrupt: - print("종료합니다.") -finally: - stream.stop_stream() - stream.close() - p.terminate() +RATE = 44100 +CHUNK = RATE * 3 # 3초 단위 버퍼 + +# 오늘 날짜 기반 녹음 파일 저장 경로 prefix +today_str = datetime.now().strftime("%Y%m%d") +WAVE_OUTPUT_PREFIX = f"/home/team4/Desktop/capstone/AI/app/emotion_diary/{today_str}_" + +def interaction(alias: str): + """ + alias: 사용자 이름 또는 AI가 부르는 별칭 (ex: "홍길동") + 1) alias 인사 → TTS → 재생 + 2) 이후 반복: emotion_record → Whisper STT → GPT 질문 생성 → TTS → 재생 + """ + # 1) alias 인사 + greet_text = f"{alias}~~ 오늘 좋은 하루 보냈나~~?? 어떻게 지냈어!!" + print("👋 인사:", greet_text) + greet_audio = text_to_speech_file(greet_text) + subprocess.run(["mpg321", greet_audio], check=True) + + # 대화 이력 초기화 + messages = [ + {"role": "system", + "content": "너는 대화를 자연스럽게 이어가는 AI야. 사용자와 계속 이어지는 대화를 만들어야 해."}, + {"role": "assistant", "content": greet_text} + ] + + record_idx = 0 + try: + while True: + # 2-1) 감정 녹음 (침묵 기준으로 자동 종료) + wav_path = emotion_record(record_idx) + print(f"[녹음 완료] {wav_path}") + record_idx += 1 + + # 2-2) Whisper STT (한국어) + segments, _ = whisper_model.transcribe(wav_path, + beam_size=1, + language="ko") + user_text = " ".join(seg.text for seg in segments).strip() + print("▶ 사용자 음성(텍스트):", user_text or "(인식 안됨)") + + if not user_text: + print("(음성 인식 실패 → 다시 녹음)") + continue + + # 2-3) GPT-4o 에 질문 생성 요청 + messages.append({"role": "user", "content": user_text}) + resp = gpt_client.chat.completions.create( + model="gpt-4o", + messages=messages + ) + question = resp.choices[0].message.content.strip() + print("생성된 질문:", question) + + # 2-4) 대화 이력에 어시스턴트 질문 추가 + messages.append({"role": "assistant", "content": question}) + + # 2-5) 질문 → ElevenLabs TTS → 파일 + tts_path = text_to_speech_file(question) + print(" (TTS 파일 생성:", tts_path, ")") + + # 2-6) 재생 + subprocess.run(["mpg321", tts_path], check=True) + + except KeyboardInterrupt: + print("\n[사용자 종료 요청] interaction을 종료합니다.") + except Exception as e: + print("예외 발생:", e) + + print("=== interaction 종료 ===") + +if __name__ == "__main__": + # 스크립트를 직접 실행할 때만 동작 + # alias를 원하는 이름으로 바꿔주세요 + interaction("홍길동") diff --git a/app/service/record_respberry.py b/app/service/record_respberry.py index 3f76b84..3bb9206 100644 --- a/app/service/record_respberry.py +++ b/app/service/record_respberry.py @@ -1,129 +1,85 @@ -import pyaudio -import wave -import numpy as np import os +import wave from datetime import datetime -from s3Service import upload_to_s3 -from elevenLabs import text_to_speech_file -from faster_whisper import WhisperModel -model = WhisperModel("tiny", device="cpu", compute_type="int8") -import subprocess -MIC_INDEX = 1 # USB 마이크 인덱스 +import numpy as np +import pyaudio + +# === 녹음 설정 === FORMAT = pyaudio.paInt16 CHANNELS = 1 RATE = 44100 -CHUNK = 4096 -SILENCE_LIMIT = 10 # 침묵 3초 이상이면 종료 - -# 오늘 날짜 문자열 -today_str = datetime.now().strftime("%Y%m%d") -WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_" - -def is_silent(data, threshold=100): +CHUNK = 4096 # 약 0.093초 분량 (4096/44100) +SILENCE_LIMIT = 5 # 5초 연속 침묵이면 녹음 종료 +BASE_DIR = "/home/team4/Desktop/capstone/AI/app/emotion_diary" + +# 날짜 기반 하위 디렉터리(매일 한 번만 생성) +def _ensure_dir(): + os.makedirs(BASE_DIR, exist_ok=True) + +def is_silent(data: bytes, threshold: float = 1000.0) -> bool: + """ + 한 프레임(CHUNK) 크기의 raw PCM data를 받아 + RMS 기준으로 침묵 여부를 판단. + """ audio_data = np.frombuffer(data, dtype=np.int16) - rms = np.sqrt(np.mean(audio_data**2)) - print(f"RMS: {rms}") + rms = np.sqrt(np.mean(audio_data.astype(np.float32) ** 2)) + # print(f"RMS={rms:.1f}") # 필요 시 디버그용 return rms < threshold -model = WhisperModel("tiny", device="cpu", compute_type="int8") -def emotion_record(index): - p = pyaudio.PyAudio() - stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, - input_device_index=MIC_INDEX, frames_per_buffer=CHUNK) - - print("녹음 시작...") - frames = [] - silence_counter = 0 - p = pyaudio.PyAudio() - stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, - input_device_index=MIC_INDEX, frames_per_buffer=CHUNK) - print("녹음 시작...") +def emotion_record(index: int) -> str: + """ + index: 녹음 파일 구분을 위한 정수 인덱스 + return: 저장된 .wav 파일의 전체 경로 + """ + _ensure_dir() + date_str = datetime.now().strftime("%Y%m%d") + filename = f"{date_str}_{index}.wav" + filepath = os.path.join(BASE_DIR, filename) + + pa = pyaudio.PyAudio() + # input_device_index 를 지정하지 않으면 ALSA default (=USBMIC) 사용 + stream = pa.open( + format=FORMAT, + channels=CHANNELS, + rate=RATE, + input=True, + frames_per_buffer=CHUNK + ) + + print(f"[녹음 시작] {filename}") frames = [] - silence_counter = 0 - - while True: - data = stream.read(CHUNK) - frames.append(data) - if is_silent(data): - silence_counter += CHUNK / RATE - print(f"침묵 감지: {silence_counter:.2f}초") - else: - silence_counter = 0 - - if silence_counter >= SILENCE_LIMIT: - print(f"{SILENCE_LIMIT}초 이상 침묵 감지! 녹음 종료.") - break - - stream.stop_stream() - stream.close() - p.terminate() - - wf = wave.open(WAVE_OUTPUT_FILENAME+index+".wav", 'wb') - wf.setnchannels(CHANNELS) - wf.setsampwidth(p.get_sample_size(FORMAT)) - wf.setframerate(RATE) - wf.writeframes(b''.join(frames)) - wf.close() - - print(f"파일 저장 완료: {WAVE_OUTPUT_FILENAME}") - while True: - data = stream.read(CHUNK) - frames.append(data) - if is_silent(data): - silence_counter += CHUNK / RATE - print(f"침묵 감지: {silence_counter:.2f}초") - else: - silence_counter = 0 - - if silence_counter >= SILENCE_LIMIT: - print(f"{SILENCE_LIMIT}초 이상 침묵 감지! 녹음 종료.") - break - - stream.stop_stream() - stream.close() - p.terminate() - - wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') + silent_secs = 0.0 + + try: + while True: + data = stream.read(CHUNK, exception_on_overflow=False) + frames.append(data) + + if is_silent(data): + silent_secs += CHUNK / RATE + else: + silent_secs = 0.0 + + if silent_secs >= SILENCE_LIMIT: + print(f"[침묵 {SILENCE_LIMIT}초 감지 → 녹음 종료]") + break + + except Exception as e: + print("녹음 중 예외:", e) + finally: + stream.stop_stream() + stream.close() + pa.terminate() + + # WAV 파일로 저장 + wf = wave.open(filepath, 'wb') wf.setnchannels(CHANNELS) - wf.setsampwidth(p.get_sample_size(FORMAT)) + wf.setsampwidth(pa.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() - print(f"파일 저장 완료: {WAVE_OUTPUT_FILENAME}") - return WAVE_OUTPUT_FILENAME - - # s3_path = upload_to_s3(WAVE_OUTPUT_FILENAME) - -def start(alias): - text = alias + "~~ 오늘 좋은 하루 보냈나~~?? 어떻게 지냈어!!" - save_file_path = text_to_speech_file(text) - subprocess.run(["mpg321", save_file_path]) - - while(True): - emotion_record() - segments, _ = model.transcribe(save_file_path, beam_size=1, language="ko") - user_text = " ".join([seg.text for seg in segments]).strip() - - subprocess.run(["mpg321", local_file_path]) - - - - - - -# import pyaudio - -# p = pyaudio.PyAudio() - -# print("==== 오디오 입력 장치 목록 ====") -# for i in range(p.get_device_count()): -# info = p.get_device_info_by_index(i) -# if info['maxInputChannels'] > 0: -# print(f"[Index {i}] {info['name']}") -# print(f" - 입력 채널 수 (maxInputChannels): {info['maxInputChannels']}") -# print(f" - 기본 샘플레이트 (defaultSampleRate): {int(info['defaultSampleRate'])} Hz") -# print("-" * 40) + print(f"[저장 완료] {filepath}\n") + return filepath -# p.terminate() \ No newline at end of file diff --git a/app/service/s3Service.py b/app/service/s3Service.py index b992d24..74d0879 100644 --- a/app/service/s3Service.py +++ b/app/service/s3Service.py @@ -9,7 +9,7 @@ from dotenv import load_dotenv from fastapi import UploadFile -from app.utils.convertFileExtension import convert_to_mp3 +from AI.app.utils.convertFileExtension import convert_to_mp3 load_dotenv() From cfdb19b8672c8a1de78640d03114c837e7fa0ed3 Mon Sep 17 00:00:00 2001 From: yjg0815git Date: Sat, 24 May 2025 02:15:18 -0500 Subject: [PATCH 3/6] recent file --- app/service/elevenLabs.py | 2 +- app/service/main.py | 17 ++++++++++++++++- app/service/subscribe.py | 29 +++++++++++++++++++++++++++++ main.py | 2 +- requirements.txt | 2 +- 5 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 app/service/subscribe.py diff --git a/app/service/elevenLabs.py b/app/service/elevenLabs.py index b715910..63d62d1 100644 --- a/app/service/elevenLabs.py +++ b/app/service/elevenLabs.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from elevenlabs import ElevenLabs, VoiceSettings -from s3Service import upload_to_s3 +from AI.app.service.s3Service import upload_to_s3 load_dotenv() client = ElevenLabs( diff --git a/app/service/main.py b/app/service/main.py index cab8d43..f7cabf5 100644 --- a/app/service/main.py +++ b/app/service/main.py @@ -1,11 +1,26 @@ +import asyncio + from fastapi import FastAPI, Depends, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.openapi.utils import get_openapi from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from contextlib import asynccontextmanager from app.controller.RecordController import router +from app.service.subscribe import subscribe_schedule + +@asynccontextmanager +async def lifespan(app: FastAPI): + task = asyncio.create_task(subscribe_schedule()) + yield + task.cancel() + try: + await task + except asyncio.CancelledError: + print("Redis task cancelled") + -app = FastAPI() +app = FastAPI(lifespan = lifespan) auth_scheme = HTTPBearer() diff --git a/app/service/subscribe.py b/app/service/subscribe.py new file mode 100644 index 0000000..65cb42c --- /dev/null +++ b/app/service/subscribe.py @@ -0,0 +1,29 @@ +import os +import redis.asyncio as redis +import json +import subprocess +from AI.app.service.s3Service import download_from_s3 +REDIS_HOST = os.getenv("REDIS_HOST", "15.165.21.152") +REDIS_PORT = int(os.getenv("REDIS_PORT", "6380")) +REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "babyy1023@") +CHANNEL_NAME = "spring-scheduler-channel" + +async def subscribe_schedule(): + r = redis.Redis( + host=REDIS_HOST, + port=REDIS_PORT, + password=REDIS_PASSWORD, + decode_responses=True + ) + + pubsub = r.pubsub() + await pubsub.subscribe(CHANNEL_NAME) + + print(f"Subscribed to Redis '{CHANNEL_NAME}") + + async for message in pubsub.listen(): + if message["type"] == "message": + local_path = download_from_s3("https://humanicare-bucket.s3.ap-northeast-2.amazonaws.com/record/audio_1743069498_081a9673-aebe-4b86-a4ba-c32f4424e8b9.wav") + subprocess.run(["mpg321", local_path]) + print("speaker out") + \ No newline at end of file diff --git a/main.py b/main.py index c8e4812..c4b798f 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ if __name__ == "__main__": uvicorn.run( - app="app.main:app", + app="app.service.main:app", host="localhost", # host="0.0.0.0", port=8000, diff --git a/requirements.txt b/requirements.txt index d7099e7..9adb1ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -63,4 +63,4 @@ tqdm==4.67.1 typing_extensions==4.12.2 urllib3==1.26.20 uvicorn==0.34.0 -websockets==15.0.1 +websockets==15.0.1 \ No newline at end of file From 92b4995d57b7f53ec52f04165f3f85e1fb91cec2 Mon Sep 17 00:00:00 2001 From: yjg0815git Date: Sat, 24 May 2025 23:58:24 -0500 Subject: [PATCH 4/6] fix: localhost -> springboot change --- app/controller/RecordController.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/controller/RecordController.py b/app/controller/RecordController.py index 1de9ea7..7f1dcf6 100644 --- a/app/controller/RecordController.py +++ b/app/controller/RecordController.py @@ -114,7 +114,7 @@ def send_user_voice_file_to_spring(token: str, voice_url: str): data = { "voiceUrl": voice_url } - requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data) + requests.post("http://springboot:8080/api/spring/records/voices", headers=headers, json=data) # requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data) @@ -125,7 +125,7 @@ def send_user_voice_id_to_spring(token: str, voice_id: str): data = { "voiceId": voice_id } - requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data) + requests.post("http://springboot:8080/api/spring/records/voices", headers=headers, json=data) # requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data) @@ -137,7 +137,7 @@ def send_user_speech_file_to_spring(token: str, before_audio_link: str, answerId "beforeAudioLink": before_audio_link, "answerId": answerId } - requests.post("http://localhost:8080/api/spring/records/speeches", headers=headers, json=data) + requests.post("http://springboot:8080/api/spring/records/speeches", headers=headers, json=data) # requests.post("https://peachmentor.com/api/spring/records/speeches", headers=headers, json=data) @@ -145,7 +145,7 @@ def receive_self_feedback(token: str) -> str: headers = { "Authorization": f"Bearer {token}" } - response = requests.get("http://localhost:8080/api/spring/self-feedbacks/latest-feedbacks", headers=headers) + response = requests.get("http://springboot:8080/api/spring/self-feedbacks/latest-feedbacks", headers=headers) # response = requests.get("https://peachmentor.com/api/spring/self-feedbacks/latest-feedbacks", headers=headers) feedback_data = response.json().get('result', {}) @@ -165,7 +165,7 @@ def send_statistics_to_spring(token: str, gantourCount: int, silentTime: float, "silentTime": silentTime, "answerId": answerId } - requests.post("http://localhost:8080/api/spring/statistics", headers=headers, json=data) + requests.post("http://springboot:8080/api/spring/statistics", headers=headers, json=data) # requests.post("https://peachmentor.com/api/spring/statistics", headers=headers, json=data) # # 질문 답변에 대한 insight 제공 api From 1114701e093101cc41d72358bd7a2b481dbc1d87 Mon Sep 17 00:00:00 2001 From: yjg0815git Date: Sun, 1 Jun 2025 03:08:01 -0500 Subject: [PATCH 5/6] fix: Docker error --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9adb1ff..169daaf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,6 @@ exceptiongroup==1.2.2 fastapi==0.115.11 faster-whisper==1.1.1 filelock==3.18.0 -flatbuffers==20181003210633 fsspec==2025.3.2 h11==0.14.0 httpcore==1.0.7 From c1e83289c7172d8a2d6be86f3eba6c72a19cfeb9 Mon Sep 17 00:00:00 2001 From: yjg0815git Date: Sun, 1 Jun 2025 03:25:04 -0500 Subject: [PATCH 6/6] fix:Docker error --- main.py | 102 +++++++++++++++++++++++------------------------ requirements.txt | 4 +- 2 files changed, 53 insertions(+), 53 deletions(-) diff --git a/main.py b/main.py index 0ded50f..8f05ef4 100644 --- a/main.py +++ b/main.py @@ -1,66 +1,66 @@ -import RPi.GPIO as GPIO -import time -from datetime import datetime +# import RPi.GPIO as GPIO +# import time +# from datetime import datetime -# ─────────────────────────────── -# PIR 센서 관련 -# ─────────────────────────────── -PIR_PIN = 17 # GPIO17 +# # ─────────────────────────────── +# # PIR 센서 관련 +# # ─────────────────────────────── +# PIR_PIN = 17 # GPIO17 -def detect_motion(): - GPIO.setmode(GPIO.BCM) - GPIO.setup(PIR_PIN, GPIO.IN) +# def detect_motion(): +# GPIO.setmode(GPIO.BCM) +# GPIO.setup(PIR_PIN, GPIO.IN) - print("PIR 센서 디버깅 시작 (Ctrl+C 종료)") - prev_state = None +# print("PIR 센서 디버깅 시작 (Ctrl+C 종료)") +# prev_state = None - try: - while True: - signal = GPIO.input(PIR_PIN) +# try: +# while True: +# signal = GPIO.input(PIR_PIN) - if signal != prev_state: - timestamp = datetime.now().strftime("%H:%M:%S") - state_str = "감지됨 (HIGH)" if signal else " 없음 (LOW)" - print(f"[{timestamp}] 상태 변경 ▶ {state_str}") - prev_state = signal +# if signal != prev_state: +# timestamp = datetime.now().strftime("%H:%M:%S") +# state_str = "감지됨 (HIGH)" if signal else " 없음 (LOW)" +# print(f"[{timestamp}] 상태 변경 ▶ {state_str}") +# prev_state = signal - time.sleep(0.1) - except KeyboardInterrupt: - print("⛔ 종료 중...") - GPIO.cleanup() +# time.sleep(0.1) +# except KeyboardInterrupt: +# print("⛔ 종료 중...") +# GPIO.cleanup() -# ─────────────────────────────── -# DHT11 센서 관련 (5회 재시도 버전) -# ─────────────────────────────── -import adafruit_dht -import board +# # ─────────────────────────────── +# # DHT11 센서 관련 (5회 재시도 버전) +# # ─────────────────────────────── +# import adafruit_dht +# import board -def read_dht11(): - print("🌡️ DHT11 센서 측정 시작...") - dhtDevice = adafruit_dht.DHT11(board.D4) # GPIO4 (멀티보드 IO4) +# def read_dht11(): +# print("🌡️ DHT11 센서 측정 시작...") +# dhtDevice = adafruit_dht.DHT11(board.D4) # GPIO4 (멀티보드 IO4) - for i in range(5): # 최대 5번 재시도 - try: - print(f"📡 시도 {i + 1} ...") - temperature = dhtDevice.temperature - humidity = dhtDevice.humidity +# for i in range(5): # 최대 5번 재시도 +# try: +# print(f"📡 시도 {i + 1} ...") +# temperature = dhtDevice.temperature +# humidity = dhtDevice.humidity - if temperature is not None and humidity is not None: - print(f"✅ 온도: {temperature}°C") - print(f"✅ 습도: {humidity}%") - break - else: - print("⚠️ 센서로부터 데이터를 읽을 수 없습니다.") - except RuntimeError as error: - print(f"⚠️ 에러 발생: {error.args[0]}") - except Exception as error: - print(f"❌ 심각한 오류: {error}") - break - time.sleep(2) # 재시도 간 간격 +# if temperature is not None and humidity is not None: +# print(f"✅ 온도: {temperature}°C") +# print(f"✅ 습도: {humidity}%") +# break +# else: +# print("⚠️ 센서로부터 데이터를 읽을 수 없습니다.") +# except RuntimeError as error: +# print(f"⚠️ 에러 발생: {error.args[0]}") +# except Exception as error: +# print(f"❌ 심각한 오류: {error}") +# break +# time.sleep(2) # 재시도 간 간격 - # 종료 함수는 비활성화 (라이브러리 오류 방지) - # dhtDevice.exit() +# # 종료 함수는 비활성화 (라이브러리 오류 방지) +# # dhtDevice.exit() # ─────────────────────────────── diff --git a/requirements.txt b/requirements.txt index 169daaf..c61a0e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,7 +36,7 @@ openai==1.68.2 packaging==25.0 playsound==1.3.0 protobuf==6.31.0 -PyAudio==0.2.14 +# PyAudio==0.2.14 pycparser==2.22 pydantic==2.10.6 pydantic_core==2.27.2 @@ -46,7 +46,7 @@ python-dotenv==1.0.1 python-multipart==0.0.20 PyYAML==6.0.2 requests==2.32.3 -RPi.GPIO==0.7.1 +# RPi.GPIO==0.7.1 s3transfer==0.11.4 scipy==1.13.1 six==1.17.0