From a847e8cfea917293617f32cd98f4621999f06f29 Mon Sep 17 00:00:00 2001
From: yjg0815git <dlwlsrud815@gmail.com>
Date: Fri, 16 May 2025 08:00:14 -0500
Subject: [PATCH 1/6] interaction

---
 app/service/elevenLabs.py       | 20 +++----
 app/service/gpt.py              | 43 ++++++++++++++
 app/service/interaction.py      | 99 +++++++++++++++++++++++++++++++++
 app/service/record_respberry.py | 33 +++++++++--
 app/utils/play_file.py          |  8 +--
 requirements.txt                | 15 +++++
 6 files changed, 199 insertions(+), 19 deletions(-)
 create mode 100644 app/service/interaction.py

diff --git a/app/service/elevenLabs.py b/app/service/elevenLabs.py
index 8eeddb0..11c2942 100644
--- a/app/service/elevenLabs.py
+++ b/app/service/elevenLabs.py
@@ -4,7 +4,7 @@
 from dotenv import load_dotenv
 from elevenlabs import ElevenLabs, VoiceSettings
 
-from app.service.s3Service import upload_to_s3
+from s3Service import upload_to_s3
 
 load_dotenv()
 client = ElevenLabs(
@@ -72,18 +72,18 @@ def text_to_speech_file_save_AWS(text: str, voice_id: str) -> str:
 def text_to_speech_file(text: str, voice_id: str) -> str:
     response = client.text_to_speech.convert(
         voice_id=voice_id,
-        output_format="mp3_22050_32",
+        # output_format="mp3_22050_32",
         text=text,
-        model_id="eleven_turbo_v2_5",
-        voice_settings=VoiceSettings(
-            stability=0.3,
-            similarity_boost=1.0,
-            style=0.0,
-            use_speaker_boost=True,
-        ),
+        model_id="eleven_multilingual_v2",
+        # voice_settings=VoiceSettings(
+        #     stability=0.3,
+        #     similarity_boost=1.0,
+        #     style=0.0,
+        #     use_speaker_boost=True,
+        # ),
     )
 
-    save_file_path = f"{uuid.uuid4()}.mp3"
+    save_file_path = f"{uuid.uuid4()}.wav"
     with open(save_file_path, "wb") as f:
         for chunk in response:
             if chunk:
diff --git a/app/service/gpt.py b/app/service/gpt.py
index 33118ea..6d027c1 100644
--- a/app/service/gpt.py
+++ b/app/service/gpt.py
@@ -50,3 +50,46 @@ def get_schedule_json(self):
         schedule_dict = parsing_json.extract_json_from_content(content)
 
         return schedule_dict
+
+class GenerateQuestionGPT:
+    def __init__(self, text, alias):
+        self.text = text
+        self.alias = alias
+
+    def create_schedule_prompt(self):
+        system_message = f"""
+            너는 지금부터 혼자 사시는 부모님을 걱정하는 보호자야.
+            
+            네 역할은 키워드를 보고, 키워드와 관련한 문제에 대해서 부모님을 걱정하고, 생활은 챙겨주는거야.
+            키워드는 다음과 같아: {str(self.schedules)}
+
+            너의 목표는 두 가지야:
+            1. 키워드에 대한 질문 혹은 문장을 한 줄의 텍스트로 만들어.
+                ex) 키워드가 '저녁' 이라면, "{self.alias}~~  하루 잘 보냈어??  저녁도 맛있는거 챙겨먹어!!  사랑해~~  "
+            2. 만든 텍스트는 ?? !! ~~ ,, .. 등의 다양한 특수문자가 많이 들어갈 수 있어. 감정이 강하게 느껴지게 작성해줘.
+                2-a. 특수문자를 붙일 때는 꼭 2개씩 붙여줘
+            3. 부모님을 지칭하는 별명은 {self.alias} 로 해줘. 
+            4. 문장과 문장 사이의 띄어쓰기를 2개씩 넣어줘 
+
+            결과는 {{"키워드": "문장"}} 형태의 JSON 문자열로 반환해줘. 꼭 큰따옴표(")만 사용해.
+
+        """
+
+        messages = [
+            {"role": "system", "content": system_message}
+        ]
+        return messages
+
+    def get_schedule_json(self):
+        prompt = self.create_schedule_prompt()
+        response = client.chat.completions.create(
+            model="gpt-4-turbo",
+            messages=prompt,
+            temperature=0.5,
+            max_tokens=2048
+        )
+
+        content = response.choices[0].message.content
+        schedule_dict = parsing_json.extract_json_from_content(content)
+
+        return schedule_dict
\ No newline at end of file
diff --git a/app/service/interaction.py b/app/service/interaction.py
new file mode 100644
index 0000000..eea5754
--- /dev/null
+++ b/app/service/interaction.py
@@ -0,0 +1,99 @@
+import time
+import pyaudio
+import numpy as np
+import soundfile as sf
+from faster_whisper import WhisperModel
+from openai import OpenAI
+import openai
+from elevenlabs import play, ElevenLabs
+from elevenLabs import text_to_speech_file
+from dotenv import load_dotenv
+import os
+from datetime import datetime
+
+from record_respberry import emotion_record, is_silent
+model = WhisperModel("tiny", device="cpu", compute_type="int8")
+import subprocess
+
+MIC_INDEX = 1  # USB 마이크 인덱스
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RATE = 44100
+CHUNK = 4096
+SILENCE_LIMIT = 10 # 침묵 3초 이상이면 종료
+
+# 오늘 날짜 문자열
+today_str = datetime.now().strftime("%Y%m%d")
+WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_"
+
+# === 환경 변수/API 키 세팅 ===
+load_dotenv()
+
+gpt = OpenAI(
+    api_key=os.getenv("OPENAI_API_KEY")
+)
+cloning = ElevenLabs(
+    api_key=os.getenv("ELEVENLABS_KEY"),
+)
+
+# === Whisper 모델 로드 (tiny가 가장 가볍고 빠름) ===
+model = WhisperModel("tiny", device="cpu", compute_type="int8")
+
+# === 오디오 입력 설정 ===
+RATE = 44100
+CHANNELS = 1
+CHUNK = RATE * 3  # 3초 단위로 STT
+FORMAT = pyaudio.paInt16
+
+p = pyaudio.PyAudio()
+stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
+INDEX = 1
+
+print("실시간 STT + GPT 질문 생성 + ElevenLabs Voice Cloning 시작 (Ctrl+C로 종료)")
+
+
+messages = [
+    {"role": "system", "content": "너는 대화를 자연스럽게 이어가는 AI야. 사용자와 계속 이어지는 대화를 만들어야 해."}
+]
+INDEX = 0
+
+try:
+    while True:
+        wav_filename = emotion_record(INDEX)
+        INDEX += 1
+
+        # === 2. STT ===
+        segments, _ = model.transcribe(wav_filename, beam_size=1, language="ko")
+        user_text = " ".join([seg.text for seg in segments]).strip()
+        print("STT 결과:", user_text)
+        if not user_text:
+            print("음성이 인식되지 않았습니다.")
+            continue
+
+        # 3. messages에 사용자 발화 추가
+        messages.append({"role": "user", "content": user_text})
+
+        # 4. GPT에 전체 메시지 전달
+        response = openai.ChatCompletion.create(
+            model="gpt-4o",
+            messages=messages
+        )
+        question = response['choices'][0]['message']['content'].strip()
+        print("생성된 질문:", question)
+
+        # 5. GPT 응답도 messages에 추가
+        messages.append({"role": "assistant", "content": question})
+
+        # 6. ElevenLabs TTS로 변환
+        audio_path = text_to_speech_file(question)
+
+        # 7. 음성 재생
+        subprocess.run(["mpg321", audio_path])
+
+except KeyboardInterrupt:
+    print("종료합니다.")
+finally:
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
diff --git a/app/service/record_respberry.py b/app/service/record_respberry.py
index 2ea44ff..3f76b84 100644
--- a/app/service/record_respberry.py
+++ b/app/service/record_respberry.py
@@ -3,25 +3,30 @@
 import numpy as np
 import os
 from datetime import datetime
+from s3Service import upload_to_s3
+from elevenLabs import text_to_speech_file
+from faster_whisper import WhisperModel
+model = WhisperModel("tiny", device="cpu", compute_type="int8")
+import subprocess
 
 MIC_INDEX = 1  # USB 마이크 인덱스
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 RATE = 44100
 CHUNK = 4096
-SILENCE_LIMIT = 5  # 침묵 3초 이상이면 종료
+SILENCE_LIMIT = 10 # 침묵 3초 이상이면 종료
 
 # 오늘 날짜 문자열
 today_str = datetime.now().strftime("%Y%m%d")
-WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_output.mp3"
+WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_"
 
 def is_silent(data, threshold=100):
     audio_data = np.frombuffer(data, dtype=np.int16)
     rms = np.sqrt(np.mean(audio_data**2))
     print(f"RMS: {rms}")
     return rms < threshold
-
-def emotion_record():
+model = WhisperModel("tiny", device="cpu", compute_type="int8")
+def emotion_record(index):
     p = pyaudio.PyAudio()
     stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
                     input_device_index=MIC_INDEX, frames_per_buffer=CHUNK)
@@ -54,7 +59,7 @@ def emotion_record():
     stream.close()
     p.terminate()
 
-    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
+    wf = wave.open(WAVE_OUTPUT_FILENAME+index+".wav", 'wb')
     wf.setnchannels(CHANNELS)
     wf.setsampwidth(p.get_sample_size(FORMAT))
     wf.setframerate(RATE)
@@ -87,9 +92,27 @@ def emotion_record():
     wf.close()
 
     print(f"파일 저장 완료: {WAVE_OUTPUT_FILENAME}")
+    return WAVE_OUTPUT_FILENAME
+
+    # s3_path = upload_to_s3(WAVE_OUTPUT_FILENAME)
+
+def start(alias):
+    text = alias + "~~ 오늘 좋은 하루 보냈나~~?? 어떻게 지냈어!!"
+    save_file_path = text_to_speech_file(text)
+    subprocess.run(["mpg321", save_file_path])
+
+    while(True):
+        emotion_record()
+        segments, _ = model.transcribe(save_file_path, beam_size=1, language="ko")
+        user_text = " ".join([seg.text for seg in segments]).strip()
+
+        subprocess.run(["mpg321", local_file_path])
+        
 
+    
 
 
+ 
 # import pyaudio
 
 # p = pyaudio.PyAudio()
diff --git a/app/utils/play_file.py b/app/utils/play_file.py
index 2a549e3..ce1032a 100644
--- a/app/utils/play_file.py
+++ b/app/utils/play_file.py
@@ -25,7 +25,7 @@ def play_at_target_time(target_time: str, local_file_path: str):
     subprocess.run(["mpg321", local_file_path])
 
     # window 테스트 용
-    from playsound import playsound
-    from pathlib import Path
-    safe_path = Path(local_file_path).resolve().as_posix()
-    playsound(safe_path)
+    # from playsound import playsound
+    # from pathlib import Path
+    # safe_path = Path(local_file_path).resolve().as_posix()
+    # playsound(safe_path)
diff --git a/requirements.txt b/requirements.txt
index 52a2fd7..d7099e7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 annotated-types==0.7.0
 anyio==4.9.0
+av==14.3.0
 boto3==1.37.16
 botocore==1.37.16
 certifi==2025.1.31
@@ -7,16 +8,22 @@ cffi==1.17.1
 charset-normalizer==3.4.1
 click==8.1.8
 colorama==0.4.6
+coloredlogs==15.0.1
+ctranslate2==4.6.0
 distro==1.9.0
 dotenv==0.9.9
 elevenlabs==1.54.0
 exceptiongroup==1.2.2
 fastapi==0.115.11
+faster-whisper==1.1.1
 filelock==3.18.0
+flatbuffers==20181003210633
 fsspec==2025.3.2
 h11==0.14.0
 httpcore==1.0.7
 httpx==0.28.1
+huggingface-hub==0.31.2
+humanfriendly==10.0
 idna==3.10
 Jinja2==3.1.6
 jiter==0.9.0
@@ -25,8 +32,12 @@ MarkupSafe==3.0.2
 mpmath==1.3.0
 networkx==3.2.1
 numpy==2.0.2
+onnxruntime==1.19.2
 openai==1.68.2
+packaging==25.0
 playsound==1.3.0
+protobuf==6.31.0
+PyAudio==0.2.14
 pycparser==2.22
 pydantic==2.10.6
 pydantic_core==2.27.2
@@ -34,14 +45,18 @@ pydub==0.25.1
 python-dateutil==2.9.0.post0
 python-dotenv==1.0.1
 python-multipart==0.0.20
+PyYAML==6.0.2
 requests==2.32.3
+RPi.GPIO==0.7.1
 s3transfer==0.11.4
 scipy==1.13.1
 six==1.17.0
 sniffio==1.3.1
 sounddevice==0.5.1
+soundfile==0.13.1
 starlette==0.46.1
 sympy==1.14.0
+tokenizers==0.21.1
 torch==2.7.0
 torchaudio==2.7.0
 tqdm==4.67.1

From bbf307507d28da255786b8ce9bfe9dfa10342bc9 Mon Sep 17 00:00:00 2001
From: yjg0815git <dlwlsrud815@gmail.com>
Date: Thu, 22 May 2025 08:59:14 -0500
Subject: [PATCH 2/6] fix: check mic. setting & gpt interaction

---
 .gitignore                      |   3 +-
 app/service/elevenLabs.py       |   7 +-
 app/service/interaction.py      | 180 ++++++++++++++++---------------
 app/service/record_respberry.py | 182 ++++++++++++--------------------
 app/service/s3Service.py        |   2 +-
 5 files changed, 172 insertions(+), 202 deletions(-)

diff --git a/.gitignore b/.gitignore
index 25e5a12..f792265 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,4 +38,5 @@ venv.bak/
 __pycache__/
 *.pyc
 audio/
-emotion_diary/
\ No newline at end of file
+emotion_diary/
+pyvenv.cfg
\ No newline at end of file
diff --git a/app/service/elevenLabs.py b/app/service/elevenLabs.py
index 11c2942..b715910 100644
--- a/app/service/elevenLabs.py
+++ b/app/service/elevenLabs.py
@@ -10,6 +10,7 @@
 client = ElevenLabs(
     api_key=os.getenv("ELEVENLABS_KEY"),
 )
+yjg_voice_id = os.getenv("YJG_VOICE_ID")
 
 
 def get_voice():
@@ -42,7 +43,7 @@ def add_voice(name: str, local_file_paths: list):
     return response.voice_id
 
 
-def text_to_speech_file_save_AWS(text: str, voice_id: str) -> str:
+def text_to_speech_file_save_AWS(text: str, voice_id=yjg_voice_id) -> str:
     response = client.text_to_speech.convert(
         voice_id=voice_id,
         output_format="mp3_22050_32",
@@ -69,7 +70,7 @@ def text_to_speech_file_save_AWS(text: str, voice_id: str) -> str:
     return aws_file_url
 
 
-def text_to_speech_file(text: str, voice_id: str) -> str:
+def text_to_speech_file(text: str, voice_id=yjg_voice_id) -> str:
     response = client.text_to_speech.convert(
         voice_id=voice_id,
         # output_format="mp3_22050_32",
@@ -89,7 +90,7 @@ def text_to_speech_file(text: str, voice_id: str) -> str:
             if chunk:
                 f.write(chunk)
     # aws_file_url = upload_to_s3(local_file_path=save_file_path)
-    os.remove(save_file_path)
+    # os.remove(save_file_path)
 
     # delete_voice(voice_id)
 
diff --git a/app/service/interaction.py b/app/service/interaction.py
index eea5754..4518264 100644
--- a/app/service/interaction.py
+++ b/app/service/interaction.py
@@ -1,99 +1,111 @@
-import time
+import os
+import subprocess
+from datetime import datetime
+
 import pyaudio
 import numpy as np
-import soundfile as sf
 from faster_whisper import WhisperModel
 from openai import OpenAI
-import openai
-from elevenlabs import play, ElevenLabs
 from elevenLabs import text_to_speech_file
+from elevenlabs import ElevenLabs
 from dotenv import load_dotenv
-import os
-from datetime import datetime
 
+# 아래 두 함수는 record_respberry.py 에 구현된 그대로 사용합니다.
+# emotion_record(index) → "{prefix}{index}.wav" 파일을 만들어 리턴
+# is_silent(data) → 음성 청크가 침묵인지 여부 판단
 from record_respberry import emotion_record, is_silent
-model = WhisperModel("tiny", device="cpu", compute_type="int8")
-import subprocess
-
-MIC_INDEX = 1  # USB 마이크 인덱스
-FORMAT = pyaudio.paInt16
-CHANNELS = 1
-RATE = 44100
-CHUNK = 4096
-SILENCE_LIMIT = 10 # 침묵 3초 이상이면 종료
 
-# 오늘 날짜 문자열
-today_str = datetime.now().strftime("%Y%m%d")
-WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_"
-
-# === 환경 변수/API 키 세팅 ===
+# ==== 공통 설정 ====
 load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+ELEVENLABS_KEY   = os.getenv("ELEVENLABS_KEY")
+
+if not OPENAI_API_KEY or not ELEVENLABS_KEY:
+    raise RuntimeError(".env 에 OPENAI_API_KEY/ELEVENLABS_KEY 를 설정하세요")
 
-gpt = OpenAI(
-    api_key=os.getenv("OPENAI_API_KEY")
-)
-cloning = ElevenLabs(
-    api_key=os.getenv("ELEVENLABS_KEY"),
-)
+# OpenAI / ElevenLabs 클라이언트
+gpt_client   = OpenAI(api_key=OPENAI_API_KEY)
+tts_client   = ElevenLabs(api_key=ELEVENLABS_KEY)
 
-# === Whisper 모델 로드 (tiny가 가장 가볍고 빠름) ===
-model = WhisperModel("tiny", device="cpu", compute_type="int8")
+# Whisper 모델 (tiny, CPU, int8)
+whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")
 
-# === 오디오 입력 설정 ===
-RATE = 44100
+# 녹음 파라미터 (ALSA default=USBMIC 으로 잡힌 상태)
+FORMAT   = pyaudio.paInt16
 CHANNELS = 1
-CHUNK = RATE * 3  # 3초 단위로 STT
-FORMAT = pyaudio.paInt16
-
-p = pyaudio.PyAudio()
-stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
-INDEX = 1
-
-print("실시간 STT + GPT 질문 생성 + ElevenLabs Voice Cloning 시작 (Ctrl+C로 종료)")
-
-
-messages = [
-    {"role": "system", "content": "너는 대화를 자연스럽게 이어가는 AI야. 사용자와 계속 이어지는 대화를 만들어야 해."}
-]
-INDEX = 0
-
-try:
-    while True:
-        wav_filename = emotion_record(INDEX)
-        INDEX += 1
-
-        # === 2. STT ===
-        segments, _ = model.transcribe(wav_filename, beam_size=1, language="ko")
-        user_text = " ".join([seg.text for seg in segments]).strip()
-        print("STT 결과:", user_text)
-        if not user_text:
-            print("음성이 인식되지 않았습니다.")
-            continue
-
-        # 3. messages에 사용자 발화 추가
-        messages.append({"role": "user", "content": user_text})
-
-        # 4. GPT에 전체 메시지 전달
-        response = openai.ChatCompletion.create(
-            model="gpt-4o",
-            messages=messages
-        )
-        question = response['choices'][0]['message']['content'].strip()
-        print("생성된 질문:", question)
-
-        # 5. GPT 응답도 messages에 추가
-        messages.append({"role": "assistant", "content": question})
-
-        # 6. ElevenLabs TTS로 변환
-        audio_path = text_to_speech_file(question)
-
-        # 7. 음성 재생
-        subprocess.run(["mpg321", audio_path])
-
-except KeyboardInterrupt:
-    print("종료합니다.")
-finally:
-    stream.stop_stream()
-    stream.close()
-    p.terminate()
+RATE     = 44100
+CHUNK    = RATE * 3    # 3초 단위 버퍼
+
+# 오늘 날짜 기반 녹음 파일 저장 경로 prefix
+today_str           = datetime.now().strftime("%Y%m%d")
+WAVE_OUTPUT_PREFIX  = f"/home/team4/Desktop/capstone/AI/app/emotion_diary/{today_str}_"
+
+def interaction(alias: str):
+    """
+    alias: 사용자 이름 또는 AI가 부르는 별칭 (ex: "홍길동")
+    1) alias 인사 → TTS → 재생
+    2) 이후 반복: emotion_record → Whisper STT → GPT 질문 생성 → TTS → 재생
+    """
+    # 1) alias 인사
+    greet_text = f"{alias}~~ 오늘 좋은 하루 보냈나~~?? 어떻게 지냈어!!"
+    print("👋 인사:", greet_text)
+    greet_audio = text_to_speech_file(greet_text)
+    subprocess.run(["mpg321", greet_audio], check=True)
+
+    # 대화 이력 초기화
+    messages = [
+        {"role": "system",
+         "content": "너는 대화를 자연스럽게 이어가는 AI야. 사용자와 계속 이어지는 대화를 만들어야 해."},
+        {"role": "assistant", "content": greet_text}
+    ]
+
+    record_idx = 0
+    try:
+        while True:
+            # 2-1) 감정 녹음 (침묵 기준으로 자동 종료)
+            wav_path = emotion_record(record_idx)
+            print(f"[녹음 완료] {wav_path}")
+            record_idx += 1
+
+            # 2-2) Whisper STT (한국어)
+            segments, _ = whisper_model.transcribe(wav_path,
+                                                   beam_size=1,
+                                                   language="ko")
+            user_text = " ".join(seg.text for seg in segments).strip()
+            print("▶ 사용자 음성(텍스트):", user_text or "(인식 안됨)")
+
+            if not user_text:
+                print("(음성 인식 실패 → 다시 녹음)")
+                continue
+
+            # 2-3) GPT-4o 에 질문 생성 요청
+            messages.append({"role": "user", "content": user_text})
+            resp = gpt_client.chat.completions.create(
+                model="gpt-4o",
+                messages=messages
+            )
+            question = resp.choices[0].message.content.strip()
+            print("생성된 질문:", question)
+
+            # 2-4) 대화 이력에 어시스턴트 질문 추가
+            messages.append({"role": "assistant", "content": question})
+
+            # 2-5) 질문 → ElevenLabs TTS → 파일
+            tts_path = text_to_speech_file(question)
+            print("  (TTS 파일 생성:", tts_path, ")")
+
+            # 2-6) 재생
+            subprocess.run(["mpg321", tts_path], check=True)
+
+    except KeyboardInterrupt:
+        print("\n[사용자 종료 요청] interaction을 종료합니다.")
+    except Exception as e:
+        print("예외 발생:", e)
+
+    print("=== interaction 종료 ===")
+
+if __name__ == "__main__":
+    # 스크립트를 직접 실행할 때만 동작
+    # alias를 원하는 이름으로 바꿔주세요
+    interaction("홍길동")
 
diff --git a/app/service/record_respberry.py b/app/service/record_respberry.py
index 3f76b84..3bb9206 100644
--- a/app/service/record_respberry.py
+++ b/app/service/record_respberry.py
@@ -1,129 +1,85 @@
-import pyaudio
-import wave
-import numpy as np
 import os
+import wave
 from datetime import datetime
-from s3Service import upload_to_s3
-from elevenLabs import text_to_speech_file
-from faster_whisper import WhisperModel
-model = WhisperModel("tiny", device="cpu", compute_type="int8")
-import subprocess
 
-MIC_INDEX = 1  # USB 마이크 인덱스
+import numpy as np
+import pyaudio
+
+# === 녹음 설정 ===
 FORMAT = pyaudio.paInt16
 CHANNELS = 1
 RATE = 44100
-CHUNK = 4096
-SILENCE_LIMIT = 10 # 침묵 3초 이상이면 종료
-
-# 오늘 날짜 문자열
-today_str = datetime.now().strftime("%Y%m%d")
-WAVE_OUTPUT_FILENAME = "/home/team4/Desktop/capstone/AI/app/emotion_diary/" + today_str + "_"
-
-def is_silent(data, threshold=100):
+CHUNK = 4096               # 약 0.093초 분량 (4096/44100)
+SILENCE_LIMIT = 5          # 5초 연속 침묵이면 녹음 종료
+BASE_DIR = "/home/team4/Desktop/capstone/AI/app/emotion_diary"
+
+# 날짜 기반 하위 디렉터리(매일 한 번만 생성)
+def _ensure_dir():
+    os.makedirs(BASE_DIR, exist_ok=True)
+
+def is_silent(data: bytes, threshold: float = 1000.0) -> bool:
+    """
+    한 프레임(CHUNK) 크기의 raw PCM data를 받아
+    RMS 기준으로 침묵 여부를 판단.
+    """
     audio_data = np.frombuffer(data, dtype=np.int16)
-    rms = np.sqrt(np.mean(audio_data**2))
-    print(f"RMS: {rms}")
+    rms = np.sqrt(np.mean(audio_data.astype(np.float32) ** 2))
+    # print(f"RMS={rms:.1f}")  # 필요 시 디버그용
     return rms < threshold
-model = WhisperModel("tiny", device="cpu", compute_type="int8")
-def emotion_record(index):
-    p = pyaudio.PyAudio()
-    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
-                    input_device_index=MIC_INDEX, frames_per_buffer=CHUNK)
-
-    print("녹음 시작...")
-    frames = []
-    silence_counter = 0
-    p = pyaudio.PyAudio()
-    stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True,
-                    input_device_index=MIC_INDEX, frames_per_buffer=CHUNK)
 
-    print("녹음 시작...")
+def emotion_record(index: int) -> str:
+    """
+    index: 녹음 파일 구분을 위한 정수 인덱스
+    return: 저장된 .wav 파일의 전체 경로
+    """
+    _ensure_dir()
+    date_str = datetime.now().strftime("%Y%m%d")
+    filename = f"{date_str}_{index}.wav"
+    filepath = os.path.join(BASE_DIR, filename)
+
+    pa = pyaudio.PyAudio()
+    # input_device_index 를 지정하지 않으면 ALSA default (=USBMIC) 사용
+    stream = pa.open(
+        format=FORMAT,
+        channels=CHANNELS,
+        rate=RATE,
+        input=True,
+        frames_per_buffer=CHUNK
+    )
+
+    print(f"[녹음 시작] {filename}")
     frames = []
-    silence_counter = 0
-
-    while True:
-        data = stream.read(CHUNK)
-        frames.append(data)
-        if is_silent(data):
-            silence_counter += CHUNK / RATE
-            print(f"침묵 감지: {silence_counter:.2f}초")
-        else:
-            silence_counter = 0
-
-        if silence_counter >= SILENCE_LIMIT:
-            print(f"{SILENCE_LIMIT}초 이상 침묵 감지! 녹음 종료.")
-            break
-
-    stream.stop_stream()
-    stream.close()
-    p.terminate()
-
-    wf = wave.open(WAVE_OUTPUT_FILENAME+index+".wav", 'wb')
-    wf.setnchannels(CHANNELS)
-    wf.setsampwidth(p.get_sample_size(FORMAT))
-    wf.setframerate(RATE)
-    wf.writeframes(b''.join(frames))
-    wf.close()
-
-    print(f"파일 저장 완료: {WAVE_OUTPUT_FILENAME}")
-    while True:
-        data = stream.read(CHUNK)
-        frames.append(data)
-        if is_silent(data):
-            silence_counter += CHUNK / RATE
-            print(f"침묵 감지: {silence_counter:.2f}초")
-        else:
-            silence_counter = 0
-
-        if silence_counter >= SILENCE_LIMIT:
-            print(f"{SILENCE_LIMIT}초 이상 침묵 감지! 녹음 종료.")
-            break
-
-    stream.stop_stream()
-    stream.close()
-    p.terminate()
-
-    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
+    silent_secs = 0.0
+
+    try:
+        while True:
+            data = stream.read(CHUNK, exception_on_overflow=False)
+            frames.append(data)
+
+            if is_silent(data):
+                silent_secs += CHUNK / RATE
+            else:
+                silent_secs = 0.0
+
+            if silent_secs >= SILENCE_LIMIT:
+                print(f"[침묵 {SILENCE_LIMIT}초 감지 → 녹음 종료]")
+                break
+
+    except Exception as e:
+        print("녹음 중 예외:", e)
+    finally:
+        stream.stop_stream()
+        stream.close()
+        pa.terminate()
+
+    # WAV 파일로 저장
+    wf = wave.open(filepath, 'wb')
     wf.setnchannels(CHANNELS)
-    wf.setsampwidth(p.get_sample_size(FORMAT))
+    wf.setsampwidth(pa.get_sample_size(FORMAT))
     wf.setframerate(RATE)
     wf.writeframes(b''.join(frames))
     wf.close()
 
-    print(f"파일 저장 완료: {WAVE_OUTPUT_FILENAME}")
-    return WAVE_OUTPUT_FILENAME
-
-    # s3_path = upload_to_s3(WAVE_OUTPUT_FILENAME)
-
-def start(alias):
-    text = alias + "~~ 오늘 좋은 하루 보냈나~~?? 어떻게 지냈어!!"
-    save_file_path = text_to_speech_file(text)
-    subprocess.run(["mpg321", save_file_path])
-
-    while(True):
-        emotion_record()
-        segments, _ = model.transcribe(save_file_path, beam_size=1, language="ko")
-        user_text = " ".join([seg.text for seg in segments]).strip()
-
-        subprocess.run(["mpg321", local_file_path])
-        
-
-    
-
-
- 
-# import pyaudio
-
-# p = pyaudio.PyAudio()
-
-# print("==== 오디오 입력 장치 목록 ====")
-# for i in range(p.get_device_count()):
-#     info = p.get_device_info_by_index(i)
-#     if info['maxInputChannels'] > 0:
-#         print(f"[Index {i}] {info['name']}")
-#         print(f"  - 입력 채널 수 (maxInputChannels): {info['maxInputChannels']}")
-#         print(f"  - 기본 샘플레이트 (defaultSampleRate): {int(info['defaultSampleRate'])} Hz")
-#         print("-" * 40)
+    print(f"[저장 완료] {filepath}\n")
+    return filepath
 
-# p.terminate()
\ No newline at end of file
diff --git a/app/service/s3Service.py b/app/service/s3Service.py
index b992d24..74d0879 100644
--- a/app/service/s3Service.py
+++ b/app/service/s3Service.py
@@ -9,7 +9,7 @@
 from dotenv import load_dotenv
 from fastapi import UploadFile
 
-from app.utils.convertFileExtension import convert_to_mp3
+from AI.app.utils.convertFileExtension import convert_to_mp3
 
 load_dotenv()
 

From cfdb19b8672c8a1de78640d03114c837e7fa0ed3 Mon Sep 17 00:00:00 2001
From: yjg0815git <dlwlsrud815@gmail.com>
Date: Sat, 24 May 2025 02:15:18 -0500
Subject: [PATCH 3/6] recent file

---
 app/service/elevenLabs.py |  2 +-
 app/service/main.py       | 17 ++++++++++++++++-
 app/service/subscribe.py  | 29 +++++++++++++++++++++++++++++
 main.py                   |  2 +-
 requirements.txt          |  2 +-
 5 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100644 app/service/subscribe.py

diff --git a/app/service/elevenLabs.py b/app/service/elevenLabs.py
index b715910..63d62d1 100644
--- a/app/service/elevenLabs.py
+++ b/app/service/elevenLabs.py
@@ -4,7 +4,7 @@
 from dotenv import load_dotenv
 from elevenlabs import ElevenLabs, VoiceSettings
 
-from s3Service import upload_to_s3
+from AI.app.service.s3Service import upload_to_s3
 
 load_dotenv()
 client = ElevenLabs(
diff --git a/app/service/main.py b/app/service/main.py
index cab8d43..f7cabf5 100644
--- a/app/service/main.py
+++ b/app/service/main.py
@@ -1,11 +1,26 @@
+import asyncio
+
 from fastapi import FastAPI, Depends, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.openapi.utils import get_openapi
 from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from contextlib import asynccontextmanager
 
 from app.controller.RecordController import router
+from app.service.subscribe import subscribe_schedule
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    task = asyncio.create_task(subscribe_schedule())
+    yield
+    task.cancel()
+    try:
+        await task
+    except asyncio.CancelledError:
+        print("Redis task cancelled")
+
 
-app = FastAPI()
+app = FastAPI(lifespan = lifespan)
 
 auth_scheme = HTTPBearer()
 
diff --git a/app/service/subscribe.py b/app/service/subscribe.py
new file mode 100644
index 0000000..65cb42c
--- /dev/null
+++ b/app/service/subscribe.py
@@ -0,0 +1,29 @@
+import os
+import redis.asyncio as redis
+import json
+import subprocess
+from AI.app.service.s3Service import download_from_s3
+REDIS_HOST = os.getenv("REDIS_HOST", "15.165.21.152")
+REDIS_PORT = int(os.getenv("REDIS_PORT", "6380"))
+REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", "babyy1023@")
+CHANNEL_NAME = "spring-scheduler-channel"
+
+async def subscribe_schedule():
+    r = redis.Redis(
+        host=REDIS_HOST,
+        port=REDIS_PORT,
+        password=REDIS_PASSWORD,
+        decode_responses=True
+    )
+
+    pubsub = r.pubsub()
+    await pubsub.subscribe(CHANNEL_NAME)
+
+    print(f"Subscribed to Redis '{CHANNEL_NAME}")
+
+    async for message in pubsub.listen():
+        if message["type"] == "message":
+            local_path = download_from_s3("https://humanicare-bucket.s3.ap-northeast-2.amazonaws.com/record/audio_1743069498_081a9673-aebe-4b86-a4ba-c32f4424e8b9.wav")
+            subprocess.run(["mpg321", local_path])
+            print("speaker out")
+        
\ No newline at end of file
diff --git a/main.py b/main.py
index c8e4812..c4b798f 100644
--- a/main.py
+++ b/main.py
@@ -4,7 +4,7 @@
 
 if __name__ == "__main__":
     uvicorn.run(
-        app="app.main:app",
+        app="app.service.main:app",
         host="localhost",
         # host="0.0.0.0",
         port=8000,
diff --git a/requirements.txt b/requirements.txt
index d7099e7..9adb1ff 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -63,4 +63,4 @@ tqdm==4.67.1
 typing_extensions==4.12.2
 urllib3==1.26.20
 uvicorn==0.34.0
-websockets==15.0.1
+websockets==15.0.1
\ No newline at end of file

From 92b4995d57b7f53ec52f04165f3f85e1fb91cec2 Mon Sep 17 00:00:00 2001
From: yjg0815git <dlwlsrud815@gmail.com>
Date: Sat, 24 May 2025 23:58:24 -0500
Subject: [PATCH 4/6] fix: localhost -> springboot change

---
 app/controller/RecordController.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/app/controller/RecordController.py b/app/controller/RecordController.py
index 1de9ea7..7f1dcf6 100644
--- a/app/controller/RecordController.py
+++ b/app/controller/RecordController.py
@@ -114,7 +114,7 @@ def send_user_voice_file_to_spring(token: str, voice_url: str):
     data = {
         "voiceUrl": voice_url
     }
-    requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data)
+    requests.post("http://springboot:8080/api/spring/records/voices", headers=headers, json=data)
     # requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data)
 
 
@@ -125,7 +125,7 @@ def send_user_voice_id_to_spring(token: str, voice_id: str):
     data = {
         "voiceId": voice_id
     }
-    requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data)
+    requests.post("http://springboot:8080/api/spring/records/voices", headers=headers, json=data)
     # requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data)
 
 
@@ -137,7 +137,7 @@ def send_user_speech_file_to_spring(token: str, before_audio_link: str, answerId
         "beforeAudioLink": before_audio_link,
         "answerId": answerId
     }
-    requests.post("http://localhost:8080/api/spring/records/speeches", headers=headers, json=data)
+    requests.post("http://springboot:8080/api/spring/records/speeches", headers=headers, json=data)
     # requests.post("https://peachmentor.com/api/spring/records/speeches", headers=headers, json=data)
 
 
@@ -145,7 +145,7 @@ def receive_self_feedback(token: str) -> str:
     headers = {
         "Authorization": f"Bearer {token}"
     }
-    response = requests.get("http://localhost:8080/api/spring/self-feedbacks/latest-feedbacks", headers=headers)
+    response = requests.get("http://springboot:8080/api/spring/self-feedbacks/latest-feedbacks", headers=headers)
     # response = requests.get("https://peachmentor.com/api/spring/self-feedbacks/latest-feedbacks", headers=headers)
 
     feedback_data = response.json().get('result', {})
@@ -165,7 +165,7 @@ def send_statistics_to_spring(token: str, gantourCount: int, silentTime: float,
         "silentTime": silentTime,
         "answerId": answerId
     }
-    requests.post("http://localhost:8080/api/spring/statistics", headers=headers, json=data)
+    requests.post("http://springboot:8080/api/spring/statistics", headers=headers, json=data)
     # requests.post("https://peachmentor.com/api/spring/statistics", headers=headers, json=data)
 
 # # 질문 답변에 대한 insight 제공 api

From 1114701e093101cc41d72358bd7a2b481dbc1d87 Mon Sep 17 00:00:00 2001
From: yjg0815git <dlwlsrud815@gmail.com>
Date: Sun, 1 Jun 2025 03:08:01 -0500
Subject: [PATCH 5/6] fix: Docker error

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 9adb1ff..169daaf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,7 +17,6 @@ exceptiongroup==1.2.2
 fastapi==0.115.11
 faster-whisper==1.1.1
 filelock==3.18.0
-flatbuffers==20181003210633
 fsspec==2025.3.2
 h11==0.14.0
 httpcore==1.0.7

From c1e83289c7172d8a2d6be86f3eba6c72a19cfeb9 Mon Sep 17 00:00:00 2001
From: yjg0815git <dlwlsrud815@gmail.com>
Date: Sun, 1 Jun 2025 03:25:04 -0500
Subject: [PATCH 6/6] fix:Docker error

---
 main.py          | 102 +++++++++++++++++++++++------------------------
 requirements.txt |   4 +-
 2 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/main.py b/main.py
index 0ded50f..8f05ef4 100644
--- a/main.py
+++ b/main.py
@@ -1,66 +1,66 @@
-import RPi.GPIO as GPIO
-import time
-from datetime import datetime
+# import RPi.GPIO as GPIO
+# import time
+# from datetime import datetime
 
-# ───────────────────────────────
-# PIR 센서 관련
-# ───────────────────────────────
-PIR_PIN = 17  # GPIO17
+# # ───────────────────────────────
+# # PIR 센서 관련
+# # ───────────────────────────────
+# PIR_PIN = 17  # GPIO17
 
-def detect_motion():
-    GPIO.setmode(GPIO.BCM)
-    GPIO.setup(PIR_PIN, GPIO.IN)
+# def detect_motion():
+#     GPIO.setmode(GPIO.BCM)
+#     GPIO.setup(PIR_PIN, GPIO.IN)
 
-    print("PIR 센서 디버깅 시작 (Ctrl+C 종료)")
-    prev_state = None
+#     print("PIR 센서 디버깅 시작 (Ctrl+C 종료)")
+#     prev_state = None
 
-    try:
-        while True:
-            signal = GPIO.input(PIR_PIN)
+#     try:
+#         while True:
+#             signal = GPIO.input(PIR_PIN)
 
-            if signal != prev_state:
-                timestamp = datetime.now().strftime("%H:%M:%S")
-                state_str = "감지됨 (HIGH)" if signal else " 없음 (LOW)"
-                print(f"[{timestamp}] 상태 변경 ▶ {state_str}")
-                prev_state = signal
+#             if signal != prev_state:
+#                 timestamp = datetime.now().strftime("%H:%M:%S")
+#                 state_str = "감지됨 (HIGH)" if signal else " 없음 (LOW)"
+#                 print(f"[{timestamp}] 상태 변경 ▶ {state_str}")
+#                 prev_state = signal
 
-            time.sleep(0.1)
-    except KeyboardInterrupt:
-        print("⛔ 종료 중...")
-        GPIO.cleanup()
+#             time.sleep(0.1)
+#     except KeyboardInterrupt:
+#         print("⛔ 종료 중...")
+#         GPIO.cleanup()
 
 
-# ───────────────────────────────
-# DHT11 센서 관련 (5회 재시도 버전)
-# ───────────────────────────────
-import adafruit_dht
-import board
+# # ───────────────────────────────
+# # DHT11 센서 관련 (5회 재시도 버전)
+# # ───────────────────────────────
+# import adafruit_dht
+# import board
 
-def read_dht11():
-    print("🌡️ DHT11 센서 측정 시작...")
-    dhtDevice = adafruit_dht.DHT11(board.D4)  # GPIO4 (멀티보드 IO4)
+# def read_dht11():
+#     print("🌡️ DHT11 센서 측정 시작...")
+#     dhtDevice = adafruit_dht.DHT11(board.D4)  # GPIO4 (멀티보드 IO4)
 
-    for i in range(5):  # 최대 5번 재시도
-        try:
-            print(f"📡 시도 {i + 1} ...")
-            temperature = dhtDevice.temperature
-            humidity = dhtDevice.humidity
+#     for i in range(5):  # 최대 5번 재시도
+#         try:
+#             print(f"📡 시도 {i + 1} ...")
+#             temperature = dhtDevice.temperature
+#             humidity = dhtDevice.humidity
 
-            if temperature is not None and humidity is not None:
-                print(f"✅ 온도: {temperature}°C")
-                print(f"✅ 습도: {humidity}%")
-                break
-            else:
-                print("⚠️ 센서로부터 데이터를 읽을 수 없습니다.")
-        except RuntimeError as error:
-            print(f"⚠️ 에러 발생: {error.args[0]}")
-        except Exception as error:
-            print(f"❌ 심각한 오류: {error}")
-            break
-        time.sleep(2)  # 재시도 간 간격
+#             if temperature is not None and humidity is not None:
+#                 print(f"✅ 온도: {temperature}°C")
+#                 print(f"✅ 습도: {humidity}%")
+#                 break
+#             else:
+#                 print("⚠️ 센서로부터 데이터를 읽을 수 없습니다.")
+#         except RuntimeError as error:
+#             print(f"⚠️ 에러 발생: {error.args[0]}")
+#         except Exception as error:
+#             print(f"❌ 심각한 오류: {error}")
+#             break
+#         time.sleep(2)  # 재시도 간 간격
 
-    # 종료 함수는 비활성화 (라이브러리 오류 방지)
-    # dhtDevice.exit()
+#     # 종료 함수는 비활성화 (라이브러리 오류 방지)
+#     # dhtDevice.exit()
 
 
 # ───────────────────────────────
diff --git a/requirements.txt b/requirements.txt
index 169daaf..c61a0e6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,7 +36,7 @@ openai==1.68.2
 packaging==25.0
 playsound==1.3.0
 protobuf==6.31.0
-PyAudio==0.2.14
+# PyAudio==0.2.14
 pycparser==2.22
 pydantic==2.10.6
 pydantic_core==2.27.2
@@ -46,7 +46,7 @@ python-dotenv==1.0.1
 python-multipart==0.0.20
 PyYAML==6.0.2
 requests==2.32.3
-RPi.GPIO==0.7.1
+# RPi.GPIO==0.7.1
 s3transfer==0.11.4
 scipy==1.13.1
 six==1.17.0