Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ venv.bak/
__pycache__/
*.pyc
audio/
emotion_diary/
emotion_diary/
pyvenv.cfg
12 changes: 12 additions & 0 deletions app/controller/RecordController.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,24 @@ def send_user_voice_file_to_spring(token: str, voice_url: str):
"Authorization": f"Bearer {token}",
"Content-Type": "text/plain"
}
# requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data)
# requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data)

requests.post(
"http://springboot:8080/api/spring/records/voices",
headers=headers,
data=voice_url # 주의: 'data='를 써야 함
)


def send_user_voice_id_to_spring(token: str, voice_id: str):
headers = {
"Authorization": f"Bearer {token}"
}
data = {
"voiceId": voice_id
}
requests.post("http://localhost:8080/api/spring/records/voices", headers=headers, json=data)
# requests.post("https://peachmentor.com/api/spring/records/voices", headers=headers, json=data)


Expand Down
27 changes: 14 additions & 13 deletions app/service/elevenLabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
from dotenv import load_dotenv
from elevenlabs import ElevenLabs, VoiceSettings

from app.service.s3Service import upload_to_s3
from AI.app.service.s3Service import upload_to_s3

load_dotenv()
client = ElevenLabs(
api_key=os.getenv("ELEVENLABS_KEY"),
)
yjg_voice_id = os.getenv("YJG_VOICE_ID")


def get_voice():
Expand Down Expand Up @@ -42,7 +43,7 @@ def add_voice(name: str, local_file_paths: list):
return response.voice_id


def text_to_speech_file_save_AWS(text: str, voice_id: str) -> str:
def text_to_speech_file_save_AWS(text: str, voice_id=yjg_voice_id) -> str:
response = client.text_to_speech.convert(
voice_id=voice_id,
output_format="mp3_22050_32",
Expand All @@ -69,27 +70,27 @@ def text_to_speech_file_save_AWS(text: str, voice_id: str) -> str:
return aws_file_url


def text_to_speech_file(text: str, voice_id: str) -> str:
def text_to_speech_file(text: str, voice_id=yjg_voice_id) -> str:
response = client.text_to_speech.convert(
voice_id=voice_id,
output_format="mp3_22050_32",
# output_format="mp3_22050_32",
text=text,
model_id="eleven_turbo_v2_5",
voice_settings=VoiceSettings(
stability=0.3,
similarity_boost=1.0,
style=0.0,
use_speaker_boost=True,
),
model_id="eleven_multilingual_v2",
# voice_settings=VoiceSettings(
# stability=0.3,
# similarity_boost=1.0,
# style=0.0,
# use_speaker_boost=True,
# ),
)

save_file_path = f"{uuid.uuid4()}.mp3"
save_file_path = f"{uuid.uuid4()}.wav"
with open(save_file_path, "wb") as f:
for chunk in response:
if chunk:
f.write(chunk)
# aws_file_url = upload_to_s3(local_file_path=save_file_path)
os.remove(save_file_path)
# os.remove(save_file_path)

# delete_voice(voice_id)

Expand Down
43 changes: 43 additions & 0 deletions app/service/gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,46 @@ def get_schedule_json(self):
schedule_dict = parsing_json.extract_json_from_content(content)

return schedule_dict

class GenerateQuestionGPT:
def __init__(self, text, alias):
self.text = text
self.alias = alias

def create_schedule_prompt(self):
system_message = f"""
너는 지금부터 혼자 사시는 부모님을 걱정하는 보호자야.

네 역할은 키워드를 보고, 키워드와 관련한 문제에 대해서 부모님을 걱정하고, 생활은 챙겨주는거야.
키워드는 다음과 같아: {str(self.schedules)}

너의 목표는 두 가지야:
1. 키워드에 대한 질문 혹은 문장을 한 줄의 텍스트로 만들어.
ex) 키워드가 '저녁' 이라면, "{self.alias}~~ 하루 잘 보냈어?? 저녁도 맛있는거 챙겨먹어!! 사랑해~~ "
2. 만든 텍스트는 ?? !! ~~ ,, .. 등의 다양한 특수문자가 많이 들어갈 수 있어. 감정이 강하게 느껴지게 작성해줘.
2-a. 특수문자를 붙일 때는 꼭 2개씩 붙여줘
3. 부모님을 지칭하는 별명은 {self.alias} 로 해줘.
4. 문장과 문장 사이의 띄어쓰기를 2개씩 넣어줘

결과는 {{"키워드": "문장"}} 형태의 JSON 문자열로 반환해줘. 꼭 큰따옴표(")만 사용해.

"""

messages = [
{"role": "system", "content": system_message}
]
return messages

def get_schedule_json(self):
prompt = self.create_schedule_prompt()
response = client.chat.completions.create(
model="gpt-4-turbo",
messages=prompt,
temperature=0.5,
max_tokens=2048
)

content = response.choices[0].message.content
schedule_dict = parsing_json.extract_json_from_content(content)

return schedule_dict
111 changes: 111 additions & 0 deletions app/service/interaction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import os
import subprocess
from datetime import datetime

import pyaudio
import numpy as np
from faster_whisper import WhisperModel
from openai import OpenAI
from elevenLabs import text_to_speech_file
from elevenlabs import ElevenLabs
from dotenv import load_dotenv

# 아래 두 함수는 record_respberry.py 에 구현된 그대로 사용합니다.
# emotion_record(index) → "{prefix}{index}.wav" 파일을 만들어 리턴
# is_silent(data) → 음성 청크가 침묵인지 여부 판단
from record_respberry import emotion_record, is_silent

# ==== 공통 설정 ====
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ELEVENLABS_KEY = os.getenv("ELEVENLABS_KEY")

if not OPENAI_API_KEY or not ELEVENLABS_KEY:
raise RuntimeError(".env 에 OPENAI_API_KEY/ELEVENLABS_KEY 를 설정하세요")

# OpenAI / ElevenLabs 클라이언트
gpt_client = OpenAI(api_key=OPENAI_API_KEY)
tts_client = ElevenLabs(api_key=ELEVENLABS_KEY)

# Whisper 모델 (tiny, CPU, int8)
whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8")

# 녹음 파라미터 (ALSA default=USBMIC 으로 잡힌 상태)
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = RATE * 3 # 3초 단위 버퍼

# 오늘 날짜 기반 녹음 파일 저장 경로 prefix
today_str = datetime.now().strftime("%Y%m%d")
WAVE_OUTPUT_PREFIX = f"/home/team4/Desktop/capstone/AI/app/emotion_diary/{today_str}_"

def interaction(alias: str):
"""
alias: 사용자 이름 또는 AI가 부르는 별칭 (ex: "홍길동")
1) alias 인사 → TTS → 재생
2) 이후 반복: emotion_record → Whisper STT → GPT 질문 생성 → TTS → 재생
"""
# 1) alias 인사
greet_text = f"{alias}~~ 오늘 좋은 하루 보냈나~~?? 어떻게 지냈어!!"
print("👋 인사:", greet_text)
greet_audio = text_to_speech_file(greet_text)
subprocess.run(["mpg321", greet_audio], check=True)

# 대화 이력 초기화
messages = [
{"role": "system",
"content": "너는 대화를 자연스럽게 이어가는 AI야. 사용자와 계속 이어지는 대화를 만들어야 해."},
{"role": "assistant", "content": greet_text}
]

record_idx = 0
try:
while True:
# 2-1) 감정 녹음 (침묵 기준으로 자동 종료)
wav_path = emotion_record(record_idx)
print(f"[녹음 완료] {wav_path}")
record_idx += 1

# 2-2) Whisper STT (한국어)
segments, _ = whisper_model.transcribe(wav_path,
beam_size=1,
language="ko")
user_text = " ".join(seg.text for seg in segments).strip()
print("▶ 사용자 음성(텍스트):", user_text or "(인식 안됨)")

if not user_text:
print("(음성 인식 실패 → 다시 녹음)")
continue

# 2-3) GPT-4o 에 질문 생성 요청
messages.append({"role": "user", "content": user_text})
resp = gpt_client.chat.completions.create(
model="gpt-4o",
messages=messages
)
question = resp.choices[0].message.content.strip()
print("생성된 질문:", question)

# 2-4) 대화 이력에 어시스턴트 질문 추가
messages.append({"role": "assistant", "content": question})

# 2-5) 질문 → ElevenLabs TTS → 파일
tts_path = text_to_speech_file(question)
print(" (TTS 파일 생성:", tts_path, ")")

# 2-6) 재생
subprocess.run(["mpg321", tts_path], check=True)

except KeyboardInterrupt:
print("\n[사용자 종료 요청] interaction을 종료합니다.")
except Exception as e:
print("예외 발생:", e)

print("=== interaction 종료 ===")

if __name__ == "__main__":
# 스크립트를 직접 실행할 때만 동작
# alias를 원하는 이름으로 바꿔주세요
interaction("홍길동")

17 changes: 16 additions & 1 deletion app/service/main.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
import asyncio

from fastapi import FastAPI, Depends, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.openapi.utils import get_openapi
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from contextlib import asynccontextmanager

from app.controller.RecordController import router
from app.service.subscribe import subscribe_schedule

@asynccontextmanager
async def lifespan(app: FastAPI):
task = asyncio.create_task(subscribe_schedule())
yield
task.cancel()
try:
await task
except asyncio.CancelledError:
print("Redis task cancelled")


app = FastAPI()
app = FastAPI(lifespan = lifespan)

auth_scheme = HTTPBearer()

Expand Down
Loading