Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions agent/app/dto/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,18 @@ type MonitorSettingUpdate struct {
}

type MonitorGPUOptions struct {
GPUType string `json:"gpuType"`
Options []string `json:"options"`
GPUType string `json:"gpuType"`
ChartHide []GPUChartHide `json:"chartHide"`
Options []string `json:"options"`
}
type GPUChartHide struct {
ProductName string `json:"productName"`
Process bool `json:"process"`
GPU bool `json:"gpu"`
Memory bool `json:"memory"`
Power bool `json:"power"`
Temperature bool `json:"temperature"`
Speed bool `json:"speed"`
}
type MonitorGPUSearch struct {
ProductName string `json:"productName"`
Expand All @@ -59,6 +69,7 @@ type MonitorGPUData struct {
MemoryPercent []float64 `json:"memoryPercent"`
SpeedValue []int `json:"speedValue"`

ProcessCount []int `json:"processCount"`
GPUProcesses [][]GPUProcess `json:"gpuProcesses"`
}

Expand Down
37 changes: 34 additions & 3 deletions agent/app/service/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
return data, nil
}

func (m *MonitorService) LoadGPUOptions() dto.MonitorGPUOptions {

Check failure on line 122 in agent/app/service/monitor.go

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Refactor this method to reduce its Cognitive Complexity from 40 to the 15 allowed.

See more on https://sonarcloud.io/project/issues?id=1Panel-dev_1Panel&issues=AZsM-JqgZHSGyzEwT6BF&open=AZsM-JqgZHSGyzEwT6BF&pullRequest=11303
var data dto.MonitorGPUOptions
gpuExist, gpuClient := gpu.New()
xpuExist, xpuClient := xpu.New()
Expand All @@ -137,16 +137,45 @@
return gpuInfo.GPUs[i].Index < gpuInfo.GPUs[j].Index
})
for _, item := range gpuInfo.GPUs {
var chartHide dto.GPUChartHide
chartHide.ProductName = fmt.Sprintf("%d - %s", item.Index, item.ProductName)

Check failure on line 141 in agent/app/service/monitor.go

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Define a constant instead of duplicating this literal "%d - %s" 6 times.

See more on https://sonarcloud.io/project/issues?id=1Panel-dev_1Panel&issues=AZsM-JqgZHSGyzEwT6BE&open=AZsM-JqgZHSGyzEwT6BE&pullRequest=11303
chartHide.GPU = item.GPUUtil == "" || item.GPUUtil == "N/A"
if (item.MemTotal == "" || item.MemTotal == "N/A") && (item.MemUsed == "" || item.MemUsed == "N/A") {
chartHide.Memory = true
}
if (item.MaxPowerLimit == "" || item.MaxPowerLimit == "N/A") && (item.PowerDraw == "" || item.PowerDraw == "N/A") {
chartHide.Power = true
}
chartHide.Temperature = item.Temperature == "" || item.Temperature == "N/A"
chartHide.Speed = item.FanSpeed == "" || item.FanSpeed == "N/A"
data.ChartHide = append(data.ChartHide, chartHide)
data.Options = append(data.Options, fmt.Sprintf("%d - %s", item.Index, item.ProductName))
}
return data
} else {
data.GPUType = "xpu"
var err error
data.Options, err = xpuClient.LoadDeviceList()
if err != nil || len(data.Options) == 0 {
xpu, err := xpuClient.LoadGpuInfo()
if err != nil || len(xpu.Xpu) == 0 {
global.LOG.Error("Load XPU info failed or no XPU found, err: ", err)
}
sort.Slice(xpu.Xpu, func(i, j int) bool {
return xpu.Xpu[i].Basic.DeviceID < xpu.Xpu[j].Basic.DeviceID
})
for _, item := range xpu.Xpu {
var chartHide dto.GPUChartHide
chartHide.GPU = true
chartHide.Speed = true
chartHide.ProductName = fmt.Sprintf("%d - %s", item.Basic.DeviceID, item.Basic.DeviceName)
if (item.Stats.MemoryUsed == "" || item.Stats.MemoryUsed == "N/A") && (item.Basic.Memory == "" || item.Basic.FreeMemory == "N/A") {
chartHide.Memory = true
}
if item.Stats.Power == "" || item.Stats.Power == "N/A" {
chartHide.Power = true
}
chartHide.Temperature = item.Stats.Temperature == "" || item.Stats.Temperature == "N/A"
data.ChartHide = append(data.ChartHide, chartHide)
data.Options = append(data.Options, fmt.Sprintf("%d - %s", item.Basic.DeviceID, item.Basic.DeviceName))
}
return data
}
}
Expand Down Expand Up @@ -182,8 +211,10 @@
}
var process []dto.GPUProcess
if err := json.Unmarshal([]byte(gpu.Processes), &process); err == nil {
data.ProcessCount = append(data.ProcessCount, len(process))
data.GPUProcesses = append(data.GPUProcesses, process)
} else {
data.ProcessCount = append(data.ProcessCount, 0)
data.GPUProcesses = append(data.GPUProcesses, []dto.GPUProcess{})
}
data.SpeedValue = append(data.SpeedValue, gpu.FanSpeed)
Expand Down
17 changes: 0 additions & 17 deletions agent/utils/ai_tools/xpu/xpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,23 +120,6 @@ func (x XpuSMI) LoadDashData() ([]XPUSimpleInfo, error) {
return res, nil
}

func (x XpuSMI) LoadDeviceList() ([]string, error) {
cmdMgr := cmd.NewCommandMgr(cmd.WithTimeout(5 * time.Second))
data, err := cmdMgr.RunWithStdoutBashC("xpu-smi discovery -j")
if err != nil {
return nil, fmt.Errorf("calling xpu-smi failed, %v", err)
}
var deviceInfo DeviceInfo
if err := json.Unmarshal([]byte(data), &deviceInfo); err != nil {
return nil, fmt.Errorf("deviceInfo json unmarshal failed, err: %w", err)
}
var deviceNames []string
for _, device := range deviceInfo.DeviceList {
deviceNames = append(deviceNames, fmt.Sprintf("%d - %s", device.DeviceID, device.DeviceName))
}
return deviceNames, nil
}

func (x XpuSMI) LoadGpuInfo() (*XpuInfo, error) {
cmdMgr := cmd.NewCommandMgr(cmd.WithTimeout(5 * time.Second))
data, err := cmdMgr.RunWithStdoutBashC("xpu-smi discovery -j")
Expand Down
10 changes: 10 additions & 0 deletions frontend/src/api/interface/host.ts
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,16 @@ export namespace Host {
export interface MonitorGPUOptions {
gpuType: string;
options: Array<string>;
chartHide: Array<ChartHide>;
}
export interface ChartHide {
productName: string;
process: boolean;
gpu: boolean;
memory: boolean;
power: boolean;
temperature: boolean;
speed: boolean;
}
export interface MonitorGPUData {
date: Array<Date>;
Expand Down
31 changes: 31 additions & 0 deletions frontend/src/lang/modules/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,37 @@ const message = {
memoryUsed: 'Memory Used',
memoryTotal: 'Total Memory',
percent: 'Utilization',

base: 'Basic Information',
driverVersion: 'Driver Version',
cudaVersion: 'CUDA Version',
processMemoryUsage: 'Memory Usage',
performanceStateHelper: 'From P0 (maximum performance) to P12 (minimum performance)',
busID: 'Bus Address',
persistenceMode: 'Persistence Mode',
enabled: 'Enabled',
disabled: 'Disabled',
persistenceModeHelper:
'Persistence mode responds to tasks more quickly, but standby power consumption will increase accordingly',
displayActive: 'GPU Initialization',
displayActiveT: 'Yes',
displayActiveF: 'No',
ecc: 'Error Checking and Correcting Technology',
computeMode: 'Compute Mode',
default: 'Default',
exclusiveProcess: 'Exclusive Process',
exclusiveThread: 'Exclusive Thread',
prohibited: 'Prohibited',
defaultHelper: 'Default: Processes can execute concurrently',
exclusiveProcessHelper:
'Exclusive Process: Only one CUDA context can use the GPU, but it can be shared by multiple threads',
exclusiveThreadHelper: 'Exclusive Thread: Only one thread in a CUDA context can use the GPU',
prohibitedHelper: 'Prohibited: Concurrent process execution is not allowed',
migModeHelper: 'Used to create MIG instances, implementing physical GPU isolation at the user layer.',
migModeNA: 'Not Supported',
current: 'Real-time Monitoring',
history: 'Historical Records',
notSupport: 'The current version or driver does not support displaying this parameter.',
},
mcp: {
server: 'MCP Server',
Expand Down
32 changes: 32 additions & 0 deletions frontend/src/lang/modules/es-es.ts
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,38 @@ const message = {
memoryUsed: 'Memoria Utilizada',
memoryTotal: 'Memoria Total',
percent: 'Utilización',

base: 'Información Básica',
driverVersion: 'Versión del Controlador',
cudaVersion: 'Versión de CUDA',
processMemoryUsage: 'Uso de Memoria',
performanceStateHelper: 'Desde P0 (rendimiento máximo) hasta P12 (rendimiento mínimo)',
busID: 'Dirección del Bus',
persistenceMode: 'Modo de Persistencia',
enabled: 'Habilitado',
disabled: 'Deshabilitado',
persistenceModeHelper:
'El modo de persistencia responde a las tareas más rápidamente, pero el consumo de energía en espera aumentará en consecuencia',
displayActive: 'Inicialización de GPU',
displayActiveT: 'Sí',
displayActiveF: 'No',
ecc: 'Tecnología de Corrección de Errores',
computeMode: 'Modo de Computación',
default: 'Predeterminado',
exclusiveProcess: 'Proceso Exclusivo',
exclusiveThread: 'Hilo Exclusivo',
prohibited: 'Prohibido',
defaultHelper: 'Predeterminado: Los procesos pueden ejecutarse concurrentemente',
exclusiveProcessHelper:
'Proceso Exclusivo: Solo un contexto CUDA puede usar la GPU, pero puede ser compartido por múltiples hilos',
exclusiveThreadHelper: 'Hilo Exclusivo: Solo un hilo en un contexto CUDA puede usar la GPU',
prohibitedHelper: 'Prohibido: No se permite la ejecución concurrente de procesos',
migModeHelper:
'Se utiliza para crear instancias MIG, implementando aislamiento físico de GPU en la capa de usuario.',
migModeNA: 'No Compatible',
current: 'Monitoreo en Tiempo Real',
history: 'Registros Históricos',
notSupport: 'La versión actual o el controlador no admiten mostrar este parámetro.',
},
mcp: {
server: 'Servidor MCP',
Expand Down
30 changes: 30 additions & 0 deletions frontend/src/lang/modules/ja.ts
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,36 @@ const message = {
memoryUsed: '使用メモリ',
memoryTotal: '総メモリ',
percent: '使用率',

base: '基本情報',
driverVersion: 'ドライバーバージョン',
cudaVersion: 'CUDA バージョン',
processMemoryUsage: 'メモリ使用量',
performanceStateHelper: 'P0(最大パフォーマンス)から P12(最小パフォーマンス)まで',
busID: 'バスアドレス',
persistenceMode: '永続モード',
enabled: '有効',
disabled: '無効',
persistenceModeHelper: '永続モードはタスクへの応答がより迅速ですが、それに応じて待機電力消費も増加します',
displayActive: 'GPU 初期化',
displayActiveT: 'はい',
displayActiveF: 'いいえ',
ecc: 'エラー修正技術',
computeMode: '計算モード',
default: 'デフォルト',
exclusiveProcess: '排他プロセス',
exclusiveThread: '排他スレッド',
prohibited: '禁止',
defaultHelper: 'デフォルト: プロセスは同時実行可能',
exclusiveProcessHelper:
'排他プロセス: 1つのCUDAコンテキストのみがGPUを使用可能、ただし複数スレッドで共有可能',
exclusiveThreadHelper: '排他スレッド: CUDAコンテキスト内の1つのスレッドのみがGPUを使用可能',
prohibitedHelper: '禁止: プロセスの同時実行は許可されません',
migModeHelper: 'MIGインスタンスを作成するために使用され、ユーザーレイヤーでGPUの物理的隔離を実装します。',
migModeNA: 'サポートされていません',
current: 'リアルタイム監視',
history: '履歴記録',
notSupport: '現在のバージョンまたはドライバーはこのパラメータの表示をサポートしていません。',
},
mcp: {
server: 'MCP サーバー',
Expand Down
30 changes: 30 additions & 0 deletions frontend/src/lang/modules/ko.ts
Original file line number Diff line number Diff line change
Expand Up @@ -695,6 +695,36 @@ const message = {
memoryUsed: '사용된 메모리',
memoryTotal: '전체 메모리',
percent: '사용률',

base: '기본 정보',
driverVersion: '드라이버 버전',
cudaVersion: 'CUDA 버전',
processMemoryUsage: '메모리 사용량',
performanceStateHelper: 'P0(최대 성능)부터 P12(최소 성능)까지',
busID: '버스 주소',
persistenceMode: '지속성 모드',
enabled: '활성화',
disabled: '비활성화',
persistenceModeHelper: '지속성 모드는 작업에 더 빠르게 응답하지만 대기 전력 소비도 그에 따라 증가합니다',
displayActive: 'GPU 초기화',
displayActiveT: '예',
displayActiveF: '아니오',
ecc: '오류 검사 및 수정 기술',
computeMode: '계산 모드',
default: '기본값',
exclusiveProcess: '배타적 프로세스',
exclusiveThread: '배타적 스레드',
prohibited: '금지됨',
defaultHelper: '기본값: 프로세스가 동시에 실행될 수 있음',
exclusiveProcessHelper:
'배타적 프로세스: 하나의 CUDA 컨텍스트만 GPU를 사용할 수 있지만 여러 스레드에서 공유 가능',
exclusiveThreadHelper: '배타적 스레드: CUDA 컨텍스트의 하나의 스레드만 GPU를 사용할 수 있음',
prohibitedHelper: '금지됨: 프로세스 동시 실행이 허용되지 않음',
migModeHelper: 'MIG 인스턴스를 생성하는 데 사용되며 사용자 레이어에서 GPU의 물리적 격리를 구현합니다.',
migModeNA: '지원되지 않음',
current: '실시간 모니터링',
history: '기록',
notSupport: '현재 버전 또는 드라이버는 이 매개변수 표시를 지원하지 않습니다.',
},
mcp: {
server: 'MCP サーバー',
Expand Down
32 changes: 32 additions & 0 deletions frontend/src/lang/modules/ms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,38 @@ const message = {
memoryUsed: 'Memori Digunakan',
memoryTotal: 'Jumlah Memori',
percent: 'Penggunaan',
base: 'Maklumat Asas',

driverVersion: 'Versi Pemacu',
cudaVersion: 'Versi CUDA',
processMemoryUsage: 'Penggunaan Memori',
performanceStateHelper: 'Dari P0 (prestasi maksimum) hingga P12 (prestasi minimum)',
busID: 'Alamat Bas',
persistenceMode: 'Mod Kegigihan',
enabled: 'Diaktifkan',
disabled: 'Dilumpuhkan',
persistenceModeHelper:
'Mod kegigihan bertindak balas kepada tugas dengan lebih cepat, tetapi penggunaan kuasa siap sedia akan meningkat dengan sewajarnya',
displayActive: 'Permulaan GPU',
displayActiveT: 'Ya',
displayActiveF: 'Tidak',
ecc: 'Teknologi Pemeriksaan dan Pembetulan Ralat',
computeMode: 'Mod Pengiraan',
default: 'Lalai',
exclusiveProcess: 'Proses Eksklusif',
exclusiveThread: 'Benang Eksklusif',
prohibited: 'Dilarang',
defaultHelper: 'Lalai: Proses boleh dilaksanakan serentak',
exclusiveProcessHelper:
'Proses Eksklusif: Hanya satu konteks CUDA boleh menggunakan GPU, tetapi boleh dikongsi oleh berbilang benang',
exclusiveThreadHelper: 'Benang Eksklusif: Hanya satu benang dalam konteks CUDA boleh menggunakan GPU',
prohibitedHelper: 'Dilarang: Pelaksanaan proses serentak tidak dibenarkan',
migModeHelper:
'Digunakan untuk mencipta contoh MIG, melaksanakan pengasingan fizikal GPU pada lapisan pengguna.',
migModeNA: 'Tidak Disokong',
current: 'Pemantauan Masa Nyata',
history: 'Rekod Sejarah',
notSupport: 'Versi atau pemacu semasa tidak menyokong paparan parameter ini.',
},
mcp: {
server: 'Pelayan MCP',
Expand Down
32 changes: 32 additions & 0 deletions frontend/src/lang/modules/pt-br.ts
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,38 @@ const message = {
memoryUsed: 'Memória Usada',
memoryTotal: 'Memória Total',
percent: 'Utilização',

base: 'Informações Básicas',
driverVersion: 'Versão do Driver',
cudaVersion: 'Versão do CUDA',
processMemoryUsage: 'Uso de Memória',
performanceStateHelper: 'De P0 (desempenho máximo) a P12 (desempenho mínimo)',
busID: 'Endereço do Barramento',
persistenceMode: 'Modo de Persistência',
enabled: 'Habilitado',
disabled: 'Desabilitado',
persistenceModeHelper:
'O modo de persistência responde às tarefas mais rapidamente, mas o consumo de energia em espera aumentará correspondentemente',
displayActive: 'Inicialização da GPU',
displayActiveT: 'Sim',
displayActiveF: 'Não',
ecc: 'Tecnologia de Verificação e Correção de Erros',
computeMode: 'Modo de Computação',
default: 'Padrão',
exclusiveProcess: 'Processo Exclusivo',
exclusiveThread: 'Thread Exclusiva',
prohibited: 'Proibido',
defaultHelper: 'Padrão: Os processos podem executar simultaneamente',
exclusiveProcessHelper:
'Processo Exclusivo: Apenas um contexto CUDA pode usar a GPU, mas pode ser compartilhado por múltiplas threads',
exclusiveThreadHelper: 'Thread Exclusiva: Apenas uma thread em um contexto CUDA pode usar a GPU',
prohibitedHelper: 'Proibido: A execução simultânea de processos não é permitida',
migModeHelper:
'Usado para criar instâncias MIG, implementando isolamento físico da GPU na camada do usuário.',
migModeNA: 'Não Suportado',
current: 'Monitoramento em Tempo Real',
history: 'Registros Históricos',
notSupport: 'A versão atual ou o driver não suportam exibir este parâmetro.',
},
mcp: {
server: 'Servidor MCP',
Expand Down
Loading
Loading