From 876edcb1ce76cdb3a113e74337ed1b232c932120 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E4=B8=80=E4=B9=8B?= Date: Thu, 8 Jan 2026 17:05:15 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=90=9B=20=E5=A4=84=E7=90=86=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=E7=BC=96=E7=A0=81=E9=97=AE=E9=A2=98=20#1115?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 1 + pnpm-lock.yaml | 23 +++++++++++++++++++++++ src/pages/install/App.tsx | 13 ++++++------- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/package.json b/package.json index c6b851af7..46cdfa0c7 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "@dnd-kit/modifiers": "^9.0.0", "@dnd-kit/sortable": "^10.0.0", "@dnd-kit/utilities": "^3.2.2", + "chardet": "^2.1.1", "cron": "^3.2.1", "crypto-js": "^4.2.0", "dayjs": "^1.11.13", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 356299cba..842127acc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -23,6 +23,9 @@ importers: '@dnd-kit/utilities': specifier: ^3.2.2 version: 3.2.2(react@18.3.1) + chardet: + specifier: ^2.1.1 + version: 2.1.1 cron: specifier: ^3.2.1 version: 3.2.1 @@ -899,56 +902,67 @@ packages: resolution: {integrity: sha512-+xmiDGGaSfIIOXMzkhJ++Oa0Gwvl9oXUeIiwarsdRXSe27HUIvjbSIpPxvnNsRebsNdUo7uAiQVgBD1hVriwSQ==} cpu: [arm] os: [linux] + libc: [glibc] '@rollup/rollup-linux-arm-musleabihf@4.44.2': resolution: {integrity: sha512-bDHvhzOfORk3wt8yxIra8N4k/N0MnKInCW5OGZaeDYa/hMrdPaJzo7CSkjKZqX4JFUWjUGm88lI6QJLCM7lDrA==} cpu: [arm] os: [linux] + libc: [musl] '@rollup/rollup-linux-arm64-gnu@4.44.2': resolution: {integrity: sha512-NMsDEsDiYghTbeZWEGnNi4F0hSbGnsuOG+VnNvxkKg0IGDvFh7UVpM/14mnMwxRxUf9AdAVJgHPvKXf6FpMB7A==} cpu: [arm64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-arm64-musl@4.44.2': resolution: {integrity: sha512-lb5bxXnxXglVq+7imxykIp5xMq+idehfl+wOgiiix0191av84OqbjUED+PRC5OA8eFJYj5xAGcpAZ0pF2MnW+A==} cpu: [arm64] os: [linux] + libc: [musl] '@rollup/rollup-linux-loongarch64-gnu@4.44.2': resolution: {integrity: sha512-Yl5Rdpf9pIc4GW1PmkUGHdMtbx0fBLE1//SxDmuf3X0dUC57+zMepow2LK0V21661cjXdTn8hO2tXDdAWAqE5g==} cpu: [loong64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-powerpc64le-gnu@4.44.2': resolution: {integrity: sha512-03vUDH+w55s680YYryyr78jsO1RWU9ocRMaeV2vMniJJW/6HhoTBwyyiiTPVHNWLnhsnwcQ0oH3S9JSBEKuyqw==} cpu: [ppc64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-riscv64-gnu@4.44.2': resolution: {integrity: sha512-iYtAqBg5eEMG4dEfVlkqo05xMOk6y/JXIToRca2bAWuqjrJYJlx/I7+Z+4hSrsWU8GdJDFPL4ktV3dy4yBSrzg==} cpu: [riscv64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-riscv64-musl@4.44.2': resolution: {integrity: sha512-e6vEbgaaqz2yEHqtkPXa28fFuBGmUJ0N2dOJK8YUfijejInt9gfCSA7YDdJ4nYlv67JfP3+PSWFX4IVw/xRIPg==} cpu: [riscv64] os: [linux] + libc: [musl] '@rollup/rollup-linux-s390x-gnu@4.44.2': resolution: {integrity: sha512-evFOtkmVdY3udE+0QKrV5wBx7bKI0iHz5yEVx5WqDJkxp9YQefy4Mpx3RajIVcM6o7jxTvVd/qpC1IXUhGc1Mw==} cpu: [s390x] os: [linux] + libc: [glibc] '@rollup/rollup-linux-x64-gnu@4.44.2': resolution: {integrity: sha512-/bXb0bEsWMyEkIsUL2Yt5nFB5naLAwyOWMEviQfQY1x3l5WsLKgvZf66TM7UTfED6erckUVUJQ/jJ1FSpm3pRQ==} cpu: [x64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-x64-musl@4.44.2': resolution: {integrity: sha512-3D3OB1vSSBXmkGEZR27uiMRNiwN08/RVAcBKwhUYPaiZ8bcvdeEwWPvbnXvvXHY+A/7xluzcN+kaiOFNiOZwWg==} cpu: [x64] os: [linux] + libc: [musl] '@rollup/rollup-win32-arm64-msvc@4.44.2': resolution: {integrity: sha512-VfU0fsMK+rwdK8mwODqYeM2hDrF2WiHaSmCBrS7gColkQft95/8tphyzv2EupVxn3iE0FI78wzffoULH1G+dkw==} @@ -979,21 +993,25 @@ packages: resolution: {integrity: sha512-n7UGSBzv7PiX+V1Q2bY3S1XWyN3RCykCQUgfhZ+xWietCM/1349jgN7DoXKPllqlof1GPGBjziHU0sQZTC4tag==} cpu: [arm64] os: [linux] + libc: [glibc] '@rspack/binding-linux-arm64-musl@1.6.1': resolution: {integrity: sha512-P7nx0jsKxx7g3QAnH9UnJDGVgs1M2H7ZQl68SRyrs42TKOd9Md22ynoMIgCK1zoy+skssU6MhWptluSggXqSrA==} cpu: [arm64] os: [linux] + libc: [musl] '@rspack/binding-linux-x64-gnu@1.6.1': resolution: {integrity: sha512-SdiurC1bV/QHnj7rmrBYJLdsat3uUDWl9KjkVjEbtc8kQV0Ri4/vZRH0nswgzx7hZNY2j0jYuCm5O8+3qeJEMg==} cpu: [x64] os: [linux] + libc: [glibc] '@rspack/binding-linux-x64-musl@1.6.1': resolution: {integrity: sha512-JoSJu29nV+auOePhe8x2Fzqxiga1YGNcOMWKJ5Uj8rHBZ8FPAiiE+CpLG8TwfpHsivojrY/sy6fE8JldYLV5TQ==} cpu: [x64] os: [linux] + libc: [musl] '@rspack/binding-wasm32-wasi@1.6.1': resolution: {integrity: sha512-u5NiSHxM7LtIo4cebq/hQPJ9o39u127am3eVJHDzdmBVhTYYO5l7XVUnFmcU8hNHuj/4lJzkFviWFbf3SaRSYA==} @@ -1714,6 +1732,9 @@ packages: resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} engines: {node: '>=10'} + chardet@2.1.1: + resolution: {integrity: sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ==} + charenc@0.0.2: resolution: {integrity: sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==} @@ -5958,6 +5979,8 @@ snapshots: ansi-styles: 4.3.0 supports-color: 7.2.0 + chardet@2.1.1: {} + charenc@0.0.2: {} check-error@2.1.1: {} diff --git a/src/pages/install/App.tsx b/src/pages/install/App.tsx index 5770bf2b8..f53a86f2f 100644 --- a/src/pages/install/App.tsx +++ b/src/pages/install/App.tsx @@ -33,6 +33,7 @@ import { CACHE_KEY_SCRIPT_INFO } from "@App/app/cache_key"; import { cacheInstance } from "@App/app/cache"; import { formatBytes, prettyUrl } from "@App/pkg/utils/utils"; import { ScriptIcons } from "../options/routes/utils"; +import chardet from "chardet"; const backgroundPromptShownKey = "background_prompt_shown"; @@ -102,11 +103,6 @@ const fetchScriptBody = async (url: string, { onProgress }: { [key: string]: any onProgress?.({ receivedLength }); } - // 检查 Content-Type 中的 charset - const contentType = response.headers.get("content-type") || ""; - const charsetMatch = contentType.match(/charset=([^;]+)/i); - const charset = charsetMatch ? charsetMatch[1].toLowerCase() : "utf-8"; - // 合并分片(chunks) const chunksAll = new Uint8Array(receivedLength); let position = 0; @@ -115,12 +111,15 @@ const fetchScriptBody = async (url: string, { onProgress }: { [key: string]: any position += chunk.length; } + const encode = (chardet.detect(chunksAll) || "utf-8").toLowerCase(); + // 使用检测到的 charset 解码 let code; try { - code = new TextDecoder(charset).decode(chunksAll); + code = new TextDecoder(encode).decode(chunksAll); } catch (e: any) { - throw new Error(`Failed to decode response with charset ${charset}: ${e.message}`); + console.warn(`Failed to decode response with charset ${encode}: ${e.message}`); + code = new TextDecoder("utf-8").decode(chunksAll); } const metadata = parseMetadata(code); From 67fe64dda2cf7632addb5570d952643c5ed8a3a2 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 8 Jan 2026 17:53:39 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=84=9A=E6=9C=AC?= =?UTF-8?q?=E5=AE=89=E8=A3=85=E7=BC=96=E7=A0=81=E6=A3=80=E6=B5=8B=E6=80=A7?= =?UTF-8?q?=E8=83=BD=E5=B9=B6=E6=B7=BB=E5=8A=A0=E6=B5=8B=E8=AF=95=E8=A6=86?= =?UTF-8?q?=E7=9B=96=20(#1139)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Initial plan * 优化编码检测性能并添加完整测试覆盖 Co-authored-by: CodFrm <22783163+CodFrm@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: CodFrm <22783163+CodFrm@users.noreply.github.com> --- src/pages/install/App.tsx | 7 +- src/pages/install/encoding.test.ts | 142 +++++++++++++++++++++++++++++ src/pages/install/encoding.ts | 51 +++++++++++ 3 files changed, 198 insertions(+), 2 deletions(-) create mode 100644 src/pages/install/encoding.test.ts create mode 100644 src/pages/install/encoding.ts diff --git a/src/pages/install/App.tsx b/src/pages/install/App.tsx index f53a86f2f..e8680b659 100644 --- a/src/pages/install/App.tsx +++ b/src/pages/install/App.tsx @@ -33,7 +33,7 @@ import { CACHE_KEY_SCRIPT_INFO } from "@App/app/cache_key"; import { cacheInstance } from "@App/app/cache"; import { formatBytes, prettyUrl } from "@App/pkg/utils/utils"; import { ScriptIcons } from "../options/routes/utils"; -import chardet from "chardet"; +import { detectEncoding } from "./encoding"; const backgroundPromptShownKey = "background_prompt_shown"; @@ -111,7 +111,9 @@ const fetchScriptBody = async (url: string, { onProgress }: { [key: string]: any position += chunk.length; } - const encode = (chardet.detect(chunksAll) || "utf-8").toLowerCase(); + // 检测编码:优先使用 Content-Type,回退到 chardet(仅检测前16KB) + const contentType = response.headers.get("content-type"); + const encode = detectEncoding(chunksAll, contentType); // 使用检测到的 charset 解码 let code; @@ -119,6 +121,7 @@ const fetchScriptBody = async (url: string, { onProgress }: { [key: string]: any code = new TextDecoder(encode).decode(chunksAll); } catch (e: any) { console.warn(`Failed to decode response with charset ${encode}: ${e.message}`); + // 回退到 UTF-8 code = new TextDecoder("utf-8").decode(chunksAll); } diff --git a/src/pages/install/encoding.test.ts b/src/pages/install/encoding.test.ts new file mode 100644 index 000000000..a3193dea5 --- /dev/null +++ b/src/pages/install/encoding.test.ts @@ -0,0 +1,142 @@ +import { describe, it, expect, vi } from "vitest"; +import { parseCharsetFromContentType, detectEncoding } from "./encoding"; + +describe("encoding detection", () => { + describe("parseCharsetFromContentType", () => { + it("should extract charset from valid Content-Type header", () => { + expect(parseCharsetFromContentType("text/javascript; charset=utf-8")).toBe("utf-8"); + expect(parseCharsetFromContentType("text/plain; charset=GBK")).toBe("gbk"); + expect(parseCharsetFromContentType("application/javascript; charset=ISO-8859-1")).toBe("iso-8859-1"); + }); + + it("should handle charset with quotes", () => { + expect(parseCharsetFromContentType('text/javascript; charset="utf-8"')).toBe("utf-8"); + expect(parseCharsetFromContentType("text/javascript; charset='gbk'")).toBe("gbk"); + }); + + it("should handle case-insensitive charset parameter", () => { + expect(parseCharsetFromContentType("text/javascript; CHARSET=UTF-8")).toBe("utf-8"); + expect(parseCharsetFromContentType("text/javascript; Charset=GBK")).toBe("gbk"); + }); + + it("should return null for missing charset", () => { + expect(parseCharsetFromContentType("text/javascript")).toBe(null); + expect(parseCharsetFromContentType("text/plain; boundary=something")).toBe(null); + }); + + it("should return null for null or empty input", () => { + expect(parseCharsetFromContentType(null)).toBe(null); + expect(parseCharsetFromContentType("")).toBe(null); + }); + + it("should handle charset with additional parameters", () => { + expect(parseCharsetFromContentType("text/javascript; charset=utf-8; boundary=xxx")).toBe("utf-8"); + }); + }); + + describe("detectEncoding", () => { + it("should prioritize valid charset from Content-Type header", () => { + const utf8Data = new TextEncoder().encode("hello world"); + expect(detectEncoding(utf8Data, "text/javascript; charset=utf-8")).toBe("utf-8"); + }); + + it("should fallback to chardet when Content-Type header is missing", () => { + // UTF-8 编码的中文 + const utf8Data = new TextEncoder().encode("你好世界"); + const encoding = detectEncoding(utf8Data, null); + expect(encoding).toBe("utf-8"); + }); + + it("should fallback to chardet when Content-Type charset is invalid", () => { + const utf8Data = new TextEncoder().encode("hello world"); + const encoding = detectEncoding(utf8Data, "text/javascript; charset=invalid-encoding"); + // chardet 可能检测为 utf-8 或 ascii,都是合理的 + expect(["utf-8", "ascii", "windows-1252"]).toContain(encoding); + }); + + it("should fallback to utf-8 when chardet returns null", () => { + // 模拟 chardet 返回 null 的情况(空数据) + const emptyData = new Uint8Array(0); + const encoding = detectEncoding(emptyData, null); + // 空数据时,chardet 可能返回 ascii 或其他编码,但都应该是有效的 + expect(encoding).toBeTruthy(); + expect(() => new TextDecoder(encoding)).not.toThrow(); + }); + + it("should only use first 16KB for chardet detection", () => { + // 创建一个大于 16KB 的数据 + const largeData = new Uint8Array(20 * 1024); + // 填充 UTF-8 编码的数据 + const text = "a".repeat(20 * 1024); + const textBytes = new TextEncoder().encode(text); + largeData.set(textBytes.slice(0, largeData.length)); + + const encoding = detectEncoding(largeData, null); + // 应该成功检测,说明使用了采样 + expect(["utf-8", "ascii", "windows-1252"]).toContain(encoding); + }); + + it("should handle GBK encoded data", () => { + // GBK 编码的 "你好" (这是一个简化的测试,实际 GBK 编码更复杂) + // 注意:在浏览器环境中,GBK 编码可能被识别为其他兼容编码 + const gbkLikeData = new Uint8Array([0xC4, 0xE3, 0xBA, 0xC3]); // "你好" in GBK + const encoding = detectEncoding(gbkLikeData, null); + // chardet 可能识别为 GBK、Shift_JIS 或相关的东亚编码 + expect(encoding).toBeTruthy(); + expect(() => new TextDecoder(encoding)).not.toThrow(); + }); + + it("should handle ISO-8859-1 encoded data", () => { + // ISO-8859-1 特有字符(扩展 ASCII) + const iso88591Data = new Uint8Array([0xE9, 0xE8, 0xE0, 0xE7]); // é è à ç + const encoding = detectEncoding(iso88591Data, null); + expect(encoding).toBeTruthy(); + }); + + it("should validate detected encoding is supported by TextDecoder", () => { + const utf8Data = new TextEncoder().encode("test"); + const encoding = detectEncoding(utf8Data, null); + + // 确保返回的编码可以被 TextDecoder 使用 + expect(() => new TextDecoder(encoding)).not.toThrow(); + }); + + it("should prefer Content-Type charset over chardet detection", () => { + // 即使数据看起来像 GBK,如果 Content-Type 指定了 UTF-8,应该使用 UTF-8 + const data = new Uint8Array([0xC4, 0xE3, 0xBA, 0xC3]); + const encoding = detectEncoding(data, "text/javascript; charset=utf-8"); + expect(encoding).toBe("utf-8"); + }); + + it("should handle charset with different cases from Content-Type", () => { + const data = new TextEncoder().encode("test"); + expect(detectEncoding(data, "text/javascript; charset=UTF-8")).toBe("utf-8"); + expect(detectEncoding(data, "text/javascript; charset=Utf-8")).toBe("utf-8"); + expect(detectEncoding(data, "text/javascript; charset=GBK")).toBe("gbk"); + }); + + it("should handle Windows-1252 encoded data", () => { + // Windows-1252 特有字符 + const win1252Data = new Uint8Array([0x80, 0x82, 0x83, 0x84]); // € ‚ ƒ „ + const encoding = detectEncoding(win1252Data, null); + expect(encoding).toBeTruthy(); + // chardet 应该能检测出编码或回退到有效的编码 + // Shift_JIS 也是一个有效的编码,chardet 可能会识别为它 + expect(["utf-8", "windows-1252", "iso-8859-1", "shift_jis", "ascii"]).toContain(encoding); + }); + + it("should fallback to utf-8 when chardet detects invalid encoding", () => { + // 使用 vi.spyOn 来模拟 console.warn + const consoleWarnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + + const data = new TextEncoder().encode("test"); + const encoding = detectEncoding(data, null); + + // 应该成功返回一个有效的编码 + expect(encoding).toBeTruthy(); + expect(() => new TextDecoder(encoding)).not.toThrow(); + + consoleWarnSpy.mockRestore(); + }); + }); +}); diff --git a/src/pages/install/encoding.ts b/src/pages/install/encoding.ts new file mode 100644 index 000000000..e8cbd9d02 --- /dev/null +++ b/src/pages/install/encoding.ts @@ -0,0 +1,51 @@ +import chardet from "chardet"; + +/** + * 从 Content-Type header 中解析 charset + */ +export const parseCharsetFromContentType = (contentType: string | null): string | null => { + if (!contentType) return null; + + const match = contentType.match(/charset=([^;]+)/i); + if (match && match[1]) { + return match[1].trim().toLowerCase().replace(/['"]/g, ''); + } + return null; +}; + +/** + * 检测字节数组的编码 + * 优先使用 Content-Type header,失败时使用 chardet(仅对前16KB检测以提升性能) + */ +export const detectEncoding = (data: Uint8Array, contentType: string | null): string => { + // 优先尝试使用 Content-Type header 中的 charset + const headerCharset = parseCharsetFromContentType(contentType); + if (headerCharset) { + try { + // 验证 charset 是否有效 + new TextDecoder(headerCharset); + return headerCharset; + } catch (e) { + console.warn(`Invalid charset from Content-Type header: ${headerCharset}`); + } + } + + // 使用 chardet 检测编码,仅检测前16KB以提升性能 + const sampleSize = Math.min(data.length, 16 * 1024); + const sample = data.slice(0, sampleSize); + const detected = chardet.detect(sample); + + if (detected) { + const encoding = detected.toLowerCase(); + try { + // 验证检测到的编码是否有效 + new TextDecoder(encoding); + return encoding; + } catch (e) { + console.warn(`Invalid charset detected by chardet: ${encoding}`); + } + } + + // 回退到 UTF-8 + return "utf-8"; +}; From 144a5f0971bbdac6f50f4e7e044318c01210baf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E4=B8=80=E4=B9=8B?= Date: Thu, 8 Jan 2026 17:56:47 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dlint=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/pages/install/App.tsx | 2 +- .../install => pkg/utils}/encoding.test.ts | 16 ++++++++-------- src/{pages/install => pkg/utils}/encoding.ts | 14 +++++++------- 3 files changed, 16 insertions(+), 16 deletions(-) rename src/{pages/install => pkg/utils}/encoding.test.ts (96%) rename src/{pages/install => pkg/utils}/encoding.ts (88%) diff --git a/src/pages/install/App.tsx b/src/pages/install/App.tsx index e8680b659..8801db4b1 100644 --- a/src/pages/install/App.tsx +++ b/src/pages/install/App.tsx @@ -33,7 +33,7 @@ import { CACHE_KEY_SCRIPT_INFO } from "@App/app/cache_key"; import { cacheInstance } from "@App/app/cache"; import { formatBytes, prettyUrl } from "@App/pkg/utils/utils"; import { ScriptIcons } from "../options/routes/utils"; -import { detectEncoding } from "./encoding"; +import { detectEncoding } from "@App/pkg/utils/encoding"; const backgroundPromptShownKey = "background_prompt_shown"; diff --git a/src/pages/install/encoding.test.ts b/src/pkg/utils/encoding.test.ts similarity index 96% rename from src/pages/install/encoding.test.ts rename to src/pkg/utils/encoding.test.ts index a3193dea5..a62e4a8e8 100644 --- a/src/pages/install/encoding.test.ts +++ b/src/pkg/utils/encoding.test.ts @@ -70,7 +70,7 @@ describe("encoding detection", () => { const text = "a".repeat(20 * 1024); const textBytes = new TextEncoder().encode(text); largeData.set(textBytes.slice(0, largeData.length)); - + const encoding = detectEncoding(largeData, null); // 应该成功检测,说明使用了采样 expect(["utf-8", "ascii", "windows-1252"]).toContain(encoding); @@ -79,7 +79,7 @@ describe("encoding detection", () => { it("should handle GBK encoded data", () => { // GBK 编码的 "你好" (这是一个简化的测试,实际 GBK 编码更复杂) // 注意:在浏览器环境中,GBK 编码可能被识别为其他兼容编码 - const gbkLikeData = new Uint8Array([0xC4, 0xE3, 0xBA, 0xC3]); // "你好" in GBK + const gbkLikeData = new Uint8Array([0xc4, 0xe3, 0xba, 0xc3]); // "你好" in GBK const encoding = detectEncoding(gbkLikeData, null); // chardet 可能识别为 GBK、Shift_JIS 或相关的东亚编码 expect(encoding).toBeTruthy(); @@ -88,7 +88,7 @@ describe("encoding detection", () => { it("should handle ISO-8859-1 encoded data", () => { // ISO-8859-1 特有字符(扩展 ASCII) - const iso88591Data = new Uint8Array([0xE9, 0xE8, 0xE0, 0xE7]); // é è à ç + const iso88591Data = new Uint8Array([0xe9, 0xe8, 0xe0, 0xe7]); // é è à ç const encoding = detectEncoding(iso88591Data, null); expect(encoding).toBeTruthy(); }); @@ -96,14 +96,14 @@ describe("encoding detection", () => { it("should validate detected encoding is supported by TextDecoder", () => { const utf8Data = new TextEncoder().encode("test"); const encoding = detectEncoding(utf8Data, null); - + // 确保返回的编码可以被 TextDecoder 使用 expect(() => new TextDecoder(encoding)).not.toThrow(); }); it("should prefer Content-Type charset over chardet detection", () => { // 即使数据看起来像 GBK,如果 Content-Type 指定了 UTF-8,应该使用 UTF-8 - const data = new Uint8Array([0xC4, 0xE3, 0xBA, 0xC3]); + const data = new Uint8Array([0xc4, 0xe3, 0xba, 0xc3]); const encoding = detectEncoding(data, "text/javascript; charset=utf-8"); expect(encoding).toBe("utf-8"); }); @@ -128,14 +128,14 @@ describe("encoding detection", () => { it("should fallback to utf-8 when chardet detects invalid encoding", () => { // 使用 vi.spyOn 来模拟 console.warn const consoleWarnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); - + const data = new TextEncoder().encode("test"); const encoding = detectEncoding(data, null); - + // 应该成功返回一个有效的编码 expect(encoding).toBeTruthy(); expect(() => new TextDecoder(encoding)).not.toThrow(); - + consoleWarnSpy.mockRestore(); }); }); diff --git a/src/pages/install/encoding.ts b/src/pkg/utils/encoding.ts similarity index 88% rename from src/pages/install/encoding.ts rename to src/pkg/utils/encoding.ts index e8cbd9d02..83a5ce941 100644 --- a/src/pages/install/encoding.ts +++ b/src/pkg/utils/encoding.ts @@ -5,10 +5,10 @@ import chardet from "chardet"; */ export const parseCharsetFromContentType = (contentType: string | null): string | null => { if (!contentType) return null; - + const match = contentType.match(/charset=([^;]+)/i); if (match && match[1]) { - return match[1].trim().toLowerCase().replace(/['"]/g, ''); + return match[1].trim().toLowerCase().replace(/['"]/g, ""); } return null; }; @@ -25,8 +25,8 @@ export const detectEncoding = (data: Uint8Array, contentType: string | null): st // 验证 charset 是否有效 new TextDecoder(headerCharset); return headerCharset; - } catch (e) { - console.warn(`Invalid charset from Content-Type header: ${headerCharset}`); + } catch (e: any) { + console.warn(`Invalid charset from Content-Type header: ${headerCharset}, error: ${e.message}`); } } @@ -34,15 +34,15 @@ export const detectEncoding = (data: Uint8Array, contentType: string | null): st const sampleSize = Math.min(data.length, 16 * 1024); const sample = data.slice(0, sampleSize); const detected = chardet.detect(sample); - + if (detected) { const encoding = detected.toLowerCase(); try { // 验证检测到的编码是否有效 new TextDecoder(encoding); return encoding; - } catch (e) { - console.warn(`Invalid charset detected by chardet: ${encoding}`); + } catch (e: any) { + console.warn(`Invalid charset detected by chardet: ${encoding}, error: ${e.message}`); } } From 1419699cc714575541f8dca017cad8b62982aad0 Mon Sep 17 00:00:00 2001 From: cyfung1031 <44498510+cyfung1031@users.noreply.github.com> Date: Fri, 9 Jan 2026 00:58:23 +0900 Subject: [PATCH 4/4] data.subarray --- src/pkg/utils/encoding.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pkg/utils/encoding.ts b/src/pkg/utils/encoding.ts index 83a5ce941..7abda9d16 100644 --- a/src/pkg/utils/encoding.ts +++ b/src/pkg/utils/encoding.ts @@ -32,7 +32,7 @@ export const detectEncoding = (data: Uint8Array, contentType: string | null): st // 使用 chardet 检测编码,仅检测前16KB以提升性能 const sampleSize = Math.min(data.length, 16 * 1024); - const sample = data.slice(0, sampleSize); + const sample = data.subarray(0, sampleSize); const detected = chardet.detect(sample); if (detected) {