From 7476f191732338dd87e5ee6b74cf0ccc02f89aea Mon Sep 17 00:00:00 2001 From: zqqcee Date: Fri, 8 Sep 2023 14:36:53 +0800 Subject: [PATCH 1/7] feat: cosine similarity algorithm --- packages/graph/src/cosine-similarity.ts | 29 +++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 packages/graph/src/cosine-similarity.ts diff --git a/packages/graph/src/cosine-similarity.ts b/packages/graph/src/cosine-similarity.ts new file mode 100644 index 0000000..a9730f9 --- /dev/null +++ b/packages/graph/src/cosine-similarity.ts @@ -0,0 +1,29 @@ +import { Vector } from "./vector"; + +/** +Calculates the cosine similarity +@param item - The element. +@param targetItem - The target element. +@returns The cosine similarity between the item and the targetItem. +*/ +const cosineSimilarity = ( + item: number[], + targetItem: number[], +): number => { + // Vector of the target element + const targetItemVector = new Vector(targetItem); + // Norm of the target element vector + const targetNodeNorm2 = targetItemVector.norm2(); + // Vector of the item + const itemVector = new Vector(item); + // Norm of the item vector + const itemNorm2 = itemVector.norm2(); + // Calculate the dot product of the item vector and the target element vector + const dot = targetItemVector.dot(itemVector); + const norm2Product = targetNodeNorm2 * itemNorm2; + // Calculate the cosine similarity between the item vector and the target element vector + const cosineSimilarity = norm2Product ? dot / norm2Product : 0; + return cosineSimilarity; +} + +export default cosineSimilarity; From 64e3e6c2aefe76cb0773cc75d2f88cd807c0fa7c Mon Sep 17 00:00:00 2001 From: zqqcee Date: Fri, 8 Sep 2023 14:37:11 +0800 Subject: [PATCH 2/7] test: unit test --- __tests__/unit/cosine-similarity.spec.ts | 41 ++++++++++++++++++++++++ package.json | 2 +- 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 __tests__/unit/cosine-similarity.spec.ts diff --git a/__tests__/unit/cosine-similarity.spec.ts b/__tests__/unit/cosine-similarity.spec.ts new file mode 100644 index 0000000..eb50657 --- /dev/null +++ b/__tests__/unit/cosine-similarity.spec.ts @@ -0,0 +1,41 @@ +import cosineSimilarity from "../../packages/graph/src/cosine-similarity"; + +describe('cosineSimilarity abnormal demo: ', () => { + it('item contains only zeros: ', () => { + const item = [0, 0, 0]; + const targetTtem = [3, 1, 1]; + const cosineSimilarityValue = cosineSimilarity(item, targetTtem); + expect(cosineSimilarityValue).toBe(0); + }); + it('targetTtem contains only zeros: ', () => { + const item = [3, 5, 2]; + const targetTtem = [0, 0, 0]; + const cosineSimilarityValue = cosineSimilarity(item, targetTtem); + expect(cosineSimilarityValue).toBe(0); + }); + it('item and targetTtem both contains only zeros: ', () => { + const item = [0, 0, 0]; + const targetTtem = [0, 0, 0]; + const cosineSimilarityValue = cosineSimilarity(item, targetTtem); + expect(cosineSimilarityValue).toBe(0); + }); +}); + +describe('cosineSimilarity normal demo: ', () => { + it('demo similar: ', () => { + const item = [30, 0, 100]; + const targetTtem = [32, 1, 120]; + const cosineSimilarityValue = cosineSimilarity(item, targetTtem); + expect(cosineSimilarityValue).toBeGreaterThanOrEqual(0); + expect(cosineSimilarityValue).toBeLessThan(1); + expect(Number(cosineSimilarityValue.toFixed(3))).toBe(0.999); + }); + it('demo dissimilar: ', () => { + const item = [10, 300, 2]; + const targetTtem = [1, 2, 30]; + const cosineSimilarityValue = cosineSimilarity(item, targetTtem); + expect(cosineSimilarityValue).toBeGreaterThanOrEqual(0); + expect(cosineSimilarityValue).toBeLessThan(1); + expect(Number(cosineSimilarityValue.toFixed(3))).toBe(0.074); + }); +}); diff --git a/package.json b/package.json index c6cac2c..627b5bf 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "build:ci": "pnpm -r run build:ci", "prepare": "husky install", "test": "jest", - "test_one": "jest ./__tests__/unit/kCore.spec.ts", + "test_one": "jest ./__tests__/unit/cosine-similarity.spec.ts", "coverage": "jest --coverage", "build:site": "vite build", "deploy": "gh-pages -d site/dist", From 39e719178e231c9349319fbe6b67c1562b935148 Mon Sep 17 00:00:00 2001 From: zqqcee Date: Mon, 11 Sep 2023 12:40:44 +0800 Subject: [PATCH 3/7] fix: change default export to export --- __tests__/unit/bfs.spec.ts | 2 +- __tests__/unit/dfs.spec.ts | 2 +- packages/graph/src/bfs.ts | 4 +- packages/graph/src/dfs.ts | 2 +- packages/graph/src/index.ts | 4 +- packages/graph/src/nodes-cosine-similarity.ts | 46 +++++++++++++++++++ 6 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 packages/graph/src/nodes-cosine-similarity.ts diff --git a/__tests__/unit/bfs.spec.ts b/__tests__/unit/bfs.spec.ts index baf0a8f..35ad081 100644 --- a/__tests__/unit/bfs.spec.ts +++ b/__tests__/unit/bfs.spec.ts @@ -1,4 +1,4 @@ -import breadthFirstSearch from "../../packages/graph/src/bfs"; +import { breadthFirstSearch } from "../../packages/graph/src"; import { Graph } from "@antv/graphlib"; const data = { diff --git a/__tests__/unit/dfs.spec.ts b/__tests__/unit/dfs.spec.ts index aca783c..fb1e5d7 100644 --- a/__tests__/unit/dfs.spec.ts +++ b/__tests__/unit/dfs.spec.ts @@ -1,4 +1,4 @@ -import depthFirstSearch from "../../packages/graph/src/dfs"; +import { depthFirstSearch } from "../../packages/graph/src"; import { Graph } from "@antv/graphlib"; diff --git a/packages/graph/src/bfs.ts b/packages/graph/src/bfs.ts index c8f73f6..209b8e1 100644 --- a/packages/graph/src/bfs.ts +++ b/packages/graph/src/bfs.ts @@ -24,7 +24,7 @@ Performs breadth-first search (BFS) traversal on a graph. @param startNodeId - The ID of the starting node for BFS. @param originalCallbacks - Optional object containing callback functions for BFS. */ -const breadthFirstSearch = ( +export const breadthFirstSearch = ( graph: Graph, startNodeId: NodeID, originalCallbacks?: IAlgorithmCallbacks, @@ -65,5 +65,3 @@ const breadthFirstSearch = ( previousNodeId = currentNodeId; } }; - -export default breadthFirstSearch; diff --git a/packages/graph/src/dfs.ts b/packages/graph/src/dfs.ts index 83a131f..f66be8a 100644 --- a/packages/graph/src/dfs.ts +++ b/packages/graph/src/dfs.ts @@ -41,7 +41,7 @@ function depthFirstSearchRecursive( }); } -export default function depthFirstSearch( +export function depthFirstSearch( graph: Graph, startNodeId: NodeID, originalCallbacks?: IAlgorithmCallbacks, diff --git a/packages/graph/src/index.ts b/packages/graph/src/index.ts index 772f5dc..6af8072 100644 --- a/packages/graph/src/index.ts +++ b/packages/graph/src/index.ts @@ -5,4 +5,6 @@ export * from "./iLouvain"; export * from "./k-core"; export * from "./floydWarshall"; export * from "./bfs"; -export * from "./dfs"; \ No newline at end of file +export * from "./dfs"; +export * from "./cosine-similarity" +export * from "./nodes-cosine-similarity"; \ No newline at end of file diff --git a/packages/graph/src/nodes-cosine-similarity.ts b/packages/graph/src/nodes-cosine-similarity.ts new file mode 100644 index 0000000..edbeebf --- /dev/null +++ b/packages/graph/src/nodes-cosine-similarity.ts @@ -0,0 +1,46 @@ +import { clone } from '@antv/util'; +import { getAllProperties, oneHot } from './utils'; +import { NodeSimilarity } from './types'; +import cosineSimilarity from './cosine-similarity'; + +/** +Calculates the cosine similarity based on node attributes using the nodes-cosine-similarity algorithm. +This algorithm is used to find similar nodes based on a seed node in a graph. +@param nodes - The data of graph nodes. +@param seedNode - The seed node for similarity calculation. +@param involvedKeys - The collection of keys that are involved in the calculation. +@param uninvolvedKeys - The collection of keys that are not involved in the calculation. +@returns An array of nodes that are similar to the seed node based on cosine similarity. +*/ +const nodesCosineSimilarity = ( + nodes: NodeSimilarity[] = [], + seedNode: NodeSimilarity, + involvedKeys: string[] = [], + uninvolvedKeys: string[] = [], +): { + allCosineSimilarity: number[], + similarNodes: NodeSimilarity[], +} => { + const similarNodes = clone(nodes.filter(node => node.id !== seedNode.id)); + const seedNodeIndex = nodes.findIndex(node => node.id === seedNode.id); + // Collection of all node properties + const properties = getAllProperties(nodes); + // One-hot feature vectors for all node properties + const allPropertiesWeight = oneHot(properties, involvedKeys, uninvolvedKeys) as number[][]; + // Seed node properties + const seedNodeProperties = allPropertiesWeight[seedNodeIndex]; + const allCosineSimilarity: number[] = []; + similarNodes.forEach((node: NodeSimilarity, index: number) => { + if (node.id !== seedNode.id) { + const nodeProperties = allPropertiesWeight[index]; + // Calculate the cosine similarity between node vector and seed node vector + const cosineSimilarityValue = cosineSimilarity(nodeProperties, seedNodeProperties); + allCosineSimilarity.push(cosineSimilarityValue); + node.cosineSimilarity = cosineSimilarityValue; + } + }); + // Sort the returned nodes according to cosine similarity + similarNodes.sort((a: NodeSimilarity, b: NodeSimilarity) => b.cosineSimilarity - a.cosineSimilarity); + return { allCosineSimilarity, similarNodes }; +} +export default nodesCosineSimilarity; From 47124122a35096da8b3bf6a7d7808798ff548738 Mon Sep 17 00:00:00 2001 From: zqqcee Date: Mon, 11 Sep 2023 12:44:22 +0800 Subject: [PATCH 4/7] feat: v5 algorithm nodes-cosine-similarity --- packages/graph/src/cosine-similarity.ts | 4 +--- packages/graph/src/nodes-cosine-similarity.ts | 17 +++++++---------- packages/graph/src/types.ts | 5 ++++- packages/graph/src/utils.ts | 4 ++-- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/packages/graph/src/cosine-similarity.ts b/packages/graph/src/cosine-similarity.ts index a9730f9..554623b 100644 --- a/packages/graph/src/cosine-similarity.ts +++ b/packages/graph/src/cosine-similarity.ts @@ -6,7 +6,7 @@ Calculates the cosine similarity @param targetItem - The target element. @returns The cosine similarity between the item and the targetItem. */ -const cosineSimilarity = ( +export const cosineSimilarity = ( item: number[], targetItem: number[], ): number => { @@ -25,5 +25,3 @@ const cosineSimilarity = ( const cosineSimilarity = norm2Product ? dot / norm2Product : 0; return cosineSimilarity; } - -export default cosineSimilarity; diff --git a/packages/graph/src/nodes-cosine-similarity.ts b/packages/graph/src/nodes-cosine-similarity.ts index edbeebf..14259d0 100644 --- a/packages/graph/src/nodes-cosine-similarity.ts +++ b/packages/graph/src/nodes-cosine-similarity.ts @@ -1,7 +1,7 @@ import { clone } from '@antv/util'; import { getAllProperties, oneHot } from './utils'; import { NodeSimilarity } from './types'; -import cosineSimilarity from './cosine-similarity'; +import { cosineSimilarity } from '.'; /** Calculates the cosine similarity based on node attributes using the nodes-cosine-similarity algorithm. @@ -12,7 +12,7 @@ This algorithm is used to find similar nodes based on a seed node in a graph. @param uninvolvedKeys - The collection of keys that are not involved in the calculation. @returns An array of nodes that are similar to the seed node based on cosine similarity. */ -const nodesCosineSimilarity = ( +export const nodesCosineSimilarity = ( nodes: NodeSimilarity[] = [], seedNode: NodeSimilarity, involvedKeys: string[] = [], @@ -31,16 +31,13 @@ const nodesCosineSimilarity = ( const seedNodeProperties = allPropertiesWeight[seedNodeIndex]; const allCosineSimilarity: number[] = []; similarNodes.forEach((node: NodeSimilarity, index: number) => { - if (node.id !== seedNode.id) { - const nodeProperties = allPropertiesWeight[index]; - // Calculate the cosine similarity between node vector and seed node vector - const cosineSimilarityValue = cosineSimilarity(nodeProperties, seedNodeProperties); - allCosineSimilarity.push(cosineSimilarityValue); - node.cosineSimilarity = cosineSimilarityValue; - } + const nodeProperties = allPropertiesWeight[index]; + // Calculate the cosine similarity between node vector and seed node vector + const cosineSimilarityValue = cosineSimilarity(nodeProperties, seedNodeProperties); + allCosineSimilarity.push(cosineSimilarityValue); + node.cosineSimilarity = cosineSimilarityValue; }); // Sort the returned nodes according to cosine similarity similarNodes.sort((a: NodeSimilarity, b: NodeSimilarity) => b.cosineSimilarity - a.cosineSimilarity); return { allCosineSimilarity, similarNodes }; } -export default nodesCosineSimilarity; diff --git a/packages/graph/src/types.ts b/packages/graph/src/types.ts index fb9038f..657639f 100644 --- a/packages/graph/src/types.ts +++ b/packages/graph/src/types.ts @@ -37,4 +37,7 @@ export interface IAlgorithmCallbacks { allowTraversal?: (param: { previous?: NodeID; current?: NodeID; next: NodeID }) => boolean; } -export type NodeID = string | number; \ No newline at end of file +export type NodeID = string | number; +export interface NodeSimilarity extends Node<{ [key: string]: any }> { + cosineSimilarity?: number; +} \ No newline at end of file diff --git a/packages/graph/src/utils.ts b/packages/graph/src/utils.ts index 80470f3..78d5103 100644 --- a/packages/graph/src/utils.ts +++ b/packages/graph/src/utils.ts @@ -50,7 +50,7 @@ export const oneHot = (dataList: PlainObject[], involvedKeys?: string[], uninvol // 获取所有的属性/特征值 const allValue = Object.values(allKeyValueMap); // 是否所有属性/特征的值都是数值型 - const isAllNumber = allValue.every((value) => value.every((item) => (typeof(item) === 'number'))); + const isAllNumber = allValue.every((value) => value.every((item) => (typeof (item) === 'number'))); // 对数据进行one-hot编码 dataList.forEach((data, index) => { @@ -65,7 +65,7 @@ export const oneHot = (dataList: PlainObject[], involvedKeys?: string[], uninvol subCode.push(keyValue); } else { // 进行one-hot编码 - for(let i = 0; i < allKeyValue.length; i++) { + for (let i = 0; i < allKeyValue.length; i++) { if (i === valueIndex) { subCode.push(1); } else { From c3dd03532306836a769fad3a7c655123df5498e7 Mon Sep 17 00:00:00 2001 From: zqqcee Date: Mon, 11 Sep 2023 12:44:39 +0800 Subject: [PATCH 5/7] test: nodes cossim unit test --- __tests__/unit/cosine-similarity.spec.ts | 2 +- .../unit/nodes-cosine-similarity.spec.ts | 109 ++++++++++++++++++ package.json | 2 +- tsconfig.json | 3 +- 4 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 __tests__/unit/nodes-cosine-similarity.spec.ts diff --git a/__tests__/unit/cosine-similarity.spec.ts b/__tests__/unit/cosine-similarity.spec.ts index eb50657..fe595e1 100644 --- a/__tests__/unit/cosine-similarity.spec.ts +++ b/__tests__/unit/cosine-similarity.spec.ts @@ -1,4 +1,4 @@ -import cosineSimilarity from "../../packages/graph/src/cosine-similarity"; +import { cosineSimilarity } from "../../packages/graph/src"; describe('cosineSimilarity abnormal demo: ', () => { it('item contains only zeros: ', () => { diff --git a/__tests__/unit/nodes-cosine-similarity.spec.ts b/__tests__/unit/nodes-cosine-similarity.spec.ts new file mode 100644 index 0000000..e0e0246 --- /dev/null +++ b/__tests__/unit/nodes-cosine-similarity.spec.ts @@ -0,0 +1,109 @@ +import { nodesCosineSimilarity } from "../../packages/graph/src"; +import propertiesGraphData from '../data/cluster-origin-properties-data.json'; +import { NodeSimilarity } from "../../packages/graph/src/types"; + +describe('nodesCosineSimilarity abnormal demo', () => { + it('no properties demo: ', () => { + const nodes = [ + { + id: 'node-0', + data: {}, + }, + { + id: 'node-1', + data: {}, + }, + { + id: 'node-2', + data: {}, + }, + { + id: 'node-3', + data: {}, + } + ]; + const { allCosineSimilarity, similarNodes } = nodesCosineSimilarity(nodes as NodeSimilarity[], nodes[0]); + expect(allCosineSimilarity.length).toBe(3); + expect(similarNodes.length).toBe(3); + expect(allCosineSimilarity[0]).toBe(0); + expect(allCosineSimilarity[1]).toBe(0); + expect(allCosineSimilarity[2]).toBe(0); + }); +}); + + +describe('nodesCosineSimilarity normal demo', () => { + it('simple demo: ', () => { + const nodes = [ + { + id: 'node-0', + data: { + amount: 10, + } + }, + { + id: 'node-2', + data: { + amount: 100, + } + }, + { + id: 'node-3', + data: { + amount: 1000, + } + }, + { + id: 'node-4', + data: { + amount: 50, + } + } + ]; + const { allCosineSimilarity, similarNodes } = nodesCosineSimilarity(nodes as NodeSimilarity[], nodes[0], ['amount']); + expect(allCosineSimilarity.length).toBe(3); + expect(similarNodes.length).toBe(3); + allCosineSimilarity.forEach(data => { + expect(data).toBeGreaterThanOrEqual(0); + expect(data).toBeLessThanOrEqual(1); + }) + }); + + it('complex demo: ', () => { + const { nodes } = propertiesGraphData; + const { allCosineSimilarity, similarNodes } = nodesCosineSimilarity(nodes as NodeSimilarity[], nodes[16]); + expect(allCosineSimilarity.length).toBe(16); + expect(similarNodes.length).toBe(16); + allCosineSimilarity.forEach(data => { + expect(data).toBeGreaterThanOrEqual(0); + expect(data).toBeLessThanOrEqual(1); + }) + }); + + + it('demo use involvedKeys: ', () => { + const involvedKeys = ['amount', 'wifi']; + const { nodes } = propertiesGraphData; + const { allCosineSimilarity, similarNodes } = nodesCosineSimilarity(nodes as NodeSimilarity[], nodes[16], involvedKeys); + expect(allCosineSimilarity.length).toBe(16); + expect(similarNodes.length).toBe(16); + allCosineSimilarity.forEach(data => { + expect(data).toBeGreaterThanOrEqual(0); + expect(data).toBeLessThanOrEqual(1); + }) + expect(similarNodes[0].id).toBe('node-11'); + }); + + it('demo use uninvolvedKeys: ', () => { + const uninvolvedKeys = ['amount']; + const { nodes } = propertiesGraphData; + const { allCosineSimilarity, similarNodes } = nodesCosineSimilarity(nodes as NodeSimilarity[], nodes[16], [], uninvolvedKeys); + expect(allCosineSimilarity.length).toBe(16); + expect(similarNodes.length).toBe(16); + allCosineSimilarity.forEach(data => { + expect(data).toBeGreaterThanOrEqual(0); + expect(data).toBeLessThanOrEqual(1); + }) + expect(similarNodes[0].id).toBe('node-11'); + }); +}); diff --git a/package.json b/package.json index 627b5bf..8a6e34a 100644 --- a/package.json +++ b/package.json @@ -21,7 +21,7 @@ "build:ci": "pnpm -r run build:ci", "prepare": "husky install", "test": "jest", - "test_one": "jest ./__tests__/unit/cosine-similarity.spec.ts", + "test_one": "jest ./__tests__/unit/nodes-cosine-similarity.spec.ts", "coverage": "jest --coverage", "build:site": "vite build", "deploy": "gh-pages -d site/dist", diff --git a/tsconfig.json b/tsconfig.json index cf11009..a75eef5 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -7,6 +7,7 @@ "allowJs": true, "moduleResolution": "node", "resolveJsonModule": true, - "allowSyntheticDefaultImports": true + "allowSyntheticDefaultImports": true, + "esModuleInterop": true, } } From 411ea1a68f79363ef0e4ae87cf335534f3f70886 Mon Sep 17 00:00:00 2001 From: zqqcee Date: Mon, 11 Sep 2023 14:47:10 +0800 Subject: [PATCH 6/7] fix: fix import bug, fix: change NodeSimilarity type --- packages/graph/src/nodes-cosine-similarity.ts | 6 +++--- packages/graph/src/types.ts | 10 ++++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/packages/graph/src/nodes-cosine-similarity.ts b/packages/graph/src/nodes-cosine-similarity.ts index 14259d0..9ef5201 100644 --- a/packages/graph/src/nodes-cosine-similarity.ts +++ b/packages/graph/src/nodes-cosine-similarity.ts @@ -1,7 +1,7 @@ import { clone } from '@antv/util'; import { getAllProperties, oneHot } from './utils'; import { NodeSimilarity } from './types'; -import { cosineSimilarity } from '.'; +import { cosineSimilarity } from './cosine-similarity'; /** Calculates the cosine similarity based on node attributes using the nodes-cosine-similarity algorithm. @@ -35,9 +35,9 @@ export const nodesCosineSimilarity = ( // Calculate the cosine similarity between node vector and seed node vector const cosineSimilarityValue = cosineSimilarity(nodeProperties, seedNodeProperties); allCosineSimilarity.push(cosineSimilarityValue); - node.cosineSimilarity = cosineSimilarityValue; + node.data.cosineSimilarity = cosineSimilarityValue; }); // Sort the returned nodes according to cosine similarity - similarNodes.sort((a: NodeSimilarity, b: NodeSimilarity) => b.cosineSimilarity - a.cosineSimilarity); + similarNodes.sort((a: NodeSimilarity, b: NodeSimilarity) => b.data.cosineSimilarity - a.data.cosineSimilarity); return { allCosineSimilarity, similarNodes }; } diff --git a/packages/graph/src/types.ts b/packages/graph/src/types.ts index 657639f..d61f051 100644 --- a/packages/graph/src/types.ts +++ b/packages/graph/src/types.ts @@ -38,6 +38,12 @@ export interface IAlgorithmCallbacks { } export type NodeID = string | number; -export interface NodeSimilarity extends Node<{ [key: string]: any }> { - cosineSimilarity?: number; +// export interface NodeSimilarity extends Node<{ [key: string]: any }> { +// cosineSimilarity?: number; +// } + +export type NodeSimilarity = Node & { + data: { + cosineSimilarity?: number; + } } \ No newline at end of file From cb2a2e2525fc4111ae4e527c21b0d1597adabf60 Mon Sep 17 00:00:00 2001 From: zqqcee Date: Mon, 11 Sep 2023 22:50:05 +0800 Subject: [PATCH 7/7] fix: remove annotation --- packages/graph/src/types.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/graph/src/types.ts b/packages/graph/src/types.ts index d61f051..d05b75f 100644 --- a/packages/graph/src/types.ts +++ b/packages/graph/src/types.ts @@ -38,9 +38,6 @@ export interface IAlgorithmCallbacks { } export type NodeID = string | number; -// export interface NodeSimilarity extends Node<{ [key: string]: any }> { -// cosineSimilarity?: number; -// } export type NodeSimilarity = Node & { data: {