From c74bd8e4ad5dbb4c42a089beed7ae1a5c88edf34 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Fri, 3 Oct 2025 14:17:25 +0200 Subject: [PATCH 01/14] [client] handle too large items --- pycti/utils/opencti_stix2.py | 24 ++++++++------ pycti/utils/opencti_stix2_splitter.py | 33 +++++++++++++++---- .../utils/test_opencti_stix2_splitter.py | 4 +-- tests/02-integration/entities/test_malware.py | 2 +- tests/02-integration/utils/test_stix_crud.py | 2 +- 5 files changed, 45 insertions(+), 20 deletions(-) diff --git a/pycti/utils/opencti_stix2.py b/pycti/utils/opencti_stix2.py index d3bd5b50b..d359232eb 100644 --- a/pycti/utils/opencti_stix2.py +++ b/pycti/utils/opencti_stix2.py @@ -8,7 +8,7 @@ import time import traceback import uuid -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union, Tuple import datefinder import dateutil.parser @@ -196,7 +196,7 @@ def import_bundle_from_file( file_path: str, update: bool = False, types: List = None, - ) -> Optional[List]: + ) -> Optional[Tuple[list, list]]: """import a stix2 bundle from a file :param file_path: valid path to the file @@ -221,7 +221,8 @@ def import_bundle_from_json( update: bool = False, types: List = None, work_id: str = None, - ) -> List: + objects_max_deps: int = 0, + ) -> Tuple[list, list]: """import a stix2 bundle from JSON data :param json_data: JSON data @@ -231,11 +232,13 @@ def import_bundle_from_json( :param types: list of stix2 types, defaults to None :type types: list, optional :param work_id work_id: str, optional - :return: list of imported stix2 objects - :rtype: List + :param objects_max_deps: max deps amount of objects, reject object import if larger than configured amount + :type objects_max_deps: int, optional + :return: list of imported stix2 objects and a list of stix2 objects with too many deps + :rtype: Tuple[List,List] """ data = json.loads(json_data) - return self.import_bundle(data, update, types, work_id) + return self.import_bundle(data, update, types, work_id, objects_max_deps) def resolve_author(self, title: str) -> Optional[Identity]: if "fireeye" in title.lower() or "mandiant" in title.lower(): @@ -3060,7 +3063,8 @@ def import_bundle( update: bool = False, types: List = None, work_id: str = None, - ) -> List: + objects_max_deps: int = 0, + ) -> Tuple[list, list]: # Check if the bundle is correctly formatted if "type" not in stix_bundle or stix_bundle["type"] != "bundle": raise ValueError("JSON data type is not a STIX2 bundle") @@ -3072,8 +3076,8 @@ def import_bundle( else None ) - stix2_splitter = OpenCTIStix2Splitter() - _, incompatible_elements, bundles = ( + stix2_splitter = OpenCTIStix2Splitter(objects_max_deps) + _, incompatible_elements, bundles, too_large_elements_bundles = ( stix2_splitter.split_bundle_with_expectations( stix_bundle, False, event_version ) @@ -3099,7 +3103,7 @@ def import_bundle( self.import_item(item, update, types, 0, work_id) imported_elements.append({"id": item["id"], "type": item["type"]}) - return imported_elements + return imported_elements, too_large_elements_bundles @staticmethod def put_attribute_in_extension( diff --git a/pycti/utils/opencti_stix2_splitter.py b/pycti/utils/opencti_stix2_splitter.py index b65ef25cd..7ff4d64eb 100644 --- a/pycti/utils/opencti_stix2_splitter.py +++ b/pycti/utils/opencti_stix2_splitter.py @@ -33,17 +33,19 @@ def is_id_supported(key): return True -class OpenCTIStix2Splitter: +class OpenCTIStix2Splitter: # pylint: disable=too-many-instance-attributes """STIX2 bundle splitter for OpenCTI Splits large STIX2 bundles into smaller chunks for processing. """ - def __init__(self): + def __init__(self, objects_max_deps: int = 0): + self.objects_max_deps = objects_max_deps self.cache_index = {} self.cache_refs = {} self.elements = [] self.incompatible_items = [] + self.too_large_elements = [] def get_internal_ids_in_extension(self, item): ids = [] @@ -196,7 +198,9 @@ def enlist_element( ) else: is_compatible = is_id_supported(item_id) - if is_compatible: + if self.objects_max_deps is not 0 and nb_deps >= self.objects_max_deps: + self.too_large_elements.append(item) + elif is_compatible: self.elements.append(item) else: self.incompatible_items.append(item) @@ -212,7 +216,7 @@ def split_bundle_with_expectations( use_json=True, event_version=None, cleanup_inconsistent_bundle=False, - ) -> Tuple[int, list, list]: + ) -> Tuple[int, list, list, list]: """splits a valid stix2 bundle into a list of bundles""" if use_json: try: @@ -262,11 +266,28 @@ def by_dep_size(elem): ) ) - return number_expectations, self.incompatible_items, bundles + too_large_elements_bundles = [] + for too_large_element in self.too_large_elements: + too_large_elements_bundles.append( + self.stix2_create_bundle( + bundle_data["id"], + too_large_element["nb_deps"], + [too_large_element], + use_json, + event_version, + ) + ) + + return ( + number_expectations, + self.incompatible_items, + bundles, + too_large_elements_bundles, + ) @deprecated("Use split_bundle_with_expectations instead") def split_bundle(self, bundle, use_json=True, event_version=None) -> list: - _, _, bundles = self.split_bundle_with_expectations( + _, _, bundles, _ = self.split_bundle_with_expectations( bundle, use_json, event_version ) return bundles diff --git a/tests/01-unit/utils/test_opencti_stix2_splitter.py b/tests/01-unit/utils/test_opencti_stix2_splitter.py index 146182873..66a2f93f9 100644 --- a/tests/01-unit/utils/test_opencti_stix2_splitter.py +++ b/tests/01-unit/utils/test_opencti_stix2_splitter.py @@ -80,11 +80,11 @@ def test_split_internal_ids_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/bundle_with_internal_ids.json") as file: content = file.read() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 4 # Split with cleanup_inconsistent_bundle stix_splitter = OpenCTIStix2Splitter() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations( + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations( bundle=content, cleanup_inconsistent_bundle=True ) assert expectations == 4 diff --git a/tests/02-integration/entities/test_malware.py b/tests/02-integration/entities/test_malware.py index d63855f33..cb833424f 100644 --- a/tests/02-integration/entities/test_malware.py +++ b/tests/02-integration/entities/test_malware.py @@ -5,7 +5,7 @@ def test_malware_import_with_sample_refs(api_client): with open("tests/data/basicMalwareWithSample.json", "r") as content_file: content = content_file.read() - imported_malware_bundle = api_client.stix2.import_bundle_from_json( + imported_malware_bundle, _ = api_client.stix2.import_bundle_from_json( json_data=content ) assert imported_malware_bundle is not None diff --git a/tests/02-integration/utils/test_stix_crud.py b/tests/02-integration/utils/test_stix_crud.py index edadba800..495062d41 100644 --- a/tests/02-integration/utils/test_stix_crud.py +++ b/tests/02-integration/utils/test_stix_crud.py @@ -21,7 +21,7 @@ def test_entity_create(entity_class, api_stix, opencti_splitter): stix_object = stix_class(**class_data) bundle = Bundle(objects=[stix_object]).serialize() split_bundle = opencti_splitter.split_bundle(bundle, True, None)[0] - bundles_sent = api_stix.import_bundle_from_json(split_bundle, False, None, None) + bundles_sent, _ = api_stix.import_bundle_from_json(split_bundle, False, None, None) assert len(bundles_sent) == 1 assert bundles_sent[0]["id"] == stix_object["id"] From 890641eed0738ed2dca56f64de1d448e9e08b149 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Fri, 3 Oct 2025 14:19:21 +0200 Subject: [PATCH 02/14] [client] handle too large items --- pycti/utils/opencti_stix2_splitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycti/utils/opencti_stix2_splitter.py b/pycti/utils/opencti_stix2_splitter.py index 7ff4d64eb..304a427fe 100644 --- a/pycti/utils/opencti_stix2_splitter.py +++ b/pycti/utils/opencti_stix2_splitter.py @@ -198,7 +198,7 @@ def enlist_element( ) else: is_compatible = is_id_supported(item_id) - if self.objects_max_deps is not 0 and nb_deps >= self.objects_max_deps: + if self.objects_max_deps != 0 and nb_deps >= self.objects_max_deps: self.too_large_elements.append(item) elif is_compatible: self.elements.append(item) From 861031cde114b5a007f655a94cf8c948261c331b Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Fri, 3 Oct 2025 14:19:46 +0200 Subject: [PATCH 03/14] [client] handle too large items --- pycti/utils/opencti_stix2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycti/utils/opencti_stix2.py b/pycti/utils/opencti_stix2.py index d359232eb..0d45b5b28 100644 --- a/pycti/utils/opencti_stix2.py +++ b/pycti/utils/opencti_stix2.py @@ -8,7 +8,7 @@ import time import traceback import uuid -from typing import Any, Dict, List, Optional, Union, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union import datefinder import dateutil.parser From a5f22a235e51b17db860abf363b1eebd0666df44 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Fri, 3 Oct 2025 14:40:17 +0200 Subject: [PATCH 04/14] [client] handle too large items --- pycti/connector/opencti_connector_helper.py | 2 +- .../utils/test_opencti_stix2_splitter.py | 20 +++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pycti/connector/opencti_connector_helper.py b/pycti/connector/opencti_connector_helper.py index 6d843729f..942219dd8 100644 --- a/pycti/connector/opencti_connector_helper.py +++ b/pycti/connector/opencti_connector_helper.py @@ -2096,7 +2096,7 @@ def send_stix2_bundle(self, bundle: str, **kwargs) -> list: os.rename(write_file, final_write_file) stix2_splitter = OpenCTIStix2Splitter() - (expectations_number, _, bundles) = ( + (expectations_number, _, bundles, _) = ( stix2_splitter.split_bundle_with_expectations( bundle=bundle, use_json=True, diff --git a/tests/01-unit/utils/test_opencti_stix2_splitter.py b/tests/01-unit/utils/test_opencti_stix2_splitter.py index 66a2f93f9..65697887b 100644 --- a/tests/01-unit/utils/test_opencti_stix2_splitter.py +++ b/tests/01-unit/utils/test_opencti_stix2_splitter.py @@ -10,7 +10,7 @@ def test_split_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/enterprise-attack.json") as file: content = file.read() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles; _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 7016 @@ -18,7 +18,7 @@ def test_split_test_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/DATA-TEST-STIX2_v2.json") as file: content = file.read() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 59 base_bundles = json.loads(content)["objects"] for base in base_bundles: @@ -40,13 +40,13 @@ def test_split_mono_entity_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/mono-bundle-entity.json") as file: content = file.read() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 1 json_bundle = json.loads(bundles[0])["objects"][0] assert json_bundle["created_by_ref"] == "fa42a846-8d90-4e51-bc29-71d5b4802168" # Split with cleanup_inconsistent_bundle stix_splitter = OpenCTIStix2Splitter() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations( + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations( bundle=content, cleanup_inconsistent_bundle=True ) assert expectations == 1 @@ -58,11 +58,11 @@ def test_split_mono_relationship_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/mono-bundle-relationship.json") as file: content = file.read() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 1 # Split with cleanup_inconsistent_bundle stix_splitter = OpenCTIStix2Splitter() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations( + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations( bundle=content, cleanup_inconsistent_bundle=True ) assert expectations == 0 @@ -72,7 +72,7 @@ def test_split_capec_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/mitre_att_capec.json") as file: content = file.read() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 2610 @@ -101,11 +101,11 @@ def test_split_missing_refs_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/missing_refs.json") as file: content = file.read() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 4 # Split with cleanup_inconsistent_bundle stix_splitter = OpenCTIStix2Splitter() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations( + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations( bundle=content, cleanup_inconsistent_bundle=True ) assert expectations == 3 @@ -115,7 +115,7 @@ def test_split_cyclic_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/cyclic-bundle.json") as file: content = file.read() - expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 6 for bundle in bundles: json_bundle = json.loads(bundle) From 1e3697ee41faff146903cc791b79a7c7dbf81752 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Fri, 3 Oct 2025 14:41:22 +0200 Subject: [PATCH 05/14] [client] handle too large items --- tests/01-unit/utils/test_opencti_stix2_splitter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/01-unit/utils/test_opencti_stix2_splitter.py b/tests/01-unit/utils/test_opencti_stix2_splitter.py index 65697887b..65d162c4e 100644 --- a/tests/01-unit/utils/test_opencti_stix2_splitter.py +++ b/tests/01-unit/utils/test_opencti_stix2_splitter.py @@ -10,7 +10,8 @@ def test_split_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/enterprise-attack.json") as file: content = file.read() - expectations, _, bundles; _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles + _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 7016 From ab6432d099ba22a399fc54929652652edcf43ed2 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Fri, 3 Oct 2025 14:43:46 +0200 Subject: [PATCH 06/14] [client] handle too large items --- tests/01-unit/utils/test_opencti_stix2_splitter.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/01-unit/utils/test_opencti_stix2_splitter.py b/tests/01-unit/utils/test_opencti_stix2_splitter.py index 65d162c4e..76cbdcf30 100644 --- a/tests/01-unit/utils/test_opencti_stix2_splitter.py +++ b/tests/01-unit/utils/test_opencti_stix2_splitter.py @@ -10,8 +10,7 @@ def test_split_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/enterprise-attack.json") as file: content = file.read() - expectations, _, bundles - _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) assert expectations == 7016 From 419249e5c5a368b3e5761b31fc19b129f5e62a31 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Wed, 8 Oct 2025 11:39:15 +0200 Subject: [PATCH 07/14] [client] also reject objects_max_deps < 0 --- pycti/utils/opencti_stix2_splitter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pycti/utils/opencti_stix2_splitter.py b/pycti/utils/opencti_stix2_splitter.py index 304a427fe..e9cc3d410 100644 --- a/pycti/utils/opencti_stix2_splitter.py +++ b/pycti/utils/opencti_stix2_splitter.py @@ -198,7 +198,7 @@ def enlist_element( ) else: is_compatible = is_id_supported(item_id) - if self.objects_max_deps != 0 and nb_deps >= self.objects_max_deps: + if 0 < self.objects_max_deps <= nb_deps: self.too_large_elements.append(item) elif is_compatible: self.elements.append(item) From 89ca3d613bbbf7b23fe5c6e0ddddb4fa1a5284c0 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Wed, 8 Oct 2025 12:40:15 +0200 Subject: [PATCH 08/14] [client] add raw_nb_refs computation --- pycti/utils/opencti_stix2_splitter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pycti/utils/opencti_stix2_splitter.py b/pycti/utils/opencti_stix2_splitter.py index e9cc3d410..139db1be3 100644 --- a/pycti/utils/opencti_stix2_splitter.py +++ b/pycti/utils/opencti_stix2_splitter.py @@ -63,6 +63,7 @@ def enlist_element( self, item_id, raw_data, cleanup_inconsistent_bundle, parent_acc ): nb_deps = 1 + raw_nb_refs = 0 if item_id not in raw_data: return 0 @@ -79,6 +80,7 @@ def enlist_element( if key.endswith("_refs") and item[key] is not None: to_keep = [] for element_ref in item[key]: + raw_nb_refs += 1 # We need to check if this ref is not already a reference is_missing_ref = raw_data.get(element_ref) is None must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle @@ -105,6 +107,7 @@ def enlist_element( to_keep.append(element_ref) item[key] = to_keep elif key.endswith("_ref"): + raw_nb_refs += 1 is_missing_ref = raw_data.get(value) is None must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle not_dependency_ref = ( @@ -131,6 +134,7 @@ def enlist_element( item[key] = None # Case for embedded elements (deduplicating and cleanup) elif key == "external_references" and item[key] is not None: + raw_nb_refs += 1 # specific case of splitting external references # reference_ids = [] deduplicated_references = [] @@ -157,6 +161,7 @@ def enlist_element( # nb_deps += self.enlist_element(reference_id, raw_data) item[key] = deduplicated_references elif key == "kill_chain_phases" and item[key] is not None: + raw_nb_refs += 1 # specific case of splitting kill_chain phases # kill_chain_ids = [] deduplicated_kill_chain = [] @@ -198,7 +203,7 @@ def enlist_element( ) else: is_compatible = is_id_supported(item_id) - if 0 < self.objects_max_deps <= nb_deps: + if 0 < self.objects_max_deps <= raw_nb_refs: self.too_large_elements.append(item) elif is_compatible: self.elements.append(item) From 13013f1b57ed5bf1eaac1cd2627237b515466539 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Wed, 8 Oct 2025 16:36:44 +0200 Subject: [PATCH 09/14] [client] rename objects_max_deps to objects_max_refs --- pycti/utils/opencti_stix2.py | 12 ++++++------ pycti/utils/opencti_stix2_splitter.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pycti/utils/opencti_stix2.py b/pycti/utils/opencti_stix2.py index 0d45b5b28..d9c56b610 100644 --- a/pycti/utils/opencti_stix2.py +++ b/pycti/utils/opencti_stix2.py @@ -221,7 +221,7 @@ def import_bundle_from_json( update: bool = False, types: List = None, work_id: str = None, - objects_max_deps: int = 0, + objects_max_refs: int = 0, ) -> Tuple[list, list]: """import a stix2 bundle from JSON data @@ -232,13 +232,13 @@ def import_bundle_from_json( :param types: list of stix2 types, defaults to None :type types: list, optional :param work_id work_id: str, optional - :param objects_max_deps: max deps amount of objects, reject object import if larger than configured amount - :type objects_max_deps: int, optional + :param objects_max_refs: max deps amount of objects, reject object import if larger than configured amount + :type objects_max_refs: int, optional :return: list of imported stix2 objects and a list of stix2 objects with too many deps :rtype: Tuple[List,List] """ data = json.loads(json_data) - return self.import_bundle(data, update, types, work_id, objects_max_deps) + return self.import_bundle(data, update, types, work_id, objects_max_refs) def resolve_author(self, title: str) -> Optional[Identity]: if "fireeye" in title.lower() or "mandiant" in title.lower(): @@ -3063,7 +3063,7 @@ def import_bundle( update: bool = False, types: List = None, work_id: str = None, - objects_max_deps: int = 0, + objects_max_refs: int = 0, ) -> Tuple[list, list]: # Check if the bundle is correctly formatted if "type" not in stix_bundle or stix_bundle["type"] != "bundle": @@ -3076,7 +3076,7 @@ def import_bundle( else None ) - stix2_splitter = OpenCTIStix2Splitter(objects_max_deps) + stix2_splitter = OpenCTIStix2Splitter(objects_max_refs) _, incompatible_elements, bundles, too_large_elements_bundles = ( stix2_splitter.split_bundle_with_expectations( stix_bundle, False, event_version diff --git a/pycti/utils/opencti_stix2_splitter.py b/pycti/utils/opencti_stix2_splitter.py index 139db1be3..d5833c7f9 100644 --- a/pycti/utils/opencti_stix2_splitter.py +++ b/pycti/utils/opencti_stix2_splitter.py @@ -39,8 +39,8 @@ class OpenCTIStix2Splitter: # pylint: disable=too-many-instance-attributes Splits large STIX2 bundles into smaller chunks for processing. """ - def __init__(self, objects_max_deps: int = 0): - self.objects_max_deps = objects_max_deps + def __init__(self, objects_max_refs: int = 0): + self.objects_max_refs = objects_max_refs self.cache_index = {} self.cache_refs = {} self.elements = [] @@ -203,7 +203,7 @@ def enlist_element( ) else: is_compatible = is_id_supported(item_id) - if 0 < self.objects_max_deps <= raw_nb_refs: + if 0 < self.objects_max_refs <= raw_nb_refs: self.too_large_elements.append(item) elif is_compatible: self.elements.append(item) From 7688e8110a4c4d9bf20dd5f7046eccf4cbcf2466 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Fri, 10 Oct 2025 16:02:06 +0200 Subject: [PATCH 10/14] [worker] change stix_object_max_refs default value & report expectation dropped bundles --- pycti/utils/opencti_stix2.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pycti/utils/opencti_stix2.py b/pycti/utils/opencti_stix2.py index d9c56b610..50f743e6b 100644 --- a/pycti/utils/opencti_stix2.py +++ b/pycti/utils/opencti_stix2.py @@ -3095,6 +3095,16 @@ def import_bundle( + " is incompatible and couldn't be processed", }, ) + for too_large_elements_bundle in too_large_elements_bundles: + self.opencti.work.report_expectation( + work_id, + { + "error": "Incompatible element in bundle", + "source": "Element " + + too_large_elements_bundle["id"] + + " is incompatible and couldn't be processed", + }, + ) # Import every element in a specific order imported_elements = [] From e3f825da6d7e8fc177be321259989a9143d6dbf1 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Fri, 10 Oct 2025 16:06:18 +0200 Subject: [PATCH 11/14] [client] report expectation dropped bundles --- pycti/utils/opencti_stix2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pycti/utils/opencti_stix2.py b/pycti/utils/opencti_stix2.py index 50f743e6b..8d4a57c55 100644 --- a/pycti/utils/opencti_stix2.py +++ b/pycti/utils/opencti_stix2.py @@ -3099,10 +3099,10 @@ def import_bundle( self.opencti.work.report_expectation( work_id, { - "error": "Incompatible element in bundle", + "error": "Too large element in bundle", "source": "Element " + too_large_elements_bundle["id"] - + " is incompatible and couldn't be processed", + + " is too large and couldn't be processed", }, ) From b36a15f69215dd5dd50378f2c91b0e2d58b82669 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Wed, 15 Oct 2025 12:45:19 +0200 Subject: [PATCH 12/14] [client] add dead_letter_routing to config fragment --- pycti/api/opencti_api_connector.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pycti/api/opencti_api_connector.py b/pycti/api/opencti_api_connector.py index feb70d81e..fad02afec 100644 --- a/pycti/api/opencti_api_connector.py +++ b/pycti/api/opencti_api_connector.py @@ -79,6 +79,7 @@ def list(self) -> Dict: push push_exchange push_routing + dead_letter_routing } } } From 44315c6f99efbacaf6e8204c0c447ab2c8b6846d Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Wed, 15 Oct 2025 17:37:34 +0200 Subject: [PATCH 13/14] [client] move nb refs computation from splitter to static utils method --- pycti/connector/opencti_connector_helper.py | 2 +- pycti/utils/opencti_stix2.py | 33 +++++++++++-------- pycti/utils/opencti_stix2_splitter.py | 33 ++++--------------- pycti/utils/opencti_stix2_utils.py | 14 ++++++++ .../utils/test_opencti_stix2_splitter.py | 24 +++++++------- 5 files changed, 52 insertions(+), 54 deletions(-) diff --git a/pycti/connector/opencti_connector_helper.py b/pycti/connector/opencti_connector_helper.py index 942219dd8..6d843729f 100644 --- a/pycti/connector/opencti_connector_helper.py +++ b/pycti/connector/opencti_connector_helper.py @@ -2096,7 +2096,7 @@ def send_stix2_bundle(self, bundle: str, **kwargs) -> list: os.rename(write_file, final_write_file) stix2_splitter = OpenCTIStix2Splitter() - (expectations_number, _, bundles, _) = ( + (expectations_number, _, bundles) = ( stix2_splitter.split_bundle_with_expectations( bundle=bundle, use_json=True, diff --git a/pycti/utils/opencti_stix2.py b/pycti/utils/opencti_stix2.py index 8d4a57c55..07ef9fc05 100644 --- a/pycti/utils/opencti_stix2.py +++ b/pycti/utils/opencti_stix2.py @@ -32,6 +32,7 @@ STIX_CORE_OBJECTS, STIX_CYBER_OBSERVABLE_MAPPING, STIX_META_OBJECTS, + OpenCTIStix2Utils, ) datefinder.ValueError = ValueError, OverflowError @@ -3076,8 +3077,8 @@ def import_bundle( else None ) - stix2_splitter = OpenCTIStix2Splitter(objects_max_refs) - _, incompatible_elements, bundles, too_large_elements_bundles = ( + stix2_splitter = OpenCTIStix2Splitter() + _, incompatible_elements, bundles = ( stix2_splitter.split_bundle_with_expectations( stix_bundle, False, event_version ) @@ -3095,23 +3096,27 @@ def import_bundle( + " is incompatible and couldn't be processed", }, ) - for too_large_elements_bundle in too_large_elements_bundles: - self.opencti.work.report_expectation( - work_id, - { - "error": "Too large element in bundle", - "source": "Element " - + too_large_elements_bundle["id"] - + " is too large and couldn't be processed", - }, - ) # Import every element in a specific order imported_elements = [] + too_large_elements_bundles = [] for bundle in bundles: for item in bundle["objects"]: - self.import_item(item, update, types, 0, work_id) - imported_elements.append({"id": item["id"], "type": item["type"]}) + nb_refs = OpenCTIStix2Utils.compute_object_refs_number(item) + if 0 < objects_max_refs <= nb_refs: + self.opencti.work.report_expectation( + work_id, + { + "error": "Too large element in bundle", + "source": "Element " + + item["id"] + + " is too large and couldn't be processed", + }, + ) + too_large_elements_bundles.append(item) + else: + self.import_item(item, update, types, 0, work_id) + imported_elements.append({"id": item["id"], "type": item["type"]}) return imported_elements, too_large_elements_bundles diff --git a/pycti/utils/opencti_stix2_splitter.py b/pycti/utils/opencti_stix2_splitter.py index d5833c7f9..75f6c9e3b 100644 --- a/pycti/utils/opencti_stix2_splitter.py +++ b/pycti/utils/opencti_stix2_splitter.py @@ -33,19 +33,17 @@ def is_id_supported(key): return True -class OpenCTIStix2Splitter: # pylint: disable=too-many-instance-attributes +class OpenCTIStix2Splitter: """STIX2 bundle splitter for OpenCTI Splits large STIX2 bundles into smaller chunks for processing. """ - def __init__(self, objects_max_refs: int = 0): - self.objects_max_refs = objects_max_refs + def __init__(self): self.cache_index = {} self.cache_refs = {} self.elements = [] self.incompatible_items = [] - self.too_large_elements = [] def get_internal_ids_in_extension(self, item): ids = [] @@ -63,7 +61,6 @@ def enlist_element( self, item_id, raw_data, cleanup_inconsistent_bundle, parent_acc ): nb_deps = 1 - raw_nb_refs = 0 if item_id not in raw_data: return 0 @@ -80,7 +77,6 @@ def enlist_element( if key.endswith("_refs") and item[key] is not None: to_keep = [] for element_ref in item[key]: - raw_nb_refs += 1 # We need to check if this ref is not already a reference is_missing_ref = raw_data.get(element_ref) is None must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle @@ -107,7 +103,6 @@ def enlist_element( to_keep.append(element_ref) item[key] = to_keep elif key.endswith("_ref"): - raw_nb_refs += 1 is_missing_ref = raw_data.get(value) is None must_be_cleaned = is_missing_ref and cleanup_inconsistent_bundle not_dependency_ref = ( @@ -134,7 +129,6 @@ def enlist_element( item[key] = None # Case for embedded elements (deduplicating and cleanup) elif key == "external_references" and item[key] is not None: - raw_nb_refs += 1 # specific case of splitting external references # reference_ids = [] deduplicated_references = [] @@ -161,7 +155,6 @@ def enlist_element( # nb_deps += self.enlist_element(reference_id, raw_data) item[key] = deduplicated_references elif key == "kill_chain_phases" and item[key] is not None: - raw_nb_refs += 1 # specific case of splitting kill_chain phases # kill_chain_ids = [] deduplicated_kill_chain = [] @@ -203,9 +196,8 @@ def enlist_element( ) else: is_compatible = is_id_supported(item_id) - if 0 < self.objects_max_refs <= raw_nb_refs: - self.too_large_elements.append(item) - elif is_compatible: + + if is_compatible: self.elements.append(item) else: self.incompatible_items.append(item) @@ -221,7 +213,7 @@ def split_bundle_with_expectations( use_json=True, event_version=None, cleanup_inconsistent_bundle=False, - ) -> Tuple[int, list, list, list]: + ) -> Tuple[int, list, list]: """splits a valid stix2 bundle into a list of bundles""" if use_json: try: @@ -271,28 +263,15 @@ def by_dep_size(elem): ) ) - too_large_elements_bundles = [] - for too_large_element in self.too_large_elements: - too_large_elements_bundles.append( - self.stix2_create_bundle( - bundle_data["id"], - too_large_element["nb_deps"], - [too_large_element], - use_json, - event_version, - ) - ) - return ( number_expectations, self.incompatible_items, bundles, - too_large_elements_bundles, ) @deprecated("Use split_bundle_with_expectations instead") def split_bundle(self, bundle, use_json=True, event_version=None) -> list: - _, _, bundles, _ = self.split_bundle_with_expectations( + _, _, bundles = self.split_bundle_with_expectations( bundle, use_json, event_version ) return bundles diff --git a/pycti/utils/opencti_stix2_utils.py b/pycti/utils/opencti_stix2_utils.py index 0d9a3eec5..8b8d8100e 100644 --- a/pycti/utils/opencti_stix2_utils.py +++ b/pycti/utils/opencti_stix2_utils.py @@ -233,3 +233,17 @@ def retrieveClassForMethod( if hasattr(attribute, method): return attribute return None + + @staticmethod + def compute_object_refs_number(entity: Dict): + refs_number = 0 + for key in list(entity.keys()): + if key.endswith("_refs") and entity[key] is not None: + refs_number += len(entity[key]) + elif key.endswith("_ref"): + refs_number += 1 + elif key == "external_references" and entity[key] is not None: + refs_number += len(entity[key]) + elif key == "kill_chain_phases" and entity[key] is not None: + refs_number += len(entity[key]) + return refs_number diff --git a/tests/01-unit/utils/test_opencti_stix2_splitter.py b/tests/01-unit/utils/test_opencti_stix2_splitter.py index 76cbdcf30..146182873 100644 --- a/tests/01-unit/utils/test_opencti_stix2_splitter.py +++ b/tests/01-unit/utils/test_opencti_stix2_splitter.py @@ -10,7 +10,7 @@ def test_split_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/enterprise-attack.json") as file: content = file.read() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) assert expectations == 7016 @@ -18,7 +18,7 @@ def test_split_test_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/DATA-TEST-STIX2_v2.json") as file: content = file.read() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) assert expectations == 59 base_bundles = json.loads(content)["objects"] for base in base_bundles: @@ -40,13 +40,13 @@ def test_split_mono_entity_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/mono-bundle-entity.json") as file: content = file.read() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) assert expectations == 1 json_bundle = json.loads(bundles[0])["objects"][0] assert json_bundle["created_by_ref"] == "fa42a846-8d90-4e51-bc29-71d5b4802168" # Split with cleanup_inconsistent_bundle stix_splitter = OpenCTIStix2Splitter() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations( + expectations, _, bundles = stix_splitter.split_bundle_with_expectations( bundle=content, cleanup_inconsistent_bundle=True ) assert expectations == 1 @@ -58,11 +58,11 @@ def test_split_mono_relationship_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/mono-bundle-relationship.json") as file: content = file.read() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) assert expectations == 1 # Split with cleanup_inconsistent_bundle stix_splitter = OpenCTIStix2Splitter() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations( + expectations, _, bundles = stix_splitter.split_bundle_with_expectations( bundle=content, cleanup_inconsistent_bundle=True ) assert expectations == 0 @@ -72,7 +72,7 @@ def test_split_capec_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/mitre_att_capec.json") as file: content = file.read() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) assert expectations == 2610 @@ -80,11 +80,11 @@ def test_split_internal_ids_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/bundle_with_internal_ids.json") as file: content = file.read() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) assert expectations == 4 # Split with cleanup_inconsistent_bundle stix_splitter = OpenCTIStix2Splitter() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations( + expectations, _, bundles = stix_splitter.split_bundle_with_expectations( bundle=content, cleanup_inconsistent_bundle=True ) assert expectations == 4 @@ -101,11 +101,11 @@ def test_split_missing_refs_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/missing_refs.json") as file: content = file.read() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) assert expectations == 4 # Split with cleanup_inconsistent_bundle stix_splitter = OpenCTIStix2Splitter() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations( + expectations, _, bundles = stix_splitter.split_bundle_with_expectations( bundle=content, cleanup_inconsistent_bundle=True ) assert expectations == 3 @@ -115,7 +115,7 @@ def test_split_cyclic_bundle(): stix_splitter = OpenCTIStix2Splitter() with open("./tests/data/cyclic-bundle.json") as file: content = file.read() - expectations, _, bundles, _ = stix_splitter.split_bundle_with_expectations(content) + expectations, _, bundles = stix_splitter.split_bundle_with_expectations(content) assert expectations == 6 for bundle in bundles: json_bundle = json.loads(bundle) From aac635ca57928f30f9221bc07c646ca65db6b640 Mon Sep 17 00:00:00 2001 From: Jeremy Cloarec Date: Wed, 15 Oct 2025 17:41:14 +0200 Subject: [PATCH 14/14] [client] add comment --- pycti/utils/opencti_stix2.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pycti/utils/opencti_stix2.py b/pycti/utils/opencti_stix2.py index 07ef9fc05..e25ab57aa 100644 --- a/pycti/utils/opencti_stix2.py +++ b/pycti/utils/opencti_stix2.py @@ -3102,6 +3102,7 @@ def import_bundle( too_large_elements_bundles = [] for bundle in bundles: for item in bundle["objects"]: + # If item is considered too large, meaning that it has a number of refs higher than inputted objects_max_refs, do not import it nb_refs = OpenCTIStix2Utils.compute_object_refs_number(item) if 0 < objects_max_refs <= nb_refs: self.opencti.work.report_expectation(