@@ -38,10 +38,10 @@ def __init__(self, pod_node: pp.PodNode, input_stream: cp.Stream, **kwargs):
3838 super ().__init__ (source = pod_node , upstreams = (input_stream ,), ** kwargs )
3939 self .pod_node = pod_node
4040 self .input_stream = input_stream
41- self ._set_modified_time () # set modified time to when we obtain the iterator
42- # capture the immutable iterator from the input stream
4341
42+ # capture the immutable iterator from the input stream
4443 self ._prepared_stream_iterator = input_stream .iter_packets ()
44+ self ._set_modified_time () # set modified time to when we obtain the iterator
4545
4646 # Packet-level caching (from your PodStream)
4747 self ._cached_output_packets : list [tuple [cp .Tag , cp .Packet | None ]] | None = None
@@ -134,7 +134,7 @@ def run(
134134 cached_results = []
135135
136136 # identify all entries in the input stream for which we still have not computed packets
137- if filter is not None :
137+ if len ( args ) > 0 or len ( kwargs ) > 0 :
138138 input_stream_used = self .input_stream .polars_filter (* args , ** kwargs )
139139 else :
140140 input_stream_used = self .input_stream
@@ -194,6 +194,7 @@ def run(
194194
195195 if existing is not None and existing .num_rows > 0 :
196196 # If there are existing entries, we can cache them
197+ # TODO: cache them based on the record ID
197198 existing_stream = TableStream (existing , tag_columns = tag_keys )
198199 for tag , packet in existing_stream .iter_packets ():
199200 cached_results .append ((tag , packet ))
@@ -232,6 +233,14 @@ def run(
232233
233234 self ._cached_output_packets = cached_results
234235 self ._set_modified_time ()
236+ self .pod_node .flush ()
237+ # TODO: evaluate proper handling of cache here
238+ self .clear_cache ()
239+
240+ def clear_cache (self ) -> None :
241+ self ._cached_output_packets = None
242+ self ._cached_output_table = None
243+ self ._cached_content_hash_column = None
235244
236245 def iter_packets (
237246 self , execution_engine : cp .ExecutionEngine | None = None
@@ -423,21 +432,41 @@ def as_table(
423432
424433 converter = self .data_context .type_converter
425434
426- struct_packets = converter .python_dicts_to_struct_dicts (all_packets )
427- all_tags_as_tables : pa .Table = pa .Table .from_pylist (
428- all_tags , schema = tag_schema
429- )
430- all_packets_as_tables : pa .Table = pa .Table .from_pylist (
431- struct_packets , schema = packet_schema
432- )
435+ if len (all_tags ) == 0 :
436+ tag_types , packet_types = self .pod_node .output_types (
437+ include_system_tags = True
438+ )
439+ tag_schema = converter .python_schema_to_arrow_schema (tag_types )
440+ source_entries = {
441+ f"{ constants .SOURCE_PREFIX } { c } " : str for c in packet_types .keys ()
442+ }
443+ packet_types .update (source_entries )
444+ packet_types [constants .CONTEXT_KEY ] = str
445+ packet_schema = converter .python_schema_to_arrow_schema (packet_types )
446+ total_schema = arrow_utils .join_arrow_schemas (tag_schema , packet_schema )
447+ # return an empty table with the right schema
448+ self ._cached_output_table = pa .Table .from_pylist (
449+ [], schema = total_schema
450+ )
451+ else :
452+ struct_packets = converter .python_dicts_to_struct_dicts (all_packets )
433453
434- self ._cached_output_table = arrow_utils .hstack_tables (
435- all_tags_as_tables , all_packets_as_tables
436- )
454+ all_tags_as_tables : pa .Table = pa .Table .from_pylist (
455+ all_tags , schema = tag_schema
456+ )
457+ all_packets_as_tables : pa .Table = pa .Table .from_pylist (
458+ struct_packets , schema = packet_schema
459+ )
460+
461+ self ._cached_output_table = arrow_utils .hstack_tables (
462+ all_tags_as_tables , all_packets_as_tables
463+ )
437464 assert self ._cached_output_table is not None , (
438465 "_cached_output_table should not be None here."
439466 )
440467
468+ if self ._cached_output_table .num_rows == 0 :
469+ return self ._cached_output_table
441470 drop_columns = []
442471 if not include_source :
443472 drop_columns .extend (f"{ constants .SOURCE_PREFIX } { c } " for c in self .keys ()[1 ])
0 commit comments