@@ -180,9 +180,9 @@ def __init__(
180180
181181 self .default_encoding = "latin-1"
182182 self .compression = b""
183- self .column_names_strings : list [str ] = []
184- self .column_names : list [str ] = []
185- self .column_formats : list [str ] = []
183+ self .column_names_raw : list [bytes ] = []
184+ self .column_names : list [str | bytes ] = []
185+ self .column_formats : list [str | bytes ] = []
186186 self .columns : list [_Column ] = []
187187
188188 self ._current_page_data_subheader_pointers : list [_SubheaderPointer ] = []
@@ -570,12 +570,9 @@ def _process_columntext_subheader(self, offset: int, length: int) -> None:
570570
571571 buf = self ._read_bytes (offset , text_block_size )
572572 cname_raw = buf [0 :text_block_size ].rstrip (b"\x00 " )
573- cname = cname_raw
574- if self .convert_header_text :
575- cname = cname .decode (self .encoding or self .default_encoding )
576- self .column_names_strings .append (cname )
573+ self .column_names_raw .append (cname_raw )
577574
578- if len (self .column_names_strings ) == 1 :
575+ if len (self .column_names_raw ) == 1 :
579576 compression_literal = b""
580577 for cl in const .compression_literals :
581578 if cl in cname_raw :
@@ -644,8 +641,14 @@ def _process_columnname_subheader(self, offset: int, length: int) -> None:
644641 )
645642 col_len = self ._read_int (col_name_length , const .column_name_length_length )
646643
647- name_str = self .column_names_strings [idx ]
648- self .column_names .append (name_str [col_offset : col_offset + col_len ])
644+ name_raw = self .column_names_raw [idx ]
645+ cname = name_raw [col_offset : col_offset + col_len ]
646+ if self .convert_header_text :
647+ self .column_names .append (
648+ cname .decode (self .encoding or self .default_encoding )
649+ )
650+ else :
651+ self .column_names .append (cname )
649652
650653 def _process_columnattributes_subheader (self , offset : int , length : int ) -> None :
651654 int_len = self ._int_length
@@ -693,7 +696,7 @@ def _process_format_subheader(self, offset: int, length: int) -> None:
693696 x = self ._read_int (
694697 text_subheader_format , const .column_format_text_subheader_index_length
695698 )
696- format_idx = min (x , len (self .column_names_strings ) - 1 )
699+ format_idx = min (x , len (self .column_names_raw ) - 1 )
697700
698701 format_start = self ._read_int (
699702 col_format_offset , const .column_format_offset_length
@@ -703,15 +706,29 @@ def _process_format_subheader(self, offset: int, length: int) -> None:
703706 label_idx = self ._read_int (
704707 text_subheader_label , const .column_label_text_subheader_index_length
705708 )
706- label_idx = min (label_idx , len (self .column_names_strings ) - 1 )
709+ label_idx = min (label_idx , len (self .column_names_raw ) - 1 )
707710
708711 label_start = self ._read_int (col_label_offset , const .column_label_offset_length )
709712 label_len = self ._read_int (col_label_len , const .column_label_length_length )
710713
711- label_names = self .column_names_strings [label_idx ]
712- column_label = label_names [label_start : label_start + label_len ]
713- format_names = self .column_names_strings [format_idx ]
714- column_format = format_names [format_start : format_start + format_len ]
714+ label_names = self .column_names_raw [label_idx ]
715+ column_label_bytes = label_names [label_start : label_start + label_len ]
716+ column_label : str | bytes
717+ if self .convert_header_text :
718+ column_label = column_label_bytes .decode (
719+ self .encoding or self .default_encoding
720+ )
721+ else :
722+ column_label = column_label_bytes
723+ format_names = self .column_names_raw [format_idx ]
724+ column_format_bytes = format_names [format_start : format_start + format_len ]
725+ column_format : str | bytes
726+ if self .convert_header_text :
727+ column_format = column_format_bytes .decode (
728+ self .encoding or self .default_encoding
729+ )
730+ else :
731+ column_format = column_format_bytes
715732 current_column_number = len (self .columns )
716733
717734 col = _Column (
0 commit comments