@@ -701,6 +701,32 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test):
701701 assert columns_and_indexes == expected
702702
703703
704+ @pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
705+ def test_dataframe_to_bq_schema_dict_sequence (module_under_test ):
706+ df_data = collections .OrderedDict (
707+ [
708+ ("str_column" , [u"hello" , u"world" ]),
709+ ("int_column" , [42 , 8 ]),
710+ ("bool_column" , [True , False ]),
711+ ]
712+ )
713+ dataframe = pandas .DataFrame (df_data )
714+
715+ dict_schema = [
716+ {"name" : "str_column" , "type" : "STRING" , "mode" : "NULLABLE" },
717+ {"name" : "bool_column" , "type" : "BOOL" , "mode" : "REQUIRED" },
718+ ]
719+
720+ returned_schema = module_under_test .dataframe_to_bq_schema (dataframe , dict_schema )
721+
722+ expected_schema = (
723+ schema .SchemaField ("str_column" , "STRING" , "NULLABLE" ),
724+ schema .SchemaField ("int_column" , "INTEGER" , "NULLABLE" ),
725+ schema .SchemaField ("bool_column" , "BOOL" , "REQUIRED" ),
726+ )
727+ assert returned_schema == expected_schema
728+
729+
704730@pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
705731@pytest .mark .skipif (pyarrow is None , reason = "Requires `pyarrow`" )
706732def test_dataframe_to_arrow_with_multiindex (module_under_test ):
@@ -856,6 +882,28 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test):
856882 assert arrow_schema [3 ].name == "field03"
857883
858884
885+ @pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
886+ @pytest .mark .skipif (pyarrow is None , reason = "Requires `pyarrow`" )
887+ def test_dataframe_to_arrow_dict_sequence_schema (module_under_test ):
888+ dict_schema = [
889+ {"name" : "field01" , "type" : "STRING" , "mode" : "REQUIRED" },
890+ {"name" : "field02" , "type" : "BOOL" , "mode" : "NULLABLE" },
891+ ]
892+
893+ dataframe = pandas .DataFrame (
894+ {"field01" : [u"hello" , u"world" ], "field02" : [True , False ]}
895+ )
896+
897+ arrow_table = module_under_test .dataframe_to_arrow (dataframe , dict_schema )
898+ arrow_schema = arrow_table .schema
899+
900+ expected_fields = [
901+ pyarrow .field ("field01" , "string" , nullable = False ),
902+ pyarrow .field ("field02" , "bool" , nullable = True ),
903+ ]
904+ assert list (arrow_schema ) == expected_fields
905+
906+
859907@pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
860908def test_dataframe_to_parquet_without_pyarrow (module_under_test , monkeypatch ):
861909 monkeypatch .setattr (module_under_test , "pyarrow" , None )
@@ -908,6 +956,36 @@ def test_dataframe_to_parquet_compression_method(module_under_test):
908956 assert call_args .kwargs .get ("compression" ) == "ZSTD"
909957
910958
959+ @pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
960+ @pytest .mark .skipif (pyarrow is None , reason = "Requires `pyarrow`" )
961+ def test_dataframe_to_parquet_dict_sequence_schema (module_under_test ):
962+ dict_schema = [
963+ {"name" : "field01" , "type" : "STRING" , "mode" : "REQUIRED" },
964+ {"name" : "field02" , "type" : "BOOL" , "mode" : "NULLABLE" },
965+ ]
966+
967+ dataframe = pandas .DataFrame (
968+ {"field01" : [u"hello" , u"world" ], "field02" : [True , False ]}
969+ )
970+
971+ write_table_patch = mock .patch .object (
972+ module_under_test .pyarrow .parquet , "write_table" , autospec = True
973+ )
974+ to_arrow_patch = mock .patch .object (
975+ module_under_test , "dataframe_to_arrow" , autospec = True
976+ )
977+
978+ with write_table_patch , to_arrow_patch as fake_to_arrow :
979+ module_under_test .dataframe_to_parquet (dataframe , dict_schema , None )
980+
981+ expected_schema_arg = [
982+ schema .SchemaField ("field01" , "STRING" , mode = "REQUIRED" ),
983+ schema .SchemaField ("field02" , "BOOL" , mode = "NULLABLE" ),
984+ ]
985+ schema_arg = fake_to_arrow .call_args .args [1 ]
986+ assert schema_arg == expected_schema_arg
987+
988+
911989@pytest .mark .skipif (pyarrow is None , reason = "Requires `pyarrow`" )
912990def test_download_arrow_tabledata_list_unknown_field_type (module_under_test ):
913991 fake_page = api_core .page_iterator .Page (
@@ -977,3 +1055,62 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test):
9771055 col = result .columns [1 ]
9781056 assert type (col ) is pyarrow .lib .StringArray
9791057 assert list (col ) == ["2.2" , "22.22" , "222.222" ]
1058+
1059+
1060+ @pytest .mark .skipif (pyarrow is None , reason = "Requires `pyarrow`" )
1061+ def test_download_arrow_tabledata_list_dict_sequence_schema (module_under_test ):
1062+ fake_page = api_core .page_iterator .Page (
1063+ parent = mock .Mock (),
1064+ items = [{"page_data" : "foo" }],
1065+ item_to_value = api_core .page_iterator ._item_to_value_identity ,
1066+ )
1067+ fake_page ._columns = [[1 , 10 , 100 ], ["2.2" , "22.22" , "222.222" ]]
1068+ pages = [fake_page ]
1069+
1070+ dict_schema = [
1071+ {"name" : "population_size" , "type" : "INTEGER" , "mode" : "NULLABLE" },
1072+ {"name" : "non_alien_field" , "type" : "STRING" , "mode" : "NULLABLE" },
1073+ ]
1074+
1075+ results_gen = module_under_test .download_arrow_tabledata_list (pages , dict_schema )
1076+ result = next (results_gen )
1077+
1078+ assert len (result .columns ) == 2
1079+ col = result .columns [0 ]
1080+ assert type (col ) is pyarrow .lib .Int64Array
1081+ assert list (col ) == [1 , 10 , 100 ]
1082+ col = result .columns [1 ]
1083+ assert type (col ) is pyarrow .lib .StringArray
1084+ assert list (col ) == ["2.2" , "22.22" , "222.222" ]
1085+
1086+
1087+ @pytest .mark .skipif (pandas is None , reason = "Requires `pandas`" )
1088+ @pytest .mark .skipif (pyarrow is None , reason = "Requires `pyarrow`" )
1089+ def test_download_dataframe_tabledata_list_dict_sequence_schema (module_under_test ):
1090+ fake_page = api_core .page_iterator .Page (
1091+ parent = mock .Mock (),
1092+ items = [{"page_data" : "foo" }],
1093+ item_to_value = api_core .page_iterator ._item_to_value_identity ,
1094+ )
1095+ fake_page ._columns = [[1 , 10 , 100 ], ["2.2" , "22.22" , "222.222" ]]
1096+ pages = [fake_page ]
1097+
1098+ dict_schema = [
1099+ {"name" : "population_size" , "type" : "INTEGER" , "mode" : "NULLABLE" },
1100+ {"name" : "non_alien_field" , "type" : "STRING" , "mode" : "NULLABLE" },
1101+ ]
1102+
1103+ results_gen = module_under_test .download_dataframe_tabledata_list (
1104+ pages , dict_schema , dtypes = {}
1105+ )
1106+ result = next (results_gen )
1107+
1108+ expected_result = pandas .DataFrame (
1109+ collections .OrderedDict (
1110+ [
1111+ ("population_size" , [1 , 10 , 100 ]),
1112+ ("non_alien_field" , ["2.2" , "22.22" , "222.222" ]),
1113+ ]
1114+ )
1115+ )
1116+ assert result .equals (expected_result )
0 commit comments