Print SCHEMA as code¶
Function, that take pyspark.sql.types.StructType
and print a valid Python
code.
In [1]:
Copied!
from quinn import print_schema_as_code
from quinn import print_schema_as_code
In [2]:
Copied!
from pyspark.sql import types as T
from pyspark.sql import types as T
In [3]:
Copied!
schema = T.StructType(
[
T.StructField("string_field", T.StringType()),
T.StructField("decimal_38_10_field", T.DecimalType(38, 10)),
T.StructField("decimal_10_2_field", T.DecimalType(10, 2)),
T.StructField("array_of_double", T.ArrayType(elementType=T.DoubleType())),
T.StructField("map_type", T.MapType(keyType=T.StringType(), valueType=T.ShortType())),
T.StructField("struct_type", T.StructType([T.StructField("t1", T.StringType()), T.StructField("t2", T.BooleanType())])),
]
)
schema = T.StructType(
[
T.StructField("string_field", T.StringType()),
T.StructField("decimal_38_10_field", T.DecimalType(38, 10)),
T.StructField("decimal_10_2_field", T.DecimalType(10, 2)),
T.StructField("array_of_double", T.ArrayType(elementType=T.DoubleType())),
T.StructField("map_type", T.MapType(keyType=T.StringType(), valueType=T.ShortType())),
T.StructField("struct_type", T.StructType([T.StructField("t1", T.StringType()), T.StructField("t2", T.BooleanType())])),
]
)
In [4]:
Copied!
print(print_schema_as_code(schema))
# Create a dictionary of PySpark SQL types to provide context to 'eval()'
spark_type_dict = {k: getattr(T, k) for k in dir(T) if isinstance(getattr(T, k), type)}
eval(print_schema_as_code(schema), {"__builtins__": None}, spark_type_dict)
print(print_schema_as_code(schema))
# Create a dictionary of PySpark SQL types to provide context to 'eval()'
spark_type_dict = {k: getattr(T, k) for k in dir(T) if isinstance(getattr(T, k), type)}
eval(print_schema_as_code(schema), {"__builtins__": None}, spark_type_dict)
StructType( fields=[ StructField("string_field", StringType(), True), StructField("decimal_38_10_field", DecimalType(38, 10), True), StructField("decimal_10_2_field", DecimalType(10, 2), True), StructField( "array_of_double", ArrayType(DoubleType()), True, ), StructField( "map_type", MapType( StringType(), ShortType(), True, ), True, ), StructField( "struct_type", StructType( fields=[ StructField("t1", StringType(), True), StructField("t2", BooleanType(), True), ] ), True, ), ] )
Out[4]:
StructType([StructField('string_field', StringType(), True), StructField('decimal_38_10_field', DecimalType(38,10), True), StructField('decimal_10_2_field', DecimalType(10,2), True), StructField('array_of_double', ArrayType(DoubleType(), True), True), StructField('map_type', MapType(StringType(), ShortType(), True), True), StructField('struct_type', StructType([StructField('t1', StringType(), True), StructField('t2', BooleanType(), True)]), True)])
In [ ]:
Copied!