Asked inTCS,Associate

from pyspark.sql import DataFrame, SparkSession from pyspark.sql.functions import col, explode_outer from pyspark.sql.types import ArrayType, StructType def flatten(df: DataFrame) -> DataFrame: # Initial identification of complex fields (arrays and structs) complex_fields = dict([(field.name, field.dataType) for field in df.schema.fields if isinstance(field.dataType, (ArrayType, StructType))]) while complex_fields: col_name = list(complex_fields.keys())[0] print(f"Processing: {col_name}, Type: {type(complex_fields[col_name])}") if isinstance(complex_fields[col_name], StructType): # Expanding StructType columns expanded = [col(f"{col_name}.{k.name}").alias(f"{col_name}_{k.name}") for k in complex_fields[col_name].fields] df = df.select("*", *expanded).drop(col_name) elif isinstance(complex_fields[col_name], ArrayType): # Exploding ArrayType columns df = df.withColumn(col_name, explode_outer(col(col_name))) # Update the list of complex fields after processing the current column complex_fields = dict([(field.name, field.dataType) for field in df.schema.fields if isinstance(field.dataType, (ArrayType, StructType))]) return df # Example usage spark = SparkSession.builder.appName("FlattenExample").getOrCreate() # Sample data with a mix of simple and complex fields data = [ ("John", 30, ["engineer", "data scientist"], {"city": "New York", "state": "NY"}), ("Jane", 25, ["analyst"], {"city": "San Francisco", "state": "CA"}) ] # Define schema schema = ["name", "age", "roles", "location"] df = spark.createDataFrame(data, schema) # Call the flatten function flattened_df = flatten(df) flattened_df.show(truncate=False)

Be the first one to answer
Add answer anonymously...
TCS Associate Interview Questions
Stay ahead in your career. Get AmbitionBox app
qr-code
Helping over 1 Crore job seekers every month in choosing their right fit company
65 L+

Reviews

4 L+

Interviews

4 Cr+

Salaries

1 Cr+

Users/Month

Contribute to help millions
Get AmbitionBox app

Made with ❤️ in India. Trademarks belong to their respective owners. All rights reserved © 2024 Info Edge (India) Ltd.

Follow us
  • Youtube
  • Instagram
  • LinkedIn
  • Facebook
  • Twitter