# Conversion from a local csv file to a single parquet file :
csv_to_parquet(
path_to_file = parquetize_example("region_2022.csv"),
path_to_parquet = tempfile(fileext=".parquet")
)
# Conversion from a local txt file to a single parquet file :
csv_to_parquet(
path_to_file = parquetize_example("region_2022.txt"),
path_to_parquet = tempfile(fileext=".parquet")
)
# Conversion from a local csv file to a single parquet file and select only
# few columns :
csv_to_parquet(
path_to_file = parquetize_example("region_2022.csv"),
path_to_parquet = tempfile(fileext = ".parquet"),
columns = c("REG","LIBELLE")
)
# Conversion from a local csv file to a partitioned parquet file :
csv_to_parquet(
path_to_file = parquetize_example("region_2022.csv"),
path_to_parquet = tempfile(fileext = ".parquet"),
partition = "yes",
partitioning = c("REG")
)
# Conversion from a URL and a zipped file (csv) :
csv_to_parquet(
path_to_file = "https://www.nomisweb.co.uk/output/census/2021/census2021-ts007.zip",
filename_in_zip = "census2021-ts007-ctry.csv",
path_to_parquet = tempfile(fileext = ".parquet")
)
# Conversion from a URL and a zipped file (txt) :
csv_to_parquet(
path_to_file = "https://sourceforge.net/projects/irisdss/files/latest/download",
filename_in_zip = "IRIS TEST data.txt",
path_to_parquet = tempfile(fileext=".parquet")
)
if (FALSE) {
# Conversion from a URL and a csv file with "gzip" compression :
csv_to_parquet(
path_to_file =
"https://github.com/sidsriv/Introduction-to-Data-Science-in-python/raw/master/census.csv",
path_to_parquet = tempfile(fileext = ".parquet"),
compression = "gzip",
compression_level = 5
)
}
Run the code above in your browser using DataLab