File Handling Answers¶
CSV Exercises¶
Re-order¶
Read a CSV file, swap the first and second columns, and write the CSV back out to a new file.
Answers
With DictReader/DictWriter:
import csv
import sys
def re_order(in_filename, out_filename):
"""Read CSV file, swap first two columns, and output new file."""
with open(in_filename) as csv_file:
reader = csv.DictReader(csv_file)
rows = [row for row in reader]
first, second, *rest = reader.fieldnames
headers = [second, first] + rest
with open(out_filename, mode='wt') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=headers)
writer.writeheader()
writer.writerows(rows)
if __name__ == "__main__":
re_order(*sys.argv[1:])
Without DictReader/DictWriter (possibly headerless files):
import csv
import sys
def re_order(in_filename, out_filename):
"""Read CSV file, swap first two columns, and output new file."""
with open(in_filename, mode='rt') as in_file:
rows = [[b, a] + r for (a, b, *r) in csv.reader(in_file)]
with open(out_filename, mode='wt') as out_file:
csv.writer(out_file).writerows(rows)
if __name__ == "__main__":
re_order(*sys.argv[1:])
Re-sort¶
Read a CSV file, sort the file by the second column, and write the file back.
Answers
import csv
import sys
def re_sort(file_name):
"""Re-sort the given CSV file by the second column."""
with open(file_name) as csv_file:
reader = csv.DictReader(csv_file)
rows = [row for row in reader]
second_column = reader.fieldnames[1]
sorted_rows = sorted(rows, key=lambda r: r[second_column])
with open(file_name, mode='wt') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=reader.fieldnames)
writer.writeheader()
writer.writerows(sorted_rows)
if __name__ == "__main__":
re_sort(sys.argv[1])
Tab to Comma¶
Write a program that reads a CSV file using tabs as delimiters and converts it to one using commas.
Answers
import csv
import sys
def tab_to_comma(in_filename, out_filename):
"""Read tab-delimited file and write CSV file back out."""
with open(in_filename, mode='rt') as in_file:
rows = [row for row in csv.reader(in_file, delimiter="\t")]
with open(out_filename, mode='wt') as out_file:
csv.writer(out_file, delimiter=",").writerows(rows)
if __name__ == "__main__":
tab_to_comma(*sys.argv[1:])
Capital Guessing¶
Write a guessing game program that takes a list of locations and their capitals and quizzes us on capitals.
You might want to test this program using us-state-capitals.csv.
Answers
import csv
import random
import sys
def get_capitals(file_name):
"""Open capitals CSV file and return list of capital tuples."""
with open(get_capitals) as csv_file:
csv_reader = csv.reader(csv_file)
next(csv_reader) # Skip headers row
return [(place, capital) for place, capital in csv_reader]
def main(file_name):
capitals = get_capitals(file_name)
place, capital = random.choice(capitals)
while True:
guess = input("What is the capital of {}? ".format(place))
if guess == capital:
print("Correct! {} is the capital of {}".format(capital, place))
break
else:
print("That's not correct. Try again.")
if __name__ == "__main__":
main(sys.argv[1])
File Exercises¶
Country Capitals CSV¶
Download this country capitals file.
Write a program that opens the file and extracts country name and capital city from each row, and write a new file to disk in the following format:
country,capital,population
China,Beijing,1330044000
India,New Delhi,1173108018
United States,Washington,310232863
The country rows should be sorted by largest population first.
Answers
import csv
import sys
from urllib.request import urlopen
def get_capital_data():
"""Return parsed CSV data from capitals API."""
url = "http://api.geonames.org/countryInfoCSV?username=truthfultechnology"
with urlopen(url) as response:
raw_data = response.read().decode('utf-8').splitlines()
reader = csv.DictReader(raw_data, delimiter="\t")
return [row for row in reader]
def rearrange_capital_data(old_rows):
"""Sort CSV data by population and rename country column."""
new_rows = [{
'country': row['name'],
'capital': row['capital'],
'population': row['population'],
} for row in old_rows]
return sorted(new_rows, reverse=True, key=lambda r: int(r['population']))
def write_capital_file(file_name, csv_rows):
"""Write capital rows CSV file."""
headers = ["country", "capital", "population"]
with open(file_name, mode='wt') as out_file:
writer = csv.DictWriter(out_file, delimiter=",",
fieldnames=headers)
writer.writeheader()
writer.writerows(csv_rows)
def main(out_file_name):
original_data = get_capital_data()
new_data = rearrange_capital_data(original_data)
write_capital_file(out_file_name, new_data)
if __name__ == "__main__":
main(sys.argv[1])
Echo¶
Write a program that downloads gzipped data from the Internet, extracts it, and saves it on disk all without using a temporary file.
You can use this gzipped response: https://httpbin.org/gzip
Answers
import gzip
from urllib.request import urlopen
data_url = "https://httpbin.org/gzip"
with urlopen(data_url) as response:
with gzip.GzipFile(fileobj=response, mode='rb') as extracted:
with open('data.txt', mode='wb') as data_file:
data_file.write(extracted.read())