File Handling Answers

CSV Exercises

Re-order

Read a CSV file, swap the first and second columns, and write the CSV back out to a new file.

Answers

With DictReader/DictWriter:

import csv
import sys

def re_order(in_filename, out_filename):
    """Read CSV file, swap first two columns, and output new file."""
    with open(in_filename) as csv_file:
        reader = csv.DictReader(csv_file)
        rows = [row for row in reader]
    first, second, *rest = reader.fieldnames
    headers = [second, first] + rest
    with open(out_filename, mode='wt') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=headers)
        writer.writeheader()
        writer.writerows(rows)


if __name__ == "__main__":
    re_order(*sys.argv[1:])

Without DictReader/DictWriter (possibly headerless files):

import csv
import sys

def re_order(in_filename, out_filename):
    """Read CSV file, swap first two columns, and output new file."""
    with open(in_filename, mode='rt') as in_file:
        rows = [[b, a] + r for (a, b, *r) in csv.reader(in_file)]
    with open(out_filename, mode='wt') as out_file:
        csv.writer(out_file).writerows(rows)


if __name__ == "__main__":
    re_order(*sys.argv[1:])

Re-sort

Read a CSV file, sort the file by the second column, and write the file back.

Answers

import csv
import sys

def re_sort(file_name):
    """Re-sort the given CSV file by the second column."""
    with open(file_name) as csv_file:
        reader = csv.DictReader(csv_file)
        rows = [row for row in reader]
    second_column = reader.fieldnames[1]
    sorted_rows = sorted(rows, key=lambda r: r[second_column])
    with open(file_name, mode='wt') as csv_file:
        writer = csv.DictWriter(csv_file, fieldnames=reader.fieldnames)
        writer.writeheader()
        writer.writerows(sorted_rows)

if __name__ == "__main__":
    re_sort(sys.argv[1])

Tab to Comma

Write a program that reads a CSV file using tabs as delimiters and converts it to one using commas.

Answers

import csv
import sys


def tab_to_comma(in_filename, out_filename):
    """Read tab-delimited file and write CSV file back out."""
    with open(in_filename, mode='rt') as in_file:
        rows = [row for row in csv.reader(in_file, delimiter="\t")]
    with open(out_filename, mode='wt') as out_file:
        csv.writer(out_file, delimiter=",").writerows(rows)


if __name__ == "__main__":
    tab_to_comma(*sys.argv[1:])

Capital Guessing

Write a guessing game program that takes a list of locations and their capitals and quizzes us on capitals.

You might want to test this program using us-state-capitals.csv.

Answers

import csv
import random
import sys


def get_capitals(file_name):
    """Open capitals CSV file and return list of capital tuples."""
    with open(get_capitals) as csv_file:
        csv_reader = csv.reader(csv_file)
        next(csv_reader)  # Skip headers row
        return [(place, capital) for place, capital in csv_reader]


def main(file_name):
    capitals = get_capitals(file_name)
    place, capital = random.choice(capitals)
    while True:
        guess = input("What is the capital of {}? ".format(place))
        if guess == capital:
            print("Correct! {} is the capital of {}".format(capital, place))
            break
        else:
            print("That's not correct.  Try again.")


if __name__ == "__main__":
    main(sys.argv[1])

File Exercises

Country Capitals CSV

Download this country capitals file.

Write a program that opens the file and extracts country name and capital city from each row, and write a new file to disk in the following format:

country,capital,population
China,Beijing,1330044000
India,New Delhi,1173108018
United States,Washington,310232863

The country rows should be sorted by largest population first.

Answers

import csv
import sys
from urllib.request import urlopen


def get_capital_data():
    """Return parsed CSV data from capitals API."""
    url = "http://api.geonames.org/countryInfoCSV?username=truthfultechnology"
    with urlopen(url) as response:
        raw_data = response.read().decode('utf-8').splitlines()
        reader = csv.DictReader(raw_data, delimiter="\t")
        return [row for row in reader]


def rearrange_capital_data(old_rows):
    """Sort CSV data by population and rename country column."""
    new_rows = [{
        'country': row['name'],
        'capital': row['capital'],
        'population': row['population'],
    } for row in old_rows]
    return sorted(new_rows, reverse=True, key=lambda r: int(r['population']))


def write_capital_file(file_name, csv_rows):
    """Write capital rows CSV file."""
    headers = ["country", "capital", "population"]
    with open(file_name, mode='wt') as out_file:
        writer = csv.DictWriter(out_file, delimiter=",",
                                fieldnames=headers)
        writer.writeheader()
        writer.writerows(csv_rows)


def main(out_file_name):
    original_data = get_capital_data()
    new_data = rearrange_capital_data(original_data)
    write_capital_file(out_file_name, new_data)


if __name__ == "__main__":
    main(sys.argv[1])

Echo

Write a program that downloads gzipped data from the Internet, extracts it, and saves it on disk all without using a temporary file.

You can use this gzipped response: https://httpbin.org/gzip

Answers

import gzip
from urllib.request import urlopen
data_url = "https://httpbin.org/gzip"
with urlopen(data_url) as response:
    with gzip.GzipFile(fileobj=response, mode='rb') as extracted:
        with open('data.txt', mode='wb') as data_file:
            data_file.write(extracted.read())