| #!/usr/bin/env python |
| # |
| # Copyright 2014 the Melange authors. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Converts the GSoC Schools spreadsheet to a python dictionary. |
| |
| Converts the GSoC Schools spreadsheet (in tsv format) to a python |
| dictionary suitable for inclusion in app/soc/models/universities.py. |
| |
| The input file is a tab delimited file containing three columns: |
| country, school, and (optional and unused) url. The first two lines |
| are skipped as headers. |
| |
| Output is written to STDOUT. |
| """ |
| |
| import collections |
| import pprint |
| import sys |
| |
| |
| def main(argv): |
| if len(argv) != 2: |
| print "Usage: %s <schools.tsv>" % argv[0] |
| sys.exit(1) |
| |
| universities_file = file(argv[1]) |
| |
| # first two lines are headers |
| universities_file.readline() |
| universities_file.readline() |
| |
| universities = collections.defaultdict(list) |
| count = 0 |
| |
| for line in universities_file: |
| country, school, unused_url = line.split("\t", 3) |
| |
| if country and school: |
| count += 1 |
| universities[country].append(school) |
| |
| pp = pprint.PrettyPrinter() |
| # convert the collections.defaultdict back to a dict so pprint can |
| # make it prettier. |
| pp.pprint(dict(universities)) |
| |
| print >>sys.stderr, "%d schools extracted." % count |
| |
| if __name__ == "__main__": |
| main(sys.argv) |