blob: 2db30f52a521ea8f0c7fb0deac83a7d5c363fdf3 [file] [log] [blame]
# Copyright 2011 the Melange authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
"""Downloads GSOC student forms."""
import argparse
import os
import re
import shutil
# sets up the environment so we need to do this before importing some
# other modules
import interactive # pylint: disable=relative-import
from google.appengine.datastore import datastore_query
from google.appengine.ext import blobstore
from soc.modules.gsoc.models import program as program_model
def WriteBlobAsFile(blob, filename):
"""Write the data from blobstore to local disk.
blob: a google.appengine.ext.blobstore.BlobInfo object.
filename: path to the filename to write to
dst = open(filename, "w")
src =
shutil.copyfileobj(src, dst)
def WriteBlobAsFileUnlessExists(blob, filename):
"""Write the data from blobstore to local disk, if it doesn't already exist.
blob: a google.appengine.ext.blobstore.BlobInfo object.
filename: path to the filename to write to
# TODO(robert): check size of file vs size of blob?
if not os.path.isfile(filename):
WriteBlobAsFile(blob, filename)
def MakeFilename(student, blob, directory):
"""Create the filename for a blob.
Files are named based on the username of the student, with the
extension stored in blobstore.
The filename will consist of dash (-) separated components:
- legal name of student
- username of student
Characters outside of [0-9A-Za-z-] will be replaced with _.
student: a Student object
blob: a google.appengine.ext.blobstore.BlobInfo object.
directory: directory to put file in
string containing name of file to write to.
_, ext = os.path.splitext(blob.filename)
outfile = student.legal_name + "-" + student.profile_id
outfile = re.sub("[^a-zA-Z0-9-]", "_", outfile)
outfile += ext
path = os.path.join(directory, outfile)
return path
def BulkDownloadTaxForms(students, directory):
"""Bulk download the tax form for a student to local disk.
This is similar to DownloadTaxForm, but fetches all the BlobInfo
objects in one call. It is probably mor efficient.
students: Student objects to download the forms for
directory: Where to put the files.
tax_form_blobs = [s.student_data.tax_form for s in students]
blobs = blobstore.get(tax_form_blobs)
for student, blob in zip(students, blobs):
path = MakeFilename(student, blob, directory)
# pylint: disable=print-statement
print "Downloading form to '%s'..." % path
WriteBlobAsFileUnlessExists(blob, path)
def DownloadStudentTaxForms(program_path, outputdir):
"""Download all the student tax forms for a program to disk.
program_path: the name of the program. "google/gsoc2014"
outputdir: directory to write the forms to. will be created if
it doesn't exist.
outputdir = os.path.abspath(outputdir)
if not os.path.exists(outputdir):
if not os.path.isdir(outputdir):
# pylint: disable=print-statement
print "Could not create output dir: %s" % outputdir
program = program_model.GSoCProgram.get_by_key_name(program_path)
if not program:
# pylint: disable=print-statement
print 'Could not find GSoC program "%s"' % program_path
# this can't be imported at the top of the file because it triggers
# initialization of the site singleton which requires the DB
# connection to be already setup.
import summerofcode.logic.profile as profile_logic
query = profile_logic.queryAcceptedStudentsForProgram(program.key())
cursor = datastore_query.Cursor()
more = True
while more:
results, cursor, more = query.fetch_page(100, start_cursor=cursor)
students_with_forms = [s for s in results if s.student_data.tax_form]
BulkDownloadTaxForms(students_with_forms, outputdir)
def main():
args = argparse.ArgumentParser(description="Download Tax Forms")
args.add_argument("--output", "-o", dest="outputdir", default="/tmp/taxforms",
help="write files to target DIR", metavar="DIR")
args.add_argument("--program", "-p", dest="program_name", default="",
help='full program key name (such as "google/gsoc2014")',
metavar="PROGRAM", required=True)
help="Name of the application. i.e. s~google-melange")
options = args.parse_args()
DownloadStudentTaxForms(options.program_name, options.outputdir)
if __name__ == "__main__":