blob: af4b7c92ec1ddcb6229b6f81e9b1f0c29941f4b8 [file] [log] [blame]
# Copyright 2012 the Melange authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
"""Downloads GCI student forms."""
import argparse
import os
import re
import shutil
# sets up the environment so we need to do this before importing some
# other modules
import interactive # pylint: disable=relative-import
from google.appengine.datastore import datastore_query
from google.appengine.ext import blobstore
from google.appengine.ext import ndb
from melange.models import profile as profile_model
from soc.modules.gci.models import program as program_model
def WriteBlobAsFile(blob, filename):
"""Write the data from blobstore to local disk.
blob: a google.appengine.ext.blobstore.BlobInfo object.
filename: path to the filename to write to
dst = open(filename, "w")
src =
shutil.copyfileobj(src, dst)
def WriteBlobAsFileUnlessExists(blob, filename):
"""Write the data from blobstore to local disk, if it doesn't already exist.
blob: a google.appengine.ext.blobstore.BlobInfo object.
filename: path to the filename to write to
# TODO(robert): check size of file vs size of blob?
if not os.path.isfile(filename):
WriteBlobAsFile(blob, filename)
def MakeFilename(student, suffix, blob, directory):
"""Create the filename for a blob.
Files are named based on the username of the student, with the
extension stored in blobstore.
The filename will consist of dash (-) separated components:
- legal name of student
- username of student
- an optional suffix
Characters outside of [0-9A-Za-z-] will be replaced with _.
student: a Student object
suffix: suffix to add to the filename
blob: a google.appengine.ext.blobstore.BlobInfo object.
directory: directory to put file in
string containing name of file to write to.
_, ext = os.path.splitext(blob.filename)
outfile = student.legal_name + "-" + student.profile_id
if suffix:
outfile += "-" + suffix
outfile = re.sub("[^a-zA-Z0-9-]", "_", outfile)
outfile += ext
path = os.path.join(directory, outfile)
return path
def BulkDownloadForms(students, key_function, kind, directory):
"""Bulk download the tax form for a student to local disk.
This is similar to DownloadTaxForm, but fetches all the BlobInfo
objects in one call. It is probably mor efficient.
students: Student objects to download the forms for
key_function: a function that takes one parameter (a student), and returns
the BlobKey to download
kind: what kind of form is this downloading. will be used as a suffix
on the filename.
directory: Where to put the files.
form_blobkeys = [key_function(s) for s in students]
blobs = blobstore.BlobInfo.get(form_blobkeys)
for student, blob in zip(students, blobs):
path = MakeFilename(student, kind, blob, directory)
# pylint: disable=print-statement
print "Downloading form to '%s'..." % path
WriteBlobAsFileUnlessExists(blob, path)
def DownloadStudentForms(program_path, outputdir):
"""Download all the student tax forms for a program to disk.
program_path: the name of the program. "google/gci2013"
outputdir: directory to write the forms to. will be created if
it doesn't exist.
outputdir = os.path.abspath(outputdir)
if not os.path.exists(outputdir):
if not os.path.isdir(outputdir):
# pylint: disable=print-statement
print "Could not create output dir: %s" % outputdir
program = program_model.GCIProgram.get_by_key_name(program_path)
if not program:
# pylint: disable=print-statement
print 'Could not find GCI program "%s"' % program_path
program_key = ndb.Key.from_old_key(program.key())
query = profile_model.Profile.query(
profile_model.Profile.program == program_key,
profile_model.Profile.is_student == True,
profile_model.Profile.student_data.number_of_completed_tasks > 0)
cursor = datastore_query.Cursor()
more = True
while more:
results, cursor, more = query.fetch_page(100, start_cursor=cursor)
students_with_enrollment_forms = [s for s in results
if s.student_data.enrollment_form]
students_with_consent_forms = [s for s in results
if s.student_data.consent_form]
lambda s: s.student_data.enrollment_form,
"enrollment", outputdir)
lambda s: s.student_data.consent_form,
"consent", outputdir)
def main():
args = argparse.ArgumentParser(description="Download Tax Forms")
args.add_argument("--output", "-o", dest="outputdir", default="/tmp/gciforms",
help="write files to target DIR", metavar="DIR")
args.add_argument("--program", "-p", dest="program_name", default="",
help='full program key name (such as "google/gci2013")',
metavar="PROGRAM", required=True)
help="Name of the application. i.e. s~google-melange")
options = args.parse_args()
DownloadStudentForms(options.program_name, options.outputdir)
if __name__ == "__main__":