| #!/usr/bin/python2.7 |
| # |
| # Copyright 2012 the Melange authors. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Downloads GCI student forms.""" |
| |
| import argparse |
| import os |
| import re |
| import shutil |
| |
| # sets up the environment so we need to do this before importing some |
| # other modules |
| import interactive # pylint: disable=relative-import |
| interactive.setup() |
| interactive.setDjango() |
| |
| from google.appengine.datastore import datastore_query |
| from google.appengine.ext import blobstore |
| from google.appengine.ext import ndb |
| from melange.models import profile as profile_model |
| from soc.modules.gci.models import program as program_model |
| |
| |
| def WriteBlobAsFile(blob, filename): |
| """Write the data from blobstore to local disk. |
| |
| Args: |
| blob: a google.appengine.ext.blobstore.BlobInfo object. |
| filename: path to the filename to write to |
| """ |
| dst = open(filename, "w") |
| src = blob.open() |
| shutil.copyfileobj(src, dst) |
| |
| |
| def WriteBlobAsFileUnlessExists(blob, filename): |
| """Write the data from blobstore to local disk, if it doesn't already exist. |
| |
| Args: |
| blob: a google.appengine.ext.blobstore.BlobInfo object. |
| filename: path to the filename to write to |
| """ |
| # TODO(robert): check size of file vs size of blob? |
| if not os.path.isfile(filename): |
| WriteBlobAsFile(blob, filename) |
| |
| |
| def MakeFilename(student, suffix, blob, directory): |
| """Create the filename for a blob. |
| |
| Files are named based on the username of the student, with the |
| extension stored in blobstore. |
| |
| The filename will consist of dash (-) separated components: |
| - legal name of student |
| - username of student |
| - an optional suffix |
| Characters outside of [0-9A-Za-z-] will be replaced with _. |
| |
| Args: |
| student: a Student object |
| suffix: suffix to add to the filename |
| blob: a google.appengine.ext.blobstore.BlobInfo object. |
| directory: directory to put file in |
| |
| Returns: |
| string containing name of file to write to. |
| |
| """ |
| _, ext = os.path.splitext(blob.filename) |
| outfile = student.legal_name + "-" + student.profile_id |
| if suffix: |
| outfile += "-" + suffix |
| outfile = re.sub("[^a-zA-Z0-9-]", "_", outfile) |
| outfile += ext |
| path = os.path.join(directory, outfile) |
| return path |
| |
| |
| def BulkDownloadForms(students, key_function, kind, directory): |
| """Bulk download the tax form for a student to local disk. |
| |
| This is similar to DownloadTaxForm, but fetches all the BlobInfo |
| objects in one call. It is probably mor efficient. |
| |
| Args: |
| students: Student objects to download the forms for |
| key_function: a function that takes one parameter (a student), and returns |
| the BlobKey to download |
| kind: what kind of form is this downloading. will be used as a suffix |
| on the filename. |
| directory: Where to put the files. |
| |
| """ |
| form_blobkeys = [key_function(s) for s in students] |
| blobs = blobstore.BlobInfo.get(form_blobkeys) |
| for student, blob in zip(students, blobs): |
| path = MakeFilename(student, kind, blob, directory) |
| print "Downloading form to '%s'..." % path |
| WriteBlobAsFileUnlessExists(blob, path) |
| |
| |
| def DownloadStudentForms(program_path, outputdir): |
| """Download all the student tax forms for a program to disk. |
| |
| Args: |
| program_path: the name of the program. "google/gci2013" |
| outputdir: directory to write the forms to. will be created if |
| it doesn't exist. |
| """ |
| |
| outputdir = os.path.abspath(outputdir) |
| if not os.path.exists(outputdir): |
| os.mkdir(outputdir) |
| if not os.path.isdir(outputdir): |
| print "Could not create output dir: %s" % outputdir |
| |
| program = program_model.GCIProgram.get_by_key_name(program_path) |
| if not program: |
| print 'Could not find GCI program "%s"' % program_path |
| return |
| |
| program_key = ndb.Key.from_old_key(program.key()) |
| query = profile_model.Profile.query( |
| profile_model.Profile.program == program_key, |
| profile_model.Profile.is_student == True, |
| profile_model.Profile.student_data.number_of_completed_tasks > 0) |
| |
| cursor = datastore_query.Cursor() |
| more = True |
| while more: |
| results, cursor, more = query.fetch_page(100, start_cursor=cursor) |
| students_with_enrollment_forms = [s for s in results |
| if s.student_data.enrollment_form] |
| students_with_consent_forms = [s for s in results |
| if s.student_data.consent_form] |
| |
| BulkDownloadForms(students_with_enrollment_forms, |
| lambda s: s.student_data.enrollment_form, |
| "enrollment", outputdir) |
| |
| BulkDownloadForms(students_with_consent_forms, |
| lambda s: s.student_data.consent_form, |
| "consent", outputdir) |
| |
| |
| def main(): |
| args = argparse.ArgumentParser(description="Download Tax Forms") |
| args.add_argument("--output", "-o", dest="outputdir", default="/tmp/gciforms", |
| help="write files to target DIR", metavar="DIR") |
| args.add_argument("--program", "-p", dest="program_name", default="", |
| help='full program key name (such as "google/gci2013")', |
| metavar="PROGRAM", required=True) |
| args.add_argument("app_id", |
| help="Name of the application. i.e. s~google-melange") |
| options = args.parse_args() |
| |
| interactive.setupRemote(options.app_id) |
| DownloadStudentForms(options.program_name, options.outputdir) |
| |
| if __name__ == "__main__": |
| main() |