| #!/usr/bin/python2.7 |
| # |
| # Copyright 2011 the Melange authors. |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Downloads GSOC student forms.""" |
| |
| import argparse |
| import os |
| import re |
| import shutil |
| |
| # sets up the environment so we need to do this before importing some |
| # other modules |
| import interactive # pylint: disable=relative-import |
| interactive.setup() |
| interactive.setDjango() |
| |
| from google.appengine.datastore import datastore_query |
| from google.appengine.ext import blobstore |
| from soc.modules.gsoc.models import program as program_model |
| |
| |
| def WriteBlobAsFile(blob, filename): |
| """Write the data from blobstore to local disk. |
| |
| Args: |
| blob: a google.appengine.ext.blobstore.BlobInfo object. |
| filename: path to the filename to write to |
| """ |
| dst = open(filename, "w") |
| src = blob.open() |
| shutil.copyfileobj(src, dst) |
| |
| |
| def WriteBlobAsFileUnlessExists(blob, filename): |
| """Write the data from blobstore to local disk, if it doesn't already exist. |
| |
| Args: |
| blob: a google.appengine.ext.blobstore.BlobInfo object. |
| filename: path to the filename to write to |
| """ |
| # TODO(robert): check size of file vs size of blob? |
| if not os.path.isfile(filename): |
| WriteBlobAsFile(blob, filename) |
| |
| |
| def MakeFilename(student, blob, directory): |
| """Create the filename for a blob. |
| |
| Files are named based on the username of the student, with the |
| extension stored in blobstore. |
| |
| The filename will consist of dash (-) separated components: |
| - legal name of student |
| - username of student |
| Characters outside of [0-9A-Za-z-] will be replaced with _. |
| |
| Args: |
| student: a Student object |
| blob: a google.appengine.ext.blobstore.BlobInfo object. |
| directory: directory to put file in |
| |
| Returns: |
| string containing name of file to write to. |
| """ |
| _, ext = os.path.splitext(blob.filename) |
| outfile = student.legal_name + "-" + student.profile_id |
| outfile = re.sub("[^a-zA-Z0-9-]", "_", outfile) |
| outfile += ext |
| path = os.path.join(directory, outfile) |
| return path |
| |
| |
| def BulkDownloadTaxForms(students, directory): |
| """Bulk download the tax form for a student to local disk. |
| |
| This is similar to DownloadTaxForm, but fetches all the BlobInfo |
| objects in one call. It is probably mor efficient. |
| |
| Args: |
| students: Student objects to download the forms for |
| directory: Where to put the files. |
| |
| """ |
| tax_form_blobs = [s.student_data.tax_form for s in students] |
| blobs = blobstore.get(tax_form_blobs) |
| for student, blob in zip(students, blobs): |
| path = MakeFilename(student, blob, directory) |
| # pylint: disable=print-statement |
| print "Downloading form to '%s'..." % path |
| WriteBlobAsFileUnlessExists(blob, path) |
| |
| |
| def DownloadStudentTaxForms(program_path, outputdir): |
| """Download all the student tax forms for a program to disk. |
| |
| Args: |
| program_path: the name of the program. "google/gsoc2014" |
| outputdir: directory to write the forms to. will be created if |
| it doesn't exist. |
| """ |
| |
| outputdir = os.path.abspath(outputdir) |
| if not os.path.exists(outputdir): |
| os.mkdir(outputdir) |
| if not os.path.isdir(outputdir): |
| # pylint: disable=print-statement |
| print "Could not create output dir: %s" % outputdir |
| |
| program = program_model.GSoCProgram.get_by_key_name(program_path) |
| if not program: |
| # pylint: disable=print-statement |
| print 'Could not find GSoC program "%s"' % program_path |
| return |
| |
| # this can't be imported at the top of the file because it triggers |
| # initialization of the site singleton which requires the DB |
| # connection to be already setup. |
| import summerofcode.logic.profile as profile_logic |
| |
| query = profile_logic.queryAcceptedStudentsForProgram(program.key()) |
| |
| cursor = datastore_query.Cursor() |
| more = True |
| while more: |
| results, cursor, more = query.fetch_page(100, start_cursor=cursor) |
| students_with_forms = [s for s in results if s.student_data.tax_form] |
| BulkDownloadTaxForms(students_with_forms, outputdir) |
| |
| |
| def main(): |
| args = argparse.ArgumentParser(description="Download Tax Forms") |
| args.add_argument("--output", "-o", dest="outputdir", default="/tmp/taxforms", |
| help="write files to target DIR", metavar="DIR") |
| args.add_argument("--program", "-p", dest="program_name", default="", |
| help='full program key name (such as "google/gsoc2014")', |
| metavar="PROGRAM", required=True) |
| args.add_argument("app_id", |
| help="Name of the application. i.e. s~google-melange") |
| options = args.parse_args() |
| |
| interactive.setupRemote(options.app_id) |
| DownloadStudentTaxForms(options.program_name, options.outputdir) |
| |
| if __name__ == "__main__": |
| main() |