blob: c53f0c8964437d7a8fafad935578ac2495f724ac [file] [log] [blame]
#!/usr/bin/env python2.5
#
# Copyright 2011 the Melange authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tasks related to collection of statistic for GSoC programs"""
from google.appengine.ext import db
from google.appengine.ext import deferred
from google.appengine.runtime import DeadlineExceededError
from django.utils import simplejson
from django.conf.urls.defaults import url
from soc.models import countries
from soc.models.statistic_info import StatisticInfo
from soc.modules.gsoc.models.profile import GSoCProfile
from soc.modules.gsoc.models.program import GSoCProgram
from soc.modules.gsoc.models.proposal import GSoCProposal
from soc.modules.gsoc.models.statistic_data import GSoCStatisticData
from soc.modules.gsoc.models.statistic_info import GSoCStatisticInfo
from soc.modules.gsoc.statistics import mapping
PROFILE_SPECIFIC = [
'profiles',
'students',
'students_per_country',
'mentors',
'mentors_per_country',
'admins',
'proposals_per_student',
'students_with_proposals',
'students_with_proposals_per_country'
]
PROPOSAL_SPECIFIC = [
'proposals',
'proposals_per_organization'
]
class NotImplementedStatistic(Exception):
pass
class AbstractStatisticService(object):
def __call__(self, request, *args, **kwargs):
return self.processRequest(request, args, kwargs)
def processRequest(self, request, args, kwargs):
raise NotImplementedError('The process request method not implemented.')
class CreateStatisticInfoService(AbstractStatisticService):
def djangoURLPatterns(self):
return [
url(r'^gsoc/statistic_info/create$', self)]
def processRequest(self, request, args, kwargs):
statisticInfo = GSoCStatisticInfo.getInstance()
# create Statistic instances based on mapping file
# by default all the statistic will be hidden
for statistic_name in mapping.STATISTIC_NAMES:
statisticInfo.appendStatistic(
StatisticInfo.Statistic(statistic_name, False))
statisticInfo.put()
class CollectStatisticService(AbstractStatisticService):
_model = None
_model_specific = []
_batch_size = 10
_statistics = {}
def initialize(self):
entities = GSoCStatisticData.get_by_key_name(self._model_specific)
for entity in entities:
self._statistics[entity.key().name()] = {
'entity': entity,
'data': simplejson.loads(entity.data),
'dirty': False
}
def finalize(self):
to_put = []
for statistic in self._statistics.itervalues():
entity = statistic['entity']
if statistic['dirty']:
entity.data = simplejson.dumps(statistic['data'])
to_put.append(entity)
db.put(to_put)
def processRequest(self, request, args, kwargs):
# first, we need to create or clear statistic entities
self._initializeStatistics()
# start the first of the tasks which will collect the actual data
deferred.defer(self._continue, None)
def _initializeStatistics(self):
raise NotImplementedError('Subclasses should implement this method.')
def _processEntity(self, entity):
raise NotImplementedError('The process entity method not implemented.')
def _continue(self, start_key):
self.initialize()
query = self._model.all()
if start_key:
query.filter('__key__ > ', start_key)
try:
entities = query.fetch(self._batch_size)
if not entities:
# all the entities have been processed
return
for entity in entities:
self._processEntity(entity)
self.finalize()
start_key = entity.key()
except DeadlineExceededError:
# nothing happens, we just try processing the same batch again
pass
# delegate work to the next task
deferred.defer(self._continue, start_key)
class CreateStatisticService(AbstractStatisticService):
_model_specific = []
def __init__(self):
self.programs = GSoCProgram.all().fetch(1000)
def processRequest(self, request, args, kwargs):
key_name = kwargs.get('key_name')
# if specific key_name is not specified, initialize all statistic
key_names = [key_name] if key_name else self._model_specific
self._createStatistics(key_names)
def createStatistics(self):
self._createStatistics(self._model_specific)
def _createStatistics(self, key_names):
to_put = []
for key_name in key_names:
to_put.append(self._createStatistic(key_name))
db.put(to_put)
def _createStatistic(self, key_name):
data = self._createInitialData(key_name)
entity = GSoCStatisticData.get_by_key_name(key_name)
if not entity:
entity = GSoCStatisticData(key_name=key_name, data=data)
else:
entity.data = data
return entity
def _createInitialData(self, key_name):
raise NotImplementedError('Subclasses should implement this method.')
def _createPerProgramInitialData(self):
data = {}
for program in self.programs:
key_name = program.key().name()
data[key_name] = 0
return data
def _createPerProgramPerCountryInitialData(self):
data = {}
for program in self.programs:
key_name = program.key().name()
data[key_name] = {}
for country in countries.COUNTRIES_AND_TERRITORIES:
data[key_name][country] = 0
return data
class CreateProfileSpecificStatisticService(CreateStatisticService):
_model_specific = PROFILE_SPECIFIC
def djangoURLPatterns(self):
return [
url(r'^gsoc/statistic/create/profile$', self),
url(r'^gsoc/statistic/create/profile/(?P<key_name>(\w+))$', self),
]
def _createInitialData(self, key_name):
if key_name == 'admins':
data = self._createAdmins()
elif key_name == 'profiles':
data = self._createProfiles()
elif key_name == 'mentors':
data = self._createMentors()
elif key_name == 'mentors_per_country':
data = self._createMentorsPerCountry()
elif key_name == 'proposals_per_student':
data = self._createProposalsPerStudent()
elif key_name == 'students':
data = self._createStudents()
elif key_name == 'students_per_country':
data = self._createStudentsPerCountry()
elif key_name == 'students_with_proposals':
data = self._createStudentsWithProposals()
elif key_name == 'students_with_proposals_per_country':
data = self._createStudentsWithProposalsPerCountry()
else:
raise NotImplementedStatistic(
'The statistic with name %s has not been defined.' % key_name)
return simplejson.dumps(data)
def _createAdmins(self):
return self._createPerProgramInitialData()
def _createMentors(self):
return self._createPerProgramInitialData()
def _createMentorsPerCountry(self):
return self._createPerProgramPerCountryInitialData()
def _createProfiles(self):
return self._createPerProgramInitialData()
def _createProposalsPerStudent(self):
return self._createPerProgramPerCounter(25)
def _createStudents(self):
return self._createPerProgramInitialData()
def _createStudentsPerCountry(self):
return self._createPerProgramPerCountryInitialData()
def _createStudentsWithProposals(self):
return self._createPerProgramInitialData()
def _createStudentsWithProposalsPerCountry(self):
return self._createPerProgramPerCountryInitialData()
def _createPerProgramPerCounter(self, limit):
data = {}
for program in self.programs:
key_name = program.key().name()
data[key_name] = {}
for i in xrange(limit + 1):
data[key_name][i] = 0
return data
class CreateProposalSpecificStatisticService(CreateStatisticService):
_model_specific = PROPOSAL_SPECIFIC
def djangoURLPatterns(self):
return [
url(r'^gsoc/statistic/create/proposal$', self),
url(r'^gsoc/statistic/create/proposal/(?P<key_name>(\w+))$', self),
]
def _createInitialData(self, key_name):
if key_name == 'proposals':
data = self._createProposals()
elif key_name == 'proposals_per_organization':
data = self._createProposalsPerOrganization()
else:
raise NotImplementedStatistic(
'The statistic with name %s has not been defined.' % key_name)
return simplejson.dumps(data)
def _createProposals(self):
return self._createPerProgramInitialData()
def _createProposalsPerOrganization(self):
data = {}
for program in self.programs:
key_name = program.key().name()
data[key_name] = {}
return data
class CollectProfileSpecificStatistics(CollectStatisticService):
_model = GSoCProfile
_model_specific = PROFILE_SPECIFIC
def djangoURLPatterns(self):
return [
url(r'^gsoc/statistic/collect/profile$', self)
]
def _initializeStatistics(self):
CreateProfileSpecificStatisticService().createStatistics()
def _processEntity(self, entity):
# check if the profile is active
if entity.status == 'invalid':
return
self._collectProfiles(entity)
# check if the profile represents a student
is_student = entity.student_info is not None
if is_student:
self._collectStudentSpecificStatistics(entity)
elif entity.is_mentor or entity.is_org_admin:
self._collectMentorSpecificStatistics(entity)
if entity.is_org_admin:
self._collectAdminSpecificStatistics(entity)
def _collectAdmins(self, entity):
self._collectPerProgramStatistic(entity, 'admins')
def _collectMentors(self, entity):
self._collectPerProgramStatistic(entity, 'mentors')
def _collectMentorsPerCountry(self, entity):
self._collectPerProgramPerCountryStatistic(entity, 'mentors_per_country')
def _collectProfiles(self, entity):
self._collectPerProgramStatistic(entity, 'profiles')
def _collectProposalsPerStudent(self, entity):
program_key_name = entity.scope.key().name()
number = str(entity.student_info.number_of_proposals)
statistic = self._statistics['proposals_per_student']
statistic['data'][program_key_name][number] += 1
def _collectStudents(self, entity):
self._collectPerProgramStatistic(entity, 'students')
def _collectStudentsPerCountry(self, entity):
self._collectPerProgramPerCountryStatistic(entity, 'students_per_country')
def _collectStudentsWithProposals(self, entity):
self._collectPerProgramStatistic(entity, 'students_with_proposals')
def _collectStudentsWithProposalsPerCountry(self, entity):
self._collectPerProgramPerCountryStatistic(
entity, 'students_with_proposals_per_country')
def _collectAdminSpecificStatistics(self, entity):
self._collectAdmins(entity)
def _collectMentorSpecificStatistics(self, entity):
self._collectMentors(entity)
self._collectMentorsPerCountry(entity)
def _collectStudentSpecificStatistics(self, entity):
self._collectStudents(entity)
self._collectStudentsPerCountry(entity)
student_info = entity.student_info
if student_info.number_of_proposals:
self._collectStudentsWithProposals(entity)
self._collectStudentsWithProposalsPerCountry(entity)
self._collectProposalsPerStudent(entity)
def _collectPerProgramStatistic(self, entity, key_name):
program_key_name = entity.scope.key().name()
statistic = self._statistics[key_name]
statistic['data'][program_key_name] += 1
statistic['dirty'] = True
def _collectPerProgramPerCountryStatistic(self, entity, key_name):
program_key_name = entity.scope.key().name()
country = entity.res_country
statistic = self._statistics[key_name]
statistic['data'][program_key_name][country] += 1
statistic['dirty'] = True
def _collectPerProgramPerCounterStatistic(self, entity, key_name):
program_key_name = entity.scope.key().name()
class CollectProposalSpecificStatistics(CollectStatisticService):
_model = GSoCProposal
_model_specific = PROPOSAL_SPECIFIC
def djangoURLPatterns(self):
return [
url(r'^gsoc/statistic/collect/proposal$', self)
]
def _initializeStatistics(self):
CreateProposalSpecificStatisticService().createStatistics()
def _processEntity(self, entity):
self._collectProposals(entity)
self._collectProposalsPerOrganization(entity)
def _collectProposals(self, entity):
self._collectPerProgramStatistic(entity, 'proposals')
def _collectProposalsPerOrganization(self, entity):
program_key_name = entity.program.key().name()
org_key_name = entity.org.key().name()
statistic = self._statistics['proposals_per_organization']
if org_key_name in statistic['data'][program_key_name]:
statistic['data'][program_key_name][org_key_name] += 1
else:
statistic['data'][program_key_name][org_key_name] = 1
statistic['dirty'] = True
def _collectPerProgramStatistic(self, entity, key_name):
program_key_name = entity.program.key().name()
statistic = self._statistics[key_name]
statistic['data'][program_key_name] += 1
statistic['dirty'] = True