blob: 9238af7a59536f2ac154134aef2bef3f87a3d959 [file] [log] [blame]
#!/usr/bin/env python
import datetime
import interactive # pylint: disable=relative-import
import itertools
import sys
import zipfile
def _batch(iterable, size=100):
"""Returns an iterator for the given iterable object that yields iterators
to go through the specified sequence in batches.
Example usage:
sequence = xrange(11)
for batchit in _batch(sequence, size=3):
for item in batchit:
print item,
print
Will output:
0 1 2
3 4 5
6 7 8
9 10
Args:
iterable: An iterable object:
size: An int which specifies size of a single batch.
Returns:
An iterator which returns iterators that split the given collection
in batches.
"""
global_it = iter(iterable)
while True:
next_slice_it = itertools.islice(global_it, size)
yield itertools.chain([next_slice_it.next()], next_slice_it)
def sanitize_html(fragment):
"""Returns sanitized HTML fragment.
Args:
fragment: A string containing HTML fragment.
Returns:
A string containing sanitized HTML fragment.
"""
from html5lib import html5parser
from melange.utils import htmlsanitizer
return ''.join(
token.toxml() for token in
(html5parser.HTMLParser(tokenizer=htmlsanitizer.HTMLSanitizer)
.parseFragment(fragment)
.childNodes))
def sanitizeProposals():
from summerofcode.models import proposal as proposal_model
it = interactive.deepFetchNDB(lambda: proposal_model.Proposal.query())
for proposal in it:
file_name = proposal.key.urlsafe()
if len(file_name) > 50:
file_name = file_name[:50]
oldFile = open('%s.old' % file_name, 'w')
oldFile.write(proposal.content.encode('utf8'))
oldFile.close()
newFile = open('%s.new' % file_name, 'w')
newFile.write(sanitize_html(proposal.content).encode('utf8'))
newFile.close()
def sampleProposals(key_strings):
from google.appengine.ext import ndb
from summerofcode.models import proposal as proposal_model
proposals = ndb.get_multi(
ndb.Key(urlsafe=key_string) for key_string in key_strings)
for proposal in proposals:
file_name = proposal.key.urlsafe()
if len(file_name) > 50:
file_name = file_name[:50]
oldFile = open('%s.old' % file_name, 'w')
oldFile.write(proposal.content.encode('utf8'))
oldFile.close()
newFile = open('%s.new' % file_name, 'w')
newFile.write(sanitize_html(proposal.content).encode('utf8'))
newFile.close()
def sanitizeOrganizationDescription(dry_run=True):
"""Sanitizes content of descriptions of existing organizations. The old
descriptions are saved in a zip file.
Args:
dry_run: A bool specifying whether the function is run in a dry run mode
or not.
"""
from melange.models import organization as org_model
zipname = 'org-descriptions-%s.zip' % (
datetime.datetime.utcnow().strftime('%Y-%m-%d-%H:%M'))
with zipfile.ZipFile(zipname, 'w') as zfile:
# pylint: disable=print-statement
print 'Existing content will be saved in %s file.' % zipname
# pylint: enable=print-statement
it = interactive.deepFetchNDB(lambda: org_model.Organization.query())
for org in it:
# add current description to a zip file
zfile.writestr(
'%s.txt' % org.key.id().replace('/', '-'),
org.description.encode('utf-8'), zipfile.ZIP_DEFLATED)
org.description = sanitize_html(org.description)
if not dry_run:
org.put()
def sanitizeProposalContent(dry_run=True):
"""Sanitizes content of the latest revision of each existing proposal.
The old content is saved in a zip file.
Args:
dry_run: A bool specifying whether the function is run in a dry run mode
or not.
"""
from google.appengine.ext import ndb
from summerofcode.models import proposal as proposal_model
zipname = 'proposals-%s.zip' % (
datetime.datetime.utcnow().strftime('%Y-%m-%d-%H:%M'))
with zipfile.ZipFile(zipname, 'w') as zfile:
# pylint: disable=print-statement
print 'Existing content will be saved in %s file.' % zipname
# pylint: enable=print-statement
it = interactive.deepFetchNDB(lambda: proposal_model.Proposal.query())
for batch_it in _batch(it):
to_put = []
revision_keys = [proposal.latest_revision for proposal in batch_it]
revisions = ndb.get_multi(revision_keys)
for revision in revisions:
sanitized_content = sanitize_html(revision.content)
if revision.content != sanitized_content:
# add current revision content to a zip file
zfile.writestr(
'%s-%d-rev%s.txt' % (
proposal.key.parent().id().replace('/', '-'),
proposal.key.id(), revision.key.id()),
revision.content.encode('utf-8'), zipfile.ZIP_DEFLATED)
revision.content = sanitized_content
to_put.append(revision)
if not dry_run:
ndb.put_multi(to_put)
def main():
interactive.setup()
interactive.setDjango()
context = {
'sod': sanitizeOrganizationDescription,
'sp': sanitizeProposals,
'spc': sanitizeProposalContent,
'sample': sampleProposals
}
interactive.remote(sys.argv[1:], context)
interactive.main()
if __name__ == '__main__':
if len(sys.argv) < 2:
# pylint: disable=print-statement
print "Usage: %s app_id [host]" % (sys.argv[0],)
sys.exit(1)
# pylint: disable=print-statement
print ("Now you're at Python prompt, and you probably want to re-import "
"this script and run functions from it")
main()