tubestation/taskcluster/taskgraph/task/transform.py

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import, print_function, unicode_literals

import codecs
import logging
import os
import yaml

from . import base
from ..util.python_path import find_object
from ..transforms.base import TransformSequence, TransformConfig

logger = logging.getLogger(__name__)


class TransformTask(base.Task):
    """
    Tasks of this class are generated by applying transformations to a sequence
    of input entities.  By default, it gets those inputs from YAML data in the
    kind directory, but subclasses may override `get_inputs` to produce them
    in some other way.
    """

    @classmethod
    def get_inputs(cls, kind, path, config, params, loaded_tasks):
        """
        Get the input elements that will be transformed into tasks.  The
        elements themselves are free-form, and become the input to the first
        transform.

        By default, this reads jobs from the `jobs` key, or from yaml files
        named by `jobs-from`, but can be overridden in subclasses.  The
        entities are read from mappings, and the keys to those mappings are
        added in the `name` key of each entity.
        """
        def jobs():
            for name, job in config.get('jobs', {}).iteritems():
                yield name, job
            for filename in config.get('jobs-from', {}):
                jobs = load_yaml(path, filename)
                for name, job in jobs.iteritems():
                    yield name, job

        for name, job in jobs():
            job['name'] = name
            logger.debug("Generating tasks for {} {}".format(kind, name))
            yield job

    @classmethod
    def load_tasks(cls, kind, path, config, params, loaded_tasks):
        inputs = cls.get_inputs(kind, path, config, params, loaded_tasks)

        transforms = TransformSequence()
        for xform_path in config['transforms']:
            transform = find_object(xform_path)
            transforms.add(transform)

        # perform the transformations
        trans_config = TransformConfig(kind, path, config, params)
        tasks = [cls(kind, t) for t in transforms(trans_config, inputs)]
        return tasks

    def __init__(self, kind, task):
        self.dependencies = task['dependencies']
        super(TransformTask, self).__init__(kind, task['label'],
                                            task['attributes'], task['task'])

    def get_dependencies(self, taskgraph):
        return [(label, name) for name, label in self.dependencies.items()]

    def optimize(self):
        return False, None


def load_yaml(path, name):
    """Convenience method to load a YAML file in the kind directory"""
    filename = os.path.join(path, name)
    with codecs.open(filename, 'rb', 'utf-8') as f:
        return yaml.load(f)