Before, we'd open files and feed bytes to yaml.load(). When a str is fed to yaml.load(), it attempts to guess the encoding. It defaults to UTF-8 unless somebody set us up the BOM. This is probably OK. Except if the file isn't valid UTF-8, the exception will be raised in the bowels of YAML parsing and it may not be obvious the failure is due to invalid UTF-8 input versus say Python str/unicode coercion foo. We change all call sites that load YAML from a file to use codecs.open() to open the file in UTF-8 and perform UTF-8 decoding/validation at file read time. This should make any UTF-8 failures more obvious. Furthermore, it reinforces that our YAML files are UTF-8 and not some other encoding. I discovered this issue as part of trying to get emoji symbols to render on Treeherder. Unfortunately, it appears pyyaml detects many emoji as unprintable characters and refuses to load them. This makes me sad and makes me want to abandon pyyaml/YAML in favor of something that supports emoji :P MozReview-Commit-ID: AOvAruZFfnK
83 lines
2.9 KiB
Python
83 lines
2.9 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
from __future__ import absolute_import, print_function, unicode_literals
|
|
|
|
import codecs
|
|
import logging
|
|
import os
|
|
import yaml
|
|
|
|
from . import base
|
|
from ..util.python_path import find_object
|
|
from ..transforms.base import TransformSequence, TransformConfig
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class TransformTask(base.Task):
|
|
"""
|
|
Tasks of this class are generated by applying transformations to a sequence
|
|
of input entities. By default, it gets those inputs from YAML data in the
|
|
kind directory, but subclasses may override `get_inputs` to produce them
|
|
in some other way.
|
|
"""
|
|
|
|
@classmethod
|
|
def get_inputs(cls, kind, path, config, params, loaded_tasks):
|
|
"""
|
|
Get the input elements that will be transformed into tasks. The
|
|
elements themselves are free-form, and become the input to the first
|
|
transform.
|
|
|
|
By default, this reads jobs from the `jobs` key, or from yaml files
|
|
named by `jobs-from`, but can be overridden in subclasses. The
|
|
entities are read from mappings, and the keys to those mappings are
|
|
added in the `name` key of each entity.
|
|
"""
|
|
def jobs():
|
|
for name, job in config.get('jobs', {}).iteritems():
|
|
yield name, job
|
|
for filename in config.get('jobs-from', {}):
|
|
jobs = load_yaml(path, filename)
|
|
for name, job in jobs.iteritems():
|
|
yield name, job
|
|
|
|
for name, job in jobs():
|
|
job['name'] = name
|
|
logger.debug("Generating tasks for {} {}".format(kind, name))
|
|
yield job
|
|
|
|
@classmethod
|
|
def load_tasks(cls, kind, path, config, params, loaded_tasks):
|
|
inputs = cls.get_inputs(kind, path, config, params, loaded_tasks)
|
|
|
|
transforms = TransformSequence()
|
|
for xform_path in config['transforms']:
|
|
transform = find_object(xform_path)
|
|
transforms.add(transform)
|
|
|
|
# perform the transformations
|
|
trans_config = TransformConfig(kind, path, config, params)
|
|
tasks = [cls(kind, t) for t in transforms(trans_config, inputs)]
|
|
return tasks
|
|
|
|
def __init__(self, kind, task):
|
|
self.dependencies = task['dependencies']
|
|
super(TransformTask, self).__init__(kind, task['label'],
|
|
task['attributes'], task['task'])
|
|
|
|
def get_dependencies(self, taskgraph):
|
|
return [(label, name) for name, label in self.dependencies.items()]
|
|
|
|
def optimize(self):
|
|
return False, None
|
|
|
|
|
|
def load_yaml(path, name):
|
|
"""Convenience method to load a YAML file in the kind directory"""
|
|
filename = os.path.join(path, name)
|
|
with codecs.open(filename, 'rb', 'utf-8') as f:
|
|
return yaml.load(f)
|