Bug 1341214 - Add a small API to handle taskcluster queue and index requests. r=dustin

Various modules under taskcluster are doing ad-hoc url formatting or requests to taskcluster services. While we could use the taskcluster client python module, it's kind of overkill for the simple requests done here. So instead of vendoring that module, create a smaller one with a limited set of functions we need. This changes the behavior of the get_artifact function to return a file-like object when the file is neither a json nor a yaml, but that branch was never used (and was actually returning an unassigned variable, so it was broken anyways). At the same time, make the function that does HTTP requests more error-resistant, using urllib3's Retry with a backoff factor. Also add a function that retrieves the list of artifacts, that while currently unused, will be used by `mach artifact` shortly.
2017-02-17 12:04:48 +09:00
parent eb5916da95
commit c7945d8ea1
11 changed files with 110 additions and 70 deletions
--- a/taskcluster/taskgraph/util/taskcluster.py
+++ b/taskcluster/taskgraph/util/taskcluster.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import functools
+import os
+import yaml
+import requests
+from mozbuild.util import memoize
+from requests.packages.urllib3.util.retry import Retry
+from requests.adapters import HTTPAdapter
+
+
+# if running in a task, prefer to use the taskcluster proxy
+# (http://taskcluster/), otherwise hit the services directly
+if os.environ.get('TASK_ID'):
+    INDEX_URL = 'http://taskcluster/index/v1/task/{}'
+    ARTIFACT_URL = 'http://taskcluster/queue/v1/task/{}/artifacts/{}'
+else:
+    INDEX_URL = 'https://index.taskcluster.net/v1/task/{}'
+    ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
+
+
+@memoize
+def _get_session():
+    session = requests.Session()
+    retry = Retry(total=5, backoff_factor=0.1,
+                  status_forcelist=[500, 502, 503, 504])
+    session.mount('http://', HTTPAdapter(max_retries=retry))
+    session.mount('https://', HTTPAdapter(max_retries=retry))
+    return session
+
+
+def _do_request(url):
+    session = _get_session()
+    return session.get(url, stream=True)
+
+
+def get_artifact_url(task_id, path):
+    return ARTIFACT_URL.format(task_id, path)
+
+
+def get_artifact(task_id, path):
+    """
+    Returns the artifact with the given path for the given task id.
+
+    If the path ends with ".json" or ".yml", the content is deserialized as,
+    respectively, json or yaml, and the corresponding python data (usually
+    dict) is returned.
+    For other types of content, a file-like object is returned.
+    """
+    response = _do_request(get_artifact_url(task_id, path))
+    response.raise_for_status()
+    if path.endswith('.json'):
+        return response.json()
+    if path.endswith('.yml'):
+        return yaml.load(response.text)
+    response.raw.read = functools.partial(response.raw.read,
+                                          decode_content=True)
+    return response.raw
+
+
+def list_artifacts(task_id):
+    response = _do_request(get_artifact_url(task_id, '').rstrip('/'))
+    response.raise_for_status()
+    return response.json()['artifacts']
+
+
+def find_task_id(index_path):
+    response = _do_request(INDEX_URL.format(index_path))
+    response.raise_for_status()
+    return response.json()['taskId']