阅读(3.9k) 书签 (0)

TensorFlow如何定义脚本语言运算符

2018-12-05 10:30 更新

#版权所有2015 TensorFlow作者.版权所有.

#根据Apache许可证版本2.0(“许可证”)获得许可;

#除了符合许可证外,您不得使用此文件.

#您可以在获得许可证副本

#http://www.apache.org/licenses/LICENSE-2.0

#除非适用法律要求或书面同意,软件

根据许可证分发的#按“现状”分发,

#没有任何形式的明示或暗示保证或条件.

#请参阅许可证以了解特定语言的管理权限和权限

#许可证下的#限制.

# ==============================================================================

""脚本语言运算符.请参阅@ {$ python / script_ops}指南.

@@ py_func

""

# pylint: disable=g-bad-name from __future__ import absolute_import from __future__ import division from __future__ import print_function import threading import numpy as np import six from tensorflow.python import pywrap_tensorflow from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.ops import gen_script_ops class FuncRegistry(object): """A helper class to keep track of registered py functions. FuncRegistry keeps a map from unique tokens (string) to python functions, which takes numpy arrays and outputs numpy arrays. """ def __init__(self): self._lock = threading.Lock() self._unique_id = 0 # GUARDED_BY(self._lock) self._funcs = {} def insert(self, func): """Registers `func` and returns a unique token for this entry.""" token = self._next_unique_token() self._funcs[token] = func return token def remove(self, token): """Removes the registered function corresponding to `token`.""" self._funcs.pop(token, None) @staticmethod def _convert(value, dtype=None): """Converts an arg to numpy, avoiding dangerous string and unicode dtypes. Numpy pads with zeros when using string and unicode dtypes if different components of a tensor have different lengths. This is bad: ignoring the padding is wrong for text data, and removing the padding is wrong for binary data. To avoid this bug, we redo the conversion using an object dtype. Additionally, we convert unicode strings to (byte-)strings for Python3 compatibility. Args: value: Value to convert to a numpy array. dtype: (Optional.) Desired NumPy type for the returned value. Returns: A numpy array. """ result = np.asarray(value, dtype=dtype, order="C") if result.dtype.char == "S" and result is not value: return np.asarray(value, order="C", dtype=object) elif result.dtype.char == "U" and result is not value: value = np.vectorize(lambda x: x.encode())(value) return np.asarray(value, order="C", dtype=object) elif result.dtype.char == "U": return result.astype(np.bytes_) else: return result def __call__(self, token, args): """Calls the registered function for `token` with args.""" func = self._funcs[token] if func is None: raise ValueError("callback %s is not found" % token) ret = func(*args) # Strings seem to lead to a memory leak here if they're not wrapped in a # list. if isinstance(ret, six.binary_type): ret = [ret] # Ensures that we return either a single numpy array or a list of numpy # arrays. if isinstance(ret, (tuple, list)): return [self._convert(x) for x in ret] else: return self._convert(ret) def size(self): """Returns how many functions are currently registered.""" return len(self._funcs) def _next_unique_token(self): """Returns a unique token.""" with self._lock: uid = self._unique_id self._unique_id += 1 return "pyfunc_%d" % uid # Global registry for py functions. _py_funcs = FuncRegistry() pywrap_tensorflow.InitializePyTrampoline(_py_funcs) class CleanupFunc(object): """A helper class to remove a registered function from _py_funcs.""" def __init__(self, token): self._token = token def __del__(self): _py_funcs.remove(self._token) def py_func(func, inp, Tout, stateful=True, name=None): """Wraps a python function and uses it as a TensorFlow op. Given a python function `func`, which takes numpy arrays as its inputs and returns numpy arrays as its outputs, wrap this function as an operation in a TensorFlow graph. The following snippet constructs a simple TensorFlow graph that invokes the `np.sinh()` NumPy function as a operation in the graph: ```python def my_func(x): # x will be a numpy array with the contents of the placeholder below return np.sinh(x) inp = tf.placeholder(tf.float32) y = tf.py_func(my_func, [inp], tf.float32) ``` **N.B.** The `tf.py_func()` operation has the following known limitations: * The body of the function (i.e. `func`) will not be serialized in a `GraphDef`. Therefore, you should not use this function if you need to serialize your model and restore it in a different environment. * The operation must run in the same address space as the Python program that calls `tf.py_func()`. If you are using distributed TensorFlow, you must run a `tf.train.Server` in the same process as the program that calls `tf.py_func()` and you must pin the created operation to a device in that server (e.g. using `with tf.device():`). Args: func: A Python function, which accepts a list of NumPy `ndarray` objects having element types that match the corresponding `tf.Tensor` objects in `inp`, and returns a list of `ndarray` objects (or a single `ndarray`) having element types that match the corresponding values in `Tout`. inp: A list of `Tensor` objects. Tout: A list or tuple of tensorflow data types or a single tensorflow data type if there is only one, indicating what `func` returns. stateful: (Boolean.) If True, the function should be considered stateful. If a function is stateless, when given the same input it will return the same output and have no observable side effects. Optimizations such as common subexpression elimination are only performed on stateless operations. name: A name for the operation (optional). Returns: A list of `Tensor` or a single `Tensor` which `func` computes. """ token = _py_funcs.insert(func) # We tie the registered function's life-time with the current # default graph. I.e., when the current graph is destroyed, we # should remove its py funcs. g = ops.get_default_graph() # pylint: disable=protected-access while isinstance(g, function._FuncGraph): # If the py_func was declared inside a _FuncGraph, its lifetime should be # bound to that of the outer graph instead. g = g._outer_graph cleanup = CleanupFunc(token) # TODO(zhifengc): Consider adding a Graph method to collect # `cleanup` objects in one of its member. if not hasattr(g, "_cleanup_py_funcs_used_in_graph"): g._cleanup_py_funcs_used_in_graph = [] # When g is destroyed, elements in _cleanup_py_funcs_used_in_graph # will be destroyed and their __del__ will remove the 'token' from # the funcs registry. g._cleanup_py_funcs_used_in_graph.append(cleanup) # pylint: enable=protected-access if isinstance(Tout, (list, tuple)): is_list_or_tuple = True else: Tout = [Tout] is_list_or_tuple = False # pylint: disable=protected-access if stateful: result = gen_script_ops._py_func( input=inp, token=token, Tout=Tout, name=name) else: result = gen_script_ops._py_func_stateless( input=inp, token=token, Tout=Tout, name=name) # pylint: enable=protected-access return result if is_list_or_tuple else result[0] ops.NotDifferentiable("PyFunc") ops.NotDifferentiable("PyFuncStateless")