generated from amazon-archives/__template_DevGuide
-
Notifications
You must be signed in to change notification settings - Fork 174
Closed
Description
Error
I tried to build large language model (gpt-large) using neuron sdk tensorflow 2.x.
But it failed due to protobuf maximum size limit 2GB.
It worked with gpt-small size.
Available Solution
onnx/tensorflow-onnx#1090 fixed similar issue using external tensor saving.
Code
import argparse
import os
import tensorflow as tf
import tensorflow.neuron as tfn
from transformers.models.gpt2.configuration_gpt2 import GPT2Config
from transformers.models.gpt2.modeling_tf_gpt2 import TFGPT2LMHeadModel
os.environ["NEURON_CC_FLAGS"] = "--dynamic-batch-size"
# fmt: off
parser = argparse.ArgumentParser()
parser.add_argument("--batch-size", type=int, help="batch length", default=1)
parser.add_argument("--context-length", type=int, help="context length", default=8)
parser.add_argument("--slow-model-dir", type=str, help="neuron traced path", required=True)
# fmt: on
class GPTWrapper(tf.keras.Model):
def __init__(self) -> None:
super().__init__()
self.model = TFGPT2LMHeadModel.from_pretrained("gpt2-large")
self.model.config.use_cache = False
def call(self, inputs):
model_output = self.model(input_ids=inputs[0], attention_mask=inputs[1])
model_output = self.model.serving_output(model_output)
return model_output
def main(args: argparse.Namespace):
model = GPTWrapper(config)
print("trace slow model")
slow_input_ids = tf.random.uniform(shape=[args.batch_size, args.context_length], minval=1, maxval=10, dtype=tf.int64)
slow_attention_mask = tf.ones_like(slow_input_ids, dtype=tf.float32)
slow_model_neuron = tfn.trace(model, [slow_input_ids, slow_attention_mask])
slow_model_neuron.save(args.slow_model_dir)
print("done!")
if __name__ == "__main__":
main(parser.parse_args())Full Log
2022-03-23 02:39:20.027966: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2022-03-23 02:39:21.013037: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2022-03-23 02:39:21.111699: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-03-23 02:39:21.111742: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-172-31-7-159): /proc/driver/nvidia/version does not exist
2022-03-23 02:39:21.283985: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-03-23 02:39:21.302856: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
trace slow model
WARNING:tensorflow:From /home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py:5049: calling gather (from tensorflow.python.ops.array_ops) with validate_indices is deprecated and will be removed in a future version.
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
2022-03-23 02:39:33.403105: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2022-03-23 02:39:33.403261: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2022-03-23 02:39:33.420927: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 3000020000 Hz
Traceback (most recent call last):
File "build.py", line 76, in <module>
main(parser.parse_args())
File "build.py", line 53, in main
slow_model_neuron = tfn.trace(model, [slow_input_ids, slow_attention_mask])
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow_neuron/python/_trace.py", line 176, in trace
cfunc = convert_variables_to_constants_v2(func, aggressive_inlining=False)
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/framework/convert_to_constants.py", line 1084, in convert_variables_to_constants_v2
converted_input_indices)
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/framework/convert_to_constants.py", line 1010, in _construct_concrete_function
new_output_names)
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/eager/wrap_function.py", line 650, in function_from_graph_def
wrapped_import = wrap_function(_imports_graph_def, [])
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/eager/wrap_function.py", line 628, in wrap_function
collections={}),
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/framework/func_graph.py", line 999, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/eager/wrap_function.py", line 87, in __call__
return self.call_with_variable_creator_scope(self._fn)(*args, **kwargs)
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/eager/wrap_function.py", line 93, in wrapped
return fn(*args, **kwargs)
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/eager/wrap_function.py", line 648, in _imports_graph_def
importer.import_graph_def(graph_def, name="")
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 535, in new_func
return func(*args, **kwargs)
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/framework/importer.py", line 405, in import_graph_def
producer_op_list=producer_op_list)
File "/home/ubuntu/generation-serving/env/lib/python3.6/site-packages/tensorflow/python/framework/importer.py", line 494, in _import_graph_def_internal
with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized:
ValueError: Message tensorflow.GraphDef exceeds maximum protobuf size of 2GB: 9339557543
Environment
tensorflow==2.5.2
tensorflow-estimator==2.5.0
tensorflow-neuron==2.5.2.2.1.14.0
aws-diamant
Metadata
Metadata
Assignees
Labels
No labels