diff options
8 files changed, 21 insertions, 710 deletions
diff --git a/chromium/third_party/tflite_support/patches/0014-remove-whitespace-tokenizer.patch b/chromium/third_party/tflite_support/patches/0014-remove-whitespace-tokenizer.patch new file mode 100644 index 00000000000..149356526f2 --- /dev/null +++ b/chromium/third_party/tflite_support/patches/0014-remove-whitespace-tokenizer.patch @@ -0,0 +1,21 @@ +From 3e2574d49dd6a93efef8de6c5256a428c9d9c784 Mon Sep 17 00:00:00 2001 +From: Robert Ogden <robertogden@chromium.org> +Date: Mon, 17 Oct 2022 13:09:01 -0700 +Subject: [PATCH] remove whitespace tokenizer + +--- + .../custom_ops/kernel/whitespace_tokenizer.cc | 227 ------------------ + .../custom_ops/kernel/whitespace_tokenizer.h | 31 --- + .../whitespace_tokenizer_op_resolver.cc | 32 --- + .../kernel/whitespace_tokenizer_op_resolver.h | 34 --- + ...hitespace_tokenizer_op_resolver_wrapper.cc | 29 --- + .../kernel/whitespace_tokenizer_test.cc | 189 --------------- + .../kernel/whitespace_tokenizer_test.py | 167 ------------- + 7 files changed, 709 deletions(-) + delete mode 100644 third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.cc + delete mode 100644 third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.h + delete mode 100644 third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.cc + delete mode 100644 third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.h + delete mode 100644 third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver_wrapper.cc + delete mode 100644 third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.cc + diff --git a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.cc b/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.cc deleted file mode 100644 index 8096a5008bd..00000000000 --- a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.cc +++ /dev/null @@ -1,227 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.h" - -#include <algorithm> -#include <utility> -#include <vector> - -#include "libutf/utf.h" -#include "tensorflow/lite/context.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/string_util.h" - -constexpr int kInput = 0; -constexpr int kOutputValues = 0; -constexpr int kOutputRowSplitsStart = 1; - -namespace tflite { -namespace ops { -namespace custom { -namespace whitespace_tokenizer { - -// This TFLite op implements a whitespace tokenizer, and can output the -// tokens as either a padded tensor or a ragged tensor. -// -// If we're outputting a padded tensor, our outputs are: -// * A string tensor -// -// If we're outputting a ragged tensor, our outputs are: -// * A string tensor (the innermost values of the ragged tensor) -// * N int64 tensors (the row_splits of the ragged tensor, where N is the -// rank of the input tensor) - -inline bool OutputIsPaddedTensor(TfLiteNode* node) { - return NumOutputs(node) == 1; -} - -inline int charntorune(Rune* r, const char* s, int n) { - const int bytes_read = chartorune(r, const_cast<char*>(s)); - if (bytes_read > n) { - *r = Runeerror; - return 0; - } - return bytes_read; -} - -std::vector<std::pair<const char*, int>> Tokenize(StringRef str) { - const char* p = str.str; - int n = str.len; - - std::vector<std::pair<const char*, int>> tokens; - const char* start = nullptr; - while (n > 0) { - Rune r; - int c = charntorune(&r, p, n); - if (r == Runeerror) - break; - - if (isspacerune(r)) { - if (start != nullptr) { - tokens.push_back({start, p - start}); - } - start = nullptr; - } else { - if (start == nullptr) { - start = p; - } - } - - p += c; - n -= c; - } - if (start != nullptr) { - tokens.push_back({start, p - start}); - } - - return tokens; -} - -TfLiteStatus WritePaddedOutput( - const std::vector<std::vector<std::pair<const char*, int>>>& list_of_tokens, - const TfLiteTensor* input, - TfLiteTensor* output_values) { - TfLiteIntArray* output_shape = TfLiteIntArrayCreate(NumDimensions(input) + 1); - for (int i = 0; i < NumDimensions(input); ++i) { - output_shape->data[i] = SizeOfDimension(input, i); - } - - size_t max_tokens = 0; - for (const auto& tokens : list_of_tokens) { - max_tokens = std::max(max_tokens, tokens.size()); - } - - output_shape->data[NumDimensions(input)] = max_tokens; - DynamicBuffer buffer; - for (const auto& tokens : list_of_tokens) { - for (const auto& token : tokens) { - buffer.AddString(token.first, token.second); - } - for (int i = tokens.size(); i < max_tokens; ++i) { - buffer.AddString(nullptr, 0); - } - } - buffer.WriteToTensor(output_values, output_shape); - return kTfLiteOk; -} - -TfLiteStatus WriteRaggedOutput( - const std::vector<std::vector<std::pair<const char*, int>>>& list_of_tokens, - const TfLiteTensor* input, - TfLiteTensor* output_values, - std::vector<TfLiteTensor*> nested_row_splits) { - // The outer dimensions of the ragged tensor are all non-ragged. - for (int i = 0; i < nested_row_splits.size() - 1; ++i) { - int row_splits_step = SizeOfDimension(input, i + 1); - TfLiteTensor* row_splits = nested_row_splits[i]; - for (int j = 0; j < SizeOfDimension(row_splits, 0); ++j) { - row_splits->data.i64[j] = j * row_splits_step; - } - } - - // Generate the innermost row_splits and values tensors. - TfLiteTensor* row_splits = nested_row_splits.back(); - TfLiteIntArray* output_shape = TfLiteIntArrayCreate(1); - DynamicBuffer buffer; - int token_index = 0; - int row_splits_index = 0; - for (const auto& tokens : list_of_tokens) { - row_splits->data.i64[row_splits_index] = token_index; - for (const auto& token : tokens) { - buffer.AddString(token.first, token.second); - ++token_index; - } - ++row_splits_index; - } - row_splits->data.i64[row_splits_index] = token_index; - output_shape->data[0] = token_index; - buffer.WriteToTensor(output_values, output_shape); - return kTfLiteOk; -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - TfLiteTensor* output_values = GetOutput(context, node, kOutputValues); - SetTensorToDynamic(output_values); - - if (OutputIsPaddedTensor(node)) { - return kTfLiteOk; - } - - const TfLiteTensor* input = GetInput(context, node, kInput); - TF_LITE_ENSURE(context, NumDimensions(input) == - (NumOutputs(node) - kOutputRowSplitsStart)); - - // Resize the row_splits tensors. We're just adding a ragged inner - // dimension to the shape of the input tensor, so the size of the - // row_splits tensors can be calculated using the input tensor's shape. - int input_size = 1; - for (int i = 0; i < NumDimensions(input); ++i) { - input_size *= SizeOfDimension(input, i); - - TfLiteIntArray* row_splits_shape = TfLiteIntArrayCreate(1); - row_splits_shape->data[0] = input_size + 1; - TfLiteTensor* row_splits = - GetOutput(context, node, kOutputRowSplitsStart + i); - TF_LITE_ENSURE_STATUS( - context->ResizeTensor(context, row_splits, row_splits_shape)); - } - - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const TfLiteTensor* input = GetInput(context, node, kInput); - int input_size = 1; - for (int i = 0; i < NumDimensions(input); ++i) { - input_size *= SizeOfDimension(input, i); - } - - std::vector<std::vector<std::pair<const char*, int>>> list_of_tokens; - list_of_tokens.reserve(input_size); - for (int i = 0; i < input_size; ++i) { - list_of_tokens.emplace_back(Tokenize(GetString(input, i))); - } - - TfLiteTensor* output_values = GetOutput(context, node, kOutputValues); - TF_LITE_ENSURE(context, IsDynamicTensor(output_values)); - - if (OutputIsPaddedTensor(node)) { - return WritePaddedOutput(list_of_tokens, input, output_values); - } - - std::vector<TfLiteTensor*> nested_row_splits; - nested_row_splits.reserve(NumDimensions(input)); - for (int i = 0; i < NumDimensions(input); ++i) { - TfLiteTensor* output_row_splits = - GetOutput(context, node, kOutputRowSplitsStart + i); - nested_row_splits.push_back(output_row_splits); - } - return WriteRaggedOutput(list_of_tokens, input, output_values, - nested_row_splits); -} - -} // namespace whitespace_tokenizer - -TfLiteRegistration* Register_tftext_WhitespaceTokenizer() { - static TfLiteRegistration r = {nullptr, nullptr, - whitespace_tokenizer::Prepare, - whitespace_tokenizer::Eval}; - return &r; -} - -} // namespace custom -} // namespace ops -} // namespace tflite diff --git a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.h b/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.h deleted file mode 100644 index b190248087d..00000000000 --- a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.h +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_SUPPORT_CUSTOM_OPS_KERNEL_WHITESPACE_TOKENIZER_H_ -#define TENSORFLOW_LITE_SUPPORT_CUSTOM_OPS_KERNEL_WHITESPACE_TOKENIZER_H_ - -#include "tensorflow/lite/context.h" - -namespace tflite { -namespace ops { -namespace custom { - -TfLiteRegistration* Register_tftext_WhitespaceTokenizer(); - -} // namespace custom -} // namespace ops -} // namespace tflite - -#endif // TENSORFLOW_LITE_SUPPORT_CUSTOM_OPS_KERNEL_WHITESPACE_TOKENIZER_H_ diff --git a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.cc b/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.cc deleted file mode 100644 index 6166bc149bc..00000000000 --- a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.cc +++ /dev/null @@ -1,32 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.h" - -#include "tensorflow/lite/mutable_op_resolver.h" -#include "tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.h" - -namespace tflite { -namespace ops { -namespace custom { - -void AddWhitespaceTokenizerCustomOp(MutableOpResolver* resolver) { - resolver->AddCustom("tftext:WhitespaceTokenizer", - Register_tftext_WhitespaceTokenizer()); -} - -} // namespace custom -} // namespace ops -} // namespace tflite diff --git a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.h b/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.h deleted file mode 100644 index 4f57d8d8010..00000000000 --- a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_LITE_SUPPORT_CUSTOM_OPS_KERNEL_WHITESPACE_TOKENIZER_OP_RESOLVER_H_ -#define TENSORFLOW_LITE_SUPPORT_CUSTOM_OPS_KERNEL_WHITESPACE_TOKENIZER_OP_RESOLVER_H_ - -#include "tensorflow/lite/mutable_op_resolver.h" - -namespace tflite { -namespace ops { -namespace custom { - -// Adds the WhitespaceTokenizer custom op to an op resolver. -// This function can be loaded using dlopen. Since C++ function names get -// mangled, declare this function as extern C, so its name is unchanged. -extern "C" void AddWhitespaceTokenizerCustomOp(MutableOpResolver* resolver); - -} // namespace custom -} // namespace ops -} // namespace tflite - -#endif // LETENSORFLOW_LITE_SUPPORT_CUSTOM_OPS_KERNEL_WHITESPACE_TOKENIZER_OP_RESOLVER_H_ diff --git a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver_wrapper.cc b/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver_wrapper.cc deleted file mode 100644 index 03d3ba89939..00000000000 --- a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver_wrapper.cc +++ /dev/null @@ -1,29 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "pybind11/pybind11.h" -#include "tensorflow/lite/mutable_op_resolver.h" -#include "tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_op_resolver.h" - -PYBIND11_MODULE(_pywrap_whitespace_tokenizer_op_resolver, m) { - m.doc() = "_pywrap_whitespace_tokenizer_op_resolver"; - m.def( - "AddWhitespaceTokenizerCustomOp", - [](uintptr_t resolver) { - tflite::ops::custom::AddWhitespaceTokenizerCustomOp( - reinterpret_cast<tflite::MutableOpResolver*>(resolver)); - }, - "Op registerer function for the tftext:WhitespaceTokenizer custom op."); -} diff --git a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.cc b/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.cc deleted file mode 100644 index 4654e46c4a2..00000000000 --- a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.cc +++ /dev/null @@ -1,189 +0,0 @@ -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer.h" - -#include <string> -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/lite/kernels/test_util.h" -#include "tensorflow/lite/schema/schema_generated.h" -#include "tensorflow/lite/string_util.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace whitespace_tokenizer { -namespace test { -namespace { - -using ::testing::ElementsAre; -using ::testing::ElementsAreArray; - -} // namespace - -enum OutputType { PADDED, RAGGED }; - -class WhitespaceTokenizerModel : public SingleOpModel { - public: - WhitespaceTokenizerModel(OutputType output_type, - const std::vector<std::string>& input_values, - const std::vector<int>& input_shape) - : input_shape_(input_shape) { - input_ = AddInput(TensorType_STRING); - output_values_ = AddOutput(TensorType_STRING); - if (output_type == RAGGED) { - for (int i = 0; i < input_shape_.size(); ++i) { - output_row_splits_.push_back(AddOutput(TensorType_INT64)); - } - } - SetCustomOp("WhitespaceTokenizer", {}, Register_tftext_WhitespaceTokenizer); - - BuildInterpreter({input_shape}); - PopulateStringTensor(input_, input_values); - Invoke(); - } - - std::vector<int> GetValuesTensorShape() { - return GetTensorShape(output_values_); - } - - std::vector<std::string> ExtractValuesTensorVector() { - std::vector<std::string> r; - TfLiteTensor* tensor = interpreter_->tensor(output_values_); - int n = GetStringCount(tensor); - for (int i = 0; i < n; ++i) { - StringRef ref = GetString(tensor, i); - r.emplace_back(ref.str, ref.len); - } - return r; - } - - void CheckRowSplits(const std::vector<int>& token_counts) { - int size = 1; - for (int i = 0; i < input_shape_.size(); ++i) { - size *= input_shape_[i]; - EXPECT_THAT(GetTensorShape(output_row_splits_[i]), ElementsAre(size + 1)) - << "row_splits " << i << " has the wrong shape"; - - std::vector<int64_t> expected_values(size + 1); - if (i == input_shape_.size() - 1) { - ASSERT_EQ(token_counts.size(), size); - - int index = 0; - expected_values[0] = index; - for (int j = 0; j < size; ++j) { - index += token_counts[j]; - expected_values[j + 1] = index; - } - } else { - for (int j = 0; j <= size; ++j) { - expected_values[j] = j * input_shape_[i + 1]; - } - } - EXPECT_THAT(ExtractVector<int64_t>(output_row_splits_[i]), - ElementsAreArray(expected_values)) - << "row_splits " << i << " has an incorrect value/index"; - } - } - - private: - int input_; - std::vector<int> input_shape_; - int output_values_; - std::vector<int> output_row_splits_; -}; // namespace test - -TEST(WhitespaceTokenizerTest, SingleStringPaddedOutput) { - WhitespaceTokenizerModel m(PADDED, {"this is a test"}, {1}); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(1, 4)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("this", "is", "a", "test")); -} - -TEST(WhitespaceTokenizerTest, SingleStringRaggedOutput) { - WhitespaceTokenizerModel m(RAGGED, {"this is a test"}, {1}); - m.CheckRowSplits({4}); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("this", "is", "a", "test")); -} - -TEST(WhitespaceTokenizerTest, VectorPaddedOutput) { - WhitespaceTokenizerModel m(PADDED, - {"this is a test", // - "three token sentence", // - "many more tokens than that sentence"}, - {3}); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(3, 6)); - EXPECT_THAT( - m.ExtractValuesTensorVector(), - ElementsAre("this", "is", "a", "test", "", "", // - "three", "token", "sentence", "", "", "", // - "many", "more", "tokens", "than", "that", "sentence")); -} - -TEST(WhitespaceTokenizerTest, VectorRaggedOutput) { - WhitespaceTokenizerModel m(RAGGED, - {"this is a test", // - "three token sentence", // - "many more tokens than that sentence"}, - {3}); - m.CheckRowSplits({4, 3, 6}); - EXPECT_THAT( - m.ExtractValuesTensorVector(), - ElementsAre("this", "is", "a", "test", // - "three", "token", "sentence", // - "many", "more", "tokens", "than", "that", "sentence")); -} - -TEST(WhitespaceTokenizerTest, MatrixPaddedOutput) { - WhitespaceTokenizerModel m(PADDED, - {"a b c", "d e f", // - "g h", "i j k l", // - "m", "n o p q r"}, - {3, 2}); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(3, 2, 5)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("a", "b", "c", "", "", // - "d", "e", "f", "", "", // - "g", "h", "", "", "", // - "i", "j", "k", "l", "", // - "m", "", "", "", "", // - "n", "o", "p", "q", "r")); -} - -TEST(WhitespaceTokenizerTest, MatrixRAGGEDOutput) { - WhitespaceTokenizerModel m(RAGGED, - {"a b c", "d e f", // - "g h", "i j k l", // - "m", "n o p q r"}, - {3, 2}); - m.CheckRowSplits({3, 3, 2, 4, 1, 5}); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("a", "b", "c", // - "d", "e", "f", // - "g", "h", // - "i", "j", "k", "l", // - "m", // - "n", "o", "p", "q", "r")); -} - -} // namespace test -} // namespace whitespace_tokenizer -} // namespace custom -} // namespace ops -} // namespace tflite diff --git a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.py b/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.py deleted file mode 100644 index b6a1a67d74b..00000000000 --- a/chromium/third_party/tflite_support/src/tensorflow_lite_support/custom_ops/kernel/whitespace_tokenizer_test.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2020 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# Lint as: python3 -"""Tests for tensorflow_lite_support.custom_ops.kernel.whitespace_tokenizer.""" - -import os -import sys -import timeit - -from absl import logging -from absl.testing import parameterized -import numpy as np -import tensorflow as tf -import tensorflow_text as tf_text -# pylint: disable=g-direct-tensorflow-import -from tensorflow.lite.python import interpreter as interpreter_wrapper -from tensorflow.python.platform import resource_loader - -# Force loaded shared object symbols to be globally visible. This is needed so -# that the interpreter_wrapper, in one .so file, can see the op resolver -# in a different .so file. Note that this may already be set by default. -# pylint: disable=g-import-not-at-top,g-bad-import-order,unused-import -if hasattr(sys, 'setdlopenflags') and hasattr(sys, 'getdlopenflags'): - sys.setdlopenflags(sys.getdlopenflags() | os.RTLD_GLOBAL) -from tensorflow_lite_support.custom_ops.kernel import _pywrap_whitespace_tokenizer_op_resolver - -TEST_CASES = [ - ['this is a test'], - ['extra spaces in here'], - ['a four token sentence', 'a five token sentence thing.'], - [['a multi dimensional test case', 'a b c d', 'e f g'], - ['h i j', 'k l m 2 3', 'n o p'], ['q r s 0 1', 't u v', 'w x y z']], -] - -INVOKES_FOR_SINGLE_OP_BENCHMARK = 1000 -INVOKES_FOR_FLEX_DELEGATE_BENCHMARK = 10 - - -@tf.function -def _call_whitespace_tokenizer_to_tensor(test_case): - tokenizer = tf_text.WhitespaceTokenizer() - return tokenizer.tokenize(test_case).to_tensor() - - -@tf.function -def _call_whitespace_tokenizer_to_ragged(test_case): - tokenizer = tf_text.WhitespaceTokenizer() - return tokenizer.tokenize(test_case) - - -class WhitespaceTokenizerTest(parameterized.TestCase): - - @parameterized.parameters([t] for t in TEST_CASES) - def testToTensorEquivalence(self, test_case): - tf_output = _call_whitespace_tokenizer_to_tensor(test_case) - - model_filename = resource_loader.get_path_to_datafile( - 'testdata/whitespace_tokenizer_to_tensor.tflite') - with open(model_filename, 'rb') as file: - model = file.read() - interpreter = interpreter_wrapper.InterpreterWithCustomOps( - model_content=model, - custom_op_registerers=['AddWhitespaceTokenizerCustomOp']) - - np_test_case = np.array(test_case, dtype=np.str) - interpreter.resize_tensor_input(0, np_test_case.shape) - interpreter.allocate_tensors() - interpreter.set_tensor(interpreter.get_input_details()[0]['index'], - np_test_case) - interpreter.invoke() - tflite_output = interpreter.get_tensor( - interpreter.get_output_details()[0]['index']) - - self.assertEqual(tf_output.numpy().tolist(), tflite_output.tolist()) - - @parameterized.parameters([t] for t in TEST_CASES) - def testToRaggedEquivalence(self, test_case): - tf_output = _call_whitespace_tokenizer_to_ragged(test_case) - - np_test_case = np.array(test_case, dtype=np.str) - rank = len(np_test_case.shape) - - model_filename = resource_loader.get_path_to_datafile( - 'testdata/whitespace_tokenizer_to_ragged_{}d_input.tflite'.format(rank)) - with open(model_filename, 'rb') as file: - model = file.read() - interpreter = interpreter_wrapper.InterpreterWithCustomOps( - model_content=model, - custom_op_registerers=['AddWhitespaceTokenizerCustomOp']) - interpreter.resize_tensor_input(0, np_test_case.shape) - interpreter.allocate_tensors() - interpreter.set_tensor(interpreter.get_input_details()[0]['index'], - np_test_case) - interpreter.invoke() - - # Traverse the nested row_splits/values of the ragged tensor. - for i in range(rank): - tflite_output_cur_row_splits = interpreter.get_tensor( - interpreter.get_output_details()[1 + i]['index']) - self.assertEqual(tf_output.row_splits.numpy().tolist(), - tflite_output_cur_row_splits.tolist()) - tf_output = tf_output.values - - tflite_output_values = interpreter.get_tensor( - interpreter.get_output_details()[0]['index']) - self.assertEqual(tf_output.numpy().tolist(), tflite_output_values.tolist()) - - def testSingleOpLatency(self): - model_filename = resource_loader.get_path_to_datafile( - 'testdata/whitespace_tokenizer_to_tensor.tflite') - with open(model_filename, 'rb') as file: - model = file.read() - interpreter = interpreter_wrapper.InterpreterWithCustomOps( - model_content=model, - custom_op_registerers=['AddWhitespaceTokenizerCustomOp']) - - latency = 0.0 - for test_case in TEST_CASES: - np_test_case = np.array(test_case, dtype=np.str) - interpreter.resize_tensor_input(0, np_test_case.shape) - interpreter.allocate_tensors() - interpreter.set_tensor(interpreter.get_input_details()[0]['index'], - np_test_case) - start_time = timeit.default_timer() - for _ in range(INVOKES_FOR_SINGLE_OP_BENCHMARK): - interpreter.invoke() - latency = latency + timeit.default_timer() - start_time - - latency = latency / (INVOKES_FOR_SINGLE_OP_BENCHMARK * len(TEST_CASES)) - logging.info('Latency: %fms', latency * 1000.0) - - def testFlexDelegateLatency(self): - model_filename = resource_loader.get_path_to_datafile( - 'testdata/whitespace_tokenizer_flex_delegate.tflite') - with open(model_filename, 'rb') as file: - model = file.read() - interpreter = interpreter_wrapper.Interpreter(model_content=model) - - latency = 0.0 - for test_case in TEST_CASES: - np_test_case = np.array(test_case, dtype=np.str) - interpreter.resize_tensor_input(0, np_test_case.shape) - interpreter.allocate_tensors() - interpreter.set_tensor(interpreter.get_input_details()[0]['index'], - np_test_case) - start_time = timeit.default_timer() - for _ in range(INVOKES_FOR_FLEX_DELEGATE_BENCHMARK): - interpreter.invoke() - latency = latency + timeit.default_timer() - start_time - - latency = latency / (INVOKES_FOR_FLEX_DELEGATE_BENCHMARK * len(TEST_CASES)) - logging.info('Latency: %fms', latency * 1000.0) - - -if __name__ == '__main__': - tf.test.main() |