diff --git a/genai/count_tokens/counttoken_localtokenizer_compute_with_txt.py b/genai/count_tokens/counttoken_localtokenizer_compute_with_txt.py new file mode 100644 index 0000000000..889044e63a --- /dev/null +++ b/genai/count_tokens/counttoken_localtokenizer_compute_with_txt.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def counttoken_localtokenizer_compute_with_txt() -> int: + # [START googlegenaisdk_counttoken_localtokenizer_compute_with_txt] + from google.genai.local_tokenizer import LocalTokenizer + + tokenizer = LocalTokenizer(model_name="gemini-2.5-flash") + response = tokenizer.compute_tokens("What's the longest word in the English language?") + print(response) + # Example output: + # tokens_info=[TokensInfo( + # role='user', + # token_ids=[3689, 236789, 236751, 506, + # 27801, 3658, 528, 506, 5422, 5192, 236881], + # tokens=[b'What', b"'", b's', b' the', b' longest', + # b' word', b' in', b' the', b' English', b' language', b'?'] + # )] + # [END googlegenaisdk_counttoken_localtokenizer_compute_with_txt] + return response.tokens_info + + +if __name__ == "__main__": + counttoken_localtokenizer_compute_with_txt() diff --git a/genai/count_tokens/counttoken_localtokenizer_with_txt.py b/genai/count_tokens/counttoken_localtokenizer_with_txt.py new file mode 100644 index 0000000000..e784d393c9 --- /dev/null +++ b/genai/count_tokens/counttoken_localtokenizer_with_txt.py @@ -0,0 +1,30 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def counttoken_localtokenizer_with_txt() -> int: + # [START googlegenaisdk_counttoken_localtokenizer_with_txt] + from google.genai.local_tokenizer import LocalTokenizer + + tokenizer = LocalTokenizer(model_name="gemini-2.5-flash") + response = tokenizer.count_tokens("What's the highest mountain in Africa?") + print(response) + # Example output: + # total_tokens=10 + # [END googlegenaisdk_counttoken_localtokenizer_with_txt] + return response.total_tokens + + +if __name__ == "__main__": + counttoken_localtokenizer_with_txt() diff --git a/genai/count_tokens/counttoken_with_txt.py b/genai/count_tokens/counttoken_with_txt.py index 84464c5cf8..fcbf948408 100644 --- a/genai/count_tokens/counttoken_with_txt.py +++ b/genai/count_tokens/counttoken_with_txt.py @@ -25,7 +25,7 @@ def count_tokens() -> int: ) print(response) # Example output: - # total_tokens=10 + # total_tokens=9 # cached_content_token_count=None # [END googlegenaisdk_counttoken_with_txt] return response.total_tokens diff --git a/genai/count_tokens/requirements.txt b/genai/count_tokens/requirements.txt index 1efe7b29db..726dd09178 100644 --- a/genai/count_tokens/requirements.txt +++ b/genai/count_tokens/requirements.txt @@ -1 +1,2 @@ google-genai==1.42.0 +sentencepiece==0.2.1 diff --git a/genai/count_tokens/test_count_tokens_examples.py b/genai/count_tokens/test_count_tokens_examples.py index b654ff872d..e83f20cd14 100644 --- a/genai/count_tokens/test_count_tokens_examples.py +++ b/genai/count_tokens/test_count_tokens_examples.py @@ -19,6 +19,8 @@ import os import counttoken_compute_with_txt +import counttoken_localtokenizer_compute_with_txt +import counttoken_localtokenizer_with_txt import counttoken_resp_with_txt import counttoken_with_txt import counttoken_with_txt_vid @@ -43,3 +45,11 @@ def test_counttoken_with_txt() -> None: def test_counttoken_with_txt_vid() -> None: assert counttoken_with_txt_vid.count_tokens() + + +def test_counttoken_localtokenizer_with_txt() -> None: + assert counttoken_localtokenizer_with_txt.counttoken_localtokenizer_with_txt() + + +def test_counttoken_localtokenizer_compute_with_txt() -> None: + assert counttoken_localtokenizer_compute_with_txt.counttoken_localtokenizer_compute_with_txt()