From fb4d072f94716335c370471d91a52eb604e1e7e8 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Mon, 18 Dec 2023 14:51:27 -0800 Subject: [PATCH] Skip decoding special tokens --- pgml-extension/src/bindings/transformers/transformers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pgml-extension/src/bindings/transformers/transformers.py b/pgml-extension/src/bindings/transformers/transformers.py index eed9cede7..83608ed48 100644 --- a/pgml-extension/src/bindings/transformers/transformers.py +++ b/pgml-extension/src/bindings/transformers/transformers.py @@ -314,6 +314,7 @@ def stream(self, input, timeout=None, **kwargs): self.tokenizer, timeout=timeout, skip_prompt=True, + skip_special_tokens=True ) if "chat_template" in kwargs: input = self.tokenizer.apply_chat_template( @@ -338,6 +339,7 @@ def stream(self, input, timeout=None, **kwargs): streamer = TextIteratorStreamer( self.tokenizer, timeout=timeout, + skip_special_tokens=True ) input = self.tokenizer(input, return_tensors="pt", padding=True).to( self.model.device