Fixes #249

h2oai · Jun 8, 2023 · 839b7f5 · 839b7f5
1 parent 4f4fd98
commit 839b7f5
Showing 1 changed file with 36 additions and 13 deletions.
diff --git a/h2oai_pipeline.py b/h2oai_pipeline.py
@@ -50,24 +50,47 @@ def __init__(self, *args, debug=False, chat=False, stream_output=False,
  self.max_input_tokens = max_input_tokens # not for generate, so ok that not kwargs
 
  def preprocess(self, prompt_text, prefix="", handle_long_generation=None, **generate_kwargs):
- if self.prompt_type not in [PromptType.plain.name, PromptType.plain.value] and \
- hasattr(self.tokenizer, 'model_max_length'):
+ if hasattr(self.tokenizer, 'model_max_length'):
  # model_max_length only defined for generate.py, not raw use of h2oai_pipeline.py
  model_max_length = self.tokenizer.model_max_length
- verbose = False # FIXME: debug
+ else:
+ # unknown
+ model_max_length = None
+
+ verbose = False # FIXME: debug
+ if model_max_length is not None:
+ num_prompt_tokens = None
  # can't wait for "hole" if not plain prompt_type, since would lose prefix like <human>:
  # For https:/h2oai/h2ogpt/issues/192
- prompt_tokens = self.tokenizer(prompt_text)['input_ids']
- num_prompt_tokens = len(prompt_tokens)
- if num_prompt_tokens > model_max_length:
- # conservative by using int()
- chars_per_token = int(len(prompt_text) / num_prompt_tokens)
- prompt_text = prompt_text[-model_max_length * chars_per_token:]
+ for trial in range(0, 3):
+ prompt_tokens = self.tokenizer(prompt_text)['input_ids']
+ num_prompt_tokens = len(prompt_tokens)
+ if num_prompt_tokens > model_max_length:
+ # conservative by using int()
+ chars_per_token = int(len(prompt_text) / num_prompt_tokens)
+ prompt_text = prompt_text[-model_max_length * chars_per_token:]
+ if verbose:
+ print("reducing %s tokens, assuming average of %s chars/token for %s characters" % (
+ num_prompt_tokens, chars_per_token, len(prompt_text)), flush=True)
+ else:
+ if verbose:
+ print("using %s tokens with %s chars" % (num_prompt_tokens, len(prompt_text)), flush=True)
+ break
+
+ # if input prompt is some number of tokens, despite user request, can't have max_new_tokens more
+ #
+ if self.prompt_type not in [PromptType.plain.name, PromptType.plain.value]:
+ # then give room for prompt
+ fudge = 20
+ else:
+ fudge = 0
+ assert num_prompt_tokens is not None
+ max_new_tokens = max(0, min(generate_kwargs['max_new_tokens'],
+ model_max_length - (num_prompt_tokens + fudge)))
+ if max_new_tokens < generate_kwargs['max_new_tokens']:
  if verbose:
- print("reducing tokens, assuming average of %s chars/token: %s" % chars_per_token, flush=True)
- prompt_tokens2 = self.tokenizer(prompt_text)['input_ids']
- num_prompt_tokens2 = len(prompt_tokens2)
- print("reduced tokens from %d -> %d" % (num_prompt_tokens, num_prompt_tokens2), flush=True)
+ print("Reduced max_new_tokens from %s -> %s" % (generate_kwargs['max_new_tokens'], max_new_tokens))
+ generate_kwargs['max_new_tokens'] = max_new_tokens
 
  data_point = dict(context='', instruction=prompt_text, input='')
  if self.prompter is not None: