Skip to content

RuntimeError: CUDA error: device-side assert triggered #15

@sameeravithana

Description

@sameeravithana

Hi, We face the following issue when executing the following script;

from sacrerouge.metrics import QAEval

# This line will load the generation and answer models into memory, so it may take some time to complete.
NP_CHUNKS_STRATEGY = 'np-chunks'
MAX_NP_STRATEGY = 'max-np'
NER_STRATEGY = 'ner'
ALL_STRATEGY = 'all'
qaeval = QAEval(answer_selection_strategy=ALL_STRATEGY)

summaries=['Submarine is a game','the strength of many comments']
references=[['Submarine is a game'],['the strength of many comments']]

results = qaeval.score_all(summaries, references, return_qa_pairs=True)
allennlp.models.archival - archival.load_archive.L209 - INFO: loading archive file ../.sacrerouge/metrics/qaeval/models/generation/model.tar.gz
allennlp.models.archival - archival.extracted_archive.L300 - INFO: extracting archive file ...sacrerouge/metrics/qaeval/models/generation/tmp/tmp5uzny9w_
allennlp.common.params - params.pop.L221 - INFO: dataset_reader.type = question_generation
allennlp.common.params - params.pop.L221 - INFO: dataset_reader.model_name = facebook/bart-large
allennlp.common.params - params.pop.L221 - INFO: dataset_reader.lazy = False
allennlp.common.params - params.pop.L221 - INFO: dataset_reader.type = question_generation
allennlp.common.params - params.pop.L221 - INFO: dataset_reader.model_name = facebook/bart-large
allennlp.common.params - params.pop.L221 - INFO: dataset_reader.lazy = False
allennlp.common.params - params.pop.L221 - INFO: type = from_instances
allennlp.data.vocabulary - vocabulary.from_files.L349 - INFO: Loading token dictionary from /tmp/tmp5uzny9w_/vocabulary.
allennlp.common.params - params.pop.L221 - INFO: model.type = question_generation
allennlp.common.params - params.pop.L221 - INFO: model.model_name = facebook/bart-large
allennlp.common.params - params.pop.L221 - INFO: model.max_decoding_steps = 100
allennlp.common.params - params.pop.L221 - INFO: model.beam_size = 4
allennlp.models.archival - archival.load_archive.L243 - INFO: removing temporary unarchived model dir at /tmp/tmp5uzny9w_
qaeval.metric - metric._generate_qa_pairs.L138 - INFO: Selecting answers from 2 distinct summaries
qaeval.metric - metric._generate_qa_pairs.L144 - INFO: Selected 4 answers in total
qaeval.metric - metric._generate_qa_pairs.L155 - INFO: Generating questions for 4 answers
Traceback (most recent call last):
    results = qaeval.score_all(privatized_docs, gold_referenced_docs, return_qa_pairs=True)
  File "../sacrerouge/metrics/metric.py", line 83, in score_all
    return super().score_all(summaries, references_list, **kwargs)
  File "../sacrerouge/metrics/metric.py", line 56, in score_all
    metrics_lists = self.score_multi_all(summaries_list, *args, **kwargs)
  File "../sacrerouge/metrics/qaeval.py", line 87, in score_multi_all
    ungrouped_metrics = self.metric.score_batch(ungrouped_summaries, ungrouped_references_list, return_qa_pairs=return_qa_pairs)
  File "../qaeval/metric.py", line 368, in score_batch
    qa_pairs_lists = self._generate_qa_pairs(references_list)
  File "../qaeval/metric.py", line 156, in _generate_qa_pairs
    question_list = self.question_generator.generate_all(generation_inputs)
  File "../qaeval/generation/model.py", line 59, in generate_all
    outputs.extend(self.predictor.predict_batch_json(batch))
  File "../allennlp/predictors/predictor.py", line 292, in predict_batch_json
    return self.predict_batch_instance(instances)
  File "../allennlp/predictors/predictor.py", line 297, in predict_batch_instance
    outputs = self._model.forward_on_instances(instances)
  File "../allennlp/models/model.py", line 217, in forward_on_instances
    outputs = self.make_output_human_readable(self(**model_input))
  File "../torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "../qaeval/generation/model.py", line 140, in forward
    beam_result = self._beam_search.search(
  File "../torch/autograd/grad_mode.py", line 28, in decorate_context
    return func(*args, **kwargs)
  File "../allennlp/nn/beam_search.py", line 870, in search
    return self._search(
  File "../allennlp/nn/beam_search.py", line 982, in _search
    class_log_probabilities, state = step(last_predictions, state, timestep + 1)
  File "../qaeval/generation/model.py", line 218, in take_step
    outputs = self.bart(
  File "../torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "../transformers/models/bart/modeling_bart.py", line 1353, in forward
    outputs = self.model(
  File "../torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "../transformers/models/bart/modeling_bart.py", line 1240, in forward
    decoder_outputs = self.decoder(
  File "../torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "../transformers/models/bart/modeling_bart.py", line 1039, in forward
    positions = self.embed_positions(input_shape, past_key_values_length)
  File "../torch/nn/modules/module.py", line 1102, in _call_impl
    return forward_call(*input, **kwargs)
  File "../transformers/models/bart/modeling_bart.py", line 137, in forward
    return super().forward(positions + self.offset)
  File "../torch/nn/modules/sparse.py", line 158, in forward
    return F.embedding(
  File "../torch/nn/functional.py", line 2044, in embedding
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: CUDA error: device-side assert triggered

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions