-
Notifications
You must be signed in to change notification settings - Fork 8
Risk dashboard UI #46
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3194a97
f6e6059
6dc2b39
123f33e
5e60bdc
2eb9546
df7bb3b
5bd7db2
2ee77b0
6db0dac
a948f3a
e870b52
17e78e5
7658c33
331e7db
fc66711
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -89,6 +89,8 @@ venv/ | |
| ENV/ | ||
| env.bak/ | ||
| venv.bak/ | ||
| venv310 | ||
| cache | ||
|
|
||
| # Spyder project settings | ||
| .spyderproject | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,57 @@ | ||
| from flask_sqlalchemy import SQLAlchemy | ||
|
|
||
| db = SQLAlchemy() | ||
|
|
||
|
|
||
| # Represents a target model that can be attacked by various attacks. | ||
| class TargetModel(db.Model): | ||
| __tablename__ = 'target_models' | ||
| id = db.Column(db.Integer, primary_key=True) | ||
| name = db.Column(db.String, unique=True, nullable=False) | ||
| description = db.Column(db.String) | ||
|
|
||
|
|
||
| # Represents an attack that can be performed on a target model. | ||
| class Attack(db.Model): | ||
| __tablename__ = 'attacks' | ||
| id = db.Column(db.Integer, primary_key=True) | ||
| name = db.Column(db.String, nullable=False, unique=True) | ||
| weight = db.Column(db.Integer, nullable=False, default=1, server_default="1") # noqa: E501 | ||
|
|
||
|
|
||
| # Represents a sub-attack that is part of a larger attack. | ||
| class SubAttack(db.Model): | ||
| __tablename__ = 'sub_attacks' | ||
| id = db.Column(db.Integer, primary_key=True) | ||
| name = db.Column(db.String, nullable=False) | ||
| description = db.Column(db.String) | ||
| attack_id = db.Column(db.Integer, db.ForeignKey('attacks.id'), nullable=False) # noqa: E501 | ||
|
|
||
|
|
||
| # Represents the results of each sigle attack on a target model. | ||
| class AttackResult(db.Model): | ||
| __tablename__ = 'attack_results' | ||
| id = db.Column(db.Integer, primary_key=True) | ||
| attack_model_id = db.Column(db.Integer, db.ForeignKey('target_models.id'), nullable=False) # noqa: E501 | ||
| attack_id = db.Column(db.Integer, db.ForeignKey('attacks.id'), nullable=False) # noqa: E501 | ||
| success = db.Column(db.Boolean, nullable=False) | ||
| vulnerability_type = db.Column(db.String, nullable=True) | ||
| details = db.Column(db.JSON, nullable=True) # JSON field | ||
|
marcorosa marked this conversation as resolved.
|
||
|
|
||
|
|
||
| # Represents the global attack success rate of an attack on a target model, | ||
| # including the total number of attacks and successful attacks. | ||
| class ModelAttackScore(db.Model): | ||
| __tablename__ = 'model_attack_scores' | ||
| id = db.Column(db.Integer, primary_key=True) | ||
| attack_model_id = db.Column(db.Integer, db.ForeignKey('target_models.id'), nullable=False) # noqa: E501 | ||
| attack_id = db.Column(db.Integer, db.ForeignKey('attacks.id'), nullable=False) # noqa: E501 | ||
| total_number_of_attack = db.Column(db.Integer, nullable=False) | ||
| total_success = db.Column(db.Integer, nullable=False) | ||
|
|
||
| __table_args__ = ( | ||
| db.UniqueConstraint('attack_model_id', 'attack_id', name='uix_model_attack'), # noqa: E501 | ||
| ) | ||
|
|
||
|
|
||
| db.configure_mappers() | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also in this file, please add some comments |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| import logging | ||
|
|
||
| from .models import ( | ||
| Attack as AttackDB, | ||
| db, | ||
| TargetModel as TargetModelDB, | ||
| AttackResult as AttackResultDB, | ||
| ModelAttackScore as ModelAttackScoreDB, | ||
| ) | ||
|
|
||
| from status import status | ||
|
|
||
| logger = logging.getLogger(__name__) | ||
| logger.setLevel(logging.DEBUG) | ||
| logger.addHandler(status.trace_logging) | ||
|
|
||
|
|
||
| # Persist the attack result into the database for each attack. | ||
| def save_to_db(attack_results: AttackResultDB) -> list[AttackResultDB]: | ||
| """ | ||
| Persist the attack result into the database. | ||
| Returns a list of AttackResults that were added. | ||
| """ | ||
| inserted_records = [] | ||
|
|
||
| # Retrieve what to save to db | ||
| attack_name = attack_results.attack.lower() | ||
| success = attack_results.success | ||
| vulnerability_type = attack_results.vulnerability_type.lower() | ||
| details = attack_results.details # JSON column | ||
| target_name = details.get('target_model', '').lower() | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does it works if we don't have the target model ? we deleted the tolower() and moved it below in case we cannot find the target name it doesn't crash |
||
|
|
||
| # If target model name is not provided, skip saving | ||
| if not target_name: | ||
| logger.info("Skipping result: missing target model name.") | ||
| return | ||
|
|
||
| # If target model does not exist, create it | ||
| target_model = TargetModelDB.query.filter_by(name=target_name).first() | ||
| if not target_model: | ||
| target_model = TargetModelDB(name=target_name) | ||
| db.session.add(target_model) | ||
| db.session.flush() | ||
|
|
||
| # If attack does not exist, create it with default weight to 1 | ||
| attack = AttackDB.query.filter_by(name=attack_name).first() | ||
|
cabch marked this conversation as resolved.
|
||
| if not attack: | ||
| attack = AttackDB(name=attack_name, weight=1) | ||
| db.session.add(attack) | ||
| db.session.flush() | ||
|
|
||
| # Add the attack result to inserted_records | ||
| db_record = AttackResultDB( | ||
| attack_model_id=target_model.id, | ||
| attack_id=attack.id, | ||
| success=success, | ||
| vulnerability_type=vulnerability_type, | ||
| details=details, | ||
|
cabch marked this conversation as resolved.
|
||
| ) | ||
| db.session.add(db_record) | ||
| inserted_records.append(db_record) | ||
|
|
||
| # If model_attack_score does not exist, create it | ||
| # otherwise, update the existing record | ||
| model_attack_score = ModelAttackScoreDB.query.filter_by( | ||
| attack_model_id=target_model.id, | ||
| attack_id=attack.id | ||
| ).first() | ||
| if not model_attack_score: | ||
| model_attack_score = ModelAttackScoreDB( | ||
| attack_model_id=target_model.id, | ||
| attack_id=attack.id, | ||
| total_number_of_attack=details.get('total_attacks', 0), | ||
| total_success=details.get('number_successful_attacks', 0) | ||
| ) | ||
| else: | ||
| model_attack_score.total_number_of_attack += details.get('total_attacks', 0) # noqa: E501 | ||
| model_attack_score.total_success += details.get('number_successful_attacks', 0) # noqa: E501 | ||
| db.session.add(model_attack_score) | ||
| inserted_records.append(model_attack_score) | ||
|
|
||
| # Commit the session to save all changes to the database | ||
| # or rollback if an error occurs | ||
| try: | ||
| db.session.commit() | ||
| logger.info("Results successfully saved to the database.") | ||
| return inserted_records | ||
| except Exception as e: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also for the exception blocks, 2 comments you have already seen :D
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done ✅ + same comments as for adding an utility function for exception handling ? avoiding to avoid cluttering the code every time error handling is needed... need to check again
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. adding multiple exception in a row |
||
| db.session.rollback() | ||
| logger.error("Error while saving to the database: %s", e) | ||
| return [] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,9 +17,10 @@ | |
| import logging | ||
| import re | ||
|
|
||
| from app.db.utils import save_to_db | ||
| from attack_result import AttackResult | ||
| from llm import LLM | ||
| from status import status, Step | ||
| from status import Step, status | ||
|
|
||
|
|
||
| COUNT_PROMPTS = 2 | ||
|
|
@@ -305,7 +306,7 @@ def start_prompt_map(target_model: LLM, parameters: dict) -> AttackResult: | |
| target_system_prompt) | ||
| except Exception as e: | ||
| logger.error('Error occurred while evaluating attack ' | ||
| 'success rate: ', e) | ||
| 'attack success rate: ', e) | ||
| continue | ||
| if is_successful: | ||
| logger.info('* Prompt attack successful!') | ||
|
|
@@ -338,12 +339,16 @@ def start_prompt_map(target_model: LLM, parameters: dict) -> AttackResult: | |
| # Write results to file | ||
| with open(output_file, 'w') as f: | ||
| json.dump(successful_attacks_json, f) | ||
| return AttackResult( | ||
| result = AttackResult( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also here the target model is not returned as part of the attack result
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to be modified once promptmap works again in the agent |
||
| 'promptmap', | ||
| security_failed > 0, | ||
| 'prompt-injection', | ||
| { | ||
| 'total_attacks': total_attack_count, | ||
| 'number_successful_attacks': len(successful_attacks), | ||
| 'successful_attacks': successful_attacks_json, | ||
| 'attack_description': DESCRIPTION | ||
| } | ||
| ) | ||
| save_to_db(result) | ||
| return result | ||
Uh oh!
There was an error while loading. Please reload this page.