Loading models from disk¶
In this notebook, we will load the models from disk instead of pulling from HuggingFace. This is helpful when you want to deploy LLM Guard on a server and share the models with other instances.
Pull models from HuggingFace¶
First, we will pull the models from HuggingFace and save them to disk. You can also pull them from other sources and save them to disk.
In [ ]:
Copied!
!git lfs install
!git clone git@hf.co:protectai/deberta-v3-base-prompt-injection-v2
!git clone git@hf.co:MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33
!git clone git@hf.co:tomaarsen/span-marker-bert-base-orgs
!git clone git@hf.co:unitary/unbiased-toxic-roberta
!git clone git@hf.co:philomath-1209/programming-language-identification
!git clone git@hf.co:madhurjindal/autonlp-Gibberish-Detector-492513457
!git clone git@hf.co:papluca/xlm-roberta-base-language-detection
!git clone git@hf.co:Isotonic/deberta-v3-base_finetuned_ai4privacy_v2
!git lfs install
!git clone git@hf.co:protectai/deberta-v3-base-prompt-injection-v2
!git clone git@hf.co:MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33
!git clone git@hf.co:tomaarsen/span-marker-bert-base-orgs
!git clone git@hf.co:unitary/unbiased-toxic-roberta
!git clone git@hf.co:philomath-1209/programming-language-identification
!git clone git@hf.co:madhurjindal/autonlp-Gibberish-Detector-492513457
!git clone git@hf.co:papluca/xlm-roberta-base-language-detection
!git clone git@hf.co:Isotonic/deberta-v3-base_finetuned_ai4privacy_v2
Note: If you use only ONNX
models, you can remove the other versions of the models to save disk space.
Use local models in LLM Guard¶
Now, we will use the local models in LLM Guard.
In [ ]:
Copied!
!pip install llm_guard@git+https://github.com/protectai/llm-guard.git
!pip install llm_guard@git+https://github.com/protectai/llm-guard.git
In [11]:
Copied!
from llm_guard import scan_prompt
from llm_guard.input_scanners import (
Anonymize,
BanCompetitors,
BanTopics,
Code,
Gibberish,
Language,
PromptInjection,
Toxicity,
)
from llm_guard.input_scanners.anonymize_helpers import DEBERTA_AI4PRIVACY_v2_CONF
from llm_guard.input_scanners.ban_competitors import MODEL_BASE as BAN_COMPETITORS_MODEL
from llm_guard.input_scanners.ban_topics import MODEL_DEBERTA_BASE_V2 as BAN_TOPICS_MODEL
from llm_guard.input_scanners.code import DEFAULT_MODEL as CODE_MODEL
from llm_guard.input_scanners.gibberish import DEFAULT_MODEL as GIBBERISH_MODEL
from llm_guard.input_scanners.language import DEFAULT_MODEL as LANGUAGE_MODEL
from llm_guard.input_scanners.prompt_injection import V2_MODEL as PROMPT_INJECTION_MODEL
from llm_guard.input_scanners.toxicity import DEFAULT_MODEL as TOXICITY_MODEL
from llm_guard.vault import Vault
PROMPT_INJECTION_MODEL.kwargs["local_files_only"] = True
PROMPT_INJECTION_MODEL.path = "./deberta-v3-base-prompt-injection-v2"
DEBERTA_AI4PRIVACY_v2_CONF["DEFAULT_MODEL"].path = "./deberta-v3-base_finetuned_ai4privacy_v2"
DEBERTA_AI4PRIVACY_v2_CONF["DEFAULT_MODEL"].kwargs["local_files_only"] = True
BAN_TOPICS_MODEL.path = "./deberta-v3-base-zeroshot-v1.1-all-33"
BAN_TOPICS_MODEL.kwargs["local_files_only"] = True
TOXICITY_MODEL.path = "./unbiased-toxic-roberta"
TOXICITY_MODEL.kwargs["local_files_only"] = True
BAN_COMPETITORS_MODEL.path = "./span-marker-bert-base-orgs"
BAN_COMPETITORS_MODEL.kwargs["local_files_only"] = True
CODE_MODEL.path = "./programming-language-identification"
CODE_MODEL.kwargs["local_files_only"] = True
GIBBERISH_MODEL.path = "./autonlp-Gibberish-Detector-492513457"
GIBBERISH_MODEL.kwargs["local_files_only"] = True
LANGUAGE_MODEL.path = "./xlm-roberta-base-language-detection"
LANGUAGE_MODEL.kwargs["local_files_only"] = True
vault = Vault()
input_scanners = [
Anonymize(vault, recognizer_conf=DEBERTA_AI4PRIVACY_v2_CONF),
BanTopics(["politics", "religion"], model=BAN_TOPICS_MODEL),
BanCompetitors(["google", "facebook"], model=BAN_COMPETITORS_MODEL),
Toxicity(model=TOXICITY_MODEL),
Code(["Python", "PHP"], model=CODE_MODEL),
Gibberish(model=GIBBERISH_MODEL),
Language(["en"], model=LANGUAGE_MODEL),
PromptInjection(model=PROMPT_INJECTION_MODEL),
]
sanitized_prompt, results_valid, results_score = scan_prompt(
input_scanners,
"I am happy",
)
print(sanitized_prompt)
print(results_valid)
print(results_score)
from llm_guard import scan_prompt
from llm_guard.input_scanners import (
Anonymize,
BanCompetitors,
BanTopics,
Code,
Gibberish,
Language,
PromptInjection,
Toxicity,
)
from llm_guard.input_scanners.anonymize_helpers import DEBERTA_AI4PRIVACY_v2_CONF
from llm_guard.input_scanners.ban_competitors import MODEL_BASE as BAN_COMPETITORS_MODEL
from llm_guard.input_scanners.ban_topics import MODEL_DEBERTA_BASE_V2 as BAN_TOPICS_MODEL
from llm_guard.input_scanners.code import DEFAULT_MODEL as CODE_MODEL
from llm_guard.input_scanners.gibberish import DEFAULT_MODEL as GIBBERISH_MODEL
from llm_guard.input_scanners.language import DEFAULT_MODEL as LANGUAGE_MODEL
from llm_guard.input_scanners.prompt_injection import V2_MODEL as PROMPT_INJECTION_MODEL
from llm_guard.input_scanners.toxicity import DEFAULT_MODEL as TOXICITY_MODEL
from llm_guard.vault import Vault
PROMPT_INJECTION_MODEL.kwargs["local_files_only"] = True
PROMPT_INJECTION_MODEL.path = "./deberta-v3-base-prompt-injection-v2"
DEBERTA_AI4PRIVACY_v2_CONF["DEFAULT_MODEL"].path = "./deberta-v3-base_finetuned_ai4privacy_v2"
DEBERTA_AI4PRIVACY_v2_CONF["DEFAULT_MODEL"].kwargs["local_files_only"] = True
BAN_TOPICS_MODEL.path = "./deberta-v3-base-zeroshot-v1.1-all-33"
BAN_TOPICS_MODEL.kwargs["local_files_only"] = True
TOXICITY_MODEL.path = "./unbiased-toxic-roberta"
TOXICITY_MODEL.kwargs["local_files_only"] = True
BAN_COMPETITORS_MODEL.path = "./span-marker-bert-base-orgs"
BAN_COMPETITORS_MODEL.kwargs["local_files_only"] = True
CODE_MODEL.path = "./programming-language-identification"
CODE_MODEL.kwargs["local_files_only"] = True
GIBBERISH_MODEL.path = "./autonlp-Gibberish-Detector-492513457"
GIBBERISH_MODEL.kwargs["local_files_only"] = True
LANGUAGE_MODEL.path = "./xlm-roberta-base-language-detection"
LANGUAGE_MODEL.kwargs["local_files_only"] = True
vault = Vault()
input_scanners = [
Anonymize(vault, recognizer_conf=DEBERTA_AI4PRIVACY_v2_CONF),
BanTopics(["politics", "religion"], model=BAN_TOPICS_MODEL),
BanCompetitors(["google", "facebook"], model=BAN_COMPETITORS_MODEL),
Toxicity(model=TOXICITY_MODEL),
Code(["Python", "PHP"], model=CODE_MODEL),
Gibberish(model=GIBBERISH_MODEL),
Language(["en"], model=LANGUAGE_MODEL),
PromptInjection(model=PROMPT_INJECTION_MODEL),
]
sanitized_prompt, results_valid, results_score = scan_prompt(
input_scanners,
"I am happy",
)
print(sanitized_prompt)
print(results_valid)
print(results_score)
2024-03-21 12:39:44 [debug ] No entity types provided, using default default_entities=['CREDIT_CARD', 'CRYPTO', 'EMAIL_ADDRESS', 'IBAN_CODE', 'IP_ADDRESS', 'PERSON', 'PHONE_NUMBER', 'US_SSN', 'US_BANK_NUMBER', 'CREDIT_CARD_RE', 'UUID', 'EMAIL_ADDRESS_RE', 'US_SSN_RE'] 2024-03-21 12:39:46 [debug ] Initialized NER model device=device(type='mps') model=Model(path='./deberta-v3-base_finetuned_ai4privacy_v2', subfolder='', onnx_path='Isotonic/deberta-v3-base_finetuned_ai4privacy_v2', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={'local_files_only': True}, pipeline_kwargs={'aggregation_strategy': 'simple', 'ignore_labels': ['O', 'CARDINAL']}) 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=CREDIT_CARD_RE 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=UUID 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=EMAIL_ADDRESS_RE 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=US_SSN_RE 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=BTC_ADDRESS 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=URL_RE 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=CREDIT_CARD 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=EMAIL_ADDRESS_RE 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=PHONE_NUMBER_ZH 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=PHONE_NUMBER_WITH_EXT 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=DATE_RE 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=TIME_RE 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=HEX_COLOR 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=PRICE_RE 2024-03-21 12:39:47 [debug ] Loaded regex pattern group_name=PO_BOX_RE 2024-03-21 12:39:48 [debug ] Initialized classification model device=device(type='mps') model=Model(path='./deberta-v3-base-zeroshot-v1.1-all-33', subfolder='', onnx_path='MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 1000000000000000019884624838656}, pipeline_kwargs={'max_length': 512, 'truncation': True}) 2024-03-21 12:39:55 [debug ] Initialized classification model device=device(type='mps') model=Model(path='./unbiased-toxic-roberta', subfolder='', onnx_path='ProtectAI/unbiased-toxic-roberta-onnx', onnx_subfolder='', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 512}, pipeline_kwargs={'padding': 'max_length', 'top_k': None, 'function_to_apply': 'sigmoid', 'truncation': True}) 2024-03-21 12:39:56 [debug ] Initialized classification model device=device(type='mps') model=Model(path='./programming-language-identification', subfolder='', onnx_path='philomath-1209/programming-language-identification-onnx', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 512}, pipeline_kwargs={'truncation': True}) 2024-03-21 12:39:57 [debug ] Initialized classification model device=device(type='mps') model=Model(path='./autonlp-Gibberish-Detector-492513457', subfolder='', onnx_path='madhurjindal/autonlp-Gibberish-Detector-492513457', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 512}, pipeline_kwargs={'truncation': True}) 2024-03-21 12:40:01 [debug ] Initialized classification model device=device(type='mps') model=Model(path='./xlm-roberta-base-language-detection', subfolder='', onnx_path='ProtectAI/xlm-roberta-base-language-detection-onnx', onnx_subfolder='', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 512}, pipeline_kwargs={'max_length': 512, 'truncation': True, 'top_k': None})
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
2024-03-21 12:40:04 [debug ] Prompt does not have sensitive data to replace risk_score=0.0 2024-03-21 12:40:04 [debug ] Scanner completed elapsed_time_seconds=1.366613 is_valid=True scanner=Anonymize
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
2024-03-21 12:40:05 [debug ] No banned topics detected scores={'religion': 0.5899404287338257, 'politics': 0.4100596308708191} 2024-03-21 12:40:05 [debug ] Scanner completed elapsed_time_seconds=0.911 is_valid=True scanner=BanTopics 2024-03-21 12:40:05 [debug ] None of the competitors were detected 2024-03-21 12:40:05 [debug ] Scanner completed elapsed_time_seconds=0.569812 is_valid=True scanner=BanCompetitors 2024-03-21 12:40:06 [debug ] Not toxicity found in the text results=[[{'label': 'toxicity', 'score': 0.0003712967736646533}, {'label': 'male', 'score': 0.00016587311984039843}, {'label': 'female', 'score': 0.00012892877566628158}, {'label': 'insult', 'score': 0.00011079442629124969}, {'label': 'christian', 'score': 0.0001087861746782437}, {'label': 'psychiatric_or_mental_illness', 'score': 9.981756011256948e-05}, {'label': 'muslim', 'score': 7.031546556390822e-05}, {'label': 'white', 'score': 4.716941839433275e-05}, {'label': 'jewish', 'score': 3.9232210838235915e-05}, {'label': 'identity_attack', 'score': 2.9348657335503958e-05}, {'label': 'homosexual_gay_or_lesbian', 'score': 2.922919338743668e-05}, {'label': 'threat', 'score': 2.9109109163982794e-05}, {'label': 'black', 'score': 2.897163540183101e-05}, {'label': 'obscene', 'score': 2.86914873868227e-05}, {'label': 'sexual_explicit', 'score': 1.7762333300197497e-05}, {'label': 'severe_toxicity', 'score': 1.1558224741747836e-06}]] 2024-03-21 12:40:06 [debug ] Scanner completed elapsed_time_seconds=0.392971 is_valid=True scanner=Toxicity 2024-03-21 12:40:06 [debug ] No Markdown code blocks found in the output 2024-03-21 12:40:06 [debug ] Scanner completed elapsed_time_seconds=0.000252 is_valid=True scanner=Code 2024-03-21 12:40:06 [debug ] Gibberish detection finished results=[{'label': 'clean', 'score': 0.4235343933105469}] 2024-03-21 12:40:06 [debug ] No gibberish in the text highest_score=0.58 threshold=0.7 2024-03-21 12:40:06 [debug ] Scanner completed elapsed_time_seconds=0.104569 is_valid=True scanner=Gibberish 2024-03-21 12:40:06 [debug ] Only valid languages are found in the text. 2024-03-21 12:40:06 [debug ] Scanner completed elapsed_time_seconds=0.177882 is_valid=True scanner=Language 2024-03-21 12:40:06 [info ] Scanned prompt elapsed_time_seconds=3.525234 scores={'Anonymize': 0.0, 'BanTopics': 0.0, 'BanCompetitors': 0.0, 'Toxicity': 0.0, 'Code': 0.0, 'Gibberish': 0.0, 'Language': 0.0} I am happy {'Anonymize': True, 'BanTopics': True, 'BanCompetitors': True, 'Toxicity': True, 'Code': True, 'Gibberish': True, 'Language': True} {'Anonymize': 0.0, 'BanTopics': 0.0, 'BanCompetitors': 0.0, 'Toxicity': 0.0, 'Code': 0.0, 'Gibberish': 0.0, 'Language': 0.0}