Loading models from disk¶

In this notebook, we will load the models from disk instead of pulling from HuggingFace. This is helpful when you want to deploy LLM Guard on a server and share the models with other instances.

Pull models from HuggingFace¶

First, we will pull the models from HuggingFace and save them to disk. You can also pull them from other sources and save them to disk.

In [ ]:

Copied!





!git lfs install
!git clone git@hf.co:protectai/deberta-v3-base-prompt-injection-v2
!git clone git@hf.co:MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33
!git clone git@hf.co:tomaarsen/span-marker-bert-base-orgs
!git clone git@hf.co:unitary/unbiased-toxic-roberta
!git clone git@hf.co:philomath-1209/programming-language-identification
!git clone git@hf.co:madhurjindal/autonlp-Gibberish-Detector-492513457
!git clone git@hf.co:papluca/xlm-roberta-base-language-detection
!git clone git@hf.co:Isotonic/deberta-v3-base_finetuned_ai4privacy_v2
!git lfs install
!git clone git@hf.co:protectai/deberta-v3-base-prompt-injection-v2
!git clone git@hf.co:MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33
!git clone git@hf.co:tomaarsen/span-marker-bert-base-orgs
!git clone git@hf.co:unitary/unbiased-toxic-roberta
!git clone git@hf.co:philomath-1209/programming-language-identification
!git clone git@hf.co:madhurjindal/autonlp-Gibberish-Detector-492513457
!git clone git@hf.co:papluca/xlm-roberta-base-language-detection
!git clone git@hf.co:Isotonic/deberta-v3-base_finetuned_ai4privacy_v2

Note: If you use only ONNX models, you can remove the other versions of the models to save disk space.

Use local models in LLM Guard¶

Now, we will use the local models in LLM Guard.

In [ ]:

Copied!

!pip install llm_guard@git+https://github.com/protectai/llm-guard.git
!pip install llm_guard@git+https://github.com/protectai/llm-guard.git

In [11]:

Copied!





from llm_guard import scan_prompt
from llm_guard.input_scanners import (
    Anonymize,
    BanCompetitors,
    BanTopics,
    Code,
    Gibberish,
    Language,
    PromptInjection,
    Toxicity,
)
from llm_guard.input_scanners.anonymize_helpers import DEBERTA_AI4PRIVACY_v2_CONF
from llm_guard.input_scanners.ban_competitors import MODEL_BASE as BAN_COMPETITORS_MODEL
from llm_guard.input_scanners.ban_topics import MODEL_DEBERTA_BASE_V2 as BAN_TOPICS_MODEL
from llm_guard.input_scanners.code import DEFAULT_MODEL as CODE_MODEL
from llm_guard.input_scanners.gibberish import DEFAULT_MODEL as GIBBERISH_MODEL
from llm_guard.input_scanners.language import DEFAULT_MODEL as LANGUAGE_MODEL
from llm_guard.input_scanners.prompt_injection import V2_MODEL as PROMPT_INJECTION_MODEL
from llm_guard.input_scanners.toxicity import DEFAULT_MODEL as TOXICITY_MODEL
from llm_guard.vault import Vault

PROMPT_INJECTION_MODEL.kwargs["local_files_only"] = True
PROMPT_INJECTION_MODEL.path = "./deberta-v3-base-prompt-injection-v2"

DEBERTA_AI4PRIVACY_v2_CONF["DEFAULT_MODEL"].path = "./deberta-v3-base_finetuned_ai4privacy_v2"
DEBERTA_AI4PRIVACY_v2_CONF["DEFAULT_MODEL"].kwargs["local_files_only"] = True

BAN_TOPICS_MODEL.path = "./deberta-v3-base-zeroshot-v1.1-all-33"
BAN_TOPICS_MODEL.kwargs["local_files_only"] = True

TOXICITY_MODEL.path = "./unbiased-toxic-roberta"
TOXICITY_MODEL.kwargs["local_files_only"] = True

BAN_COMPETITORS_MODEL.path = "./span-marker-bert-base-orgs"
BAN_COMPETITORS_MODEL.kwargs["local_files_only"] = True

CODE_MODEL.path = "./programming-language-identification"
CODE_MODEL.kwargs["local_files_only"] = True

GIBBERISH_MODEL.path = "./autonlp-Gibberish-Detector-492513457"
GIBBERISH_MODEL.kwargs["local_files_only"] = True

LANGUAGE_MODEL.path = "./xlm-roberta-base-language-detection"
LANGUAGE_MODEL.kwargs["local_files_only"] = True

vault = Vault()
input_scanners = [
    Anonymize(vault, recognizer_conf=DEBERTA_AI4PRIVACY_v2_CONF),
    BanTopics(["politics", "religion"], model=BAN_TOPICS_MODEL),
    BanCompetitors(["google", "facebook"], model=BAN_COMPETITORS_MODEL),
    Toxicity(model=TOXICITY_MODEL),
    Code(["Python", "PHP"], model=CODE_MODEL),
    Gibberish(model=GIBBERISH_MODEL),
    Language(["en"], model=LANGUAGE_MODEL),
    PromptInjection(model=PROMPT_INJECTION_MODEL),
]

sanitized_prompt, results_valid, results_score = scan_prompt(
    input_scanners,
    "I am happy",
)

print(sanitized_prompt)
print(results_valid)
print(results_score)
from llm_guard import scan_prompt
from llm_guard.input_scanners import (
    Anonymize,
    BanCompetitors,
    BanTopics,
    Code,
    Gibberish,
    Language,
    PromptInjection,
    Toxicity,
)
from llm_guard.input_scanners.anonymize_helpers import DEBERTA_AI4PRIVACY_v2_CONF
from llm_guard.input_scanners.ban_competitors import MODEL_BASE as BAN_COMPETITORS_MODEL
from llm_guard.input_scanners.ban_topics import MODEL_DEBERTA_BASE_V2 as BAN_TOPICS_MODEL
from llm_guard.input_scanners.code import DEFAULT_MODEL as CODE_MODEL
from llm_guard.input_scanners.gibberish import DEFAULT_MODEL as GIBBERISH_MODEL
from llm_guard.input_scanners.language import DEFAULT_MODEL as LANGUAGE_MODEL
from llm_guard.input_scanners.prompt_injection import V2_MODEL as PROMPT_INJECTION_MODEL
from llm_guard.input_scanners.toxicity import DEFAULT_MODEL as TOXICITY_MODEL
from llm_guard.vault import Vault

PROMPT_INJECTION_MODEL.kwargs["local_files_only"] = True
PROMPT_INJECTION_MODEL.path = "./deberta-v3-base-prompt-injection-v2"

DEBERTA_AI4PRIVACY_v2_CONF["DEFAULT_MODEL"].path = "./deberta-v3-base_finetuned_ai4privacy_v2"
DEBERTA_AI4PRIVACY_v2_CONF["DEFAULT_MODEL"].kwargs["local_files_only"] = True

BAN_TOPICS_MODEL.path = "./deberta-v3-base-zeroshot-v1.1-all-33"
BAN_TOPICS_MODEL.kwargs["local_files_only"] = True

TOXICITY_MODEL.path = "./unbiased-toxic-roberta"
TOXICITY_MODEL.kwargs["local_files_only"] = True

BAN_COMPETITORS_MODEL.path = "./span-marker-bert-base-orgs"
BAN_COMPETITORS_MODEL.kwargs["local_files_only"] = True

CODE_MODEL.path = "./programming-language-identification"
CODE_MODEL.kwargs["local_files_only"] = True

GIBBERISH_MODEL.path = "./autonlp-Gibberish-Detector-492513457"
GIBBERISH_MODEL.kwargs["local_files_only"] = True

LANGUAGE_MODEL.path = "./xlm-roberta-base-language-detection"
LANGUAGE_MODEL.kwargs["local_files_only"] = True

vault = Vault()
input_scanners = [
    Anonymize(vault, recognizer_conf=DEBERTA_AI4PRIVACY_v2_CONF),
    BanTopics(["politics", "religion"], model=BAN_TOPICS_MODEL),
    BanCompetitors(["google", "facebook"], model=BAN_COMPETITORS_MODEL),
    Toxicity(model=TOXICITY_MODEL),
    Code(["Python", "PHP"], model=CODE_MODEL),
    Gibberish(model=GIBBERISH_MODEL),
    Language(["en"], model=LANGUAGE_MODEL),
    PromptInjection(model=PROMPT_INJECTION_MODEL),
]

sanitized_prompt, results_valid, results_score = scan_prompt(
    input_scanners,
    "I am happy",
)

print(sanitized_prompt)
print(results_valid)
print(results_score)

2024-03-21 12:39:44 [debug    ] No entity types provided, using default default_entities=['CREDIT_CARD', 'CRYPTO', 'EMAIL_ADDRESS', 'IBAN_CODE', 'IP_ADDRESS', 'PERSON', 'PHONE_NUMBER', 'US_SSN', 'US_BANK_NUMBER', 'CREDIT_CARD_RE', 'UUID', 'EMAIL_ADDRESS_RE', 'US_SSN_RE']
2024-03-21 12:39:46 [debug    ] Initialized NER model          device=device(type='mps') model=Model(path='./deberta-v3-base_finetuned_ai4privacy_v2', subfolder='', onnx_path='Isotonic/deberta-v3-base_finetuned_ai4privacy_v2', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={'local_files_only': True}, pipeline_kwargs={'aggregation_strategy': 'simple', 'ignore_labels': ['O', 'CARDINAL']})
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=CREDIT_CARD_RE
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=UUID
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=EMAIL_ADDRESS_RE
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=US_SSN_RE
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=BTC_ADDRESS
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=URL_RE
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=CREDIT_CARD
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=EMAIL_ADDRESS_RE
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=PHONE_NUMBER_ZH
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=PHONE_NUMBER_WITH_EXT
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=DATE_RE
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=TIME_RE
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=HEX_COLOR
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=PRICE_RE
2024-03-21 12:39:47 [debug    ] Loaded regex pattern           group_name=PO_BOX_RE
2024-03-21 12:39:48 [debug    ] Initialized classification model device=device(type='mps') model=Model(path='./deberta-v3-base-zeroshot-v1.1-all-33', subfolder='', onnx_path='MoritzLaurer/deberta-v3-base-zeroshot-v1.1-all-33', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 1000000000000000019884624838656}, pipeline_kwargs={'max_length': 512, 'truncation': True})
2024-03-21 12:39:55 [debug    ] Initialized classification model device=device(type='mps') model=Model(path='./unbiased-toxic-roberta', subfolder='', onnx_path='ProtectAI/unbiased-toxic-roberta-onnx', onnx_subfolder='', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 512}, pipeline_kwargs={'padding': 'max_length', 'top_k': None, 'function_to_apply': 'sigmoid', 'truncation': True})
2024-03-21 12:39:56 [debug    ] Initialized classification model device=device(type='mps') model=Model(path='./programming-language-identification', subfolder='', onnx_path='philomath-1209/programming-language-identification-onnx', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 512}, pipeline_kwargs={'truncation': True})
2024-03-21 12:39:57 [debug    ] Initialized classification model device=device(type='mps') model=Model(path='./autonlp-Gibberish-Detector-492513457', subfolder='', onnx_path='madhurjindal/autonlp-Gibberish-Detector-492513457', onnx_subfolder='onnx', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 512}, pipeline_kwargs={'truncation': True})
2024-03-21 12:40:01 [debug    ] Initialized classification model device=device(type='mps') model=Model(path='./xlm-roberta-base-language-detection', subfolder='', onnx_path='ProtectAI/xlm-roberta-base-language-detection-onnx', onnx_subfolder='', onnx_filename='model.onnx', kwargs={'local_files_only': True, 'max_length': 512}, pipeline_kwargs={'max_length': 512, 'truncation': True, 'top_k': None})

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.

2024-03-21 12:40:04 [debug    ] Prompt does not have sensitive data to replace risk_score=0.0
2024-03-21 12:40:04 [debug    ] Scanner completed              elapsed_time_seconds=1.366613 is_valid=True scanner=Anonymize

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.

2024-03-21 12:40:05 [debug    ] No banned topics detected      scores={'religion': 0.5899404287338257, 'politics': 0.4100596308708191}
2024-03-21 12:40:05 [debug    ] Scanner completed              elapsed_time_seconds=0.911 is_valid=True scanner=BanTopics
2024-03-21 12:40:05 [debug    ] None of the competitors were detected
2024-03-21 12:40:05 [debug    ] Scanner completed              elapsed_time_seconds=0.569812 is_valid=True scanner=BanCompetitors
2024-03-21 12:40:06 [debug    ] Not toxicity found in the text results=[[{'label': 'toxicity', 'score': 0.0003712967736646533}, {'label': 'male', 'score': 0.00016587311984039843}, {'label': 'female', 'score': 0.00012892877566628158}, {'label': 'insult', 'score': 0.00011079442629124969}, {'label': 'christian', 'score': 0.0001087861746782437}, {'label': 'psychiatric_or_mental_illness', 'score': 9.981756011256948e-05}, {'label': 'muslim', 'score': 7.031546556390822e-05}, {'label': 'white', 'score': 4.716941839433275e-05}, {'label': 'jewish', 'score': 3.9232210838235915e-05}, {'label': 'identity_attack', 'score': 2.9348657335503958e-05}, {'label': 'homosexual_gay_or_lesbian', 'score': 2.922919338743668e-05}, {'label': 'threat', 'score': 2.9109109163982794e-05}, {'label': 'black', 'score': 2.897163540183101e-05}, {'label': 'obscene', 'score': 2.86914873868227e-05}, {'label': 'sexual_explicit', 'score': 1.7762333300197497e-05}, {'label': 'severe_toxicity', 'score': 1.1558224741747836e-06}]]
2024-03-21 12:40:06 [debug    ] Scanner completed              elapsed_time_seconds=0.392971 is_valid=True scanner=Toxicity
2024-03-21 12:40:06 [debug    ] No Markdown code blocks found in the output
2024-03-21 12:40:06 [debug    ] Scanner completed              elapsed_time_seconds=0.000252 is_valid=True scanner=Code
2024-03-21 12:40:06 [debug    ] Gibberish detection finished   results=[{'label': 'clean', 'score': 0.4235343933105469}]
2024-03-21 12:40:06 [debug    ] No gibberish in the text       highest_score=0.58 threshold=0.7
2024-03-21 12:40:06 [debug    ] Scanner completed              elapsed_time_seconds=0.104569 is_valid=True scanner=Gibberish
2024-03-21 12:40:06 [debug    ] Only valid languages are found in the text.
2024-03-21 12:40:06 [debug    ] Scanner completed              elapsed_time_seconds=0.177882 is_valid=True scanner=Language
2024-03-21 12:40:06 [info     ] Scanned prompt                 elapsed_time_seconds=3.525234 scores={'Anonymize': 0.0, 'BanTopics': 0.0, 'BanCompetitors': 0.0, 'Toxicity': 0.0, 'Code': 0.0, 'Gibberish': 0.0, 'Language': 0.0}
I am happy
{'Anonymize': True, 'BanTopics': True, 'BanCompetitors': True, 'Toxicity': True, 'Code': True, 'Gibberish': True, 'Language': True}
{'Anonymize': 0.0, 'BanTopics': 0.0, 'BanCompetitors': 0.0, 'Toxicity': 0.0, 'Code': 0.0, 'Gibberish': 0.0, 'Language': 0.0}