This content originally appeared on DEV Community and was authored by Guillermo Alcántara
Get the data
import pandas as pd
descripciones = [
'All users must reset passwords every 90 days.',
'Passwords need to be reset by all users every 90 days.',
'Admin access should be restricted.',
'Passwords must change for users every 90 days.',
'Passwords must change for users every 80 days.'
]
# Cargar el dataset
data = pd.DataFrame({
'Rule_ID': range(1, len(descripciones) + 1),
'Description': descripciones
})
Lexical similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
!
# Vectorización de las descripciones con TF-IDF
vectorizer = TfidfVectorizer().fit_transform(data['Description'])
# Calcular la matriz de similitud de coseno
cosine_sim_matrix = cosine_similarity(vectorizer)
# Crear un diccionario para almacenar las relaciones sin duplicados
def find_related_rules(matrix, rule_ids, threshold=0.8):
related_rules = {}
seen_pairs = set() # Para evitar duplicados de la forma (A, B) = (B, A)
for i in range(len(matrix)):
related = []
for j in range(i + 1, len(matrix)): # j comienza en i + 1 para evitar duplicados
if matrix[i, j] >= threshold:
pair = (rule_ids[i], rule_ids[j])
if pair not in seen_pairs:
seen_pairs.add(pair)
related.append((rule_ids[j], round(matrix[i, j], 2)))
if related:
related_rules[rule_ids[i]] = related
return related_rules
# Aplicar la función para encontrar reglas relacionadas
related_rules = find_related_rules(cosine_sim_matrix, data['Rule_ID'].tolist(), threshold=0.8)
# Mostrar las reglas relacionadas
print("Reglas relacionadas por similitud:")
for rule, relations in related_rules.items():
print(f"Rule {rule} es similar a:")
for related_rule, score in relations:
print(f" - Rule {related_rule} con similitud de {score}")
Semantical similarity
!pip install sentence-transformers
from sentence_transformers import SentenceTransformer, util
# Load the pre-trained model for generating embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')
# Generate sentence embeddings for each rule description
embeddings = model.encode(data['Description'], convert_to_tensor=True)
# Compute the semantic similarity matrix
cosine_sim_matrix = util.cos_sim(embeddings, embeddings).cpu().numpy()
# Function to find related rules based on semantic similarity
def find_related_rules(matrix, rule_ids, threshold=0.8):
related_rules = {}
seen_pairs = set() # To avoid duplicates of the form (A, B) = (B, A)
for i in range(len(matrix)):
related = []
for j in range(i + 1, len(matrix)): # Only consider upper triangular matrix
if matrix[i, j] >= threshold:
pair = (rule_ids[i], rule_ids[j])
if pair not in seen_pairs:
seen_pairs.add(pair)
related.append((rule_ids[j], round(matrix[i, j], 2)))
if related:
related_rules[rule_ids[i]] = related
return related_rules
# Apply the function to find related rules
related_rules = find_related_rules(cosine_sim_matrix, data['Rule_ID'].tolist(), threshold=0.8)
# Display the related rules
print("Reglas relacionadas por similitud semántica:")
for rule, relations in related_rules.items():
print(f"Rule {rule} es similar a:")
for related_rule, score in relations:
print(f" - Rule {related_rule} con similitud de {score}")
This content originally appeared on DEV Community and was authored by Guillermo Alcántara
Print
Share
Comment
Cite
Upload
Translate
Updates
There are no updates yet.
Click the Upload button above to add an update.
APA
MLA
Guillermo Alcántara | Sciencx (2024-10-16T18:51:25+00:00) Implementing similarity search algotithms. Retrieved from https://www.scien.cx/2024/10/16/implementing-similarity-search-algotithms/
" » Implementing similarity search algotithms." Guillermo Alcántara | Sciencx - Wednesday October 16, 2024, https://www.scien.cx/2024/10/16/implementing-similarity-search-algotithms/
HARVARDGuillermo Alcántara | Sciencx Wednesday October 16, 2024 » Implementing similarity search algotithms., viewed ,<https://www.scien.cx/2024/10/16/implementing-similarity-search-algotithms/>
VANCOUVERGuillermo Alcántara | Sciencx - » Implementing similarity search algotithms. [Internet]. [Accessed ]. Available from: https://www.scien.cx/2024/10/16/implementing-similarity-search-algotithms/
CHICAGO" » Implementing similarity search algotithms." Guillermo Alcántara | Sciencx - Accessed . https://www.scien.cx/2024/10/16/implementing-similarity-search-algotithms/
IEEE" » Implementing similarity search algotithms." Guillermo Alcántara | Sciencx [Online]. Available: https://www.scien.cx/2024/10/16/implementing-similarity-search-algotithms/. [Accessed: ]
rf:citation » Implementing similarity search algotithms | Guillermo Alcántara | Sciencx | https://www.scien.cx/2024/10/16/implementing-similarity-search-algotithms/ |
Please log in to upload a file.
There are no updates yet.
Click the Upload button above to add an update.