File size: 3,946 Bytes
f3cb94f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
"""
Test script for Supabase and Embedding services
"""
import sys
import os
from pathlib import Path
# Add parent directory to path
sys.path.append(str(Path(__file__).parent.parent))
# Set .env path explicitly
os.environ.setdefault('ENV_FILE', str(Path(__file__).parent.parent / '.env'))
from dotenv import load_dotenv
load_dotenv(Path(__file__).parent.parent / '.env')
from config import get_settings
from services.embedding_service import VietnameseEmbeddingService
from services.vector_db_service import SupabaseVectorDB
def test_embedding_service():
"""Test embedding service"""
print("\n=== Testing Vietnamese Embedding Service ===")
embed_service = VietnameseEmbeddingService()
# Test single embedding
test_text = "Sâm cau chữa tê thấp, đau khớp"
print(f"\nTest text: '{test_text}'")
embedding = embed_service.embed_text(test_text)
print(f"Embedding dimension: {len(embedding)}")
print(f"First 5 values: {embedding[:5]}")
# Test batch embedding
texts = [
"Công dụng y học",
"Cách dùng",
"Lưu ý khi sử dụng"
]
print(f"\nBatch embedding {len(texts)} texts...")
batch_embeddings = embed_service.embed_batch(texts)
print(f"Generated {len(batch_embeddings)} embeddings")
# Test similarity
emb1 = embed_service.embed_text("chữa ho")
emb2 = embed_service.embed_text("trị ho")
emb3 = embed_service.embed_text("bổ thận")
sim_12 = embed_service.similarity(emb1, emb2)
sim_13 = embed_service.similarity(emb1, emb3)
print(f"\nSimilarity 'chữa ho' vs 'trị ho': {sim_12:.4f}")
print(f"Similarity 'chữa ho' vs 'bổ thận': {sim_13:.4f}")
print("\n✅ Embedding service test passed!\n")
return embed_service
def test_vector_db():
"""Test Supabase vector DB connection"""
print("\n=== Testing Supabase Vector DB ===")
settings = get_settings()
vector_db = SupabaseVectorDB(
url=settings.supabase_url,
key=settings.supabase_anon_key
)
# Test count
count = vector_db.count_nodes()
print(f"Current node count: {count}")
# Test insert
print("\nInserting test hypernode...")
embed_service = VietnameseEmbeddingService()
test_node = {
"key": "Tên",
"value": "Sâm cau TEST",
"key_embedding": embed_service.embed_text("Tên"),
"value_embedding": embed_service.embed_text("Sâm cau TEST"),
"plant_name": "TEST_PLANT",
"section": "Basic Info",
"chunk_id": 0,
"is_chunked": False
}
inserted = vector_db.insert_hypernode(test_node)
print(f"Inserted node ID: {inserted['id']}")
# Test search
print("\nTesting vector search...")
query_embedding = embed_service.embed_text("Sâm cau")
results = vector_db.search_by_value(
query_embedding=query_embedding,
top_k=5,
threshold=0.3
)
print(f"Found {len(results)} results:")
for i, result in enumerate(results[:3], 1):
print(f"{i}. {result['plant_name']} - {result['key']}: {result['value'][:50]}... (sim: {result['similarity']:.4f})")
# Cleanup
print("\nCleaning up test node...")
vector_db.client.table('hypernodes').delete().eq('plant_name', 'TEST_PLANT').execute()
print("\n✅ Vector DB test passed!\n")
return vector_db
if __name__ == "__main__":
print("=" * 60)
print("Testing Plant Medicine RAG Backend Services")
print("=" * 60)
try:
# Test embedding
embed_service = test_embedding_service()
# Test vector DB
vector_db = test_vector_db()
print("=" * 60)
print("✅ ALL TESTS PASSED!")
print("=" * 60)
except Exception as e:
print(f"\n❌ Test failed: {str(e)}")
import traceback
traceback.print_exc()
|