File size: 3,946 Bytes
f3cb94f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""
Test script for Supabase and Embedding services
"""
import sys
import os
from pathlib import Path

# Add parent directory to path
sys.path.append(str(Path(__file__).parent.parent))

# Set .env path explicitly
os.environ.setdefault('ENV_FILE', str(Path(__file__).parent.parent / '.env'))

from dotenv import load_dotenv
load_dotenv(Path(__file__).parent.parent / '.env')

from config import get_settings
from services.embedding_service import VietnameseEmbeddingService
from services.vector_db_service import SupabaseVectorDB


def test_embedding_service():
    """Test embedding service"""
    print("\n=== Testing Vietnamese Embedding Service ===")
    
    embed_service = VietnameseEmbeddingService()
    
    # Test single embedding
    test_text = "Sâm cau chữa tê thấp, đau khớp"
    print(f"\nTest text: '{test_text}'")
    
    embedding = embed_service.embed_text(test_text)
    print(f"Embedding dimension: {len(embedding)}")
    print(f"First 5 values: {embedding[:5]}")
    
    # Test batch embedding
    texts = [
        "Công dụng y học",
        "Cách dùng",
        "Lưu ý khi sử dụng"
    ]
    print(f"\nBatch embedding {len(texts)} texts...")
    batch_embeddings = embed_service.embed_batch(texts)
    print(f"Generated {len(batch_embeddings)} embeddings")
    
    # Test similarity
    emb1 = embed_service.embed_text("chữa ho")
    emb2 = embed_service.embed_text("trị ho")
    emb3 = embed_service.embed_text("bổ thận")
    
    sim_12 = embed_service.similarity(emb1, emb2)
    sim_13 = embed_service.similarity(emb1, emb3)
    
    print(f"\nSimilarity 'chữa ho' vs 'trị ho': {sim_12:.4f}")
    print(f"Similarity 'chữa ho' vs 'bổ thận': {sim_13:.4f}")
    
    print("\n✅ Embedding service test passed!\n")
    return embed_service


def test_vector_db():
    """Test Supabase vector DB connection"""
    print("\n=== Testing Supabase Vector DB ===")
    
    settings = get_settings()
    vector_db = SupabaseVectorDB(
        url=settings.supabase_url,
        key=settings.supabase_anon_key
    )
    
    # Test count
    count = vector_db.count_nodes()
    print(f"Current node count: {count}")
    
    # Test insert
    print("\nInserting test hypernode...")
    embed_service = VietnameseEmbeddingService()
    
    test_node = {
        "key": "Tên",
        "value": "Sâm cau TEST",
        "key_embedding": embed_service.embed_text("Tên"),
        "value_embedding": embed_service.embed_text("Sâm cau TEST"),
        "plant_name": "TEST_PLANT",
        "section": "Basic Info",
        "chunk_id": 0,
        "is_chunked": False
    }
    
    inserted = vector_db.insert_hypernode(test_node)
    print(f"Inserted node ID: {inserted['id']}")
    
    # Test search
    print("\nTesting vector search...")
    query_embedding = embed_service.embed_text("Sâm cau")
    
    results = vector_db.search_by_value(
        query_embedding=query_embedding,
        top_k=5,
        threshold=0.3
    )
    
    print(f"Found {len(results)} results:")
    for i, result in enumerate(results[:3], 1):
        print(f"{i}. {result['plant_name']} - {result['key']}: {result['value'][:50]}... (sim: {result['similarity']:.4f})")
    
    # Cleanup
    print("\nCleaning up test node...")
    vector_db.client.table('hypernodes').delete().eq('plant_name', 'TEST_PLANT').execute()
    
    print("\n✅ Vector DB test passed!\n")
    return vector_db


if __name__ == "__main__":
    print("=" * 60)
    print("Testing Plant Medicine RAG Backend Services")
    print("=" * 60)
    
    try:
        # Test embedding
        embed_service = test_embedding_service()
        
        # Test vector DB
        vector_db = test_vector_db()
        
        print("=" * 60)
        print("✅ ALL TESTS PASSED!")
        print("=" * 60)
        
    except Exception as e:
        print(f"\n❌ Test failed: {str(e)}")
        import traceback
        traceback.print_exc()