Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -96,10 +96,13 @@ def fetch_html_content(url, timeout=10):
|
|
| 96 |
"""Fetch HTML content from URL (Raw HTML for Model)"""
|
| 97 |
try:
|
| 98 |
headers = {
|
| 99 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
|
|
|
|
|
|
|
|
| 100 |
}
|
| 101 |
# verify=False is intentional for phishing detection, warning suppressed globally
|
| 102 |
-
response = requests.get(url, headers=headers, timeout=
|
| 103 |
response.raise_for_status()
|
| 104 |
|
| 105 |
# Return FULL RAW HTML content instead of stripped text
|
|
|
|
| 96 |
"""Fetch HTML content from URL (Raw HTML for Model)"""
|
| 97 |
try:
|
| 98 |
headers = {
|
| 99 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| 100 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
| 101 |
+
'Accept-Language': 'en-US,en;q=0.9,vi;q=0.8',
|
| 102 |
+
'Referer': 'https://www.google.com/'
|
| 103 |
}
|
| 104 |
# verify=False is intentional for phishing detection, warning suppressed globally
|
| 105 |
+
response = requests.get(url, headers=headers, timeout=15, verify=False)
|
| 106 |
response.raise_for_status()
|
| 107 |
|
| 108 |
# Return FULL RAW HTML content instead of stripped text
|