dungeon29 commited on
Commit
869484c
·
verified ·
1 Parent(s): 273e5e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -96,10 +96,13 @@ def fetch_html_content(url, timeout=10):
96
  """Fetch HTML content from URL (Raw HTML for Model)"""
97
  try:
98
  headers = {
99
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
 
 
 
100
  }
101
  # verify=False is intentional for phishing detection, warning suppressed globally
102
- response = requests.get(url, headers=headers, timeout=timeout, verify=False)
103
  response.raise_for_status()
104
 
105
  # Return FULL RAW HTML content instead of stripped text
 
96
  """Fetch HTML content from URL (Raw HTML for Model)"""
97
  try:
98
  headers = {
99
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
100
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
101
+ 'Accept-Language': 'en-US,en;q=0.9,vi;q=0.8',
102
+ 'Referer': 'https://www.google.com/'
103
  }
104
  # verify=False is intentional for phishing detection, warning suppressed globally
105
+ response = requests.get(url, headers=headers, timeout=15, verify=False)
106
  response.raise_for_status()
107
 
108
  # Return FULL RAW HTML content instead of stripped text