严格反代理检测解决方案
方案1:使用Nginx + sub_filter(推荐)
配置要点:
- Host头处理:
proxy_set_header Host b.com
- 内容替换:使用sub_filter替换所有域名引用
- JavaScript注入:注入脚本修复域名检测
适用场景:
- 静态网站
- 简单的动态网站
- 没有复杂JavaScript检测的网站
方案2:使用Node.js代理服务器
const express = require('express');
const { createProxyMiddleware } = require('http-proxy-middleware');
const cheerio = require('cheerio');
const app = express();
app.use('/', createProxyMiddleware({
target: 'https://b.com',
changeOrigin: true,
onProxyRes: function (proxyRes, req, res) {
if (proxyRes.headers['content-type'] &&
proxyRes.headers['content-type'].includes('text/html')) {
let body = '';
proxyRes.on('data', function(chunk) {
body += chunk;
});
proxyRes.on('end', function() {
const $ = cheerio.load(body);
// 替换所有域名引用
$('a[href*="b.com"]').each(function() {
$(this).attr('href', $(this).attr('href').replace(/b\.com/g, 'a.com'));
});
$('img[src*="b.com"]').each(function() {
$(this).attr('src', $(this).attr('src').replace(/b\.com/g, 'a.com'));
});
// 注入JavaScript修复脚本
$('head').append(`
<script>
// 修复域名检测
Object.defineProperty(window.location, 'hostname', {
get: function() { return 'b.com'; }
});
Object.defineProperty(window.location, 'host', {
get: function() { return 'b.com'; }
});
Object.defineProperty(window.location, 'origin', {
get: function() { return 'https://b.com'; }
});
Object.defineProperty(document, 'domain', {
get: function() { return 'b.com'; },
set: function() { return 'b.com'; }
});
</script>
`);
res.end($.html());
});
} else {
proxyRes.pipe(res);
}
}
}));
app.listen(3000);
方案3:使用Python代理服务器
from flask import Flask, request, Response
import requests
from bs4 import BeautifulSoup
import re
app = Flask(__name__)
@app.route('/', defaults={'path': ''})
@app.route('/<path:path>')
def proxy(path):
target_url = f'https://b.com/{path}'
# 转发请求到目标网站
headers = dict(request.headers)
headers['Host'] = 'b.com'
resp = requests.get(target_url, headers=headers, stream=True)
if 'text/html' in resp.headers.get('content-type', ''):
# 处理HTML内容
content = resp.content.decode('utf-8')
# 替换域名引用
content = re.sub(r'https?://b\.com', 'https://a.com', content)
content = re.sub(r'//b\.com', '//a.com', content)
# 注入JavaScript修复脚本
js_fix = '''
<script>
Object.defineProperty(window.location, 'hostname', {
get: function() { return 'b.com'; }
});
Object.defineProperty(window.location, 'host', {
get: function() { return 'b.com'; }
});
Object.defineProperty(window.location, 'origin', {
get: function() { return 'https://b.com'; }
});
Object.defineProperty(document, 'domain', {
get: function() { return 'b.com'; },
set: function() { return 'b.com'; }
});
</script>
'''
content = content.replace('</head>', js_fix + '</head>')
return Response(content, status=resp.status_code, headers=resp.headers)
else:
# 直接转发非HTML内容
return Response(resp.content, status=resp.status_code, headers=resp.headers)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
方案4:使用浏览器自动化(最后选择)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from flask import Flask, request, Response
import time
app = Flask(__name__)
@app.route('/')
def proxy():
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
driver = webdriver.Chrome(options=chrome_options)
try:
driver.get('https://b.com')
time.sleep(2) # 等待页面加载
# 执行JavaScript修复
driver.execute_script("""
Object.defineProperty(window.location, 'hostname', {
get: function() { return 'b.com'; }
});
Object.defineProperty(window.location, 'host', {
get: function() { return 'b.com'; }
});
Object.defineProperty(window.location, 'origin', {
get: function() { return 'https://b.com'; }
});
Object.defineProperty(document, 'domain', {
get: function() { return 'b.com'; },
set: function() { return 'b.com'; }
});
""")
page_source = driver.page_source
# 替换域名引用
page_source = page_source.replace('https://b.com', 'https://a.com')
page_source = page_source.replace('http://b.com', 'https://a.com')
return Response(page_source, mimetype='text/html')
finally:
driver.quit()
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000)
常见反代理检测及解决方案
1. Host头检测
proxy_set_header Host b.com;
2. Referer检测
proxy_set_header Referer https://b.com;
3. User-Agent检测
proxy_set_header User-Agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36";
4. JavaScript域名检测
// 注入修复脚本
Object.defineProperty(window.location, 'hostname', {
get: function() { return 'b.com'; }
});
5. CSP (Content Security Policy) 绕过
# 移除或修改CSP头
proxy_hide_header Content-Security-Policy;
add_header Content-Security-Policy "frame-ancestors 'self' https://a.com";
6. X-Frame-Options 绕过
# 移除X-Frame-Options头
proxy_hide_header X-Frame-Options;
部署建议
- 先测试简单网站:验证基本代理功能
- 逐步添加功能:从基础代理开始,逐步添加内容替换
- 监控日志:密切关注错误日志和访问日志
- 性能优化:添加缓存和压缩
- 安全考虑:确保代理服务器安全配置
注意事项
- 某些网站可能违反服务条款
- 需要确保合法使用
- 考虑性能和资源消耗
- 定期更新和维护代理配置