第05章:搜索查询优化:从SQL到结果排序
复杂SQL查询实现毫秒级搜索响应,排序算法决定用户体验
📝 TL;DR (核心要点速览)
– 查询核心:复杂JOIN + 聚合函数实现多维度权重计算
– 性能关键:参数化查询、索引覆盖、结果集控制
– 排序算法:多因子权重计算 + 长度惩罚 + 多样性奖励
– 安全防护:DoS防护、参数验证、查询限制
1. 搜索查询架构设计
1.1 从用户输入到SQL查询
用户输入:"advanced database techniques"
↓ [Tokenization]
Tokens:["advanced", "database", "datab", "techniques", "techniqu"]
↓ [权重计算]
Weighted Tokens:[
{token: "advanced", weight: 10, type: "word"},
{token: "datab", weight: 3, type: "prefix"},
{token: "techniques", weight: 8, type: "singular"}
]
↓ [SQL生成]
Optimized SQL Query
↓ [数据库执行]
Result Set with Scores
↓ [后处理]
Sorted Search Results
1.2 查询性能的三个层次
L1: 基础查询(0.1-1秒)
- 简单的token匹配
- 基础权重计算
- 有限的结果数量
L2: 优化查询(50-200ms)
- 复杂的权重计算
- 多因子排序算法
- 查询结果缓存
L3: 高性能查询(10-50ms)
- 查询预编译和参数化
- 索引覆盖优化
- 结果集预计算
2. 核心搜索查询实现
2.1 基础搜索查询
-- 核心搜索查询:支持多token、多字段、多权重
SELECT
d.id as document_id,
d.title,
d.description,
d.length,
d.created_at,
-- 基础匹配得分:所有匹配token的权重之和
SUM(t.weight * ie.field_weight) as base_score,
-- 匹配多样性:不同tokenizer类型的数量
COUNT(DISTINCT t.tokenizer_type) as tokenizer_diversity,
-- 字段覆盖度:匹配的字段数量
COUNT(DISTINCT ie.field_name) as field_coverage,
-- 平均权重:用于质量评估
AVG(t.weight * ie.field_weight) as avg_weight,
-- 位置奖励:token在文档中的位置
AVG(CASE
WHEN ie.position_in_field <= 10 THEN 1.5 -- 前部匹配高奖励
WHEN ie.position_in_field <= 50 THEN 1.2 -- 中部匹配中奖励
WHEN ie.position_in_field <= 100 THEN 1.0 -- 后部匹配无奖励
ELSE 0.8 -- 远距离匹配惩罚
END) as position_bonus,
-- 长度惩罚因子
CASE
WHEN d.length <= 100 THEN 1.3 -- 短文档奖励
WHEN d.length <= 500 THEN 1.1 -- 中等文档
WHEN d.length <= 2000 THEN 1.0 -- 标准文档
WHEN d.length <= 5000 THEN 0.8 -- 长文档轻度惩罚
ELSE 0.6 -- 超长文档重度惩罚
END as length_factor,
-- 新鲜度奖励
CASE
WHEN d.created_at >= DATE_SUB(NOW(), INTERVAL 7 DAY) THEN 1.2 -- 一周内
WHEN d.created_at >= DATE_SUB(NOW(), INTERVAL 30 DAY) THEN 1.1 -- 一月内
WHEN d.created_at >= DATE_SUB(NOW(), INTERVAL 90 DAY) THEN 1.0 -- 三月内
ELSE 0.9 -- 旧文档
END as freshness_factor
FROM documents d
INNER JOIN index_entries ie ON d.id = ie.document_id
INNER JOIN index_tokens t ON ie.token_id = t.id
WHERE d.status = 'active'
AND t.token IN (?, ?, ?, ?, ?) -- 参数化的搜索tokens
GROUP BY d.id, d.title, d.description, d.length, d.created_at
HAVING base_score > 0 -- 只返回有匹配的文档
-- 复杂排序:综合考虑多个因子
ORDER BY (
-- 主要得分因子
base_score *
tokenizer_diversity_factor *
field_coverage_factor *
avg_weight_factor *
position_bonus *
length_factor *
freshness_factor
) DESC,
-- 次要排序:按创建时间降序(同分时新文档优先)
d.created_at DESC,
-- 第三排序:按长度升序(同分时短文档优先)
d.length ASC
LIMIT ? OFFSET ?; -- 分页参数
2.2 参数化查询实现
class SearchQueryBuilder
{
private PDO $db;
private array $searchTokens = [];
private array $searchParams = [];
private string $sqlTemplate;
private int $maxResults = 100;
private int $defaultLimit = 20;
public function __construct(PDO $db)
{
$this->db = $db;
$this->initializeSqlTemplate();
}
/**
* 构建搜索查询
*/
public function buildQuery(string $userQuery, array $options = []): array
{
// 1. 处理用户输入,生成tokens
$this->processSearchInput($userQuery);
// 2. 构建参数化查询
$query = $this->buildParameterizedQuery($options);
// 3. 预处理查询语句
$stmt = $this->prepareQuery($query);
// 4. 绑定参数
$this->bindParameters($stmt, $options);
return ['query' => $query, 'statement' => $stmt, 'params' => $this->searchParams];
}
/**
* 处理搜索输入
*/
private function processSearchInput(string $userQuery): void
{
// 使用tokenizer处理输入
$tokenizer = new CompositeTokenizer([
'word' => true,
'prefix' => true,
'ngrams' => true,
'singular' => true
]);
$tokens = $tokenizer->tokenize($userQuery);
// 去重并按权重排序
$this->searchTokens = $this->deduplicateAndSort($tokens);
// 准备SQL参数
$this->searchParams = array_map(fn($t) => $t['token'], $this->searchTokens);
// 限制token数量,防止查询过大
if (count($this->searchParams) > 50) {
$this->searchParams = array_slice($this->searchParams, 0, 50);
}
}
/**
* 构建参数化查询SQL
*/
private function buildParameterizedQuery(array $options): string
{
$limit = min($options['limit'] ?? $this->defaultLimit, $this->maxResults);
$offset = ($options['page'] ?? 1 - 1) * $limit;
// 构建IN子句的占位符
$tokenPlaceholders = implode(',', array_fill(0, count($this->searchParams), '?'));
// 插入动态参数
$sql = str_replace(
['{TOKENS}', '{LIMIT}', '{OFFSET}'],
[$tokenPlaceholders, $limit, $offset],
$this->sqlTemplate
);
return $sql;
}
/**
* 预处理查询语句
*/
private function prepareQuery(string $query): PDOStatement
{
try {
return $this->db->prepare($query);
} catch (PDOException $e) {
throw new SearchException("Query preparation failed: " . $e->getMessage());
}
}
/**
* 绑定参数
*/
private function bindParameters(PDOStatement $stmt, array $options): void
{
// 绑定搜索tokens
$paramIndex = 1;
foreach ($this->searchParams as $token) {
$stmt->bindValue($paramIndex++, $token, PDO::PARAM_STR);
}
// 绑定分页参数
$limit = min($options['limit'] ?? $this->defaultLimit, $this->maxResults);
$offset = ($options['page'] ?? 1 - 1) * $limit;
$stmt->bindValue($paramIndex++, $limit, PDO::PARAM_INT);
$stmt->bindValue($paramIndex, $offset, PDO::PARAM_INT);
}
/**
* 初始化SQL模板
*/
private function initializeSqlTemplate(): void
{
$this->sqlTemplate = "
SELECT
d.id as document_id,
d.title,
d.description,
d.length,
d.created_at,
d.priority,
-- 得分计算
SUM(t.weight * ie.field_weight) as base_score,
COUNT(DISTINCT t.tokenizer_type) as tokenizer_diversity,
COUNT(DISTINCT ie.field_name) as field_coverage,
AVG(t.weight * ie.field_weight) as avg_weight,
-- 位置奖励
AVG(CASE
WHEN ie.position_in_field <= 10 THEN 1.5
WHEN ie.position_in_field <= 50 THEN 1.2
WHEN ie.position_in_field <= 100 THEN 1.0
ELSE 0.8
END) as position_bonus,
-- 长度因子
CASE
WHEN d.length <= 100 THEN 1.3
WHEN d.length <= 500 THEN 1.1
WHEN d.length <= 2000 THEN 1.0
WHEN d.length <= 5000 THEN 0.8
ELSE 0.6
END as length_factor,
-- 新鲜度因子
CASE
WHEN d.created_at >= DATE_SUB(NOW(), INTERVAL 7 DAY) THEN 1.2
WHEN d.created_at >= DATE_SUB(NOW(), INTERVAL 30 DAY) THEN 1.1
WHEN d.created_at >= DATE_SUB(NOW(), INTERVAL 90 DAY) THEN 1.0
ELSE 0.9
END as freshness_factor,
-- 计算综合得分
(
SUM(t.weight * ie.field_weight) *
(1 + 0.1 * LOG(1 + COUNT(DISTINCT t.tokenizer_type))) *
(1 + 0.05 * COUNT(DISTINCT ie.field_name)) *
(AVG(t.weight * ie.field_weight) / 10) *
AVG(CASE
WHEN ie.position_in_field <= 10 THEN 1.5
WHEN ie.position_in_field <= 50 THEN 1.2
WHEN ie.position_in_field <= 100 THEN 1.0
ELSE 0.8
END) *
CASE
WHEN d.length <= 100 THEN 1.3
WHEN d.length <= 500 THEN 1.1
WHEN d.length <= 2000 THEN 1.0
WHEN d.length <= 5000 THEN 0.8
ELSE 0.6
END *
CASE
WHEN d.created_at >= DATE_SUB(NOW(), INTERVAL 7 DAY) THEN 1.2
WHEN d.created_at >= DATE_SUB(NOW(), INTERVAL 30 DAY) THEN 1.1
WHEN d.created_at >= DATE_SUB(NOW(), INTERVAL 90 DAY) THEN 1.0
ELSE 0.9
END
) as final_score
FROM documents d
INNER JOIN index_entries ie ON d.id = ie.document_id
INNER JOIN index_tokens t ON ie.token_id = t.id
WHERE d.status = 'active'
AND t.token IN ({TOKENS})
GROUP BY d.id, d.title, d.description, d.length, d.created_at, d.priority
HING base_score > 0
ORDER BY final_score DESC, d.created_at DESC, d.length ASC
LIMIT {LIMIT} OFFSET {OFFSET}
";
}
/**
* 去重和排序tokens
*/
private function deduplicateAndSort(array $tokens): array
{
$uniqueTokens = [];
$seen = [];
foreach ($tokens as $token) {
$key = $token['token'] . '|' . $token['source'];
if (!isset($seen[$key])) {
$seen[$key] = true;
$uniqueTokens[] = $token;
}
}
// 按权重降序排序
usort($uniqueTokens, function($a, $b) {
return $b['weight'] - $a['weight'];
});
return $uniqueTokens;
}
}
3. 高级查询优化
3.1 分层查询策略
class LayeredSearchQuery
{
private PDO $db;
private SearchQueryBuilder $queryBuilder;
private CacheManager $cacheManager;
public function __construct(PDO $db, SearchQueryBuilder $queryBuilder, CacheManager $cacheManager)
{
$this->db = $db;
$this->queryBuilder = $queryBuilder;
$this->cacheManager = $cacheManager;
}
/**
* 分层搜索:快速响应 + 精确结果
*/
public function layeredSearch(string $query, array $options = []): array
{
$startTime = microtime(true);
// L1: 快速预搜索(缓存优先)
$quickResults = $this->quickSearch($query, $options);
if (count($quickResults) >= $options['limit'] ?? 20) {
return $this->formatResults($quickResults, 'quick', $startTime);
}
// L2: 完整搜索
$fullResults = $this->fullSearch($query, $options);
// L3: 后处理和排序优化
$optimizedResults = $this->optimizeResults($fullResults, $options);
return $this->formatResults($optimizedResults, 'full', $startTime);
}
/**
* 快速搜索:使用缓存和简化查询
*/
private function quickSearch(string $query, array $options): array
{
$cacheKey = $this->generateCacheKey('quick', $query, $options);
// 检查缓存
$cachedResults = $this->cacheManager->get($cacheKey);
if ($cachedResults !== null) {
return $cachedResults;
}
// 简化查询:只使用word tokenizer,限制结果数量
$quickOptions = array_merge($options, [
'tokenizer_types' => ['word'],
'limit' => min($options['limit'] ?? 20, 50),
'max_execution_time' => 50 // 50ms超时
]);
$results = $this->executeSimplifiedQuery($query, $quickOptions);
// 缓存结果(1分钟)
$this->cacheManager->set($cacheKey, $results, 60);
return $results;
}
/**
* 完整搜索:使用所有tokenizer和完整权重计算
*/
private function fullSearch(string $query, array $options): array
{
$cacheKey = $this->generateCacheKey('full', $query, $options);
// 检查缓存
$cachedResults = $this->cacheManager->get($cacheKey);
if ($cachedResults !== null) {
return $cachedResults;
}
// 构建完整查询
$queryData = $this->queryBuilder->buildQuery($query, $options);
// 执行查询
$results = $this->executeQuery($queryData['statement']);
// 缓存结果(5分钟)
$this->cacheManager->set($cacheKey, $results, 300);
return $results;
}
/**
* 执行简化查询
*/
private function executeSimplifiedQuery(string $query, array $options): array
{
// 只处理最重要的tokens
$tokenizer = new WordTokenizer();
$tokens = $tokenizer->tokenize($query);
$topTokens = array_slice($tokens, 0, 10);
if (empty($topTokens)) {
return [];
}
$tokenPlaceholders = implode(',', array_fill(0, count($topTokens), '?'));
$limit = $options['limit'] ?? 20;
$sql = "
SELECT
d.id as document_id,
d.title,
d.description,
(SUM(t.weight * ie.field_weight)) as score
FROM documents d
INNER JOIN index_entries ie ON d.id = ie.document_id
INNER JOIN index_tokens t ON ie.token_id = t.id
WHERE d.status = 'active'
AND t.token IN ({$tokenPlaceholders})
AND t.tokenizer_type = 'word'
GROUP BY d.id, d.title, d.description
HAVING score > 0
ORDER BY score DESC, d.created_at DESC
LIMIT {$limit}
";
$stmt = $this->db->prepare($sql);
foreach ($topTokens as $i => $token) {
$stmt->bindValue($i + 1, $token, PDO::PARAM_STR);
}
$stmt->execute();
return $stmt->fetchAll(PDO::FETCH_ASSOC);
}
/**
* 执行完整查询
*/
private function executeQuery(PDOStatement $stmt): array
{
try {
$stmt->execute();
return $stmt->fetchAll(PDO::FETCH_ASSOC);
} catch (PDOException $e) {
// 记录错误
error_log("Search query failed: " . $e->getMessage());
// 返回空结果而不是抛出异常
return [];
}
}
/**
* 结果后处理和优化
*/
private function optimizeResults(array $results, array $options): array
{
if (empty($results)) {
return $results;
}
// 1. 结果去重(如果有重复的document_id)
$results = $this->deduplicateResults($results);
// 2. 重新计算得分(应用业务规则)
$results = $this->recalculateScores($results, $options);
// 3. 应用个性化排序
if (!empty($options['user_id'])) {
$results = $this->applyPersonalization($results, $options['user_id']);
}
// 4. 应用多样性算法
if ($options['diversify'] ?? false) {
$results = $this->diversifyResults($results);
}
// 5. 最终排序
usort($results, function($a, $b) {
return $b['final_score'] <=> $a['final_score'];
});
return $results;
}
/**
* 结果去重
*/
private function deduplicateResults(array $results): array
{
$seen = [];
$deduplicated = [];
foreach ($results as $result) {
$docId = $result['document_id'];
if (!isset($seen[$docId])) {
$seen[$docId] = true;
$deduplicated[] = $result;
}
}
return $deduplicated;
}
/**
* 重新计算得分
*/
private function recalculateScores(array $results, array $options): array
{
foreach ($results as &$result) {
$baseScore = $result['final_score'] ?? $result['base_score'] ?? 0;
// 应用业务规则调整
if (!empty($options['category_boost']) && isset($result['category_id'])) {
$categoryBoost = $options['category_boost'][$result['category_id']] ?? 1.0;
$baseScore *= $categoryBoost;
}
// 应用优先级
if (isset($result['priority']) && $result['priority'] > 0) {
$baseScore *= (1 + $result['priority'] * 0.1);
}
$result['final_score'] = $baseScore;
}
return $results;
}
/**
* 应用个性化排序
*/
private function applyPersonalization(array $results, int $userId): array
{
// 获取用户偏好
$userPreferences = $this->getUserPreferences($userId);
foreach ($results as &$result) {
$personalizationScore = 1.0;
// 基于用户历史行为调整得分
if (!empty($userPreferences['categories']) && isset($result['category_id'])) {
$categoryPref = $userPreferences['categories'][$result['category_id']] ?? 1.0;
$personalizationScore *= $categoryPref;
}
if (!empty($userPreferences['authors']) && isset($result['author_id'])) {
$authorPref = $userPreferences['authors'][$result['author_id']] ?? 1.0;
$personalizationScore *= $authorPref;
}
$result['personalized_score'] = $result['final_score'] * $personalizationScore;
}
return $results;
}
/**
* 结果多样性算法
*/
private function diversifyResults(array $results): array
{
$diversified = [];
$categoryCount = [];
$maxPerCategory = 3; // 每个类别最多3个结果
foreach ($results as $result) {
$category = $result['category_id'] ?? 'unknown';
if (!isset($categoryCount[$category]) || $categoryCount[$category] < $maxPerCategory) {
$diversified[] = $result;
$categoryCount[$category] = ($categoryCount[$category] ?? 0) + 1;
if (count($diversified) >= 20) { // 限制总结果数
break;
}
}
}
return $diversified;
}
/**
* 格式化最终结果
*/
private function formatResults(array $results, string $searchType, float $startTime): array
{
$executionTime = (microtime(true) - $startTime) * 1000; // ms
return [
'results' => $results,
'meta' => [
'total' => count($results),
'search_type' => $searchType,
'execution_time_ms' => round($executionTime, 2),
'cached' => $executionTime < 10 // 小于10ms认为可能来自缓存
]
];
}
private function generateCacheKey(string $prefix, string $query, array $options): string
{
return $prefix . ':' . md5($query . serialize($options));
}
private function getUserPreferences(int $userId): array
{
// 占位符:实际应该从数据库获取用户偏好
return [
'categories' => [
1 => 1.2, // 技术类偏好
2 => 0.8 // 其他类别
],
'authors' => []
];
}
}
3.2 查询性能监控
class SearchPerformanceMonitor
{
private PDO $db;
private array $slowQueryThreshold = [
'quick' => 50, // 50ms
'full' => 200, // 200ms
'complex' => 500 // 500ms
];
public function __construct(PDO $db)
{
$this->db = $db;
}
/**
* 监控搜索查询性能
*/
public function monitorSearchQuery(
string $query,
float $executionTime,
int $resultCount,
string $searchType = 'full'
): void {
$this->recordQueryMetrics($query, $executionTime, $resultCount, $searchType);
if ($executionTime > $this->slowQueryThreshold[$searchType]) {
$this->handleSlowQuery($query, $executionTime, $searchType);
}
$this->updatePerformanceStats($searchType, $executionTime, $resultCount);
}
/**
* 记录查询指标
*/
private function recordQueryMetrics(string $query, float $time, int $results, string $type): void
{
$sql = "INSERT INTO search_metrics
(query_hash, query_length, execution_time_ms, result_count, search_type, timestamp)
VALUES (?, ?, ?, ?, ?, NOW())";
$stmt = $this->db->prepare($sql);
$stmt->execute([
md5($query),
strlen($query),
$time,
$results,
$type
]);
}
/**
* 处理慢查询
*/
private function handleSlowQuery(string $query, float $time, string $type): void
{
// 记录慢查询日志
error_log("Slow search query detected: {$type}, {$time}ms, query: {$query}");
// 分析慢查询原因
$analysis = $this->analyzeSlowQuery($query, $time);
// 生成优化建议
$recommendations = $this->generateOptimizationRecommendations($analysis);
// 发送警报(如果需要)
if ($time > $this->slowQueryThreshold[$type] * 2) {
$this->sendSlowQueryAlert($query, $time, $recommendations);
}
// 记录到慢查询表
$this->recordSlowQuery($query, $time, $type, $analysis, $recommendations);
}
/**
* 分析慢查询
*/
private function analyzeSlowQuery(string $query, float $time): array
{
$analysis = [];
// 分析查询复杂度
$tokenCount = str_word_count($query);
if ($tokenCount > 10) {
$analysis['complexity'] = 'high_token_count';
}
// 检查是否有特殊字符
if (preg_match('/[^a-zA-Z0-9\s]/', $query)) {
$analysis['special_chars'] = true;
}
// 检查查询长度
if (strlen($query) > 100) {
$analysis['long_query'] = true;
}
// 分析时间模式
if ($time > 1000) {
$analysis['severe_slowness'] = true;
} elseif ($time > 500) {
$analysis['moderate_slowness'] = true;
}
return $analysis;
}
/**
* 生成优化建议
*/
private function generateOptimizationRecommendations(array $analysis): array
{
$recommendations = [];
foreach ($analysis as $issue => $value) {
switch ($issue) {
case 'high_token_count':
$recommendations[] = "Consider limiting token count or using prefix matching";
break;
case 'special_chars':
$recommendations[] = "Implement better input sanitization";
break;
case 'long_query':
$recommendations[] = "Consider query length limits or suggestion system";
break;
case 'severe_slowness':
$recommendations[] = "Immediate optimization required - check indexes and query structure";
break;
case 'moderate_slowness':
$recommendations[] = "Consider query optimization or caching";
break;
}
}
return $recommendations;
}
/**
* 发送慢查询警报
*/
private function sendSlowQueryAlert(string $query, float $time, array $recommendations): void
{
$alert = [
'type' => 'slow_search_query',
'timestamp' => date('Y-m-d H:i:s'),
'execution_time' => $time,
'query' => $query,
'recommendations' => $recommendations
];
// 这里可以集成实际的警报系统(邮件、Slack等)
error_log("ALERT: " . json_encode($alert));
}
/**
* 记录慢查询详情
*/
private function recordSlowQuery(string $query, float $time, string $type, array $analysis, array $recommendations): void
{
$sql = "INSERT INTO slow_search_queries
(query, execution_time_ms, search_type, analysis, recommendations, created_at)
VALUES (?, ?, ?, ?, ?, NOW())";
$stmt = $this->db->prepare($sql);
$stmt->execute([
$query,
$time,
$type,
json_encode($analysis),
json_encode($recommendations)
]);
}
/**
* 更新性能统计
*/
private function updatePerformanceStats(string $type, float $time, int $results): void
{
$sql = "INSERT INTO search_performance_stats
(search_type, avg_time_ms, total_queries, total_results, last_updated)
VALUES (?, ?, 1, ?, NOW())
ON DUPLICATE KEY UPDATE
avg_time_ms = (avg_time_ms * total_queries + ?) / (total_queries + 1),
total_queries = total_queries + 1,
total_results = total_results + ?,
last_updated = NOW()";
$stmt = $this->db->prepare($sql);
$stmt->execute([$type, $time, $results, $time, $results]);
}
/**
* 获取性能报告
*/
public function getPerformanceReport(array $options = []): array
{
$hours = $options['hours'] ?? 24;
$sql = "SELECT
search_type,
COUNT(*) as query_count,
AVG(execution_time_ms) as avg_time,
MIN(execution_time_ms) as min_time,
MAX(execution_time_ms) as max_time,
AVG(result_count) as avg_results,
COUNT(*) - SUM(CASE WHEN execution_time_ms <= ? THEN 1 ELSE 0 END) as slow_queries
FROM search_metrics
WHERE timestamp >= DATE_SUB(NOW(), INTERVAL ? HOUR)
GROUP BY search_type
ORDER BY avg_time DESC";
$stmt = $this->db->prepare($sql);
$threshold = $this->slowQueryThreshold['full'];
$stmt->execute([$threshold, $hours]);
return $stmt->fetchAll(PDO::FETCH_ASSOC);
}
}
4. 安全防护实现
4.1 DoS攻击防护
class SearchDoSProtection
{
private Redis $redis;
private array $rateLimits = [
'per_ip' => ['requests' => 100, 'window' => 60], // 每IP每分钟100次
'per_user' => ['requests' => 500, 'window' => 60], // 每用户每分钟500次
'global' => ['requests' => 10000, 'window' => 60] // 全局每分钟10000次
];
private array $blockedIps = [];
public function __construct(Redis $redis)
{
$this->redis = $redis;
}
/**
* 检查搜索请求是否被允许
*/
public function isSearchAllowed(string $clientIp, ?int $userId = null): array
{
$result = [
'allowed' => true,
'reason' => '',
'retry_after' => 0
];
// 1. 检查IP是否被封禁
if ($this->isIpBlocked($clientIp)) {
$result['allowed'] = false;
$result['reason'] = 'IP blocked';
return $result;
}
// 2. 检查IP速率限制
$ipCheck = $this->checkRateLimit('per_ip', $clientIp);
if (!$ipCheck['allowed']) {
$result['allowed'] = false;
$result['reason'] = 'IP rate limit exceeded';
$result['retry_after'] = $ipCheck['retry_after'];
return $result;
}
// 3. 检查用户速率限制
if ($userId) {
$userCheck = $this->checkRateLimit('per_user', "user:{$userId}");
if (!$userCheck['allowed']) {
$result['allowed'] = false;
$result['reason'] = 'User rate limit exceeded';
$result['retry_after'] = $userCheck['retry_after'];
return $result;
}
}
// 4. 检查全局速率限制
$globalCheck = $this->checkRateLimit('global', 'global');
if (!$globalCheck['allowed']) {
$result['allowed'] = false;
$result['reason'] = 'Global rate limit exceeded';
$result['retry_after'] = $globalCheck['retry_after'];
return $result;
}
// 5. 记录请求
$this->recordSearchRequest($clientIp, $userId);
return $result;
}
/**
* 检查速率限制
*/
private function checkRateLimit(string $limitType, string $key): array
{
$config = $this->rateLimits[$limitType];
$redisKey = "search_rate_limit:{$limitType}:{$key}";
$current = $this->redis->incr($redisKey);
if ($current === 1) {
// 第一次请求,设置过期时间
$this->redis->expire($redisKey, $config['window']);
}
$ttl = $this->redis->ttl($redisKey);
if ($current > $config['requests']) {
return [
'allowed' => false,
'current' => $current,
'limit' => $config['requests'],
'retry_after' => $ttl
];
}
return [
'allowed' => true,
'current' => $current,
'limit' => $config['requests'],
'remaining' => $config['requests'] - $current
];
}
/**
* 记录搜索请求
*/
private function recordSearchRequest(string $clientIp, ?int $userId = null): void
{
$this->redis->incr("search_requests:total");
$this->redis->incr("search_requests:ip:{$clientIp}");
if ($userId) {
$this->redis->incr("search_requests:user:{$userId}");
}
}
/**
* 检查IP是否被封禁
*/
private function isIpBlocked(string $clientIp): bool
{
if (in_array($clientIp, $this->blockedIps)) {
return true;
}
$blocked = $this->redis->get("search_blocked_ip:{$clientIp}");
return $blocked !== false;
}
/**
* 封禁IP
*/
public function blockIp(string $clientIp, int $duration = 3600): void
{
$this->redis->setex("search_blocked_ip:{$clientIp}", $duration, '1');
$this->blockedIps[] = $clientIp;
}
/**
* 检查查询复杂度
*/
public function validateQueryComplexity(string $query): array
{
$validation = [
'valid' => true,
'reason' => '',
'suggestions' => []
];
// 检查查询长度
if (strlen($query) > 500) {
$validation['valid'] = false;
$validation['reason'] = 'Query too long';
$validation['suggestions'][] = 'Please shorten your search query';
}
// 检查token数量
$tokenCount = str_word_count($query);
if ($tokenCount > 20) {
$validation['valid'] = false;
$validation['reason'] = 'Too many search terms';
$validation['suggestions'][] = 'Use fewer and more specific terms';
}
// 检查特殊字符比例
$specialCharCount = preg_match_all('/[^a-zA-Z0-9\s]/', $query);
$specialCharRatio = $specialCharCount / strlen($query);
if ($specialCharRatio > 0.3) {
$validation['valid'] = false;
$validation['reason'] = 'Too many special characters';
$validation['suggestions'][] = 'Use only letters and numbers';
}
return $validation;
}
/**
* 清理过期数据
*/
public function cleanup(): int
{
$cleaned = 0;
// 清理过期的速率限制数据
$patterns = [
'search_rate_limit:*',
'search_requests:*'
];
foreach ($patterns as $pattern) {
$keys = $this->redis->keys($pattern);
foreach ($keys as $key) {
if ($this->redis->ttl($key) === -1) {
$this->redis->del($key);
$cleaned++;
}
}
}
return $cleaned;
}
}
5. 搜索服务架构
5.1 完整搜索服务
class SearchService
{
private PDO $db;
private SearchQueryBuilder $queryBuilder;
private LayeredSearchQuery $layeredSearch;
private SearchPerformanceMonitor $monitor;
private SearchDoSProtection $dosProtection;
private CacheManager $cacheManager;
public function __construct(
PDO $db,
SearchQueryBuilder $queryBuilder,
LayeredSearchQuery $layeredSearch,
SearchPerformanceMonitor $monitor,
SearchDoSProtection $dosProtection,
CacheManager $cacheManager
) {
$this->db = $db;
$this->queryBuilder = $queryBuilder;
$this->layeredSearch = $layeredSearch;
$this->monitor = $monitor;
$this->dosProtection = $dosProtection;
$this->cacheManager = $cacheManager;
}
/**
* 主搜索方法
*/
public function search(string $query, array $options = []): array
{
$startTime = microtime(true);
try {
// 1. 输入验证
$validation = $this->validateSearchRequest($query, $options);
if (!$validation['valid']) {
return $this->errorResponse($validation['reason'], 400);
}
// 2. DoS防护
$ipCheck = $this->dosProtection->isSearchAllowed(
$options['client_ip'] ?? 'unknown',
$options['user_id'] ?? null
);
if (!$ipCheck['allowed']) {
return $this->errorResponse($ipCheck['reason'], 429, [
'retry_after' => $ipCheck['retry_after']
]);
}
// 3. 查询复杂度检查
$complexityCheck = $this->dosProtection->validateQueryComplexity($query);
if (!$complexityCheck['valid']) {
return $this->errorResponse($complexityCheck['reason'], 400, [
'suggestions' => $complexityCheck['suggestions']
]);
}
// 4. 执行搜索
$results = $this->layeredSearch->layeredSearch($query, $options);
// 5. 性能监控
$executionTime = (microtime(true) - $startTime) * 1000;
$this->monitor->monitorSearchQuery(
$query,
$executionTime,
count($results['results']),
$results['meta']['search_type'] ?? 'full'
);
// 6. 记录搜索日志
$this->logSearch($query, $options, $results, $executionTime);
return $this->successResponse($results);
} catch (Exception $e) {
// 记录错误
$this->logError($query, $options, $e);
// 返回错误响应
return $this->errorResponse('Internal server error', 500);
}
}
/**
* 搜索建议功能
*/
public function suggest(string $query, array $options = []): array
{
$startTime = microtime(true);
try {
// 基础验证
if (strlen($query) < 2) {
return $this->successResponse(['suggestions' => []]);
}
// 获取建议
$suggestions = $this->generateSuggestions($query, $options);
$executionTime = (microtime(true) - $startTime) * 1000;
return $this->successResponse([
'suggestions' => $suggestions,
'execution_time_ms' => round($executionTime, 2)
]);
} catch (Exception $e) {
$this->logError("Suggest: {$query}", $options, $e);
return $this->errorResponse('Suggestion service unavailable', 503);
}
}
/**
* 生成搜索建议
*/
private function generateSuggestions(string $query, array $options): array
{
$suggestions = [];
// 1. 前缀匹配建议
$prefixSuggestions = $this->getPrefixSuggestions($query, $options['limit'] ?? 5);
$suggestions = array_merge($suggestions, $prefixSuggestions);
// 2. 拼写纠错建议
$spellSuggestions = $this->getSpellSuggestions($query, 3);
$suggestions = array_merge($suggestions, $spellSuggestions);
// 3. 热门搜索建议
$popularSuggestions = $this->getPopularSuggestions(2);
$suggestions = array_merge($suggestions, $popularSuggestions);
// 去重并限制数量
$suggestions = array_unique($suggestions);
return array_slice($suggestions, 0, $options['limit'] ?? 10);
}
/**
* 获取前缀匹配建议
*/
private function getPrefixSuggestions(string $query, int $limit): array
{
$sql = "SELECT DISTINCT token, COUNT(*) as frequency
FROM index_tokens
WHERE token LIKE ?
AND tokenizer_type IN ('word', 'prefix')
ORDER BY frequency DESC, token_length ASC
LIMIT {$limit}";
$stmt = $this->db->prepare($sql);
$stmt->execute([$query . '%']);
$suggestions = [];
foreach ($stmt->fetchAll(PDO::FETCH_ASSOC) as $row) {
$suggestions[] = $row['token'];
}
return $suggestions;
}
/**
* 获取拼写纠错建议
*/
private function getSpellSuggestions(string $query, int $limit): array
{
// 使用编辑距离算法查找相似的token
$suggestions = [];
$sql = "SELECT token,
(LENGTH(token) - LENGTH(?)) AS length_diff
FROM index_tokens
WHERE tokenizer_type = 'word'
AND token != ?
AND (token LIKE ? OR ? LIKE CONCAT('%', token, '%'))
ORDER BY length_diff ASC, LENGTH(token) ASC
LIMIT {$limit}";
$stmt = $this->db->prepare($sql);
$stmt->execute([$query, $query, "%{$query}%", $query]);
foreach ($stmt->fetchAll(PDO::FETCH_ASSOC) as $row) {
if (levenshtein($query, $row['token']) <= 2) { // 编辑距离不超过2
$suggestions[] = $row['token'];
}
}
return $suggestions;
}
/**
* 获取热门搜索建议
*/
private function getPopularSuggestions(int $limit): array
{
$cacheKey = 'popular_search_suggestions';
$cached = $this->cacheManager->get($cacheKey);
if ($cached !== null) {
return $cached;
}
$sql = "SELECT query, COUNT(*) as frequency
FROM search_metrics
WHERE timestamp >= DATE_SUB(NOW(), INTERVAL 24 HOUR)
AND result_count > 0
GROUP BY query
ORDER BY frequency DESC
LIMIT {$limit}";
$suggestions = array_column($this->db->query($sql)->fetchAll(PDO::FETCH_ASSOC), 'query');
// 缓存1小时
$this->cacheManager->set($cacheKey, $suggestions, 3600);
return $suggestions;
}
/**
* 验证搜索请求
*/
private function validateSearchRequest(string $query, array $options): array
{
$validation = ['valid' => true, 'reason' => ''];
// 检查查询长度
if (strlen(trim($query)) < 1) {
$validation['valid'] = false;
$validation['reason'] = 'Search query is required';
return $validation;
}
if (strlen($query) > 500) {
$validation['valid'] = false;
$validation['reason'] = 'Search query too long';
return $validation;
}
// 检查选项
$limit = $options['limit'] ?? 20;
if ($limit < 1 || $limit > 100) {
$validation['valid'] = false;
$validation['reason'] = 'Invalid limit parameter';
return $validation;
}
$page = $options['page'] ?? 1;
if ($page < 1 || $page > 1000) {
$validation['valid'] = false;
$validation['reason'] = 'Invalid page parameter';
return $validation;
}
return $validation;
}
/**
* 记录搜索日志
*/
private function logSearch(string $query, array $options, array $results, float $executionTime): void
{
$sql = "INSERT INTO search_logs
(query, user_id, client_ip, results_count, execution_time_ms, search_type, created_at)
VALUES (?, ?, ?, ?, ?, ?, NOW())";
$stmt = $this->db->prepare($sql);
$stmt->execute([
$query,
$options['user_id'] ?? null,
$options['client_ip'] ?? null,
count($results['results'] ?? []),
$executionTime,
$results['meta']['search_type'] ?? 'full'
]);
}
/**
* 记录错误日志
*/
private function logError(string $query, array $options, Exception $e): void
{
$sql = "INSERT INTO search_error_logs
(query, user_id, client_ip, error_message, error_trace, created_at)
VALUES (?, ?, ?, ?, ?, NOW())";
$stmt = $this->db->prepare($sql);
$stmt->execute([
$query,
$options['user_id'] ?? null,
$options['client_ip'] ?? null,
$e->getMessage(),
$e->getTraceAsString()
]);
}
/**
* 成功响应格式
*/
private function successResponse(array $data): array
{
return [
'success' => true,
'data' => $data,
'timestamp' => time()
];
}
/**
* 错误响应格式
*/
private function errorResponse(string $message, int $code, array $extra = []): array
{
$response = [
'success' => false,
'error' => [
'message' => $message,
'code' => $code
],
'timestamp' => time()
];
if (!empty($extra)) {
$response['error'] = array_merge($response['error'], $extra);
}
return $response;
}
}
6. 本章总结
6.1 核心收获
查询优化:
– 掌握复杂SQL查询的设计和优化
– 理解参数化查询的安全性和性能优势
– 学会分层查询策略实现毫秒级响应
– 掌握查询结果的后处理和排序算法
性能监控:
– 建立完善的查询性能监控体系
– 实现慢查询检测和自动优化建议
– 掌握查询缓存和结果预计算策略
– 理解查询复杂度对性能的影响
安全防护:
– 实现多层次的DoS攻击防护
– 掌握速率限制和IP封禁机制
– 学会查询复杂度验证和输入净化
– 建立完整的错误监控和日志系统
架构设计:
– 构建可扩展的搜索服务架构
– 实现搜索建议和自动补全功能
– 掌握缓存策略和性能优化技巧
– 建立完善的监控和诊断体系
6.2 下章预告
最后一章我们将深入生产环境部署,学习如何:
- 配置和调优生产环境的数据库参数
- 实现高可用和负载均衡的搜索架构
- 建立监控、告警和故障恢复机制
- 制定容量规划和扩展策略
实践作业:实现一个完整的搜索服务,包含搜索、建议、监控和安全防护功能。
上一篇 → 第04章:索引系统架构 | 下一篇 → 第06章:生产环境部署






最新评论
照片令人惊艳。万分感谢 温暖。
氛围绝佳。由衷感谢 感受。 你的博客让人一口气读完。敬意 真诚。
实用的 杂志! 越来越好!
又到年底了,真快!
研究你的文章, 我体会到美好的心情。
感谢激励。由衷感谢
好久没见过, 如此温暖又有信息量的博客。敬意。
很稀有, 这么鲜明的文字。谢谢。