# SPDX-License-Identifier: AGPL-3.0-or-later # lint: pylint """ DuckDuckGo Lite ~~~~~~~~~~~~~~~ """ from typing import TYPE_CHECKING import re from urllib.parse import urlencode import json import babel import lxml.html from searx import ( locales, redislib, external_bang, ) from searx.utils import ( eval_xpath, eval_xpath_getindex, extract_text, ) from searx.network import get # see https://github.com/searxng/searxng/issues/762 from searx import redisdb from searx.enginelib.traits import EngineTraits if TYPE_CHECKING: import logging logger: logging.Logger traits: EngineTraits about = { "website": 'https://lite.duckduckgo.com/lite/', "wikidata_id": 'Q12805', "use_official_api": False, "require_api_key": False, "results": 'HTML', } send_accept_language_header = True """DuckDuckGo-Lite tries to guess user's prefered language from the HTTP ``Accept-Language``. Optional the user can select a region filter (but not a language). """ # engine dependent config categories = ['general', 'web'] paging = True time_range_support = True safesearch = True # user can't select but the results are filtered url = 'https://lite.duckduckgo.com/lite/' # url_ping = 'https://duckduckgo.com/t/sl_l' time_range_dict = {'day': 'd', 'week': 'w', 'month': 'm', 'year': 'y'} form_data = {'v': 'l', 'api': 'd.js', 'o': 'json'} def cache_vqd(query, value): """Caches a ``vqd`` value from a query.""" c = redisdb.client() if c: logger.debug("cache vqd value: %s", value) key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query) c.set(key, value, ex=600) def get_vqd(query): """Returns the ``vqd`` that fits to the *query*. If there is no ``vqd`` cached (:py:obj:`cache_vqd`) the query is sent to DDG to get a vqd value from the response. .. hint:: If an empty string is returned there are no results for the ``query`` and therefore no ``vqd`` value. DDG's bot detection is sensitive to the ``vqd`` value. For some search terms (such as extremely long search terms that are often sent by bots), no ``vqd`` value can be determined. If SearXNG cannot determine a ``vqd`` value, then no request should go out to DDG: A request with a wrong ``vqd`` value leads to DDG temporarily putting SearXNG's IP on a block list. Requests from IPs in this block list run into timeouts. Not sure, but it seems the block list is a sliding window: to get my IP rid from the bot list I had to cool down my IP for 1h (send no requests from that IP to DDG). TL;DR; the ``vqd`` value is needed to pass DDG's bot protection and is used by all request to DDG: - DuckDuckGo Lite: ``https://lite.duckduckgo.com/lite`` (POST form data) - DuckDuckGo Web: ``https://links.duckduckgo.com/d.js?q=...&vqd=...`` - DuckDuckGo Images: ``https://duckduckgo.com/i.js??q=...&vqd=...`` - DuckDuckGo Videos: ``https://duckduckgo.com/v.js??q=...&vqd=...`` - DuckDuckGo News: ``https://duckduckgo.com/news.js??q=...&vqd=...`` """ value = '' c = redisdb.client() if c: key = 'SearXNG_ddg_vqd' + redislib.secret_hash(query) value = c.get(key) if value or value == b'': value = value.decode('utf-8') logger.debug("re-use cached vqd value: %s", value) return value # Backport of upstream fix d97b84bea (2024-03-03): vqd is no longer in the # form on lite.duckduckgo.com (which now also returns 202 # to server IPs). Fetch duckduckgo.com root instead and extract vqd from a #