跳转至

Search

BaiduSearch

BaiduSearch(
    api_key: Optional[str] = None,
    key_name: Optional[str] = "BAIDU_SEARCH",
    logger: Optional[Callable] = None,
    verbose: Optional[bool] = False,
)

百度搜索工具

Parameters:

Name Type Description Default
api_key Optional[str]
None
key_name Optional[str]
'BAIDU_SEARCH'
logger Optional[Callable]
None
verbose Optional[bool]
False
Source code in lumix\agent\tools\search\baidu.py
def __init__(
        self,
        api_key: Optional[str] = None,
        key_name: Optional[str] = "BAIDU_SEARCH",
        logger: Optional[Callable] = None,
        verbose: Optional[bool] = False,
):
    """百度搜索工具

    Args:
        api_key:
        key_name:
        logger:
        verbose:
    """
    self.logger = logger
    self.verbose = verbose
    self.set_api_key(api_key, key_name)

set_api_key

set_api_key(
    api_key: Optional[str] = None,
    key_name: Optional[str] = None,
)
Source code in lumix\agent\tools\search\baidu.py
def set_api_key(self, api_key: Optional[str] = None, key_name: Optional[str] = None):
    """"""
    if api_key is not None:
        self.headers["Cookie"] = api_key
    elif key_name is not None and os.getenv(key_name):
        self.headers["Cookie"] = os.getenv(key_name)
    else:
        raise ValueError("Please set api_key or key_name")

make_params

make_params(query: str, page: int = 0) -> dict
Source code in lumix\agent\tools\search\baidu.py
def make_params(self, query: str, page: int = 0) -> dict:
    """"""
    random_str = f"{time.time()}{random.random()}".encode()
    rsv_pq = hashlib.md5(random_str).hexdigest()

    timestamp = int(time.time() * 1000)
    random_part = base64.b64encode(
        hashlib.sha1(str(random.random()).encode()).digest()
    ).decode()[:8]

    return {
        "ie": "utf-8",
        "tn": "baidu",
        "wd": query,
        "base_query": query,
        "oq": query,
        "pn": str(int(page * 10)),
        "rsv_pq": rsv_pq,
        "rsv_t": f"{timestamp}{random_part}",
    }

parse_metadata

parse_metadata(div: Tag)
Source code in lumix\agent\tools\search\baidu.py
def parse_metadata(self, div: Tag):
    """"""
    url = div.find(name="a").get("href")
    title = div.find(name="a").text
    span = div.select('span[class^="content-right_"]')
    abstract = span[0].text if len(span) > 0 else ""
    return {"url": url, "title": title, "abstract": abstract}

parse_html

parse_html(soup: BeautifulSoup) -> List[Dict]
Source code in lumix\agent\tools\search\baidu.py
def parse_html(self, soup: BeautifulSoup) -> List[Dict]:
    """"""
    div_c_container = soup.select('div[class="c-container"]')
    metadata = list(map(self.parse_metadata, div_c_container))
    return metadata

web_content

web_content(url) -> str
Source code in lumix\agent\tools\search\baidu.py
def web_content(self, url) -> str:
    """"""
    try:
        response = requests.get(url, headers=NORMAL_HEADERS)
        detected = chardet.detect(response.content)
        soup = BeautifulSoup(response.content, from_encoding=detected['encoding'], features="html.parser")
        return soup.text
    except Exception as e:
        self.warning(msg=f"[{__class__.__name__}] URL: {url}, Error: {str(e)}")
        return ""

search

search(
    query: str, pages: Optional[int] = 10
) -> List[DocumentPage]
Source code in lumix\agent\tools\search\baidu.py
def search(self, query: str, pages: Optional[int] = 10) -> List[DocumentPage]:
    """"""
    metadata = []
    page = 0
    while len(metadata) <= pages:
        params = self.make_params(query, page=page)
        url = assemble_url(self.base_url, params)
        response = requests.get(url, headers=self.headers)
        soup = BeautifulSoup(response.text, features="html.parser")
        _metadata = self.parse_html(soup)
        metadata.extend(_metadata)
        page += 1
        if len(_metadata) == 0:
            break
    metadata = metadata[:pages]
    self.info(msg=f"[{__class__.__name__}] Find {len(metadata)} pages")
    web_data = []
    for item in metadata:
        page_content = self.web_content(item.get("url"))
        web_data.append(DocumentPage(page_content, metadata=item))
    return web_data
baidu_search(
    query: Annotated[str, "The query to search for", True],
    pages: Annotated[
        Optional[int],
        "Number of pages to search, and you can",
        False,
    ] = 5,
) -> str

Search for a query on Baidu and return the results as a string.

Parameters:

Name Type Description Default
query Annotated[str, 'The query to search for', True]

The query to search for

required
pages Annotated[Optional[int], 'Number of pages to search, and you can', False]

Number of pages to search, You can search multiple pages at the same time to ensure the information is accurate. The default is 10 pages.

5

Returns:

Type Description
str

A string containing the search results

Source code in lumix\agent\tools\search\baidu.py
def baidu_search(
        query: Annotated[str, "The query to search for", True],
        pages: Annotated[Optional[int], "Number of pages to search, and you can", False] = 5,
) -> str:
    """ Search for a query on Baidu and return the results as a string.

    Args:
        query: The query to search for
        pages:
            Number of pages to search, You can search multiple pages at the
            same time to ensure the information is accurate. The default is 10 pages.

    Returns:
        A string containing the search results
    """
    baidu = BaiduSearch(verbose=True)
    page_data = baidu.search(query=query, pages=pages)
    for page in page_data:
        page.page_content = drop_multi_mark(text=page.page_content)
    content = str([page.model_dump() for page in page_data])
    return content

BaiduImageSearch

BaiduImageSearch(
    api_key: Optional[str] = None,
    key_name: Optional[str] = "BAIDU_SEARCH",
    quality: Literal["high", "low"] = "low",
    logger: Optional[Callable] = None,
    verbose: Optional[bool] = False,
)

百度图片搜索工具

Parameters:

Name Type Description Default
api_key Optional[str]

百度图片搜索的API Key

None
key_name Optional[str]

API Key的配置文件键名

'BAIDU_SEARCH'
quality Literal['high', 'low']

图片质量,可选"high"或"low",默认"low"

'low'
logger Optional[Callable]

日志记录器

None
verbose Optional[bool]

是否打印详细信息

False

Examples:

import matplotlib.pyplot as plt
from lumix.agent.tools import BaiduImageSearch

baidu = BaiduImageSearch(verbose=True)
images = baidu.search(query="cat")

for i, image in enumerate(images):
    plt.imshow(image.image)
    plt.axis('off')
    plt.show()
Source code in lumix\agent\tools\search\baidu_image.py
def __init__(
        self,
        api_key: Optional[str] = None,
        key_name: Optional[str] = "BAIDU_SEARCH",
        quality: Literal["high", "low"] = "low",
        logger: Optional[Callable] = None,
        verbose: Optional[bool] = False,
):
    """百度图片搜索工具

    Args:
        api_key: 百度图片搜索的API Key
        key_name: API Key的配置文件键名
        quality: 图片质量,可选"high"或"low",默认"low"
        logger: 日志记录器
        verbose: 是否打印详细信息

    Examples:
        ```python
        import matplotlib.pyplot as plt
        from lumix.agent.tools import BaiduImageSearch

        baidu = BaiduImageSearch(verbose=True)
        images = baidu.search(query="cat")

        for i, image in enumerate(images):
            plt.imshow(image.image)
            plt.axis('off')
            plt.show()
        ```
    """
    self.quality = quality
    self.logger = logger
    self.verbose = verbose
    self.set_api_key(api_key, key_name)

set_api_key

set_api_key(
    api_key: Optional[str] = None,
    key_name: Optional[str] = None,
)
Source code in lumix\agent\tools\search\baidu_image.py
def set_api_key(self, api_key: Optional[str] = None, key_name: Optional[str] = None):
    """"""
    if api_key is not None:
        self.headers["Cookie"] = api_key
    elif key_name is not None and os.getenv(key_name):
        self.headers["Cookie"] = os.getenv(key_name)
    else:
        raise ValueError("Please set api_key or key_name")

make_params

make_params(query: str, page: int = 0) -> dict

Parameters:

Name Type Description Default
query str
required
page int
0

Returns:

Source code in lumix\agent\tools\search\baidu_image.py
def make_params(self, query: str, page: int = 0) -> dict:
    """
    Args:
        query:
        page:

    Returns:

    """
    return {
        "tn": "resultjson_com",
        "ipn": "rj",
        "ct": "201326592",
        "fp": "result",
        "word": query,
        "queryWord": query,
        "ie": "utf-8",
        "oe": "utf-8",
        "pn": str(int(page * 10)),
        "rn": "30",
        "gsm": "3c",
    }

fetch_images

fetch_images(metadata: List[Dict]) -> List[SearchedImage]
Source code in lumix\agent\tools\search\baidu_image.py
def fetch_images(self, metadata: List[Dict]) -> List[SearchedImage]:
    """"""
    images = []
    metadata = [item for item in metadata if item]
    for _metadata in metadata:
        if self.quality == "low":
            image_url = _metadata.get("image_url")
        elif self.quality == "high":
            image_url = _metadata.get("object_url")
        else:
            raise ValueError("Please set quality to 'low' or 'high'")

        try:
            response = requests.get(image_url, headers=self.headers)
            image = Image.open(io.BytesIO(response.content))
            images.append(SearchedImage(image=image, metadata=_metadata))
        except Exception as e:
            self.warning(msg=f"[{__class__.__name__}] Error fetching image from {image_url}: {str(e)}")

    self.info(msg=f"[{__class__.__name__}] Fetched {len(images)} / {len(metadata)} images")
    return images

fetch_metadata

fetch_metadata(data: Optional[Dict] = None) -> Dict
Source code in lumix\agent\tools\search\baidu_image.py
def fetch_metadata(self, data: Optional[Dict] = None) -> Dict:
    """"""
    mark = any([
        data is None,
        data.get("thumbURL") is None,
        data.get("replaceUrl") is None,
        data.get("fromPageTitle") is None,
    ])
    if mark:
        return dict()
    else:
        origin_url = data.get("replaceUrl")[0]
        return {
            "image_url": data.get("thumbURL"),
            "object_url": origin_url.get("ObjURL"),
            "from_url": origin_url.get("FromURL"),
            "from_title": data.get("fromPageTitle")
        }

search

search(query: str) -> List[SearchedImage]
Source code in lumix\agent\tools\search\baidu_image.py
def search(self, query: str) -> List[SearchedImage]:
    """"""
    params = self.make_params(query)
    url = assemble_url(self.base_url, params)
    response = requests.get(url, headers=self.headers)
    metadata = list(map(self.fetch_metadata, response.json().get("data")))
    return self.fetch_images(metadata)
baidu_image_search(
    query: Annotated[str, "The query to search for", True]
) -> List[Image]

Search for a query on Baidu and return the results as a string.

Parameters:

Name Type Description Default
query Annotated[str, 'The query to search for', True]

The query to search for

required

Returns:

Type Description
List[Image]

A string containing the search results

Examples:

images = baidu_image_search("cat")
Source code in lumix\agent\tools\search\baidu_image.py
def baidu_image_search(
        query: Annotated[str, "The query to search for", True],
) -> List[Image.Image]:
    """ Search for a query on Baidu and return the results as a string.

    Args:
        query: The query to search for

    Returns:
        A string containing the search results

    Examples:
        ```python
        images = baidu_image_search("cat")
        ```
    """
    baidu = BaiduImageSearch(verbose=True)
    images = baidu.search(query=query)
    images = [image.image for image in images]
    return images