jq安装失败-慕课网

半夏草 2024-05-16 18:14:57

参考了老师提供官方的git里相似问题解法：https://github.com/langchain-ai/langchain/issues/4396，但是还有问题，根据报错信息，代码贴给了GPT-4，经过给出报错提示，修复了，代码如下：

import json
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union

from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader

class JSONLoader(BaseLoader):
    def __init__(
        self,
        file_path: Union[str, Path],
        content_key: Optional[str] = None,
        metadata_func: Optional[Callable[[Dict, Dict], Dict]] = None,
        text_content: bool = False,
        json_lines: bool = False,
    ):
        """
        Initializes the JSONLoader with a file path, an optional content key to extract specific content,
        and an optional metadata function to extract metadata from each record.
        """
        self.file_path = Path(file_path).resolve()
        self._content_key = content_key
        self._metadata_func = metadata_func
        self._text_content = text_content
        self._json_lines = json_lines

    def load(self) -> List[Document]:
        """Load and return documents from the JSON file."""
        docs: List[Document] = []
        if self._json_lines:
            with self.file_path.open(encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        self._parse(line, docs)
        else:
            self._parse(self.file_path.read_text(encoding="utf-8"), docs)
        return docs

    def _parse(self, content: str, docs: List[Document]) -> None:
        """Convert given content to documents."""
        data = json.loads(content)

        # 假设 data 是字典而不是列表
        if isinstance(data, dict):
            data = [data]  # 将字典转换为单元素列表以便统一处理

        # 确保 data 是列表
        if not isinstance(data, list):
            raise ValueError("Data is not a list!")

        # 验证和处理每个记录
        for i, sample in enumerate(data, len(docs) + 1):
            text = self._get_text(sample=sample)
            metadata = self._get_metadata(sample=sample, source=str(self.file_path), seq_num=i)
            docs.append(Document(page_content=text, metadata=metadata))

    def _get_text(self, sample: Any) -> str:
        """Convert sample to string format"""
        if self._content_key is not None:
            content = sample.get(self._content_key)
        else:
            content = sample

        if self._text_content and not isinstance(content, str):
            raise ValueError(
                f"Expected page_content is string, got {type(content)} instead. \
                    Set `text_content=False` if the desired input for \
                    `page_content` is not a string"
            )

        # In case the text is None, set it to an empty string
        elif isinstance(content, str):
            return content
        elif isinstance(content, dict):
            return json.dumps(content) if content else ""
        else:
            return str(content) if content is not None else ""

    def _get_metadata(self, sample: Dict[str, Any], **additional_fields: Any) -> Dict[str, Any]:
        """
        Return a metadata dictionary base on the existence of metadata_func
        :param sample: single data payload
        :param additional_fields: key-word arguments to be added as metadata values
        :return:
        """
        if self._metadata_func is not None:
            return self._metadata_func(sample, additional_fields)
        else:
            return additional_fields

    def _validate_content_key(self, data: Any) -> None:
        """Check if a content key is valid, assuming data is a list of dictionaries."""
        # Assuming data should be a list of dicts, we take the first dict to examine.
        # Make sure to verify that data is list and it is not empty, and its elements are dicts.
        if isinstance(data, list) and data:
            sample = data[0]
            if not isinstance(sample, dict):
                raise ValueError(
                    f"Expected the data schema to result in a list of objects (dict), "
                    "so sample must be a dict but got `{type(sample)}`."
                )

            if self._content_key not in sample:
                raise ValueError(
                    f"The content key `{self._content_key}` is missing in the sample data."
                )
        else:
            raise ValueError("Data is empty or not a list!")

    def _validate_metadata_func(self, data: Any) -> None:
        """Check if the metadata_func output is valid, assuming data is a list of dictionaries."""
        if isinstance(data, list) and data:
            sample = data[0]
            if self._metadata_func is not None:
                sample_metadata = self._metadata_func(sample, {})
                if not isinstance(sample_metadata, dict):
                    raise ValueError(
                        f"Expected the metadata_func to return a dict but got `{type(sample_metadata)}`."
                    )
        else:
            raise ValueError("Data is empty or not a list!")

def item_metadata_func(record: dict, metadata: dict) -> dict:
    # metadata["_type"] = record.get("_type")
    metadata["input_variables"] = record.get("input_variables")
    metadata["template"] = record.get("template")
    return metadata

loader = JSONLoader(file_path='simple_prompt.json', content_key='description', metadata_func=item_metadata_func)
data = loader.load()
print(data)

2 回复有任何疑惑可以回复我~

收起回答

潜水员戴夫 #1
```
这个json loader代码也太长了~
```
回复有任何疑惑可以回复我~ 2024-05-18 18:24:15

潜水员戴夫 #2

生成两个一个类似的方法
import json
import jmespath
from pathlib import Path

class CustomJSONLoader:
    def __init__(self, file_path, jq_schema, text_content=True):
        self.file_path = Path(file_path).resolve()
        self.jq_schema = jq_schema
        self.text_content = text_content

    def load(self):
        # 读取 JSON 文件
        with open(self.file_path, 'r') as f:
            data = json.load(f)
        
        # 使用 jmespath 解析 JSON 数据
        result = jmespath.search(self.jq_schema, data)

        if self.text_content:
            return {"content": result}
        else:
            return result

# 使用自定义的 JSONLoader 类
loader = CustomJSONLoader(
    file_path="simple_prompt.json", jq_schema="template", text_content=True
)
data = loader.load()
print(data)

回复有任何疑惑可以回复我~ 2024-05-18 18:53:17

慕雪7278592 #3
```
太强辣
```
回复有任何疑惑可以回复我~ 2024-06-04 22:53:33

jq安装失败

正在回答回答被采纳积分+3

2回答

相似问题

请选择置顶位置

本课精华内容

shuziren Dockerfile 少加了一个斜杠/无法build

我用阿里千问的api，调用时跟你给的代码不一致

程序好像运行不了了

openai 调用无效令牌

macos m1 环境安装 pyenv 报错

热搜

最近搜索清空

jq安装失败

正在回答 回答被采纳积分+3

2回答

相似问题

请选择置顶位置

本课精华内容

shuziren Dockerfile 少加了一个斜杠/无法build

我用阿里千问的api，调用时跟你给的代码不一致

程序好像运行不了了

openai 调用 无效令牌

macos m1 环境安装 pyenv 报错

正在回答回答被采纳积分+3

openai 调用无效令牌