Skip to content

TableSematicParserSchema

Bases: BaseSchema

ソースコード位置: src/yomitoku/schemas/table_semantic_parser.py
class TableSemanticParserSchema(BaseSchema):
    tables: List[TableSemanticContentsSchema] = Field(
        ...,
        description="List of tables with semantic information",
    )

    paragraphs: List[Element] = Field(
        ...,
        description="List of recognized paragraphs in the document",
    )

    words: List[WordPrediction] = Field(
        ...,
        description="List of recognized words in the document",
    )

    def search_words_by_position(self, bbox) -> str:
        """
        Search for words by their bounding box.
        位置情報(bounding box)に対応する文字列を返す

        Args:
            box (List[int]): 検索するバウンディングボックス [x1, y1, x2, y2]
        """
        words = []
        for word in self.words:
            word_box = quad_to_xyxy(word.points)
            if is_contained(bbox, word_box, threshold=0.5):
                word = ParagraphSchema(
                    box=word_box,
                    contents=word.content,
                    direction=word.direction,
                    role=None,
                    order=None,
                    indent_level=None,
                )

                words.append(word)

        word_direction = [word.direction for word in words]
        cnt_horizontal = word_direction.count("horizontal")
        cnt_vertical = word_direction.count("vertical")

        element_direction = (
            "horizontal" if cnt_horizontal > cnt_vertical else "vertical"
        )
        order = "left2right" if element_direction == "horizontal" else "right2left"
        words = prediction_reading_order(words, order)
        words = sorted(words, key=lambda x: x.order)

        return "".join([word.contents for word in words])

    @classmethod
    def load_json(self, json_path: str) -> "TableSemanticParserSchema":
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        return TableSemanticParserSchema.model_validate(data)

    def to_csv(self, outdir):
        for table in self.tables:
            table.export.grids_to_csv(
                out_path=f"{outdir}/table_{table.id}.csv",
            )

    def to_dict(self, merge_values=False, separator="\n"):
        results = {}
        for table in self.tables:
            result = {
                "kv_items": table.view.kv_items_to_dict(
                    merge_values=merge_values, separator=separator
                ),
                "grids": table.view.grids_to_dict(),
            }
            results[table.id] = result

        return results

    def find_table_by_id(
        self, table_id: str
    ) -> Union[TableSemanticContentsSchema, None]:
        """
        Search for a table by its ID.
        テーブルIDに対応するテーブルを返す

        Args:
            table_id (str): 検索するテーブルID
        """
        for table in self.tables:
            if table.id == str(table_id):
                return table

    def find_table_by_position(
        self, box: List[int]
    ) -> Union[TableSemanticContentsSchema, None]:
        """
        Search for a table by its bounding box.
        テーブルの位置情報(bounding box)に対応するテーブルを返す

        Args:
            box (List[int]): 検索するバウンディングボックス [x1, y1, x2, y2]
        """
        ratios = []
        for table in self.tables:
            overlap_ratio = calc_overlap_ratio(box, table.box)[0]
            ratios.append(overlap_ratio)

        if not ratios:
            return None

        max_idx = ratios.index(max(ratios))
        return self.tables[max_idx] if ratios[max_idx] > 0.5 else None

    def search_kv_items_by_key(self, key: str) -> List[dict]:
        """
        search for key-value items or grid cells where the key matches the query string.
        クエリーに部分一致するキーを持つKVアイテムおよびグリッドセルを返す

        Args:
            key (str): 検索するクエリ文字列. キー部分に部分一致するものを検索
        """

        results: List[dict] = []
        for table in self.tables:
            table_results = table.search_kv_items_by_key(key)
            results.extend(table_results)

        return results

    def load_template_json(self, template_path: str) -> "TableSemanticParserSchema":
        with open(template_path, "r", encoding="utf-8") as f:
            data = json.load(f)
        template = TableSemanticParserTemplateSchema.model_validate(data)
        return apply_table_template(self, template)

    def save_template_json(
        self, out_path: str, include_kv: bool = True, include_grids: bool = True
    ):
        template_tables: List[TableSemanticContentsTemplateSchema] = []

        for t in self.tables:
            tmp_cells: Dict[str, CellTemplateSchema] = {}
            for cid, c in t.cells.items():
                if c.role == "group":
                    continue

                tmp_cells[str(cid)] = CellTemplateSchema(
                    id=str(c.id) if c.id is not None else str(cid),
                    box=list(c.box) if c.box is not None else None,
                    role=c.role,
                    contents=c.contents,
                )

            template_tables.append(
                TableSemanticContentsTemplateSchema(
                    id=t.id,
                    style=t.style,
                    box=list(t.box),
                    cells=tmp_cells,
                    kv_items=t.kv_items if include_kv else None,
                    grids=t.grids if include_grids else None,
                )
            )

        template = TableSemanticParserTemplateSchema(
            meta=TemplateMetaSchema(),
            tables=template_tables,
        )

        with open(out_path, "w", encoding="utf-8") as f:
            json.dump(
                template.model_dump(exclude_none=True), f, ensure_ascii=False, indent=4
            )

find_table_by_id(table_id)

Search for a table by its ID. テーブルIDに対応するテーブルを返す

引数:

名前 タイプ デスクリプション デフォルト
table_id str

検索するテーブルID

必須
ソースコード位置: src/yomitoku/schemas/table_semantic_parser.py
def find_table_by_id(
    self, table_id: str
) -> Union[TableSemanticContentsSchema, None]:
    """
    Search for a table by its ID.
    テーブルIDに対応するテーブルを返す

    Args:
        table_id (str): 検索するテーブルID
    """
    for table in self.tables:
        if table.id == str(table_id):
            return table

find_table_by_position(box)

Search for a table by its bounding box. テーブルの位置情報(bounding box)に対応するテーブルを返す

引数:

名前 タイプ デスクリプション デフォルト
box List[int]

検索するバウンディングボックス [x1, y1, x2, y2]

必須
ソースコード位置: src/yomitoku/schemas/table_semantic_parser.py
def find_table_by_position(
    self, box: List[int]
) -> Union[TableSemanticContentsSchema, None]:
    """
    Search for a table by its bounding box.
    テーブルの位置情報(bounding box)に対応するテーブルを返す

    Args:
        box (List[int]): 検索するバウンディングボックス [x1, y1, x2, y2]
    """
    ratios = []
    for table in self.tables:
        overlap_ratio = calc_overlap_ratio(box, table.box)[0]
        ratios.append(overlap_ratio)

    if not ratios:
        return None

    max_idx = ratios.index(max(ratios))
    return self.tables[max_idx] if ratios[max_idx] > 0.5 else None

search_kv_items_by_key(key)

search for key-value items or grid cells where the key matches the query string. クエリーに部分一致するキーを持つKVアイテムおよびグリッドセルを返す

引数:

名前 タイプ デスクリプション デフォルト
key str

検索するクエリ文字列. キー部分に部分一致するものを検索

必須
ソースコード位置: src/yomitoku/schemas/table_semantic_parser.py
def search_kv_items_by_key(self, key: str) -> List[dict]:
    """
    search for key-value items or grid cells where the key matches the query string.
    クエリーに部分一致するキーを持つKVアイテムおよびグリッドセルを返す

    Args:
        key (str): 検索するクエリ文字列. キー部分に部分一致するものを検索
    """

    results: List[dict] = []
    for table in self.tables:
        table_results = table.search_kv_items_by_key(key)
        results.extend(table_results)

    return results

search_words_by_position(bbox)

Search for words by their bounding box. 位置情報(bounding box)に対応する文字列を返す

引数:

名前 タイプ デスクリプション デフォルト
box List[int]

検索するバウンディングボックス [x1, y1, x2, y2]

必須
ソースコード位置: src/yomitoku/schemas/table_semantic_parser.py
def search_words_by_position(self, bbox) -> str:
    """
    Search for words by their bounding box.
    位置情報(bounding box)に対応する文字列を返す

    Args:
        box (List[int]): 検索するバウンディングボックス [x1, y1, x2, y2]
    """
    words = []
    for word in self.words:
        word_box = quad_to_xyxy(word.points)
        if is_contained(bbox, word_box, threshold=0.5):
            word = ParagraphSchema(
                box=word_box,
                contents=word.content,
                direction=word.direction,
                role=None,
                order=None,
                indent_level=None,
            )

            words.append(word)

    word_direction = [word.direction for word in words]
    cnt_horizontal = word_direction.count("horizontal")
    cnt_vertical = word_direction.count("vertical")

    element_direction = (
        "horizontal" if cnt_horizontal > cnt_vertical else "vertical"
    )
    order = "left2right" if element_direction == "horizontal" else "right2left"
    words = prediction_reading_order(words, order)
    words = sorted(words, key=lambda x: x.order)

    return "".join([word.contents for word in words])