Table Semantic Parser¶

End-to-end table semantic parsing pipeline for a document image.

This class detects tables/paragraphs, detects and recognizes text (OCR), detects table cells, and then builds a semantic representation of each table including: - grid tables (row/column structure) - key-value items inferred from adjacency heuristics - all cells with aggregated OCR content It also returns sorted paragraphs and global OCR words.

属性：

名前	タイプ	デスクリプション
`layout_parser`	`LayoutParser`	Detects tables and paragraph regions.
`cell_detector`	`CellDetector`	Extracts cell candidates inside tables.
`text_detector`	`TextDetector`	Detects word-level text regions.
`text_recognizer`	`TextRecognizer`	Recognizes text for detected regions.
`visualize`	`bool`	Whether to produce visualization images.
`grid_only`	`bool`	If True, skips clustering and attempts to parse full table as a grid.
`merge_same_column_values`	`bool`	Passed to grid parser to optionally merge values.

Notes

The device argument is passed to submodules that support GPU/accelerator execution.
The output is a TableSemanticParserSchema containing tables/paragraphs/words.

ソースコード位置： src/yomitoku/table_semantic_parser.py

class TableSemanticParser:
    """
    End-to-end table semantic parsing pipeline for a document image.

    This class detects tables/paragraphs, detects and recognizes text (OCR), detects
    table cells, and then builds a semantic representation of each table including:
    - grid tables (row/column structure)
    - key-value items inferred from adjacency heuristics
    - all cells with aggregated OCR content
    It also returns sorted paragraphs and global OCR words.

    Attributes:
        layout_parser (LayoutParser): Detects tables and paragraph regions.
        cell_detector (CellDetector): Extracts cell candidates inside tables.
        text_detector (TextDetector): Detects word-level text regions.
        text_recognizer (TextRecognizer): Recognizes text for detected regions.
        visualize (bool): Whether to produce visualization images.
        grid_only (bool): If True, skips clustering and attempts to parse full table as a grid.
        merge_same_column_values (bool): Passed to grid parser to optionally merge values.

    Notes:
        - The `device` argument is passed to submodules that support GPU/accelerator execution.
        - The output is a `TableSemanticParserSchema` containing tables/paragraphs/words.
    """

    def __init__(
        self,
        configs={},
        device="cuda:0",
        visualize=True,
        enable_preprocess=False,
    ):
        """
        Initialize the table semantic parser and its submodules.

        Args:
            configs (dict): Configuration overrides for submodules. Expected keys (optional):
                - "table_detector": kwargs for LayoutParser
                - "table_parser": kwargs for CellDetector
                - "text_detector": kwargs for TextDetector
                - "text_recognizer": kwargs for TextRecognizer
            device (str): Device identifier passed to submodules (e.g., "cuda:0", "cpu").
            visualize (bool): If True, the parser will generate debug visualization images.

        Raises:
            ValueError: If `configs` is not a dict.
        """
        table_detector_kwargs = {
            "device": device,
            "visualize": visualize,
            # "model_name": "rtdetrv2_table_detector_beta",
        }
        table_cell_parser_kwargs = {
            "device": device,
            "visualize": visualize,
        }

        text_detector_kwargs = {
            "device": device,
        }

        text_recognizer_kwargs = {
            "device": device,
        }

        # table_structure_recognizer_kwargs = {
        #    "device": device,
        # }

        if isinstance(configs, dict):
            if "table_detector" in configs:
                table_detector_kwargs.update(configs["table_detector"])

            if "table_cell_parser" in configs:
                table_cell_parser_kwargs.update(configs["table_cell_parser"])

            if "text_detector" in configs:
                text_detector_kwargs.update(configs["text_detector"])

            if "text_recognizer" in configs:
                text_recognizer_kwargs.update(configs["text_recognizer"])

            # if "table_structure_recognizer" in configs:
            #    table_structure_recognizer_kwargs.update(
            #        configs["table_structure_recognizer"]
            #    )
        else:
            raise ValueError(
                "configs must be a dict. See the https://kotaro-kinoshita.github.io/yomitoku-dev/usage/"
            )

        self.layout_parser = LayoutParser(
            **table_detector_kwargs,
        )
        self.cell_detector = CellDetector(
            **table_cell_parser_kwargs,
        )

        self.text_detector = TextDetector(
            **text_detector_kwargs,
        )

        self.text_recognizer = TextRecognizer(
            **text_recognizer_kwargs,
        )

        # self.table_structure_recognizer = TableStructureRecognizer(
        #    **table_structure_recognizer_kwargs,
        # )

        self.enable_preprocess = enable_preprocess
        if self.enable_preprocess:
            preprocess_configs = {
                "rotate_detector": {
                    "device": device,
                },
            }
            if isinstance(configs, dict) and "preprocess" in configs:
                preprocess_configs.update(configs["preprocess"])
            self.preprocessor = Preprocessor(
                configs=preprocess_configs,
            )

        self.visualize = visualize

        self.merge_same_column_values = False

    def aggregate(self, ocr_res, cells, overlap_th=0.2):
        from collections import defaultdict

        cell_words = defaultdict(list)

        for word in ocr_res.words:
            word_box = quad_to_xyxy(word.points)
            best_cell = None
            best_ratio = 0

            for cell in cells:
                if cell.role == "group":
                    continue
                ratio, _ = calc_overlap_ratio(cell.box, word_box)
                if ratio > best_ratio:
                    best_ratio = ratio
                    best_cell = cell

            if best_cell is None or best_ratio < overlap_th:
                continue

            word_element = ParagraphSchema(
                box=word_box,
                contents=word.content,
                direction=word.direction,
                order=0,
                role=None,
            )
            cell_words[best_cell.id].append(word_element)

        for cell in cells:
            contained = cell_words.get(cell.id, [])
            if not contained:
                cell.contents = ""
                continue

            dirs = [w.direction for w in contained]
            direction = (
                "horizontal"
                if dirs.count("horizontal") >= dirs.count("vertical")
                else "vertical"
            )
            order = "left2right" if direction == "horizontal" else "right2left"
            prediction_reading_order(contained, order)
            contained = sorted(contained, key=lambda x: x.order)
            text = "\n".join([w.contents for w in contained])
            cell.contents = text.replace("\n", "").strip()

    def replace_table_to_paragraphs(self, tables, paragraphs):
        new_table_list = []
        for table in tables:
            cnt_cell = 0
            for cell in table.cells:
                if cell.role in ["cell", "header"]:
                    cnt_cell += 1

            if cnt_cell < 2:
                paragraphs.append(
                    Element(
                        id=None,
                        box=table.box,
                        contents="",
                        score=1.0,
                        role=None,
                    )
                )
            else:
                new_table_list.append(table)

        return new_table_list

    async def run_models(self, img):
        with ThreadPoolExecutor(max_workers=2) as executor:
            loop = asyncio.get_running_loop()
            tasks = [
                loop.run_in_executor(executor, self.text_detector, img),
                loop.run_in_executor(executor, self.layout_parser, img),
            ]

            results = await asyncio.gather(*tasks)

        results_det, _ = results[0]
        results_layout, _ = results[1]

        # borderless_table = [
        #    t.box for t in results_layout.tables if t.role == "borderless_table"
        # ]

        # bordered_table = [
        #    t for t in results_layout.tables if t.role != "borderless_table"
        # ]

        bordered_table = [t for t in results_layout.tables]

        results_table = self.cell_detector(img, bordered_table)
        # results_borderless_table, _ = self.table_structure_recognizer(
        #    img, borderless_table
        # )

        results_table = self.replace_table_to_paragraphs(
            results_table, results_layout.paragraphs
        )

        # word_dicts = [
        #    {"poly": quad_to_poly(q), "score": s}
        #    for q, s in zip(results_det.points, results_det.scores)
        # ]

        # cell_dicts = []
        # for table in results_table:
        #    for c in table.cells:
        #        if c.role in ["group", "empty"]:
        #            continue
        #        cell_dicts.append({"id": str(c.id), "poly": box_to_poly(c.box)})

        # セルにまたがるテキスト領域の分割
        # split_words = replace_spanning_words_with_clipped_polys_poly(
        #    words=word_dicts,
        #    cells=cell_dicts,
        #    min_area_ratio=0.05,
        #    keep_unsplit=True,
        # )

        # schema_dict = build_text_detector_schema_from_split_words_rotated_quad(
        #    split_words, cell_dicts, use_cell=False
        # )

        # results_det = TextDetectorSchema(**word_dicts)

        results_rec = self.text_recognizer(img, results_det.points)
        outputs = {"words": ocr_aggregate(results_rec)}
        results_ocr = OCRSchema(**outputs)

        return (results_ocr, results_table, results_layout.paragraphs)

    def visualizer_ocr(self, img, semantic_info):
        vis_ocr = _ocr_visualizer(
            img,
            semantic_info,
            font_size=self.text_recognizer._cfg.visualize.font_size,
            font_color=tuple(self.text_recognizer._cfg.visualize.color[::-1]),
            font_path=self.text_recognizer._cfg.visualize.font,
        )

        return vis_ocr

    def visualizer_layout(self, img, semantic_info):
        vis_layout = img.copy()

        vis_layout = _layout_visualizer(
            semantic_info.tables,
            vis_layout,
            prefix="Table",
        )

        vis_layout = _layout_visualizer(
            semantic_info.paragraphs,
            vis_layout,
            prefix="Paragraph",
        )

        for results_table in semantic_info.tables:
            # vis_layout, vis_layout = cell_detector_visualizer(
            vis_layout, _ = cell_detector_visualizer(
                vis_layout,
                vis_layout,
                results_table.cells.values(),
            )

            for kv_item in results_table.kv_items:
                box = kv_item.box
                cv2.rectangle(
                    vis_layout,
                    (box[0], box[1]),
                    (box[2], box[3]),
                    (0, 0, 255),
                    3,
                )

            for grid in results_table.grids:
                box = grid.box
                cv2.rectangle(
                    vis_layout,
                    (box[0], box[1]),
                    (box[2], box[3]),
                    (255, 0, 0),
                    3,
                )

        return vis_layout

    def __call__(self, img, template=None, id=None, grid_only=False, kv_only=False):
        """
        Parse an input document image and return table semantics + visualizations.

        Steps:
        1) Run layout detection, text detection, cell detection, OCR recognition.
        2) Aggregate OCR results into each cell and paragraph (`contents` field).
        3) For each table:
            - cluster cells using weak connectivity graph (unless `grid_only`)
            - classify each cluster as grid vs kv based on adjacency structure
            - parse grids or kv_items accordingly
            - ensure all cells are included in the output
            - sort kv/grids and normalize IDs
        4) Sort tables and paragraphs and wrap into `TableSemanticParserSchema`.
        5) Optionally load a template JSON to align output with a predefined structure.
        6) Optionally generate visualization images.

        Args:
            img (np.ndarray): Input image (OpenCV ndarray).
            template (Optional[dict|str]): Template definition loaded into the schema.
                If provided, `semantic_info.load_template_json(template)` is called.
            id (Optional[str]): Reserved for future use (currently not used).

        Returns:
            Tuple[TableSemanticParserSchema, np.ndarray, np.ndarray]:
                - semantic_info: tables/paragraphs/words semantic structure
                - vis_layout: visualization image for layout/cells/grids/kv boxes
                - vis_ocr: visualization image for OCR polygons/text

        Notes:
            - If `self.visualize` is False, `vis_layout` and `vis_ocr` are still returned
            as copies of the input image but without overlays.
            - This method uses `asyncio.run(...)` internally; calling it from an already
            running event loop (e.g., inside async frameworks) may require refactoring
            (e.g., exposing an async entrypoint).
        """

        if self.enable_preprocess:
            _, img = self.preprocessor(img)

        try:
            results_ocr, results_table, paragraphs = asyncio.run(self.run_models(img))
        except torch.cuda.OutOfMemoryError as e:
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            logger.error("GPU out of memory in TableSemanticParser: %s", e)
            raise make_error(ErrorCode.GPU_OUT_OF_MEMORY) from e

        semantic_info = []
        for table in results_table:
            self.aggregate(results_ocr, table.cells)

        # for table in borderless_table:
        #    self.aggregate(results_ocr, table.cells)

        self.aggregate(results_ocr, paragraphs)

        vis_layout = img.copy()
        vis_ocr = img.copy()

        # results_table.extend(borderless_table)

        cell_offset = 0
        for i, table in enumerate(results_table):
            cells = {}
            for cell in table.cells:
                if isinstance(cell, TableCellSchema):
                    cell = CellSchema(
                        meta={},
                        id=cell.id,
                        box=cell.box,
                        role=cell.role,
                        row=cell.row,
                        col=cell.col,
                        row_span=cell.row_span,
                        col_span=cell.col_span,
                        contents=cell.contents,
                    )

                cells[cell.id] = cell

            table_information = {
                "id": f"t{i}",
                "box": table.box,
                "cells": {},
                "style": "border",
                "kv_items": [],
                "grids": [],
            }
            if template is None:
                nodes = _split_nodes_with_role(table.cells)

                if not grid_only:
                    clusters, dag = _weakly_cluster_nodes_with_graph(nodes)
                    cluster_nodes_list = _get_cluster_nodes(clusters, nodes)

                else:
                    clusters = [[cell.id for cell in table.cells]]
                    cluster_nodes_list = [nodes]

                for clustered_nodes in cluster_nodes_list:
                    if not kv_only and is_grid_cluster(clustered_nodes):
                        grid, grid_cells, dag = parse_grid_from_bottom_up(
                            cells, clustered_nodes, self.merge_same_column_values
                        )

                        if grid is None:
                            continue

                        table_information["grids"].append(grid)
                        table_information["cells"].update(grid_cells)

                        # For Debugging
                        # vis_layout = dag_visualizer(
                        #    dag,
                        #    vis_layout,
                        # )

                    else:
                        kv_items, dag, kv_cells = parse_kv_items(
                            clustered_nodes,
                            nodes,
                            cells,
                        )

                        table_information["kv_items"].extend(kv_items)

                        # For Debugging
                        # vis_layout = dag_visualizer(
                        #    dag,
                        #    vis_layout,
                        # )

                        table_information["cells"].update(kv_cells)

            for cell in cells.values():
                if cell.id not in table_information["cells"]:
                    table_information["cells"][cell.id] = cell

            table_information["kv_items"] = sorted(
                table_information["kv_items"],
                key=lambda kv: table_information["cells"][kv.value].box[1],
            )

            table_information["grids"] = sorted(
                table_information["grids"],
                key=lambda g: g.box[1],
            )

            for i, grid in enumerate(table_information["grids"]):
                grid.id = f"g{i}"

            for i, kv in enumerate(table_information["kv_items"]):
                kv.id = f"kv{i}"

            cell_offset = _assign_ids(table_information, cell_offset)

            semantic_info.append(TableSemanticContentsSchema(**table_information))

        semantic_info = _sort_elements(semantic_info, prefix="t")
        paragraphs = _sort_elements(paragraphs, prefix="p")

        semantic_info = TableSemanticParserSchema(
            tables=semantic_info,
            paragraphs=paragraphs,
            words=results_ocr.words,
        )

        if template is not None:
            semantic_info.load_template_json(template)

        if self.visualize:
            vis_layout = self.visualizer_layout(vis_layout, semantic_info)
            vis_ocr = self.visualizer_ocr(vis_ocr, semantic_info)

        return semantic_info, vis_layout, vis_ocr

`call(img, template=None, id=None, grid_only=False, kv_only=False)` ¶

Parse an input document image and return table semantics + visualizations.

Steps: 1) Run layout detection, text detection, cell detection, OCR recognition. 2) Aggregate OCR results into each cell and paragraph (contents field). 3) For each table: - cluster cells using weak connectivity graph (unless grid_only) - classify each cluster as grid vs kv based on adjacency structure - parse grids or kv_items accordingly - ensure all cells are included in the output - sort kv/grids and normalize IDs 4) Sort tables and paragraphs and wrap into TableSemanticParserSchema. 5) Optionally load a template JSON to align output with a predefined structure. 6) Optionally generate visualization images.

引数：

名前	タイプ	デスクリプション	デフォルト
`img`	`ndarray`	Input image (OpenCV ndarray).	必須
`template`	`Optional[dict \| str]`	Template definition loaded into the schema. If provided, `semantic_info.load_template_json(template)` is called.	`None`
`id`	`Optional[str]`	Reserved for future use (currently not used).	`None`

戻り値：

タイプ	デスクリプション
	Tuple[TableSemanticParserSchema, np.ndarray, np.ndarray]: - semantic_info: tables/paragraphs/words semantic structure - vis_layout: visualization image for layout/cells/grids/kv boxes - vis_ocr: visualization image for OCR polygons/text

Notes

If self.visualize is False, vis_layout and vis_ocr are still returned as copies of the input image but without overlays.
This method uses asyncio.run(...) internally; calling it from an already running event loop (e.g., inside async frameworks) may require refactoring (e.g., exposing an async entrypoint).

ソースコード位置： src/yomitoku/table_semantic_parser.py

def __call__(self, img, template=None, id=None, grid_only=False, kv_only=False):
    """
    Parse an input document image and return table semantics + visualizations.

    Steps:
    1) Run layout detection, text detection, cell detection, OCR recognition.
    2) Aggregate OCR results into each cell and paragraph (`contents` field).
    3) For each table:
        - cluster cells using weak connectivity graph (unless `grid_only`)
        - classify each cluster as grid vs kv based on adjacency structure
        - parse grids or kv_items accordingly
        - ensure all cells are included in the output
        - sort kv/grids and normalize IDs
    4) Sort tables and paragraphs and wrap into `TableSemanticParserSchema`.
    5) Optionally load a template JSON to align output with a predefined structure.
    6) Optionally generate visualization images.

    Args:
        img (np.ndarray): Input image (OpenCV ndarray).
        template (Optional[dict|str]): Template definition loaded into the schema.
            If provided, `semantic_info.load_template_json(template)` is called.
        id (Optional[str]): Reserved for future use (currently not used).

    Returns:
        Tuple[TableSemanticParserSchema, np.ndarray, np.ndarray]:
            - semantic_info: tables/paragraphs/words semantic structure
            - vis_layout: visualization image for layout/cells/grids/kv boxes
            - vis_ocr: visualization image for OCR polygons/text

    Notes:
        - If `self.visualize` is False, `vis_layout` and `vis_ocr` are still returned
        as copies of the input image but without overlays.
        - This method uses `asyncio.run(...)` internally; calling it from an already
        running event loop (e.g., inside async frameworks) may require refactoring
        (e.g., exposing an async entrypoint).
    """

    if self.enable_preprocess:
        _, img = self.preprocessor(img)

    try:
        results_ocr, results_table, paragraphs = asyncio.run(self.run_models(img))
    except torch.cuda.OutOfMemoryError as e:
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        logger.error("GPU out of memory in TableSemanticParser: %s", e)
        raise make_error(ErrorCode.GPU_OUT_OF_MEMORY) from e

    semantic_info = []
    for table in results_table:
        self.aggregate(results_ocr, table.cells)

    # for table in borderless_table:
    #    self.aggregate(results_ocr, table.cells)

    self.aggregate(results_ocr, paragraphs)

    vis_layout = img.copy()
    vis_ocr = img.copy()

    # results_table.extend(borderless_table)

    cell_offset = 0
    for i, table in enumerate(results_table):
        cells = {}
        for cell in table.cells:
            if isinstance(cell, TableCellSchema):
                cell = CellSchema(
                    meta={},
                    id=cell.id,
                    box=cell.box,
                    role=cell.role,
                    row=cell.row,
                    col=cell.col,
                    row_span=cell.row_span,
                    col_span=cell.col_span,
                    contents=cell.contents,
                )

            cells[cell.id] = cell

        table_information = {
            "id": f"t{i}",
            "box": table.box,
            "cells": {},
            "style": "border",
            "kv_items": [],
            "grids": [],
        }
        if template is None:
            nodes = _split_nodes_with_role(table.cells)

            if not grid_only:
                clusters, dag = _weakly_cluster_nodes_with_graph(nodes)
                cluster_nodes_list = _get_cluster_nodes(clusters, nodes)

            else:
                clusters = [[cell.id for cell in table.cells]]
                cluster_nodes_list = [nodes]

            for clustered_nodes in cluster_nodes_list:
                if not kv_only and is_grid_cluster(clustered_nodes):
                    grid, grid_cells, dag = parse_grid_from_bottom_up(
                        cells, clustered_nodes, self.merge_same_column_values
                    )

                    if grid is None:
                        continue

                    table_information["grids"].append(grid)
                    table_information["cells"].update(grid_cells)

                    # For Debugging
                    # vis_layout = dag_visualizer(
                    #    dag,
                    #    vis_layout,
                    # )

                else:
                    kv_items, dag, kv_cells = parse_kv_items(
                        clustered_nodes,
                        nodes,
                        cells,
                    )

                    table_information["kv_items"].extend(kv_items)

                    # For Debugging
                    # vis_layout = dag_visualizer(
                    #    dag,
                    #    vis_layout,
                    # )

                    table_information["cells"].update(kv_cells)

        for cell in cells.values():
            if cell.id not in table_information["cells"]:
                table_information["cells"][cell.id] = cell

        table_information["kv_items"] = sorted(
            table_information["kv_items"],
            key=lambda kv: table_information["cells"][kv.value].box[1],
        )

        table_information["grids"] = sorted(
            table_information["grids"],
            key=lambda g: g.box[1],
        )

        for i, grid in enumerate(table_information["grids"]):
            grid.id = f"g{i}"

        for i, kv in enumerate(table_information["kv_items"]):
            kv.id = f"kv{i}"

        cell_offset = _assign_ids(table_information, cell_offset)

        semantic_info.append(TableSemanticContentsSchema(**table_information))

    semantic_info = _sort_elements(semantic_info, prefix="t")
    paragraphs = _sort_elements(paragraphs, prefix="p")

    semantic_info = TableSemanticParserSchema(
        tables=semantic_info,
        paragraphs=paragraphs,
        words=results_ocr.words,
    )

    if template is not None:
        semantic_info.load_template_json(template)

    if self.visualize:
        vis_layout = self.visualizer_layout(vis_layout, semantic_info)
        vis_ocr = self.visualizer_ocr(vis_ocr, semantic_info)

    return semantic_info, vis_layout, vis_ocr

`init(configs={}, device='cuda:0', visualize=True, enable_preprocess=False)` ¶

Initialize the table semantic parser and its submodules.

引数：

名前	タイプ	デスクリプション	デフォルト
`configs`	`dict`	Configuration overrides for submodules. Expected keys (optional): - "table_detector": kwargs for LayoutParser - "table_parser": kwargs for CellDetector - "text_detector": kwargs for TextDetector - "text_recognizer": kwargs for TextRecognizer	`{}`
`device`	`str`	Device identifier passed to submodules (e.g., "cuda:0", "cpu").	`'cuda:0'`
`visualize`	`bool`	If True, the parser will generate debug visualization images.	`True`

発生：

タイプ	デスクリプション
`ValueError`	If `configs` is not a dict.

ソースコード位置： src/yomitoku/table_semantic_parser.py

def __init__(
    self,
    configs={},
    device="cuda:0",
    visualize=True,
    enable_preprocess=False,
):
    """
    Initialize the table semantic parser and its submodules.

    Args:
        configs (dict): Configuration overrides for submodules. Expected keys (optional):
            - "table_detector": kwargs for LayoutParser
            - "table_parser": kwargs for CellDetector
            - "text_detector": kwargs for TextDetector
            - "text_recognizer": kwargs for TextRecognizer
        device (str): Device identifier passed to submodules (e.g., "cuda:0", "cpu").
        visualize (bool): If True, the parser will generate debug visualization images.

    Raises:
        ValueError: If `configs` is not a dict.
    """
    table_detector_kwargs = {
        "device": device,
        "visualize": visualize,
        # "model_name": "rtdetrv2_table_detector_beta",
    }
    table_cell_parser_kwargs = {
        "device": device,
        "visualize": visualize,
    }

    text_detector_kwargs = {
        "device": device,
    }

    text_recognizer_kwargs = {
        "device": device,
    }

    # table_structure_recognizer_kwargs = {
    #    "device": device,
    # }

    if isinstance(configs, dict):
        if "table_detector" in configs:
            table_detector_kwargs.update(configs["table_detector"])

        if "table_cell_parser" in configs:
            table_cell_parser_kwargs.update(configs["table_cell_parser"])

        if "text_detector" in configs:
            text_detector_kwargs.update(configs["text_detector"])

        if "text_recognizer" in configs:
            text_recognizer_kwargs.update(configs["text_recognizer"])

        # if "table_structure_recognizer" in configs:
        #    table_structure_recognizer_kwargs.update(
        #        configs["table_structure_recognizer"]
        #    )
    else:
        raise ValueError(
            "configs must be a dict. See the https://kotaro-kinoshita.github.io/yomitoku-dev/usage/"
        )

    self.layout_parser = LayoutParser(
        **table_detector_kwargs,
    )
    self.cell_detector = CellDetector(
        **table_cell_parser_kwargs,
    )

    self.text_detector = TextDetector(
        **text_detector_kwargs,
    )

    self.text_recognizer = TextRecognizer(
        **text_recognizer_kwargs,
    )

    # self.table_structure_recognizer = TableStructureRecognizer(
    #    **table_structure_recognizer_kwargs,
    # )

    self.enable_preprocess = enable_preprocess
    if self.enable_preprocess:
        preprocess_configs = {
            "rotate_detector": {
                "device": device,
            },
        }
        if isinstance(configs, dict) and "preprocess" in configs:
            preprocess_configs.update(configs["preprocess"])
        self.preprocessor = Preprocessor(
            configs=preprocess_configs,
        )

    self.visualize = visualize

    self.merge_same_column_values = False

Table Semantic Parser¶

__call__(img, template=None, id=None, grid_only=False, kv_only=False) ¶

__init__(configs={}, device='cuda:0', visualize=True, enable_preprocess=False) ¶

`call(img, template=None, id=None, grid_only=False, kv_only=False)` ¶

`init(configs={}, device='cuda:0', visualize=True, enable_preprocess=False)` ¶