Source code for sail_on_client.feedback.document_transcription_feedback

"""Document Transcription Feedback."""

import pandas as pd
from sail_on_client.harness.local_harness import LocalHarness
from sail_on_client.harness.par_harness import ParHarness
from sail_on_client.feedback.feedback import Feedback

from typing import Union, Dict

SUPPORTED_FEEDBACK = ["classification", "score", "transcription"]


[docs]class DocumentTranscriptionFeedback(Feedback):
    """Feedback for document transcription."""

[docs]    def __init__(
        self,
        first_budget: int,
        income_per_batch: int,
        maximum_budget: int,
        interface: Union[LocalHarness, ParHarness],
        session_id: str,
        test_id: str,
        feedback_type: str,
    ) -> None:
        """
        Initialize document transcription feedback object.

        Args:
            first_budget: Initial budget
            income_per_batch: Additional labels added after every batch
            maximum_budget: Max labels that can be requested
            interface: An instance of evaluation interface
            session_id: Session identifier
            test_id: Test identifier
            feedback_type: Type of feedback that can be requested

        Returns:
            None
        """
        if feedback_type not in SUPPORTED_FEEDBACK:
            raise ValueError(f"Unsupported feedback_type {feedback_type}")
        super(DocumentTranscriptionFeedback, self).__init__(
            first_budget,
            income_per_batch,
            maximum_budget,
            interface,
            session_id,
            test_id,
            feedback_type,
        )
        self.current_round: int = -1
        self.budget: int = first_budget

[docs]    def get_levenshtein_feedback(
        self, round_id: int, images_id_list: list, image_names: list
    ) -> Union[Dict, None]:
        """
        Get levenshtein feedback for the round.

        Args:
            round_id: Round identifier
            image_id_list: List if indices for images
            image_names: List of image names for the round

        Return:
            A dictionary containing levenshtein score or None if
            feedback is requested for an older round
        """
        if round_id > self.current_round:
            self.deposit_income()
            self.current_round = round_id
            if len(images_id_list) <= self.budget:
                self.budget = self.budget - len(images_id_list)
                image_ids = [image_names[int(idx)] for idx in images_id_list]
                feedback_file = self.interface.get_feedback_request(
                    image_ids,
                    self.feedback_type,
                    self.test_id,
                    round_id,
                    self.session_id,
                )
                df = pd.read_csv(feedback_file, delimiter=",", header=None)
                return df
            else:
                raise ValueError("the function should be added")
        else:
            return None

[docs]    def get_feedback(
        self, round_id: int, images_id_list: list, image_names: list
    ) -> Union[pd.DataFrame, Dict, None]:
        """
        Get feedback for the round.

        Args:
            round_id: Round identifier
            image_id_list: List if indices for images
            image_names: List of image names for the round

        Return:
            Either a dataframe or dictionary with score if the request is valid
            for the current round.
        """
        if self.feedback_type == "classification":
            feedback_fn = self.get_labeled_feedback
        elif self.feedback_type == "score":
            feedback_fn = self.get_score_feedback
        elif self.feedback_type == "transcription":
            feedback_fn = self.get_levenshtein_feedback
        else:
            raise ValueError("Unsupported feedback type {self.feedback_type} specified")
        return feedback_fn(round_id, images_id_list, image_names)