Source code for sail_on_client.feedback.document_transcription_feedback

"""Document Transcription Feedback."""

import pandas as pd
from sail_on_client.harness.local_harness import LocalHarness
from sail_on_client.harness.par_harness import ParHarness
from sail_on_client.feedback.feedback import Feedback

from typing import Union, Dict

SUPPORTED_FEEDBACK = ["classification", "score", "transcription"]


[docs]class DocumentTranscriptionFeedback(Feedback): """Feedback for document transcription."""
[docs] def __init__( self, first_budget: int, income_per_batch: int, maximum_budget: int, interface: Union[LocalHarness, ParHarness], session_id: str, test_id: str, feedback_type: str, ) -> None: """ Initialize document transcription feedback object. Args: first_budget: Initial budget income_per_batch: Additional labels added after every batch maximum_budget: Max labels that can be requested interface: An instance of evaluation interface session_id: Session identifier test_id: Test identifier feedback_type: Type of feedback that can be requested Returns: None """ if feedback_type not in SUPPORTED_FEEDBACK: raise ValueError(f"Unsupported feedback_type {feedback_type}") super(DocumentTranscriptionFeedback, self).__init__( first_budget, income_per_batch, maximum_budget, interface, session_id, test_id, feedback_type, ) self.current_round: int = -1 self.budget: int = first_budget
[docs] def get_levenshtein_feedback( self, round_id: int, images_id_list: list, image_names: list ) -> Union[Dict, None]: """ Get levenshtein feedback for the round. Args: round_id: Round identifier image_id_list: List if indices for images image_names: List of image names for the round Return: A dictionary containing levenshtein score or None if feedback is requested for an older round """ if round_id > self.current_round: self.deposit_income() self.current_round = round_id if len(images_id_list) <= self.budget: self.budget = self.budget - len(images_id_list) image_ids = [image_names[int(idx)] for idx in images_id_list] feedback_file = self.interface.get_feedback_request( image_ids, self.feedback_type, self.test_id, round_id, self.session_id, ) df = pd.read_csv(feedback_file, delimiter=",", header=None) return df else: raise ValueError("the function should be added") else: return None
[docs] def get_feedback( self, round_id: int, images_id_list: list, image_names: list ) -> Union[pd.DataFrame, Dict, None]: """ Get feedback for the round. Args: round_id: Round identifier image_id_list: List if indices for images image_names: List of image names for the round Return: Either a dataframe or dictionary with score if the request is valid for the current round. """ if self.feedback_type == "classification": feedback_fn = self.get_labeled_feedback elif self.feedback_type == "score": feedback_fn = self.get_score_feedback elif self.feedback_type == "transcription": feedback_fn = self.get_levenshtein_feedback else: raise ValueError("Unsupported feedback type {self.feedback_type} specified") return feedback_fn(round_id, images_id_list, image_names)