Skip to main content

Step 6: Edit processor.py

This file implements a class that brings the file factory and parser together.

Explanation

The processor.py file uses the file factory to list all files that should be processed, then instantiates a parser to parse each of these files.

Hands on

  1. The template most likely generated the entire file correctly. All you need to do is remove the triple quotes and test the processor for correctness.

  2. Stage processor.py and commit it

Result

processor.py
import logging
from typing import Sequence

from parsomics_core.entities.files.protein_annotation.file.models import (
ProteinAnnotationFile,
ProteinAnnotationFileDemand,
)
from parsomics_core.entities.files.protein_annotation.file.transactions import (
ProteinAnnotationFileTransactions,
)
from parsomics_core.processors._helpers import retrieve_genome_key
from pydantic import BaseModel
from sqlalchemy import Engine
from sqlmodel import Session

from .file_factory import InterproTsvFileFactory
from .parser import InterproTsvParser
from .validated_file import InterproTsvValidatedFile


class InterproOutputProcessor(BaseModel):
output_directory: str
dereplicated_genomes: Sequence[str]
assembly_key: int
run_key: int
tool_key: int

def process_interpro_tsv_files(self, engine: Engine):
interpro_tsv_file_factory: InterproTsvFileFactory = InterproTsvFileFactory(
self.output_directory,
self.dereplicated_genomes,
)

interpro_tsv_files: list[InterproTsvValidatedFile] = (
interpro_tsv_file_factory.assemble()
)
for f in interpro_tsv_files:
genome_key = retrieve_genome_key(engine, f, self.assembly_key)
run_key = self.run_key

protein_annotation_file_demand_model = ProteinAnnotationFileDemand(
path=f.path,
run_key=run_key,
genome_key=genome_key,
)

with Session(engine) as session:
protein_annotation_file: ProteinAnnotationFile = (
ProteinAnnotationFile.model_validate(
ProteinAnnotationFileTransactions().demand(
session,
protein_annotation_file_demand_model,
)
)
)

interpro_tsv_parser = InterproTsvParser(
file=protein_annotation_file,
assembly_key=self.assembly_key,
tool_key=self.tool_key,
)
interpro_tsv_parser.parse(engine)

logging.info(
f"Finished adding all Interpro files on {self.output_directory} to the database."
)