Skip to content

Commit

Permalink
Merge pull request #60 from OpenPecha/fix-pecha_db_serializer
Browse files Browse the repository at this point in the history
updated BaseSerializer and added source_type flag
  • Loading branch information
tenzin3 authored Oct 16, 2024
2 parents 2091324 + c1c5c73 commit 4d64775
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 17 deletions.
1 change: 1 addition & 0 deletions src/openpecha/pecha/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ def name(self):
def serialize(
self,
pecha_path: Path,
source_type: str,
):
pass
35 changes: 19 additions & 16 deletions src/openpecha/pecha/serializers/pecha_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, output_path: Path = PECHAS_PATH):
self.mappings: Dict[str, List[int]] = {}
self.chapter: List[str] = []

def create_content_list(self, pecha):
def create_dharmanexus_content_list(self, pecha):
for _, layers in pecha.layers.items():
chapter_num = 1
for ann_store in layers[LayerEnum.meaning_segment]:
Expand All @@ -29,20 +29,23 @@ def create_content_list(self, pecha):
self.contents.append(self.chapter)
self.chapter = []

def serialize(self, pecha_path: Path):
def serialize(self, pecha_path: Path, source_type: str):
pecha_id = pecha_path.stem
pecha = Pecha(pecha_id=pecha_id, pecha_path=pecha_path)
self.create_content_list(pecha)
pecha_json = {
"title": pecha.metadata.title,
"language": pecha.metadata.language.value,
"versionSource": " ",
"completestatus": "done",
"content": self.contents,
"direction": "ltr",
}
pecha_db_json_path = f"{self.output_path}/{pecha_id}.json"
mapping_path = f"{self.output_path}/{pecha_id}_mapping.json"
write_json(pecha_db_json_path, pecha_json)
write_json(mapping_path, self.mappings)
return pecha_db_json_path, mapping_path
if source_type == "dharmanexus":
self.create_dharmanexus_content_list(pecha)
pecha_json = {
"title": pecha.metadata.title,
"language": pecha.metadata.language.value,
"versionSource": " ",
"completestatus": "done",
"content": self.contents,
"direction": "ltr",
}
pecha_db_json_path = f"{self.output_path}/{pecha_id}.json"
mapping_path = f"{self.output_path}/{pecha_id}_mapping.json"
write_json(pecha_db_json_path, pecha_json)
write_json(mapping_path, self.mappings)
return pecha_db_json_path, mapping_path
elif source_type == "pedurma":
pass
4 changes: 3 additions & 1 deletion tests/pecha/serializers/pecha_db/test_pecha_db_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@ def test_pecha_db_serializer():
output_path = Path(tmpdirname)
serializer = PechaDBSerializer(output_path=output_path)
pecha_path = Path("tests/pecha/serializers/pecha_db/data/IB1A4B480")
source_type = "dharmanexus"
pecha_db_json_path, mapping_json_path = serializer.serialize(
pecha_path=pecha_path
pecha_path=pecha_path,
source_type=source_type,
)
assert read_json(pecha_db_json_path) == read_json(
"tests/pecha/serializers/pecha_db/data/expected_pecha_db.json"
Expand Down

0 comments on commit 4d64775

Please sign in to comment.