Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 49 additions & 7 deletions runtime/datamate-python/app/module/annotation/interface/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,9 +1202,9 @@ async def sync_auto_task_annotations_to_database(

行为:
- 根据自动标注任务找到或创建对应的 Label Studio 项目(与 /sync-label-studio-back 相同的解析逻辑);
- 遍历项目下所有 task,按 task.data.file_id 定位 t_dm_dataset_files 记录
- 把 annotations + predictions 写入 annotation 字段,并抽取标签写入 tags,更新 tags_updated_at
返回成功更新的文件数量
- 先执行 DM -> Label Studio 的文件差异同步(确保任务文件集合最新)
- 再执行双向标签同步(LS <-> DM),按时间戳保留较新标注
返回同步变更数量(LS->DM + DM->LS)
"""

# 1. 获取并校验自动标注任务
Expand All @@ -1217,16 +1217,19 @@ async def sync_auto_task_annotations_to_database(
mappings = await mapping_service.get_mappings_by_dataset_id(task.dataset_id)

project_id: Optional[str] = None
selected_mapping = None
for m in mappings:
cfg = getattr(m, "configuration", None) or {}
if isinstance(cfg, dict) and cfg.get("autoTaskId") == task.id:
project_id = str(m.labeling_project_id)
selected_mapping = m
break

if project_id is None:
for m in mappings:
if m.name == task.name:
project_id = str(m.labeling_project_id)
selected_mapping = m
break

if project_id is None:
Expand Down Expand Up @@ -1263,13 +1266,52 @@ async def sync_auto_task_annotations_to_database(
),
)

# 3. 调用通用的 LS -> DM 同步服务
selected_mapping = await mapping_service.get_mapping_by_labeling_project_id(project_id)

if selected_mapping is None and project_id is not None:
selected_mapping = await mapping_service.get_mapping_by_labeling_project_id(project_id)

if selected_mapping is None:
raise HTTPException(
status_code=500,
detail="Failed to resolve mapping for auto task when syncing files to Label Studio.",
)

ls_client = LabelStudioClient(
base_url=settings.label_studio_base_url,
token=settings.label_studio_user_token,
)
sync_service = LSAnnotationSyncService(db, ls_client)

updated = await sync_service.sync_project_annotations_to_dm(project_id=str(project_id))
# 3. 先执行文件差异同步,确保 LS 工程任务集合与当前自动标注任务文件集合一致
dm_client = DatasetManagementService(db)
sync_orchestrator = SyncService(dm_client, ls_client, mapping_service)

Comment on lines +1285 to +1288
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LSAnnotationSyncService is imported in this module but is no longer referenced after switching the /sync-db flow to SyncService orchestration. Please remove the unused import to avoid lint/type-check failures.

Copilot uses AI. Check for mistakes.
allowed_file_ids = None
if task.file_ids:
allowed_file_ids = {str(fid) for fid in task.file_ids}

file_sync_result = await sync_orchestrator.sync_files(
selected_mapping,
batch_size=50,
allowed_file_ids=allowed_file_ids,
delete_orphans=True,
)
logger.info(
"Auto sync-db pre file-sync done: task_id=%s, created=%s, deleted=%s, total=%s",
task_id,
file_sync_result.get("created", 0),
file_sync_result.get("deleted", 0),
file_sync_result.get("total", 0),
)

# 4. 执行双向标签同步(基于时间戳决策覆盖)
annotation_sync_result = await sync_orchestrator.sync_annotations_bidirectional(
selected_mapping,
batch_size=50,
overwrite=True,
overwrite_ls=True,
sync_files_first=False,
)

return StandardResponse(code="0", message="success", data=updated)
total_synced = annotation_sync_result.synced_to_dm + annotation_sync_result.synced_to_ls
return StandardResponse(code="0", message="success", data=total_synced)
38 changes: 26 additions & 12 deletions runtime/datamate-python/app/module/annotation/interface/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,14 +612,13 @@ async def sync_manual_annotations_to_database(
):
"""从 Label Studio 项目同步当前手动标注结果到 DM 数据库。

行为:
- 基于 mapping_id 定位 Label Studio 项目;
- 遍历项目下所有 task,按 task.data.file_id 找到对应 t_dm_dataset_files 记录;
- 读取每个 task 的 annotations + predictions,写入:
* tags: 从 result 中提取的标签概要,供 DM 列表/预览展示;
* annotation: 完整原始 JSON 结果;
* tags_updated_at: 当前时间戳。
返回值为成功更新的文件数量。
行为:
- 基于 mapping_id 定位 Label Studio 项目;
- 先执行 DM -> Label Studio 的文件差异同步(增量创建新文件任务、删除孤儿任务);
- 再执行双向标签同步(LS <-> DM),按时间戳保留较新标注:
* LS 更新时间更新 -> 覆盖 DM;
* DM tags_updated_at 更新 -> 覆盖 LS。
返回值为同步变更数量(LS->DM + DM->LS)。
"""

mapping_service = DatasetMappingService(db)
Expand All @@ -631,13 +630,28 @@ async def sync_manual_annotations_to_database(
base_url=settings.label_studio_base_url,
token=settings.label_studio_user_token,
)
sync_service = LSAnnotationSyncService(db, ls_client)

updated = await sync_service.sync_project_annotations_to_dm(
project_id=str(mapping.labeling_project_id),
dm_client = DatasetManagementService(db)
sync_orchestrator = SyncService(dm_client, ls_client, mapping_service)
file_sync_result = await sync_orchestrator.sync_files(mapping, batch_size=50)
Comment on lines +634 to +636
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This endpoint now uses SyncService for manual sync, so LSAnnotationSyncService appears unused in this module (imported at the top but not referenced). Please remove the unused import to avoid lint/type-check failures and keep dependencies clear.

Copilot uses AI. Check for mistakes.
logger.info(
"Manual sync-db pre file-sync done: mapping=%s, created=%s, deleted=%s, total=%s",
mapping_id,
file_sync_result.get("created", 0),
file_sync_result.get("deleted", 0),
file_sync_result.get("total", 0),
)

annotation_sync_result = await sync_orchestrator.sync_annotations_bidirectional(
mapping,
batch_size=50,
overwrite=True,
overwrite_ls=True,
sync_files_first=False,
)

return StandardResponse(code="0", message="success", data=updated)
total_synced = annotation_sync_result.synced_to_dm + annotation_sync_result.synced_to_ls
return StandardResponse(code="0", message="success", data=total_synced)

@router.get("", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
async def list_mappings(
Expand Down
18 changes: 12 additions & 6 deletions runtime/datamate-python/app/module/annotation/interface/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,23 @@ async def sync_dataset_content(
await sync_service.sync_annotations_from_ls_to_dm(
mapping,
request.batch_size,
request.overwrite
request.overwrite,
sync_files_first=False,
)
elif request.annotation_direction == "dm_to_ls":
await sync_service.sync_annotations_from_dm_to_ls(
mapping,
request.batch_size,
request.overwrite_labeling_project
request.overwrite_labeling_project,
sync_files_first=False,
)
elif request.annotation_direction == "bidirectional":
await sync_service.sync_annotations_bidirectional(
mapping,
request.batch_size,
request.overwrite,
request.overwrite_labeling_project
request.overwrite_labeling_project,
sync_files_first=False,
)

logger.info(f"Sync completed: {result.synced_files}/{result.total_files} files")
Expand Down Expand Up @@ -139,20 +142,23 @@ async def sync_annotations(
result = await sync_service.sync_annotations_from_ls_to_dm(
mapping,
request.batch_size,
request.overwrite
request.overwrite,
sync_files_first=True,
)
elif request.direction == "dm_to_ls":
result = await sync_service.sync_annotations_from_dm_to_ls(
mapping,
request.batch_size,
request.overwrite_labeling_project
request.overwrite_labeling_project,
sync_files_first=True,
)
elif request.direction == "bidirectional":
result = await sync_service.sync_annotations_bidirectional(
mapping,
request.batch_size,
request.overwrite,
request.overwrite_labeling_project
request.overwrite_labeling_project,
sync_files_first=True,
)
else:
raise HTTPException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -194,20 +194,34 @@ def iter_results() -> List[Dict[str, Any]]:

for r in results:
r_type = r.get("type")
if isinstance(r_type, str):
r_type = r_type.strip().lower()

from_name = r.get("from_name") or r.get("fromName")
to_name = r.get("to_name") or r.get("toName")
value_obj = r.get("value") or {}
if not isinstance(value_obj, dict):
continue

# 将 Label Studio 的 value 映射为 values,方便前端统一解析
values: Dict[str, Any] = {}
for key, v in value_obj.items():
values[key] = v
normalized_key = str(key).strip().lower() if key is not None else ""
if normalized_key:
values[normalized_key] = v

if isinstance(r_type, str) and r_type:
if r_type in values:
values = {r_type: values[r_type]}
elif len(values) == 1:
only_value = next(iter(values.values()))
values = {r_type: only_value}

tag = {
"id": r.get("id"),
"type": r_type,
"from_name": from_name,
"to_name": to_name,
"values": values,
}
Comment on lines 220 to 226
Copy link

Copilot AI Mar 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_extract_tags_from_results now includes to_name in the normalized tag payload, but the docstring above still lists the output fields as id/type/from_name/values only. Update the docstring to include to_name so callers and future maintainers have an accurate contract.

Copilot uses AI. Check for mistakes.
normalized.append(tag)
Expand Down
Loading
Loading