浏览代码

V5.6 first version of data mocking scripts

ysl2007 1 月之前
父节点
当前提交
c239ac4e0c

+ 556 - 0
data_mock/card_blueprints.py

@@ -0,0 +1,556 @@
+from __future__ import annotations
+
+
+LEVEL1_DOMAIN_MAP = {
+    "零售金融部": "零售",
+    "公司金融部": "对公",
+    "信用卡中心": "信用卡",
+    "风险管理部": "风险",
+    "运营管理部": "运营",
+    "计划财务部": "财务",
+    "网络经营服务部": "渠道",
+    "普惠金融部": "普惠",
+    "金融市场部": "金融市场",
+    "国际业务部": "国际业务",
+    "法律合规部": "合规",
+    "人力资源部": "人力",
+}
+
+
+DOMAIN_KEYWORDS = {
+    "零售": ["零售", "高价值", "私人银行", "私钻", "财富", "养老金", "AUM", "存款", "消费贷", "理财", "基金", "保险", "MGM"],
+    "对公": ["对公", "公司", "战略客户", "机构客户", "上市公司", "保证金", "代发", "项目融资", "供应链", "信用证", "保函", "票据", "交易银行"],
+    "信用卡": ["信用卡", "发卡", "分期", "额度"],
+    "风险": ["风险", "不良", "逾期", "拨备", "授信", "贷后", "资产质量", "抵质押"],
+    "运营": ["运营", "柜面", "账户", "支付结算", "反洗钱", "投诉", "集中作业", "现金库存", "回单"],
+    "财务": ["财务", "利润", "FTP", "费用", "收入", "资本", "预算", "资产负债", "税务", "EVA", "收益率"],
+    "渠道": ["渠道", "手机银行", "网银", "网点", "自助设备", "开放银行", "数字人民币", "预约到店", "活跃", "渗透率"],
+    "普惠": ["普惠", "小微", "首贷", "涉农", "科创", "个体工商户"],
+    "金融市场": ["金融市场", "债券", "同业", "资金头寸", "外汇交易", "衍生品", "转贴现", "投资"],
+    "国际业务": ["国际", "跨境", "贸易融资", "进口", "出口", "外汇", "结售汇"],
+    "合规": ["合规", "受益所有人", "尽职调查", "名单命中", "可疑交易", "监管报送", "制裁"],
+    "人力": ["人力", "人均产能", "客户经理绩效", "奖金", "编制", "培训", "人才盘点"],
+}
+
+
+CARD_BLUEPRINT_NAMES = {
+    "零售": [
+        "当月零售客户数", "当月高价值客户数", "当月私人银行客户数", "零售客户较上月净增", "零售客户月度增长趋势",
+        "高价值客户月度增长趋势", "AUM余额", "AUM余额月度走势图", "各机构AUM余额排名", "各机构高价值客户数及达成率",
+        "客户资产分层结构", "产品持仓结构", "理财产品销售额", "基金产品保有量", "零售贷款余额",
+        "消费贷放款额月度趋势", "客户经理AUM贡献TOP10", "流失预警客户清单", "本机构零售客户数", "本机构客户资产提升率",
+    ],
+    "对公": [
+        "当月对公客户数", "战略客户数", "对公客户较上月净增", "对公存款余额", "对公存款日均余额",
+        "活期存款占比", "对公存款余额月度趋势", "各机构对公存款余额排名", "公司贷款余额", "当月公司贷款投放额",
+        "公司贷款投放月度趋势", "行业投向结构", "重点客户综合贡献TOP10", "代发客户数", "代发金额月度趋势",
+        "信用证余额", "保函余额", "票据贴现金额", "交易银行活跃客户数", "流失预警对公客户清单",
+    ],
+    "信用卡": [
+        "当月新增发卡量", "累计发卡量", "当月活跃客户数", "活跃率", "交易金额",
+        "交易金额月度趋势", "交易笔数月度趋势", "各机构交易金额排名", "分期金额", "分期转化率",
+        "分期手续费收入", "渠道获客结构", "额度使用率", "逾期客户数", "逾期余额",
+        "逾期率月度趋势", "风险等级分布", "睡眠客户唤醒数", "权益活动参与客户数", "商户交易额TOP10",
+    ],
+    "风险": [
+        "贷款余额", "不良贷款余额", "不良率", "关注类贷款余额", "逾期余额",
+        "逾期率", "资产质量月度趋势", "不良贷款迁徙趋势", "各机构不良率排名", "行业风险分布",
+        "风险预警客户数", "预警处置完成率", "贷后检查完成率", "授信集中度", "大额风险暴露客户清单",
+        "抵质押品价值变动趋势", "拨备覆盖率", "五级分类结构", "本机构不良贷款余额", "本机构风险预警处置清单",
+    ],
+    "运营": [
+        "当日柜面业务笔数", "当月柜面业务笔数", "业务金额", "平均处理时长", "超时业务笔数",
+        "业务量月度趋势", "各机构业务量排名", "账户开立数", "账户异常数", "企业账户年检完成率",
+        "支付结算笔数", "支付结算金额月度趋势", "运营风险事件数", "风险事件处理完成率", "客户投诉数",
+        "投诉处理及时率", "集中作业处理时效分布", "现金库存余额", "电子回单使用率", "本机构待处理事项清单",
+    ],
+    "财务": [
+        "营业收入", "净利息收入", "中间业务收入", "营业支出", "税前利润",
+        "经济利润EVA", "利润贡献月度趋势", "各机构利润贡献排名", "收入结构", "费用预算",
+        "费用执行率", "费用执行月度趋势", "产品收益率排名", "资产收益率", "负债成本率",
+        "资产负债结构", "资本占用金额", "FTP净收入", "本机构利润贡献", "本机构费用超预算项目",
+    ],
+    "渠道": [
+        "当月活跃客户数", "活跃率", "累计活跃客户数", "渠道渗透率", "活跃率月度走势图",
+        "渗透率月度走势图", "各机构当月活跃客户数及活跃率", "各机构累计活跃客户数及活跃率", "手机银行登录次数", "网银交易金额",
+        "数字人民币交易笔数", "线上预约到店客户数", "预约到店转化率", "活跃用户构成", "新增用户月度走势图",
+        "流失用户数", "渠道交易金额结构", "自助设备可用率", "开放银行接口调用量", "活跃TOP10用户名称",
+    ],
+    "普惠": [
+        "普惠贷款余额", "普惠贷款投放额", "普惠贷款余额月度趋势", "各机构普惠贷款余额排名", "小微客户数",
+        "小微客户较上月净增", "首贷户数", "首贷户拓展月度趋势", "涉农贷款余额", "科技型企业贷款余额",
+        "个体工商户贷款余额", "贷款加权利率", "利率定价分布", "普惠不良贷款余额", "普惠不良率",
+        "风险补偿金额", "延期还本金额", "本机构普惠贷款投放额", "客户经理普惠贷款贡献TOP10", "普惠风险客户清单",
+    ],
+    "金融市场": [
+        "投资余额", "投资市值", "估值损益", "估值损益月度趋势", "投资组合结构",
+        "债券品种结构", "久期分布", "同业负债余额", "同业负债成本率", "资金头寸缺口",
+        "资金头寸缺口趋势", "外汇交易损益", "衍生品估值损益", "交易对手集中度", "限额占用率",
+        "限额超限预警清单", "票据转贴现金额", "理财投资资产穿透结构", "市场价格波动预警", "本机构同业业务余额",
+    ],
+    "国际业务": [
+        "跨境结算金额", "跨境结算笔数", "跨境结算月度趋势", "贸易融资余额", "贸易融资投放额",
+        "进口信用证金额", "出口托收金额", "外汇存款余额", "结售汇金额", "结售汇客户贡献TOP10",
+        "跨境人民币金额", "币种分布", "国家地区分布", "各机构国际业务收入排名", "国际业务风险预警笔数",
+        "风险预警处置率", "本机构跨境结算金额", "本机构贸易融资余额", "重点客户国际业务贡献", "业务品种结构",
+    ],
+    "合规": [
+        "待尽调客户数", "已尽调客户数", "尽调完成率", "尽调完成率月度趋势", "受益所有人信息缺失客户数",
+        "名单命中客户数", "可疑交易笔数", "可疑交易金额", "可疑交易月度趋势", "监管报送差错数",
+        "报送质量月度趋势", "合规检查问题数", "整改完成率", "整改逾期问题清单", "制裁筛查处理及时率",
+        "员工异常行为线索数", "各机构合规问题数排名", "风险等级分布", "本机构待处理合规事项", "本机构整改完成率",
+    ],
+    "人力": [
+        "在岗人数", "客户经理人数", "人均产能", "人均产能月度趋势", "各机构人均产能排名",
+        "客户经理绩效得分", "客户经理绩效TOP10", "奖金金额", "奖金分配结构", "岗位编制缺口",
+        "人员流入流出趋势", "培训完成率", "培训完成率月度趋势", "人才梯队人数", "一线人员工作量",
+        "管户客户数", "管户负载分布", "本机构人均产能", "本机构编制缺口", "本机构培训未完成人员清单",
+    ],
+}
+
+
+DOMAIN_PROFILES = {
+    "零售": {
+        "prefix": "RTL",
+        "date_field": "统计日期",
+        "default_dataset": "零售客户经营宽表",
+        "datasets": [
+            (["AUM", "资产", "高价值", "私人银行", "客户经理"], "零售AUM日均汇总"),
+            (["理财", "基金", "产品", "持仓"], "零售产品销售明细"),
+            (["贷款", "消费贷"], "零售贷款余额快照"),
+        ],
+        "metrics": [
+            (["高价值"], [("客户ID", "高价值客户数", "CNT_DISTINCT")]),
+            (["私人银行"], [("客户ID", "私人银行客户数", "CNT_DISTINCT")]),
+            (["AUM", "资产"], [("AUM余额", "AUM余额", "SUM")]),
+            (["理财"], [("理财销售额", "理财产品销售额", "SUM")]),
+            (["基金"], [("基金保有量", "基金产品保有量", "SUM")]),
+            (["贷款"], [("贷款余额", "零售贷款余额", "SUM")]),
+            (["消费贷"], [("放款金额", "消费贷放款额", "SUM")]),
+            (["净增", "新增"], [("客户ID", "新增客户数", "CNT_DISTINCT")]),
+            (["流失"], [("客户ID", "流失客户数", "CNT_DISTINCT")]),
+            (["提升率", "达成率"], [("达成率", "达成率", "AVG")]),
+        ],
+        "default_metric": ("客户ID", "零售客户数", "CNT_DISTINCT"),
+        "top_metric": ("AUM余额", "AUM贡献", "SUM"),
+        "structure_field": "客户层级",
+        "top_field": "客户经理",
+        "list_fields": ["客户名称", "客户层级", "AUM余额", "客户经理"],
+    },
+    "对公": {
+        "prefix": "CORP",
+        "date_field": "统计日期",
+        "default_dataset": "对公客户经营汇总",
+        "datasets": [
+            (["存款", "活期"], "对公存款余额明细"),
+            (["贷款", "投放", "行业投向"], "公司信贷业务余额"),
+            (["代发", "交易银行"], "交易银行业务统计"),
+            (["信用证", "保函", "票据"], "对公表外及票据业务明细"),
+        ],
+        "metrics": [
+            (["战略客户"], [("客户ID", "战略客户数", "CNT_DISTINCT")]),
+            (["存款"], [("存款余额", "对公存款余额", "SUM")]),
+            (["日均"], [("日均余额", "对公存款日均余额", "SUM")]),
+            (["活期"], [("活期存款余额", "活期存款余额", "SUM"), ("存款余额", "存款余额", "SUM")]),
+            (["贷款"], [("贷款余额", "公司贷款余额", "SUM")]),
+            (["投放"], [("投放金额", "贷款投放额", "SUM")]),
+            (["代发"], [("代发金额", "代发金额", "SUM")]),
+            (["信用证"], [("信用证余额", "信用证余额", "SUM")]),
+            (["保函"], [("保函余额", "保函余额", "SUM")]),
+            (["票据"], [("贴现金额", "票据贴现金额", "SUM")]),
+            (["交易银行", "活跃"], [("客户ID", "活跃客户数", "CNT_DISTINCT")]),
+            (["综合贡献"], [("综合贡献", "综合贡献", "SUM")]),
+            (["净增"], [("客户ID", "新增客户数", "CNT_DISTINCT")]),
+            (["流失"], [("客户ID", "流失客户数", "CNT_DISTINCT")]),
+        ],
+        "default_metric": ("客户ID", "对公客户数", "CNT_DISTINCT"),
+        "top_metric": ("综合贡献", "综合贡献", "SUM"),
+        "structure_field": "行业门类",
+        "top_field": "客户名称",
+        "list_fields": ["客户名称", "客户等级", "行业门类", "管户客户经理"],
+    },
+    "信用卡": {
+        "prefix": "CARD",
+        "date_field": "统计日期",
+        "default_dataset": "信用卡客户交易汇总",
+        "datasets": [
+            (["分期"], "信用卡分期业务明细"),
+            (["逾期", "风险"], "信用卡风险客户监测"),
+            (["渠道", "获客"], "信用卡渠道获客统计"),
+            (["商户", "交易"], "信用卡客户交易汇总"),
+        ],
+        "metrics": [
+            (["发卡"], [("卡号", "发卡量", "CNT_DISTINCT")]),
+            (["活跃"], [("客户ID", "活跃客户数", "CNT_DISTINCT")]),
+            (["交易金额", "商户"], [("交易金额", "交易金额", "SUM")]),
+            (["交易笔数"], [("交易流水号", "交易笔数", "CNT_DISTINCT")]),
+            (["分期金额"], [("分期金额", "分期金额", "SUM")]),
+            (["转化率", "活跃率", "使用率", "逾期率"], [("比率值", "比率", "AVG")]),
+            (["手续费"], [("手续费收入", "分期手续费收入", "SUM")]),
+            (["逾期余额"], [("逾期余额", "逾期余额", "SUM")]),
+            (["逾期客户"], [("客户ID", "逾期客户数", "CNT_DISTINCT")]),
+            (["唤醒"], [("客户ID", "唤醒客户数", "CNT_DISTINCT")]),
+        ],
+        "default_metric": ("客户ID", "信用卡客户数", "CNT_DISTINCT"),
+        "top_metric": ("交易金额", "交易金额", "SUM"),
+        "structure_field": "渠道名称",
+        "top_field": "商户名称",
+        "list_fields": ["客户名称", "卡产品", "风险等级", "逾期余额"],
+    },
+    "风险": {
+        "prefix": "RISK",
+        "date_field": "统计日期",
+        "default_dataset": "信贷资产质量汇总",
+        "datasets": [
+            (["不良", "迁徙", "五级分类"], "不良贷款迁徙明细"),
+            (["预警", "清单"], "预警客户处置台账"),
+            (["授信", "集中度", "大额"], "授信集中度监控表"),
+            (["抵质押"], "抵质押品重估明细"),
+        ],
+        "metrics": [
+            (["不良率", "逾期率"], [("比率值", "风险比率", "AVG")]),
+            (["不良"], [("不良贷款余额", "不良贷款余额", "SUM")]),
+            (["关注"], [("关注类贷款余额", "关注类贷款余额", "SUM")]),
+            (["逾期"], [("逾期余额", "逾期余额", "SUM")]),
+            (["预警"], [("客户ID", "风险预警客户数", "CNT_DISTINCT")]),
+            (["完成率", "覆盖率"], [("比率值", "完成率", "AVG")]),
+            (["授信"], [("授信余额", "授信集中度", "SUM")]),
+            (["拨备"], [("拨备覆盖率", "拨备覆盖率", "AVG")]),
+        ],
+        "default_metric": ("贷款余额", "贷款余额", "SUM"),
+        "top_metric": ("贷款余额", "贷款余额", "SUM"),
+        "structure_field": "五级分类",
+        "top_field": "客户名称",
+        "list_fields": ["客户名称", "风险等级", "贷款余额", "处置状态"],
+    },
+    "运营": {
+        "prefix": "OPS",
+        "date_field": "统计日期",
+        "default_dataset": "网点运营业务量汇总",
+        "datasets": [
+            (["账户"], "账户质量监控明细"),
+            (["支付结算"], "支付结算业务统计"),
+            (["风险事件", "投诉", "待处理"], "运营风险事件台账"),
+            (["现金"], "现金库存调拨明细"),
+        ],
+        "metrics": [
+            (["笔数", "业务量"], [("业务流水号", "业务笔数", "CNT_DISTINCT")]),
+            (["金额", "库存"], [("业务金额", "业务金额", "SUM")]),
+            (["时长"], [("处理时长", "平均处理时长", "AVG")]),
+            (["超时"], [("业务流水号", "超时业务笔数", "CNT_DISTINCT")]),
+            (["开立"], [("账户ID", "账户开立数", "CNT_DISTINCT")]),
+            (["异常"], [("账户ID", "账户异常数", "CNT_DISTINCT")]),
+            (["完成率", "及时率", "使用率"], [("比率值", "比率", "AVG")]),
+            (["投诉"], [("投诉编号", "客户投诉数", "CNT_DISTINCT")]),
+            (["风险事件"], [("事件编号", "运营风险事件数", "CNT_DISTINCT")]),
+        ],
+        "default_metric": ("业务流水号", "业务笔数", "CNT_DISTINCT"),
+        "top_metric": ("业务笔数", "业务笔数", "SUM"),
+        "structure_field": "业务类型",
+        "top_field": "机构名称",
+        "list_fields": ["事项编号", "业务类型", "处理状态", "责任团队"],
+    },
+    "财务": {
+        "prefix": "FIN",
+        "date_field": "统计月份",
+        "default_dataset": "管理会计利润汇总",
+        "datasets": [
+            (["费用", "预算"], "费用预算执行表"),
+            (["收益率", "产品"], "产品收益率测算表"),
+            (["资产负债", "资本", "FTP"], "资产负债结构快照"),
+        ],
+        "metrics": [
+            (["收入"], [("收入金额", "营业收入", "SUM")]),
+            (["支出"], [("支出金额", "营业支出", "SUM")]),
+            (["利润", "EVA"], [("利润金额", "利润贡献", "SUM")]),
+            (["预算"], [("预算金额", "费用预算", "SUM")]),
+            (["执行率", "收益率", "成本率"], [("比率值", "比率", "AVG")]),
+            (["资本"], [("资本占用金额", "资本占用金额", "SUM")]),
+            (["FTP"], [("FTP净收入", "FTP净收入", "SUM")]),
+        ],
+        "default_metric": ("财务金额", "财务金额", "SUM"),
+        "top_metric": ("利润金额", "利润贡献", "SUM"),
+        "structure_field": "产品条线",
+        "top_field": "机构名称",
+        "list_fields": ["项目名称", "费用科目", "预算金额", "执行金额"],
+    },
+    "渠道": {
+        "prefix": "CHNL",
+        "date_field": "统计日期",
+        "default_dataset": "数字渠道活跃客户统计",
+        "datasets": [
+            (["网点", "预约"], "网点客流业务统计"),
+            (["自助设备"], "自助设备运行明细"),
+            (["营销"], "渠道协同营销明细"),
+            (["接口", "开放银行"], "开放银行接口调用明细"),
+            (["数字人民币"], "数字人民币交易统计"),
+        ],
+        "metrics": [
+            (["活跃客户", "累计活跃"], [("用户ID", "活跃用户数", "CNT_DISTINCT")]),
+            (["活跃率", "渗透率", "转化率", "可用率"], [("比率值", "比率", "AVG")]),
+            (["登录"], [("登录次数", "登录次数", "SUM")]),
+            (["交易金额"], [("交易金额", "交易金额", "SUM")]),
+            (["交易笔数"], [("交易流水号", "交易笔数", "CNT_DISTINCT")]),
+            (["预约"], [("客户ID", "预约客户数", "CNT_DISTINCT")]),
+            (["新增"], [("用户ID", "新增用户数", "CNT_DISTINCT")]),
+            (["流失"], [("用户ID", "流失用户数", "CNT_DISTINCT")]),
+            (["接口"], [("调用次数", "接口调用量", "SUM")]),
+        ],
+        "default_metric": ("用户ID", "用户数", "CNT_DISTINCT"),
+        "top_metric": ("登录次数", "登录次数", "SUM"),
+        "structure_field": "用户状态",
+        "top_field": "用户名称",
+        "list_fields": ["用户名称", "所属机构", "登录次数", "最近登录时间"],
+    },
+    "普惠": {
+        "prefix": "INCL",
+        "date_field": "统计日期",
+        "default_dataset": "普惠贷款业务汇总",
+        "datasets": [
+            (["小微", "客户经理"], "小微客户经营宽表"),
+            (["涉农"], "涉农贷款投放明细"),
+            (["风险补偿", "延期"], "普惠风险补偿台账"),
+        ],
+        "metrics": [
+            (["投放"], [("投放金额", "普惠贷款投放额", "SUM")]),
+            (["小微"], [("客户ID", "小微客户数", "CNT_DISTINCT")]),
+            (["首贷"], [("客户ID", "首贷户数", "CNT_DISTINCT")]),
+            (["涉农"], [("贷款余额", "涉农贷款余额", "SUM")]),
+            (["科技型"], [("贷款余额", "科技型企业贷款余额", "SUM")]),
+            (["利率", "不良率"], [("比率值", "比率", "AVG")]),
+            (["不良"], [("不良贷款余额", "不良贷款余额", "SUM")]),
+            (["风险补偿"], [("补偿金额", "风险补偿金额", "SUM")]),
+            (["延期"], [("延期金额", "延期还本金额", "SUM")]),
+        ],
+        "default_metric": ("贷款余额", "普惠贷款余额", "SUM"),
+        "top_metric": ("贷款余额", "普惠贷款贡献", "SUM"),
+        "structure_field": "贷款产品",
+        "top_field": "客户经理",
+        "list_fields": ["客户名称", "贷款产品", "贷款余额", "风险等级"],
+    },
+    "金融市场": {
+        "prefix": "MKT",
+        "date_field": "统计日期",
+        "default_dataset": "金融市场投资组合明细",
+        "datasets": [
+            (["同业"], "同业负债成本统计"),
+            (["资金头寸"], "资金头寸预测表"),
+            (["外汇", "衍生品"], "外汇衍生交易估值表"),
+            (["票据"], "票据转贴现业务分析表"),
+        ],
+        "metrics": [
+            (["市值"], [("市值", "投资市值", "SUM")]),
+            (["损益"], [("损益金额", "损益金额", "SUM")]),
+            (["久期", "成本率", "占用率"], [("比率值", "比率", "AVG")]),
+            (["同业"], [("同业负债余额", "同业负债余额", "SUM")]),
+            (["缺口"], [("资金缺口", "资金头寸缺口", "SUM")]),
+            (["票据"], [("贴现金额", "票据转贴现金额", "SUM")]),
+        ],
+        "default_metric": ("投资余额", "投资余额", "SUM"),
+        "top_metric": ("投资余额", "投资余额", "SUM"),
+        "structure_field": "交易品种",
+        "top_field": "交易对手",
+        "list_fields": ["交易对手", "交易品种", "限额占用率", "预警等级"],
+    },
+    "国际业务": {
+        "prefix": "INTL",
+        "date_field": "统计日期",
+        "default_dataset": "国际结算业务统计",
+        "datasets": [
+            (["贸易融资"], "贸易融资业务明细"),
+            (["结售汇"], "结售汇客户贡献表"),
+            (["跨境人民币"], "跨境人民币业务汇总"),
+            (["信用证", "托收"], "国际结算业务统计"),
+        ],
+        "metrics": [
+            (["笔数"], [("业务流水号", "业务笔数", "CNT_DISTINCT")]),
+            (["贸易融资"], [("融资余额", "贸易融资余额", "SUM")]),
+            (["投放"], [("投放金额", "贸易融资投放额", "SUM")]),
+            (["信用证"], [("信用证金额", "进口信用证金额", "SUM")]),
+            (["托收"], [("托收金额", "出口托收金额", "SUM")]),
+            (["外汇存款"], [("存款余额", "外汇存款余额", "SUM")]),
+            (["结售汇"], [("结售汇金额", "结售汇金额", "SUM")]),
+            (["人民币"], [("业务金额", "跨境人民币金额", "SUM")]),
+            (["处置率"], [("比率值", "风险预警处置率", "AVG")]),
+            (["风险预警"], [("预警编号", "风险预警笔数", "CNT_DISTINCT")]),
+        ],
+        "default_metric": ("结算金额", "跨境结算金额", "SUM"),
+        "top_metric": ("业务金额", "国际业务贡献", "SUM"),
+        "structure_field": "业务品种",
+        "top_field": "客户名称",
+        "list_fields": ["客户名称", "业务品种", "币种", "业务金额"],
+    },
+    "合规": {
+        "prefix": "COMP",
+        "date_field": "统计日期",
+        "default_dataset": "反洗钱客户监测表",
+        "datasets": [
+            (["尽调", "受益所有人"], "客户尽职调查台账"),
+            (["监管报送"], "监管报送质量统计"),
+            (["整改", "检查"], "合规检查整改明细"),
+            (["名单", "可疑交易", "制裁"], "反洗钱客户监测表"),
+        ],
+        "metrics": [
+            (["待尽调"], [("客户ID", "待尽调客户数", "CNT_DISTINCT")]),
+            (["已尽调"], [("客户ID", "已尽调客户数", "CNT_DISTINCT")]),
+            (["完成率", "及时率"], [("比率值", "比率", "AVG")]),
+            (["缺失"], [("客户ID", "信息缺失客户数", "CNT_DISTINCT")]),
+            (["名单"], [("客户ID", "名单命中客户数", "CNT_DISTINCT")]),
+            (["交易金额"], [("交易金额", "可疑交易金额", "SUM")]),
+            (["可疑交易"], [("交易流水号", "可疑交易笔数", "CNT_DISTINCT")]),
+            (["差错"], [("报送批次号", "报送差错数", "CNT_DISTINCT")]),
+            (["问题"], [("问题编号", "合规检查问题数", "CNT_DISTINCT")]),
+            (["线索"], [("线索编号", "异常行为线索数", "CNT_DISTINCT")]),
+        ],
+        "default_metric": ("客户ID", "客户数", "CNT_DISTINCT"),
+        "top_metric": ("问题编号", "合规问题数", "CNT_DISTINCT"),
+        "structure_field": "风险等级",
+        "top_field": "机构名称",
+        "list_fields": ["事项编号", "问题类型", "处理状态", "责任部门"],
+    },
+    "人力": {
+        "prefix": "HR",
+        "date_field": "统计月份",
+        "default_dataset": "机构人员产能统计",
+        "datasets": [
+            (["绩效", "客户经理"], "客户经理绩效明细"),
+            (["奖金"], "奖金分配测算表"),
+            (["培训"], "培训完成情况表"),
+            (["编制", "人才", "人员"], "人员编制与流动统计"),
+        ],
+        "metrics": [
+            (["人数", "在岗", "客户经理", "人才"], [("员工ID", "人数", "CNT_DISTINCT")]),
+            (["产能"], [("人均产能", "人均产能", "AVG")]),
+            (["绩效"], [("绩效得分", "绩效得分", "AVG")]),
+            (["奖金"], [("奖金金额", "奖金金额", "SUM")]),
+            (["缺口"], [("编制缺口", "编制缺口", "SUM")]),
+            (["流入流出"], [("员工ID", "人员变动数", "CNT_DISTINCT")]),
+            (["培训完成率"], [("比率值", "培训完成率", "AVG")]),
+            (["管户"], [("客户ID", "管户客户数", "CNT_DISTINCT")]),
+        ],
+        "default_metric": ("员工ID", "人数", "CNT_DISTINCT"),
+        "top_metric": ("绩效得分", "绩效得分", "AVG"),
+        "structure_field": "岗位名称",
+        "top_field": "员工姓名",
+        "list_fields": ["员工姓名", "岗位名称", "团队名称", "完成状态"],
+    },
+}
+
+
+def get_card_blueprints(domain: str) -> list[dict[str, object]]:
+    return [build_card_spec(domain, card_name) for card_name in CARD_BLUEPRINT_NAMES[domain]]
+
+
+def build_card_spec(domain: str, card_name: str) -> dict[str, object]:
+    profile = DOMAIN_PROFILES[domain]
+    metrics = _choose_metrics(profile, card_name)
+    fields = _choose_fields(profile, card_name)
+    filters = _choose_filters(profile, card_name)
+    sort = _choose_sort(profile, card_name, fields, metrics)
+    dataset = _choose_dataset(profile, card_name)
+    spec = {
+        "card_name": card_name,
+        "dataset_prefix": profile["prefix"],
+        "dataset": dataset,
+        "fields": fields,
+        "filters": filters,
+        "metrics": [{"field": field, "alias": alias, "agg": agg} for field, alias, agg in metrics],
+        "sort": sort,
+    }
+    validate_card_spec(spec)
+    return spec
+
+
+def validate_card_spec(spec: dict[str, object]) -> None:
+    metric_fields = [metric["field"] for metric in spec["metrics"]]
+    metric_aggs = [metric["agg"] for metric in spec["metrics"]]
+    filter_fields = [item["field"] for item in spec["filters"]]
+    filter_values = [item.get("value", item.get("value_from")) for item in spec["filters"]]
+    sort_fields = [item["field"] for item in spec["sort"]]
+    sort_ways = [item["way"] for item in spec["sort"]]
+    if len(metric_fields) != len(metric_aggs):
+        raise ValueError(f"metric binding mismatch for {spec['card_name']}")
+    if len(filter_fields) != len(filter_values):
+        raise ValueError(f"filter binding mismatch for {spec['card_name']}")
+    if len(sort_fields) != len(sort_ways):
+        raise ValueError(f"sort binding mismatch for {spec['card_name']}")
+
+
+def _choose_dataset(profile: dict[str, object], card_name: str) -> str:
+    for keywords, dataset in profile["datasets"]:
+        if any(keyword in card_name for keyword in keywords):
+            return dataset
+    return profile["default_dataset"]
+
+
+def _choose_metrics(profile: dict[str, object], card_name: str) -> list[tuple[str, str, str]]:
+    rate_keywords = ["率", "占比"]
+    if any(keyword in card_name for keyword in rate_keywords) and "及" not in card_name:
+        return [("比率值", card_name, "AVG")]
+    if "TOP10" in card_name:
+        return [profile["top_metric"]]
+
+    matched: list[tuple[str, str, str]] = []
+    for keywords, metrics in profile["metrics"]:
+        if any(keyword in card_name for keyword in keywords):
+            for metric in metrics:
+                if metric not in matched:
+                    matched.append(metric)
+    if not matched:
+        matched.append(profile["default_metric"])
+    if "及" in card_name and len(matched) == 1:
+        matched.append(("比率值", "比率", "AVG"))
+    return matched[:3]
+
+
+def _choose_fields(profile: dict[str, object], card_name: str) -> list[str]:
+    date_field = profile["date_field"]
+    if any(keyword in card_name for keyword in ["月度趋势", "走势图"]):
+        return [date_field]
+    if "各机构" in card_name or "各分行" in card_name:
+        return ["机构名称"]
+    if "TOP10" in card_name:
+        return [profile["top_field"]]
+    if any(keyword in card_name for keyword in ["结构", "分布", "构成"]):
+        return [profile["structure_field"]]
+    if "清单" in card_name or "超预算项目" in card_name or "未完成人员" in card_name:
+        return list(profile["list_fields"])
+    return []
+
+
+def _choose_filters(profile: dict[str, object], card_name: str) -> list[dict[str, str]]:
+    filters = [{"field": profile["date_field"], "value_kind": "month_end"}]
+    if "各机构" in card_name or "各分行" in card_name:
+        filters.append({"field": "上级机构ID", "value_from": "dashboard.bbk_id"})
+    else:
+        filters.append({"field": "机构ID", "value_from": "dashboard.bbk_id"})
+    if "本机构" in card_name:
+        filters.append({"field": "机构层级", "value": "本机构"})
+    if "当月" in card_name:
+        filters.append({"field": "统计周期", "value": "当月"})
+    if "累计" in card_name:
+        filters.append({"field": "统计口径", "value": "年累计"})
+    if "流失" in card_name:
+        filters.append({"field": "客户状态", "value": "流失"})
+    if "活跃" in card_name:
+        filters.append({"field": "活跃标识", "value": "是"})
+    if "风险" in card_name or "预警" in card_name:
+        filters.append({"field": "风险等级", "value": "中高风险"})
+    return filters
+
+
+def _choose_sort(
+    profile: dict[str, object],
+    card_name: str,
+    fields: list[str],
+    metrics: list[tuple[str, str, str]],
+) -> list[dict[str, str]]:
+    if any(keyword in card_name for keyword in ["月度趋势", "走势图"]):
+        return [{"field": profile["date_field"], "way": "ASC"}]
+    if any(keyword in card_name for keyword in ["排名", "TOP10", "清单", "各机构", "各分行"]):
+        return [{"field": metrics[0][0], "way": "DESC"}]
+    if any(keyword in card_name for keyword in ["结构", "分布", "构成"]) and metrics:
+        return [{"field": metrics[0][0], "way": "DESC"}]
+    if fields:
+        return [{"field": fields[0], "way": "ASC"}]
+    return []

+ 286 - 0
data_mock/mock_cards.py

@@ -0,0 +1,286 @@
+from __future__ import annotations
+
+import argparse
+import hashlib
+import json
+import random
+from pathlib import Path
+
+import pandas as pd
+
+from card_blueprints import DOMAIN_KEYWORDS, LEVEL1_DOMAIN_MAP, get_card_blueprints
+
+
+ROOT = Path(__file__).resolve().parents[1]
+DASHBOARD_PATH = ROOT / "data" / "dashboard.parquet"
+OUTPUT_PATH = ROOT / "data" / "card_info.parquet"
+RANDOM_SEED = 20260507
+
+
+CARD_COLUMNS = [
+    "card_id",
+    "bbk_id",
+    "bbk_name",
+    "card_name",
+    "ds_id",
+    "ds_name",
+    "dash_id",
+    "dash_name",
+    "card_type_cd",
+    "field_alias",
+    "field_name",
+    "filters_field_name",
+    "filters_field_value",
+    "sort_field_name",
+    "sort_way",
+    "num_value_field_name",
+    "num_value_field_alias",
+    "num_value_field_merge_way",
+    "folder_name",
+    "folder_all_route",
+]
+
+
+def to_json(values: list[str]) -> str:
+    return json.dumps(values, ensure_ascii=False)
+
+
+def make_id(prefix: str, ordinal: int) -> str:
+    raw = f"{prefix}-{ordinal}-{RANDOM_SEED}".encode("utf-8")
+    return hashlib.md5(raw).hexdigest()[:24]
+
+
+def parse_branch_names(raw_branch_names: list[str] | None, available_branch_names: set[str]) -> list[str] | None:
+    if not raw_branch_names:
+        return None
+
+    branch_names: list[str] = []
+    for raw_value in raw_branch_names:
+        for branch_name in raw_value.split(","):
+            branch_name = branch_name.strip()
+            if branch_name and branch_name not in branch_names:
+                branch_names.append(branch_name)
+
+    unknown_branch_names = [name for name in branch_names if name not in available_branch_names]
+    if unknown_branch_names:
+        supported = "、".join(sorted(available_branch_names))
+        unknown = "、".join(unknown_branch_names)
+        raise ValueError(f"unsupported branch name: {unknown}. Available branch names in dashboards: {supported}")
+    return branch_names
+
+
+def infer_domain(dashboard: pd.Series) -> str:
+    level1_name = str(dashboard.get("level1_dept_name", ""))
+    if level1_name in LEVEL1_DOMAIN_MAP:
+        return LEVEL1_DOMAIN_MAP[level1_name]
+
+    dash_name = str(dashboard.get("dash_dsply_name", ""))
+    folder_route = str(dashboard.get("folder_all_route", ""))
+    text = f"{level1_name} {dash_name} {folder_route}"
+    for domain, keywords in DOMAIN_KEYWORDS.items():
+        if any(keyword in text for keyword in keywords):
+            return domain
+    return "零售"
+
+
+def make_dataset_id(dataset_prefix: str, dataset_name: str) -> str:
+    return make_id(f"dataset-{dataset_prefix}-{dataset_name}", 1)
+
+
+def resolve_filter_value(filter_spec: dict[str, str], dashboard: pd.Series) -> str:
+    if "value" in filter_spec:
+        return str(filter_spec["value"])
+
+    value_from = filter_spec.get("value_from")
+    if value_from == "dashboard.bbk_id":
+        return str(dashboard["bbk_id"])
+    if value_from == "dashboard.bbk_name":
+        return str(dashboard["bbk_name"])
+
+    value_kind = filter_spec.get("value_kind")
+    field_name = str(filter_spec["field"])
+    if value_kind == "month_end":
+        return "2026-05" if "月份" in field_name else "2026-05-31"
+
+    raise ValueError(f"unsupported filter spec: {filter_spec}")
+
+
+def build_card_record(
+    global_ordinal: int,
+    dashboard: pd.Series,
+    spec: dict[str, object],
+) -> dict[str, str]:
+    dataset_prefix = str(spec["dataset_prefix"])
+    dataset_name = str(spec["dataset"])
+    fields = list(spec["fields"])
+    metrics = list(spec["metrics"])
+    filters = list(spec["filters"])
+    sort = list(spec["sort"])
+
+    filter_names = [str(item["field"]) for item in filters]
+    filter_values = [resolve_filter_value(item, dashboard) for item in filters]
+    metric_names = [str(item["field"]) for item in metrics]
+    metric_aliases = [str(item["alias"]) for item in metrics]
+    metric_aggs = [str(item["agg"]) for item in metrics]
+    sort_names = [str(item["field"]) for item in sort]
+    sort_ways = [str(item["way"]) for item in sort]
+    folder_parts = str(dashboard["folder_all_route"]).split("/")
+    folder_name = folder_parts[-1] if folder_parts else infer_domain(dashboard)
+
+    return {
+        "card_id": make_id("card", global_ordinal),
+        "bbk_id": str(dashboard["bbk_id"]),
+        "bbk_name": str(dashboard["bbk_name"]),
+        "card_name": str(spec["card_name"]),
+        "ds_id": make_dataset_id(dataset_prefix, dataset_name),
+        "ds_name": f"{dataset_prefix}_{str(dashboard['bbk_id'])}_{dataset_name}",
+        "dash_id": str(dashboard["dash_id"]),
+        "dash_name": str(dashboard["dash_dsply_name"]),
+        "card_type_cd": "图表",
+        "field_alias": to_json(fields),
+        "field_name": to_json(fields),
+        "filters_field_name": to_json(filter_names),
+        "filters_field_value": to_json(filter_values),
+        "sort_field_name": to_json(sort_names),
+        "sort_way": to_json(sort_ways),
+        "num_value_field_name": to_json(metric_names),
+        "num_value_field_alias": to_json(metric_aliases),
+        "num_value_field_merge_way": to_json(metric_aggs),
+        "folder_name": folder_name,
+        "folder_all_route": str(dashboard["folder_all_route"]),
+    }
+
+
+def select_dashboards(dashboards: pd.DataFrame, branch_names: list[str] | None, count: int | None) -> pd.DataFrame:
+    selected = dashboards
+    if branch_names:
+        selected = selected[selected["bbk_name"].isin(branch_names)]
+    if count is not None:
+        selected = selected.head(count)
+    return selected.reset_index(drop=True)
+
+
+def choose_card_specs(rng: random.Random, domain: str) -> list[dict[str, object]]:
+    specs = get_card_blueprints(domain)
+    card_count = rng.randint(10, min(20, len(specs)))
+    return rng.sample(specs, card_count)
+
+
+def make_cards(
+    dashboards: pd.DataFrame,
+    start_ordinal: int = 1,
+) -> pd.DataFrame:
+    rng = random.Random(RANDOM_SEED + start_ordinal - 1)
+    records: list[dict[str, str]] = []
+    global_ordinal = start_ordinal
+
+    for _, dashboard in dashboards.iterrows():
+        domain = infer_domain(dashboard)
+        for spec in choose_card_specs(rng, domain):
+            records.append(build_card_record(global_ordinal, dashboard, spec))
+            global_ordinal += 1
+
+    cards = pd.DataFrame(records, columns=CARD_COLUMNS)
+    validate_generated_cards(cards)
+    return cards
+
+
+def validate_generated_cards(cards: pd.DataFrame) -> None:
+    for row in cards.itertuples(index=False):
+        metric_names = json.loads(row.num_value_field_name)
+        metric_aliases = json.loads(row.num_value_field_alias)
+        metric_aggs = json.loads(row.num_value_field_merge_way)
+        filter_names = json.loads(row.filters_field_name)
+        filter_values = json.loads(row.filters_field_value)
+        sort_names = json.loads(row.sort_field_name)
+        sort_ways = json.loads(row.sort_way)
+
+        if not (len(metric_names) == len(metric_aliases) == len(metric_aggs)):
+            raise ValueError(f"metric length mismatch in card {row.card_id}")
+        if len(filter_names) != len(filter_values):
+            raise ValueError(f"filter length mismatch in card {row.card_id}")
+        if len(sort_names) != len(sort_ways):
+            raise ValueError(f"sort length mismatch in card {row.card_id}")
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Generate mock BI card parquet data.")
+    parser.add_argument(
+        "--mode",
+        choices=["overwrite", "append"],
+        default="overwrite",
+        help="overwrite replaces the parquet file; append inserts generated rows into the existing parquet file.",
+    )
+    parser.add_argument(
+        "--count",
+        type=int,
+        default=None,
+        help="number of dashboards to generate cards for. Defaults to all selected dashboards.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=OUTPUT_PATH,
+        help="target parquet path.",
+    )
+    parser.add_argument(
+        "--branch-name",
+        action="append",
+        help=(
+            "branch names to generate, separated by comma or passed repeatedly. "
+            "Defaults to all branches in dashboard parquet."
+        ),
+    )
+    parser.add_argument(
+        "--dashboard-input",
+        type=Path,
+        default=DASHBOARD_PATH,
+        help="source dashboard parquet path.",
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    if args.count is not None and args.count <= 0:
+        raise ValueError("--count must be a positive integer")
+
+    dashboard_path = args.dashboard_input.resolve()
+    if not dashboard_path.exists():
+        raise FileNotFoundError(f"dashboard parquet not found: {dashboard_path}")
+
+    dashboards = pd.read_parquet(dashboard_path)
+    branch_names = parse_branch_names(args.branch_name, set(dashboards["bbk_name"]))
+    selected_dashboards = select_dashboards(dashboards, branch_names, args.count)
+    if selected_dashboards.empty:
+        raise ValueError("no dashboards selected for card generation")
+
+    output_path = args.output.resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    existing = pd.DataFrame()
+    if args.mode == "append" and output_path.exists():
+        existing = pd.read_parquet(output_path)
+
+    new_cards = make_cards(selected_dashboards, start_ordinal=len(existing) + 1)
+    cards = pd.concat([existing, new_cards], ignore_index=True) if not existing.empty else new_cards
+    validate_generated_cards(cards)
+    cards.to_parquet(output_path, index=False)
+
+    duplicate_card_id = int(cards["card_id"].duplicated().sum())
+    card_count_by_dash = new_cards.groupby("dash_id")["card_id"].count()
+    selected_branch_names = branch_names or sorted(selected_dashboards["bbk_name"].unique())
+
+    print(f"mode: {args.mode}")
+    print(f"dashboard input: {dashboard_path}")
+    print(f"branch names: {', '.join(selected_branch_names)}")
+    print(f"selected dashboards: {len(selected_dashboards)}")
+    print(f"generated rows: {len(new_cards)}")
+    print(f"wrote total rows: {len(cards)} to {output_path}")
+    print(f"duplicate card_id: {duplicate_card_id}")
+    print(f"cards per dashboard: {int(card_count_by_dash.min())} {int(card_count_by_dash.max())}")
+    print("bbk_name:", new_cards["bbk_name"].value_counts().to_dict())
+
+
+if __name__ == "__main__":
+    main()

+ 569 - 0
data_mock/mock_dashboards.py

@@ -0,0 +1,569 @@
+from __future__ import annotations
+
+import argparse
+import hashlib
+import random
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import pandas as pd
+
+
+ROOT = Path(__file__).resolve().parents[1]
+OUTPUT_PATH = ROOT / "data" / "dashboard.parquet"
+RANDOM_SEED = 20260506
+
+
+BRANCH_ID_MAP = {
+    "总行": "00100",
+    "北京分行": "00110",
+    "广东分行": "00120",
+}
+
+
+BRANCH_DM_MAP = {
+    "总行": "zh",
+    "北京分行": "bjfh",
+    "广东分行": "gdfh",
+}
+
+
+DEFAULT_BRANCH_NAMES = ["总行", "北京分行", "广东分行"]
+
+
+DOMAIN_CONFIGS = [
+    {
+        "domain": "零售",
+        "level1": "零售金融部",
+        "level2": "经营分析室",
+        "level3": "客户经营团队",
+        "topics": [
+            "零售客户经营总览",
+            "高价值客户增长分析",
+            "私人银行客户资产监测",
+            "私钻客户AUM提升分析",
+            "财富客户资产配置看板",
+            "养老金客户经营跟踪",
+            "代发客群转化跟踪",
+            "零售存款日均分析",
+            "储蓄存款客群结构分析",
+            "零售贷款投放监控",
+            "个人住房贷款质量看板",
+            "消费贷获客转化分析",
+            "汽车分期贷款经营分析",
+            "理财产品销售分析",
+            "基金产品持仓透视",
+            "保险产品销售监测",
+            "客户资产提升作战图",
+            "网点零售业绩排名",
+            "MGM获客转化分析",
+        ],
+        "folder": "零售数据门户",
+    },
+    {
+        "domain": "对公",
+        "level1": "公司金融部",
+        "level2": "公司客户经营室",
+        "level3": "对公经营分析团队",
+        "topics": [
+            "对公客户经营总览",
+            "战略客户贡献分析",
+            "机构客户综合贡献分析",
+            "上市公司客群经营看板",
+            "对公存款增长看板",
+            "单位活期存款结构分析",
+            "保证金存款监测",
+            "代发业务拓展监测",
+            "对公信贷投放监控",
+            "项目融资投放分析",
+            "小微企业贷款分析",
+            "供应链金融业务看板",
+            "信用证业务分析",
+            "保函业务风险监测",
+            "票据贴现经营分析",
+            "现金管理客户活跃分析",
+            "交易银行客户活跃分析",
+            "公司客户流失预警",
+        ],
+        "folder": "公司金融数据门户",
+    },
+    {
+        "domain": "信用卡",
+        "level1": "信用卡中心",
+        "level2": "经营管理部",
+        "level3": "数据分析团队",
+        "topics": [
+            "信用卡新增客户分析",
+            "信用卡交易额监测",
+            "信用卡分期业务看板",
+            "信用卡账单分期转化分析",
+            "信用卡逾期风险预警",
+            "信用卡活跃客户经营",
+            "信用卡渠道获客分析",
+            "信用卡额度使用分析",
+            "信用卡客群画像看板",
+            "信用卡权益活动效果分析",
+            "信用卡商户交易监测",
+            "信用卡睡眠客户唤醒看板",
+        ],
+        "folder": "信用卡经营分析",
+    },
+    {
+        "domain": "风险",
+        "level1": "风险管理部",
+        "level2": "组合风险管理室",
+        "level3": "模型监测团队",
+        "topics": [
+            "信贷资产质量监测",
+            "不良贷款迁徙分析",
+            "逾期客户风险预警",
+            "关注类贷款压降监控",
+            "拨备覆盖率经营看板",
+            "重点行业风险排查",
+            "授信集中度监控",
+            "贷后检查进度跟踪",
+            "资产分类变动分析",
+            "抵质押品价值重估分析",
+            "风险预警信号处置看板",
+            "大额风险暴露监测",
+        ],
+        "folder": "风险管理驾驶舱",
+    },
+    {
+        "domain": "运营",
+        "level1": "运营管理部",
+        "level2": "运营监控室",
+        "level3": "流程管理团队",
+        "topics": [
+            "网点运营效率看板",
+            "柜面业务量监测",
+            "远程银行服务分析",
+            "账户开立质量监测",
+            "企业账户年检进度监控",
+            "支付结算业务分析",
+            "反洗钱可疑交易跟踪",
+            "客户投诉处理看板",
+            "运营风险事件监控",
+            "集中作业处理时效分析",
+            "现金库存与调拨监测",
+            "电子回单服务分析",
+        ],
+        "folder": "运营管理专区",
+    },
+    {
+        "domain": "财务",
+        "level1": "计划财务部",
+        "level2": "管理会计室",
+        "level3": "财务分析团队",
+        "topics": [
+            "分行利润贡献分析",
+            "FTP收支测算看板",
+            "费用预算执行监控",
+            "中间业务收入分析",
+            "净利息收入贡献分析",
+            "资本占用收益分析",
+            "经营计划完成率看板",
+            "资产负债结构分析",
+            "税务成本监测看板",
+            "经济利润EVA分析",
+            "分产品收益率分析",
+            "管理会计分摊结果看板",
+        ],
+        "folder": "财务管理驾驶舱",
+    },
+    {
+        "domain": "渠道",
+        "level1": "网络经营服务部",
+        "level2": "渠道管理室",
+        "level3": "数字渠道团队",
+        "topics": [
+            "手机银行活跃分析",
+            "网银交易监测",
+            "网点客流热力分析",
+            "自助设备运行看板",
+            "远程渠道转化分析",
+            "企业网银客户经营",
+            "开放银行接口监测",
+            "渠道协同营销看板",
+            "数字人民币交易分析",
+            "小程序渠道转化分析",
+            "线上预约到店分析",
+            "渠道客户体验监测",
+        ],
+        "folder": "渠道经营分析",
+    },
+    {
+        "domain": "普惠",
+        "level1": "普惠金融部",
+        "level2": "普惠经营管理室",
+        "level3": "小微客户团队",
+        "topics": [
+            "普惠贷款投放监控",
+            "小微客户增长看板",
+            "普惠首贷户拓展分析",
+            "涉农贷款经营监测",
+            "科技型企业贷款分析",
+            "普惠风险补偿跟踪",
+            "普惠延期还本付息监测",
+            "科创小微客户经营分析",
+            "个体工商户贷款看板",
+            "普惠贷款利率定价分析",
+        ],
+        "folder": "普惠金融专区",
+    },
+    {
+        "domain": "金融市场",
+        "level1": "金融市场部",
+        "level2": "投资交易管理室",
+        "level3": "市场风险与经营分析团队",
+        "topics": [
+            "债券投资组合分析",
+            "同业负债成本监测",
+            "资金头寸预测看板",
+            "外汇交易损益分析",
+            "衍生品估值监测",
+            "票据转贴现业务分析",
+            "理财投资资产穿透看板",
+            "金融市场限额占用监控",
+            "市场价格波动预警",
+        ],
+        "folder": "金融市场经营专区",
+    },
+    {
+        "domain": "国际业务",
+        "level1": "国际业务部",
+        "level2": "跨境金融管理室",
+        "level3": "国际结算分析团队",
+        "topics": [
+            "跨境结算业务分析",
+            "贸易融资投放监控",
+            "进口信用证业务看板",
+            "出口托收业务跟踪",
+            "外汇存款结构分析",
+            "结售汇客户贡献分析",
+            "跨境人民币业务监测",
+            "国际业务风险预警",
+        ],
+        "folder": "国际业务分析专区",
+    },
+    {
+        "domain": "合规",
+        "level1": "法律合规部",
+        "level2": "反洗钱与制裁合规管理团队",
+        "level3": "合规数据应用室",
+        "topics": [
+            "受益所有人信息缺失监测",
+            "客户尽职调查进度看板",
+            "反洗钱名单命中分析",
+            "可疑交易报告质效分析",
+            "员工异常行为排查",
+            "监管报送质量监控",
+            "合规检查问题整改跟踪",
+            "制裁筛查处理时效分析",
+        ],
+        "folder": "合规管理专区",
+    },
+    {
+        "domain": "人力",
+        "level1": "人力资源部",
+        "level2": "绩效薪酬管理室",
+        "level3": "人力数据分析团队",
+        "topics": [
+            "机构人均产能分析",
+            "客户经理绩效看板",
+            "支行奖金分配测算",
+            "岗位编制与人员缺口分析",
+            "员工培训完成率监测",
+            "人才盘点与梯队建设看板",
+            "一线人员工作量分析",
+        ],
+        "folder": "人力资源分析专区",
+    },
+]
+
+
+VERSION_SUFFIXES = [
+    "",
+    " V1.0",
+    " V2.0",
+    " V3.0",
+    " V4.0",
+    "(机构版)",
+    "(管理端)",
+    "(支行版)",
+    "(客户经理版)",
+    "(测试版)",
+    "月报",
+    "日报",
+    "周报",
+    "快报",
+    "大屏",
+]
+
+
+def make_id(prefix: str, ordinal: int) -> str:
+    raw = f"{prefix}-{ordinal}-{RANDOM_SEED}".encode("utf-8")
+    return hashlib.md5(raw).hexdigest()[:24]
+
+
+def parse_branch_names(raw_branch_names: list[str] | None) -> list[str]:
+    if not raw_branch_names:
+        return DEFAULT_BRANCH_NAMES
+
+    branch_names: list[str] = []
+    for raw_value in raw_branch_names:
+        for branch_name in raw_value.split(","):
+            branch_name = branch_name.strip()
+            if branch_name and branch_name not in branch_names:
+                branch_names.append(branch_name)
+
+    unknown_branch_names = [name for name in branch_names if name not in BRANCH_DM_MAP]
+    if unknown_branch_names:
+        supported = "、".join(BRANCH_DM_MAP)
+        unknown = "、".join(unknown_branch_names)
+        raise ValueError(f"unsupported branch name: {unknown}. Supported branch names: {supported}")
+    return branch_names
+
+
+def build_branch_pool(branch_names: list[str]) -> list[tuple[str, str, str]]:
+    return [(BRANCH_ID_MAP[name], name, BRANCH_DM_MAP[name]) for name in branch_names]
+
+
+def choose_branch(
+    rng: random.Random,
+    domain: str,
+    branch_pool: list[tuple[str, str, str]],
+) -> tuple[str, str, str]:
+    total_branch = next((branch for branch in branch_pool if branch[1] == "总行"), None)
+    if total_branch and domain in {"风险", "财务", "信用卡"} and rng.random() < 0.36:
+        return total_branch
+    return rng.choice(branch_pool)
+
+
+def make_timestamp(rng: random.Random, start: datetime, end: datetime) -> str:
+    seconds = int((end - start).total_seconds())
+    value = start + timedelta(seconds=rng.randint(0, seconds))
+    microsecond = rng.choice([0, 152000, 281000, 371000, 495000, 659000, 749000, 911000])
+    return value.replace(microsecond=microsecond).strftime("%Y-%m-%d %H:%M:%S.%f")
+
+
+def build_folder_route(rng: random.Random, branch_name: str, config: dict[str, object], topic: str) -> str:
+    roots = [
+        f"根目录/{config['folder']}/{config['domain']}/{topic}",
+        f"根目录/业务网仪表板/{config['domain']}条线/{branch_name}/{topic}",
+        f"根目录/总行/经营管理驾驶舱/{config['folder']}/{topic}",
+        f"根目录/用户开发报表区/{branch_name}/{config['folder']}/{topic}",
+        f"根目录/应用市场下载目录/分析模板/{config['domain']}经营/{topic}",
+    ]
+    return rng.choice(roots)
+
+
+def make_dashboard_name(
+    rng: random.Random,
+    ordinal: int,
+    branch_name: str,
+    topic: str,
+    used_names: set[str],
+) -> str:
+    suffix = rng.choice(VERSION_SUFFIXES)
+    branch_prefix = branch_name if rng.random() < 0.34 and branch_name != "总行" else ""
+    base_name = f"{branch_prefix}{topic}{suffix}"
+    dash_name = base_name
+    if dash_name in used_names:
+        dash_name = f"{base_name}(第{ordinal:03d}期)"
+
+    retry_index = 2
+    while dash_name in used_names:
+        dash_name = f"{base_name}(第{ordinal:03d}-{retry_index}期)"
+        retry_index += 1
+
+    used_names.add(dash_name)
+    return dash_name
+
+
+def build_record(
+    rng: random.Random,
+    ordinal: int,
+    config: dict[str, object],
+    topic: str,
+    branch_pool: list[tuple[str, str, str]],
+    used_names: set[str],
+) -> dict[str, object]:
+    bbk_id, bbk_name, dm_nm = choose_branch(rng, str(config["domain"]), branch_pool)
+    dash_name = make_dashboard_name(rng, ordinal, bbk_name, topic, used_names)
+
+    level1 = str(config["level1"])
+    level2 = str(config["level2"])
+    level3 = str(config["level3"])
+    level4_candidates = ["数据应用室", "经营推动组", "业务支持组", "指标管理组", ""]
+    level4 = rng.choice(level4_candidates)
+    dept_fname_parts = ["XX银行", bbk_name, level1, level2, level3]
+    if level4:
+        dept_fname_parts.append(level4)
+
+    chart_cnt = rng.randint(20, 50)
+    author_cnt = rng.randint(8, 360)
+    first_visit = make_timestamp(rng, datetime(2023, 1, 1), datetime(2026, 2, 28))
+    recent_start = datetime.strptime(first_visit, "%Y-%m-%d %H:%M:%S.%f") + timedelta(days=1)
+    recent_end = datetime(2026, 5, 5, 23, 59, 59)
+    recent_visit = make_timestamp(rng, recent_start, recent_end) if recent_start < recent_end else first_visit
+
+    efficiency = rng.choices(["高效", "中效", "低效"], weights=[0.34, 0.46, 0.20], k=1)[0]
+    visit_ranges = {
+        "高效": (360, 6800),
+        "中效": (80, 1200),
+        "低效": (0, 180),
+    }
+    visit_low, visit_high = visit_ranges[efficiency]
+
+    return {
+        "dash_id": make_id("dash", ordinal),
+        "dash_dsply_name": dash_name,
+        "folder_all_route": build_folder_route(rng, bbk_name, config, topic),
+        "build_main": rng.choices(["总行建设", "分行自建"], weights=[0.48, 0.52], k=1)[0],
+        "dept_cls": rng.choices(["业务部门", "信息技术部", "风险合规部门", "管理部门"], weights=[0.66, 0.18, 0.09, 0.07], k=1)[0],
+        "dash_rat": efficiency,
+        "bbk_name": bbk_name,
+        "bbk_id": bbk_id,
+        "dept_fname": "/".join(dept_fname_parts),
+        "level1_dept_name": level1,
+        "level2_dept_name": level2,
+        "level3_dept_name": level3,
+        "level4_dept_name": level4,
+        "chart_cnt": chart_cnt,
+        "be_author_cnt": author_cnt,
+        "fir_be_visit_tm": first_visit,
+        "recnt_be_visit_tm": recent_visit,
+        "m3_vis_cnt": rng.randint(visit_low, visit_high),
+        "mon_add_idf": rng.choices(["是", "否"], weights=[0.08, 0.92], k=1)[0],
+        "dm_nm": dm_nm,
+    }
+
+
+def make_dashboards(
+    target_count: int = 200,
+    start_ordinal: int = 1,
+    branch_pool: list[tuple[str, str, str]] | None = None,
+    used_names: set[str] | None = None,
+) -> pd.DataFrame:
+    rng = random.Random(RANDOM_SEED + start_ordinal - 1)
+    branch_pool = branch_pool or build_branch_pool(DEFAULT_BRANCH_NAMES)
+    used_names = used_names if used_names is not None else set()
+    records: list[dict[str, object]] = []
+    topic_pairs: list[tuple[dict[str, object], str]] = []
+    for config in DOMAIN_CONFIGS:
+        for topic in config["topics"]:
+            topic_pairs.append((config, topic))
+
+    while len(records) < target_count:
+        rng.shuffle(topic_pairs)
+        for config, topic in topic_pairs:
+            records.append(
+                build_record(
+                    rng,
+                    start_ordinal + len(records),
+                    config,
+                    topic,
+                    branch_pool,
+                    used_names,
+                )
+            )
+            if len(records) >= target_count:
+                break
+
+    columns = [
+        "dash_id",
+        "dash_dsply_name",
+        "folder_all_route",
+        "build_main",
+        "dept_cls",
+        "dash_rat",
+        "bbk_name",
+        "bbk_id",
+        "dept_fname",
+        "level1_dept_name",
+        "level2_dept_name",
+        "level3_dept_name",
+        "level4_dept_name",
+        "chart_cnt",
+        "be_author_cnt",
+        "fir_be_visit_tm",
+        "recnt_be_visit_tm",
+        "m3_vis_cnt",
+        "mon_add_idf",
+        "dm_nm",
+    ]
+    return pd.DataFrame(records, columns=columns)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Generate mock BI dashboard parquet data.")
+    parser.add_argument(
+        "--mode",
+        choices=["overwrite", "append"],
+        default="overwrite",
+        help="overwrite replaces the parquet file; append inserts generated rows into the existing parquet file.",
+    )
+    parser.add_argument(
+        "--count",
+        type=int,
+        default=200,
+        help="number of dashboard rows to generate in this run.",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=OUTPUT_PATH,
+        help="target parquet path.",
+    )
+    parser.add_argument(
+        "--branch-name",
+        action="append",
+        help=(
+            "branch names to generate, separated by comma or passed repeatedly. "
+            "Defaults to 总行,北京分行,广东分行."
+        ),
+    )
+    return parser.parse_args()
+
+
+def main() -> None:
+    args = parse_args()
+    if args.count <= 0:
+        raise ValueError("--count must be a positive integer")
+
+    output_path = args.output.resolve()
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    branch_names = parse_branch_names(args.branch_name)
+    branch_pool = build_branch_pool(branch_names)
+
+    existing = pd.DataFrame()
+    if args.mode == "append" and output_path.exists():
+        existing = pd.read_parquet(output_path)
+
+    used_names = set(existing["dash_dsply_name"]) if not existing.empty else set()
+    new_dashboards = make_dashboards(
+        args.count,
+        start_ordinal=len(existing) + 1,
+        branch_pool=branch_pool,
+        used_names=used_names,
+    )
+    dashboards = pd.concat([existing, new_dashboards], ignore_index=True) if not existing.empty else new_dashboards
+    dashboards.to_parquet(output_path, index=False)
+
+    duplicate_count = int(dashboards["dash_id"].duplicated().sum())
+    duplicate_name_count = int(dashboards["dash_dsply_name"].duplicated().sum())
+    print(f"mode: {args.mode}")
+    print(f"branch names: {', '.join(branch_names)}")
+    print(f"generated rows: {len(new_dashboards)}")
+    print(f"wrote total rows: {len(dashboards)} to {output_path}")
+    print(f"duplicate dash_id: {duplicate_count}")
+    print(f"duplicate dash_dsply_name: {duplicate_name_count}")
+    print(f"columns: {', '.join(dashboards.columns)}")
+    print("dash_rat:", dashboards["dash_rat"].value_counts().to_dict())
+    print("chart_cnt:", int(dashboards["chart_cnt"].min()), int(dashboards["chart_cnt"].max()))
+    print("level1_dept_name:", dashboards["level1_dept_name"].value_counts().to_dict())
+
+
+if __name__ == "__main__":
+    main()

+ 21 - 0
data_schema/card_info.txt

@@ -0,0 +1,21 @@
+字段名称 | 字段注释 | 字段类型 | 备注
+CARD_ID | 卡片ID | string | 24位数字字母组合
+BBK_ID | 分行编号 | string | 5位数字
+BBK_NAME | 分行名称 | string | 省级分行名称
+CARD_NAME | 卡片名称 | string | 
+DS_ID | 数据集ID | string | 24位数字字母组合
+DS_NAME | 数据集名称 | string | 
+DASH_ID | 仪表板ID | string | 24位数字字母组合
+DASH_NAME | 仪表板名称 | string | 
+CARD_TYPE_CD | 卡片类型代码 | string | 全部为“图表”
+FIELD_ALIAS | 维度字段别名 | string | JSON列表,内含字段名称
+FIELD_NAME | 维度字段名称 | string | JSON列表,内含字段名称
+FILTERS_FIELD_NAME | 筛选字段名称 | string | JSON列表,内含字段名称
+FILTERS_FIELD_VALUE | 筛选字段值 | string | JSON列表,内含字段对应取值
+SORT_FIELD_NAME | 排序字段名称 | string | JSON列表,内含字段名称
+SORT_WAY | 排序方式 | string | JSON列表,ASC或DESC
+NUM_VALUE_FIELD_NAME | 数值字段名称 | string | JSON列表,内含字段名称
+NUM_VALUE_FIELD_ALIAS | 数值字段别名 | string | JSON列表,内含字段名称
+NUM_VALUE_FIELD_MERGE_WAY | 数值字段聚合方式 | string | JSON列表,内含SUM、MAX等SQL聚合函数
+FOLDER_NAME | 文件夹名称 | string | 
+FOLDER_ALL_ROUTE | 文件夹全路径 | string | 

文件差异内容过多而无法显示
+ 1 - 0
data_schema/card_info_examples.txt


+ 11 - 0
data_schema/dashboard_examples.txt

@@ -0,0 +1,11 @@
+| dash_id | dash_dsply_name | folder_all_route | build_main | dept_cls | dash_rat | bbk_name | bbk_id | dept_fname | level1_dept_name | level2_dept_name | level3_dept_name | level4_dept_name | chart_cnt | be_author_cnt | fir_be_visit_tm | recnt_be_visit_tm | m3_vis_cnt | mon_add_idf | dm_nm | 
+| v07ecdbf9e08a4b3298abc43 | 超预算情况报表 V1.0 | 根目录/SYSTEM_CREATE_DASHBOARD_TOP_DIR/SYSTEM_CREATE_PAGE_XGB31807 | 分行自建 | | 无效 | 代发产品 | | | | | | | 5 | 0 | 2025-03-17 14:13:26.000000 | 2025-05-05 08:45:04.000000 | 0 | 否 | zh | 
+| v07ed3c52574244508ea664e | 3.2企业画像(管户客群)_测试版 | 根目录/应用市场下载目录/分析模板/团策应用测试版 | 分行自建 | 信息技术部 | 无效 | 江苏分行 | 512 | XX银行/江苏分行/信息技术部/数据分析团队 | 信息技术部 | 数据分析团队 | | | 28 | 34 | 2023-08-25 13:38:23.371000 | 2025-04-25 14:58:09.749000 | 0 | 否 | jsfh | 
+| v07f5588d83404c178c54351 | 考核-受益所有人信息缺失 | 根目录/用户开发报表区/BCK-占敏/340924 | 分行自建 | 业务部门 | 无效 | 浙江分行 | 571 | XX银行/浙江分行/法律合规部/反洗钱与制裁合规管理团队 | 法律合规部 | 反洗钱与制裁合规管理团队 | | | 0 | 9 | 2022-08-03 11:18:14.546000 | 2022-08-03 11:19:22.281000 | 0 | 否 | zjfh | 
+| v07f5929cc0d94f67a8b1669 | 复杂报表-资产负债表 | 根目录/信息技术部/通用报表区/观远2.0使用指南/培训演示/表格类型与复杂报表 | 总行建设 | 信息技术部 | 低效 | 总行 | 100 | XX银行/总行/信息技术部/数据资产与平台研发中心/大数据应用支撑开发团队/分行数据服务一室 | 信息技术部 | 数据资产与平台研发中心 | 大数据应用支撑开发团队 | 分行数据服务一室 | 3 | 291 | 2022-04-26 20:16:24.911000 | 2026-04-22 14:57:10.764000 | 1 | 否 | zh | 
+| v0830d4341fc4415da6c823e | 一般性存款 | 根目录/应用市场下载目录/高层领导管理报表-福建分行_2025-06-20 16:32:26+0800 | 分行自建 | 业务部门 | 无效 | 安徽分行 | 551 | XX银行/安徽分行/运营管理部/系统开发与科技应用室 | 运营管理部 | 系统开发与科技应用室 | | | 13 | 8 | | | 0 | 否 | ahfh | 
+| v083d5887a3fd4aa1bbad0b1 | 湖北分行零售信贷监控大屏 | 根目录/数据大屏/2022参赛作品/000559 | 总行建设 | 信息技术部 | 无效 | 总行 | 100 | XX银行/湖北分行/信息技术部/数据与平台开发团队 | 信息技术部 | 数据与平台开发团队 | | | 0 | 0 | 2022-06-24 16:59:17.417000 | 2023-10-09 16:21:45.659000 | 0 | 否 | hbfh | 
+| v0872fee09f554e0b99ad229 | 实时业绩快报(机构版) | 根目录/仪表板_IT开发(开发,非生产请最小粒度赋权仪表盘)/零售RTL/零售金融事业部(已废弃)/全视图业绩查询(请去零售专区-热门高频应用查看) | 分行自建 | 业务部门 | 无效 | 上海分行 | 121 | XX银行/上海分行/私人银行部/产品管理室 | 私人银行部 | 产品管理室 | | | 0 | 30 | 2021-12-23 11:30:14.495000 | 2022-01-11 16:52:50.152000 | 0 | 否 | shfh | 
+| v08768256b2be4288b6eadd6 | 部门账单-唐山港集团股份有限公司 V4.0 | 根目录/SYSTEM_CREATE_DASHBOARD_TOP_DIR/SYSTEM_PRJ_336993970246909952 | 总行建设 | | 低效 | 代发产品 | | | | | | | 12 | 0 | 2026-02-10 08:57:08.000000 | 2026-04-02 14:18:07.000000 | 3 | 否 | zh | 
+| v08998fccb45d491283e3baa | 用餐明细表 V7.0 | 根目录/SYSTEM_CREATE_DASHBOARD_TOP_DIR/SYSTEM_PRJ_358474665062236160 | 分行自建 | | 高效 | 代发产品 | | | | | | | 6 | 0 | 2026-04-28 16:32:43.000000 | 2026-04-29 17:21:39.000000 | 15 | 否 | zh | 
+| v08cc53082d674315aa42054 | 资产不达标率高 | 根目录/业务网仪表板/零售条线/网点取数/团体金融/团体金融拓展目标客户/7月重点代发潜力挖掘名单 | 分行自建 | 业务部门 | 无效 | 重庆分行 | 123 | XX银行/重庆分行/九龙坡支行(综合支行)/市场拓展团队 | 九龙坡支行(综合支行) | 市场拓展团队 | | | 0 | 2 | 2022-07-11 16:39:16.957000 | 2023-02-07 14:21:20.663000 | 0 | 否 | cqfh | 

+ 21 - 0
data_schema/dashboard_info.txt

@@ -0,0 +1,21 @@
+字段名称 | 中文名 | 类型 | 备注
+DASH_ID | 仪表盘ID | string | 24位数字字母组合
+DASH_DSPLY_NAME | 仪表盘显示名称 | string | 中文名称
+FOLDER_ALL_ROUTE | 文件夹全路径 | string | 
+BUILD_MAIN | 建设主体描述 | string | 
+DEPT_CLS | 部门分类描述 | string | 
+DASH_RAT | 仪表盘效率描述 | string | 高效/中效/低效
+BBK_NAME | 分行名称 | string | 省级分行名称
+BBK_ID | 分行编号 | string | 5位数字
+DEPT_FNAME | 部门全称 | string | 
+LEVEL1_DEPT_NAME | 一级部门名称 | string | 
+LEVEL2_DEPT_NAME | 二级部门名称 | string | 
+LEVEL3_DEPT_NAME | 三级部门名称 | string | 
+LEVEL4_DEPT_NAME | 四级部门名称 | string | 
+CHART_CNT | 包含图表数量 | int | 
+BE_AUTHOR_CNT | 被授权人数 | int | 
+FIR_BE_VISIT_TM | 首次被访问时间 | string | 
+RECNT_BE_VISIT_TM | 最近被访问时间 | string | 
+M3_VIS_CNT | 近三个月访问次数 | int | 
+MON_ADD_IDF | 本月新增标识 | string | 
+DM_NM | 域名 | string | 分行首字母简写

部分文件因为文件数量过多而无法显示