|
|
@@ -173,7 +173,7 @@ def build_with_part(new_date_fields, new_dimension_fields, dataset_fid_name_map,
|
|
|
return sql_part
|
|
|
|
|
|
# 处理计算字段
|
|
|
-def process_calculation_fields(measure_fields, measure_aggs, calculation_fields, card_id, card_name):
|
|
|
+def process_measure_fields(measure_fields, measure_aggs, calculation_fields, card_id, card_name):
|
|
|
## 数值字段数量 小于 聚合函数数量,不合法
|
|
|
if len(measure_fields) < len(measure_aggs):
|
|
|
print(f"警告: 卡片 {card_id} {card_name}: 数值字段数量小于聚合函数数量,不合法")
|
|
|
@@ -214,6 +214,17 @@ def process_calculation_fields(measure_fields, measure_aggs, calculation_fields,
|
|
|
new_measure_aggs.append(measure_aggs.pop(0))
|
|
|
return new_measure_fields, new_measure_aggs, agg_flag
|
|
|
|
|
|
+# sql部分去重
|
|
|
+def dedupe_sql_parts(parts):
|
|
|
+ deduped = []
|
|
|
+ seen = set()
|
|
|
+ for part in parts:
|
|
|
+ if not part or part in seen:
|
|
|
+ continue
|
|
|
+ seen.add(part)
|
|
|
+ deduped.append(part)
|
|
|
+ return deduped
|
|
|
+
|
|
|
def quote_identifier(identifier, formula=False):
|
|
|
if not QUOTE_FLAG:
|
|
|
return identifier
|
|
|
@@ -492,6 +503,10 @@ def build_sql_query(card_data, added_fields_info, dataset_fid_name_map):
|
|
|
|
|
|
# 处理字段重命名关系
|
|
|
fields_rename_map = get_fields_rename_map(card_data.get("field_info", ""))
|
|
|
+
|
|
|
+ # 处理field_id与重命名关系,用于筛选Order by子句中的字段
|
|
|
+ # 需要处理的只有日期转换类型,将转换前的原始字段名加入map
|
|
|
+ # 只需要更新有重命名的字段即可
|
|
|
selected_fid_alias_map = dict(zip(dimension_fids+measure_fids, dimension_fields+measure_fields))
|
|
|
|
|
|
# 构建WITH
|
|
|
@@ -516,6 +531,7 @@ def build_sql_query(card_data, added_fields_info, dataset_fid_name_map):
|
|
|
# 构建SELECT
|
|
|
select_parts = []
|
|
|
has_aggregation = False
|
|
|
+ non_aggregated_select_parts = []
|
|
|
|
|
|
# 添加维度字段
|
|
|
for field in dimension_fields:
|
|
|
@@ -529,7 +545,7 @@ def build_sql_query(card_data, added_fields_info, dataset_fid_name_map):
|
|
|
selected_fid_alias_map[fid] = field
|
|
|
|
|
|
# 加工计算字段
|
|
|
- new_measure_fields, measure_aggs, agg_flag = process_calculation_fields(measure_fields, measure_aggs, added_fields_info, card_id, card_name)
|
|
|
+ new_measure_fields, measure_aggs, agg_flag = process_measure_fields(measure_fields, measure_aggs, added_fields_info, card_id, card_name)
|
|
|
if agg_flag:
|
|
|
has_aggregation = True
|
|
|
for i, field in enumerate(new_measure_fields):
|
|
|
@@ -540,6 +556,8 @@ def build_sql_query(card_data, added_fields_info, dataset_fid_name_map):
|
|
|
if not alias or alias == "null":
|
|
|
alias = measure_fields[i]
|
|
|
select_parts.append(f"{field} AS {quote_identifier(alias)}")
|
|
|
+ if field and re.search(r"\b(sum|avg|count|max|min|stddev|variance|collect_list|collect_set|percentile|percentile_approx)|\s*\(", field, flags=re.IGNORECASE) is None:
|
|
|
+ non_aggregated_select_parts.append(field)
|
|
|
selected_fid_alias_map[fid] = alias
|
|
|
else:
|
|
|
has_aggregation = True
|
|
|
@@ -578,9 +596,12 @@ def build_sql_query(card_data, added_fields_info, dataset_fid_name_map):
|
|
|
|
|
|
# 构建GROUPBY
|
|
|
group_by_clause = ""
|
|
|
- if has_aggregation and dimension_fields:
|
|
|
+ if has_aggregation:
|
|
|
group_by_parts = [quote_identifier(field) for field in dimension_fields]
|
|
|
- group_by_clause = "GROUP BY " + ", ".join(group_by_parts)
|
|
|
+ group_by_parts.extend(non_aggregated_select_parts)
|
|
|
+ group_by_parts = dedupe_sql_parts(group_by_parts)
|
|
|
+ if group_by_parts:
|
|
|
+ group_by_clause = "GROUP BY " + ", ".join(group_by_parts)
|
|
|
|
|
|
# 构建ORDERBY
|
|
|
order_by_clause = ""
|
|
|
@@ -597,7 +618,7 @@ def build_sql_query(card_data, added_fields_info, dataset_fid_name_map):
|
|
|
|
|
|
# 组装SQL
|
|
|
sql_parts = [with_part, select_clause, from_clause]
|
|
|
-
|
|
|
+ # 返回 select, where, groupby, orderby
|
|
|
return ("\n".join(sql_parts)).strip(), json.dumps(filter_conditions, ensure_ascii=False), group_by_clause, order_by_clause
|
|
|
|
|
|
def generate():
|
|
|
@@ -638,3 +659,4 @@ def generate():
|
|
|
if __name__ == "__main__":
|
|
|
df = generate()
|
|
|
df.to_parquet("output/sql.parquet")
|
|
|
+ df.to_excel("output/sql.xlsx")
|