分享一个强大的PowerQuery自定义函数,轻松转换父子结构数据

教育   2024-09-04 12:08   河南  
前面介绍了如何利用DAX中的父子函数来分析父子层级结构的数据:
利用Power BI父子函数,进行特殊数据结构的分析
它利用了几个父子函数的功能将父子层级结构转换为扁平的数据结构。
这里再分享一个PowerQuery的做法,是国外大神用自定义函数来实现的,原贴见:https://www.thebiccountant.com/2017/02/14/dynamically-flatten-parent-child-hierarchies-in-dax-and-powerbi/

仍然以前面文章的数据为例,添加编号的父子层级数据表是这样的:

要将上表转换后扁平结构,下面来看看用PQ自定义函数具体是如何操作的。

1、创建自定义函数
在pq中新建查询>空查询,打开高级编辑器,将该空查询中的公式全部删除,然后将下面的公式复制进去:
///*let func =   (ParChTable as table,ChildKey as text,ParentKey as text,LevelColumnName as text) =>//*/
let/*/Debug ParametersParChTable = SourceData_Loop,ChildKey = "NodeKey",ParentKey = "ParentKey",LevelColumnName = "Name",*/
SelectRelevantColumns = Table.SelectColumns(ParChTable, {ChildKey, ParentKey, LevelColumnName}), #"Changed Type" = Table.TransformColumnTypes(SelectRelevantColumns ,{{ChildKey, type text}, {ParentKey, type text}}), ReplaceNulls = Table.ReplaceValue(#"Changed Type",null,"",Replacer.ReplaceValue,{ParentKey}), // CleanParChTable = Table.Distinct(ReplaceNulls , {ChildKey, ParentKey}), MissingParents = List.Buffer(List.Select(List.Difference(List.Distinct(Table.Column(ReplaceNulls , ParentKey)), List.Distinct(Table.Column(ReplaceNulls , ChildKey))), each _ <> "")), AddMissingParents = Table.Buffer(Table.Combine({ReplaceNulls , #table({ChildKey, LevelColumnName, ParentKey}, List.Transform(MissingParents, each {_, "Unknown TopLevel"& Text.From(List.PositionOf(MissingParents, _)), ""}))})), #"Merged Queries0" = Table.NestedJoin(AddMissingParents,{ChildKey},AddMissingParents,{ParentKey},"SelectRelevantColumns",JoinKind.LeftOuter), CheckIfIsLeaf = Table.AddColumn(#"Merged Queries0", "IsLeaf", each if Table.IsEmpty([SelectRelevantColumns]) then "yes" else "no"), #"Replaced Value1" = Table.ReplaceValue(CheckIfIsLeaf,null,"",Replacer.ReplaceValue,{ParentKey, LevelColumnName}), AddStartPath = Table.AddColumn(#"Replaced Value1", "Path", each Text.Trim(Record.Field(_, ChildKey)&"|"&Record.Field(_,ParentKey), "|")), #"Duplicated Column" = Table.DuplicateColumn(AddStartPath, LevelColumnName, "FirstName"), Feed = Table.DuplicateColumn(#"Duplicated Column", ParentKey, "FirstParentKey"),
// Retrieve all parents per row fnAllParents = List.Generate(()=> [Result= Feed, Level=1, EndlessLoop = false, StopEndlessLoop = false], each Table.RowCount([Result]) > 0 and not [StopEndlessLoop], each [ Result= let #"Merged Queries" = Table.NestedJoin([Result],{ParentKey},AddMissingParents,{ChildKey},"Added Custom",JoinKind.Inner), #"Removed Columns1" = Table.RemoveColumns(#"Merged Queries",{ParentKey}), #"Expanded Added Custom" = Table.ExpandTableColumn(#"Removed Columns1", "Added Custom", {ParentKey, LevelColumnName}, {"ParentKey.1", "Name.1"}), #"Duplicated Column" = Table.DuplicateColumn(#"Expanded Added Custom", "ParentKey.1", ParentKey), #"Merged Columns" = Table.CombineColumns(#"Duplicated Column",{"Path", "ParentKey.1"},Combiner.CombineTextByDelimiter("|", QuoteStyle.None),"Path"), #"Merged Columns2" = Table.CombineColumns( #"Merged Columns" ,{LevelColumnName, "Name.1"},Combiner.CombineTextByDelimiter("|", QuoteStyle.None),LevelColumnName) in Table.Buffer(#"Merged Columns2"), Level = [Level]+1, EndlessLoop = List.Sort(List.Distinct(Table.Column(Result, ChildKey))) = List.Sort(List.Distinct(Table.Column([Result], ChildKey))), StopEndlessLoop = [EndlessLoop] ]), ConvertToTable = Table.FromList(fnAllParents, Splitter.SplitByNothing(), null, null, ExtraValues.Error), ExpandLevel = Table.ExpandRecordColumn(ConvertToTable, "Column1", {"Result", "Level", "EndlessLoop"}, {"Result", "Level", "EndlessLoop"}), ExpandLG = Table.ExpandTableColumn(ExpandLevel, "Result", {LevelColumnName, ParentKey, ChildKey, "Path", "FirstName", "FirstParentKey"}, {"Name", "ParentKey", "NodeKey", "Path", "FirstName", "FirstParentKey"}), FilterParents = Table.SelectRows(ExpandLG, each ([ParentKey] = null or [ParentKey] = "")), #"Removed Columns" = Table.RemoveColumns(FilterParents,{"ParentKey"}), #"Trimmed Text" = Table.TransformColumns(#"Removed Columns",{{"Path", each Text.Trim(_, "|")}}), ReverseOrderName = Table.TransformColumns(#"Trimmed Text",{{"Name", each Text.Combine(List.Reverse(Text.Split(_, "|")), "|")}}), ReverseOrderPath = Table.TransformColumns(ReverseOrderName,{{"Path", each Text.Combine(List.Reverse(Text.Split(_, "|")), "|")}}), #"Reordered Columns" = Table.ReorderColumns(ReverseOrderPath,{"NodeKey", "FirstParentKey", "Path", "FirstName", "Level", "Name"}), #"Split Column by Delimiter" = Table.SplitColumn(#"Reordered Columns", "Name", Splitter.SplitTextByDelimiter("|", QuoteStyle.Csv), List.Transform({1..Table.RowCount(ConvertToTable)}, each "Level "&Text.From(_))), #"Merged Queries" = Table.NestedJoin(#"Split Column by Delimiter",{"NodeKey", "FirstParentKey"},ParChTable ,{ChildKey, ParentKey},"Split Column by Delimiter",JoinKind.LeftOuter), #"Expanded Split Column by Delimiter" = Table.ExpandTableColumn(#"Merged Queries", "Split Column by Delimiter", List.Difference(Table.ColumnNames(ParChTable), Table.ColumnNames(#"Replaced Value1"))), Rename = Table.RenameColumns(#"Expanded Split Column by Delimiter",{{"Level", "HierarchyDepth"}}), Parents = List.Buffer(Rename[FirstParentKey]), IsLeaf = Table.AddColumn(Rename, "IsLeaf", each not List.Contains(Parents, [NodeKey])), NoOfInterations = List.Count(fnAllParents), LastIteration = Table.SelectRows(ExpandLG, each ([Level] = NoOfInterations)), EndlessLoops = LastIteration[EndlessLoop], IsEndlessLoop = EndlessLoops{0}, RemainingResults = Table.NestedJoin(IsLeaf, {ChildKey}, LastIteration, {ChildKey}, "x", JoinKind.LeftAnti), Custom1 = if IsEndlessLoop then [Message= "The data is in an endless loop. Check Table in ""Endless Loop""", #" Endless Loop"= LastIteration] meta [ResultsSoFar = RemainingResults] else IsLeafin Custom1// /*, documentation = [Documentation.Name = " Table.SolveParentChild", Documentation.Description = " Creates columns for all parents, multiple parents are supported" , Documentation.LongDescription = " Creates columns for all parents, multiple parents are supported", Documentation.Category = " Table", Documentation.Version = " 2.0: Checking for endless loops ", Documentation.Source = " local", Documentation.Author = " Imke Feldmann: www.TheBIccountant.com", Documentation.Examples = {[Description = " See: http://wp.me/p6lgsG-sl for more details" , Code = " ", Result = " "]}] in Value.ReplaceType(func, Value.ReplaceMetadata(Value.Type(func), documentation)) //*/
公式很长,你可以不用管具体细节,直接复制粘贴即可。
这个自定义函数建好之后,可以重命名,这里我命名为层级转换。

2、调用自定义函数
点击该自定义函数,可以看到它需要输入4个参数,从上往下依次把我们之前的结构表的表名和相应字段放进去,如下:

然后点击“调用”,即可看到转换好的扁平结构数据:

转换后的结构和我们前面用DAX父子函数的效果一样,不过用这个自定义函数要简单很多,如果你有这样的需求,可以尝试用这个方法实现。




PowerBI星球的最新版内容合辑,值得你收藏学习:
「PowerBI星球」内容合集


如果你想深入学习Power BI,欢迎加入PowerBI星球学习社群,获取更多学习资源,和6k+ 深度学习者一起精进~

PowerBI星球
海量干货,帮你轻松上手 Power BI
 最新文章