Thursday, February 12, 2026

How to Identify the duplicate rows in a table using Power Query

How to Identify the duplicate Values in a Column using Power Query
Scenario:
Suppose, we have a dataset(ds_Sample) as below with duplicate values in a Prod_ID column and some duplicate rows:
let
    Source = #table (
        {"Trans_ID", "Prod_ID", "Prod_Name"},
        {
            {12345, 123, "Laptop"},
            {12346, 124, "Radio"},
            {12347, 125, "Keyboard"},
            {12348, 126, "Television"},
            {12349, 127, "Printer"},
            {12350, 128, "Scanner"},
            {12351, 129, "Camera"},
            {12352, 130, "Tripod"},
            {12345, 123, "Laptop"},     // duplicate row
            {12348, 126, "Television"}, // duplicate row
            {12351, 129, "Camera"},     // duplicate row
            {12356, 127, "Printer"},
            {12357, 124, "Radio"},
            {12358, 126, "Television"}
        }
    ),
    ChangeType = Table.TransformColumnTypes(Source, {
        {"Trans_ID", Int64.Type},
        {"Prod_ID", Int64.Type},
        {"Prod_Name", type text}
    })
in
    ChangeType

Now can identify and flag the duplicate values and rows using below methods.

Method-1: Using Text.Combine method with Composite Key based on specific Columns:

let
    /* Source dataset */
    Source = ds_Sample,

    /* Columns to include in Composite Key */
    SelectedCols = {"Trans_ID","Prod_ID"},

    /* Composite Key from selected columns */
    Def_CompositeKey = Table.AddColumn(Source, "Composite_Key", 
        each Text.Combine(
            List.Transform(
                Record.ToList(Record.SelectFields(_, SelectedCols)), 
                each Text.From(_)
                ), 
                "|"
            )
        ),

    /* Flag duplicate rows using Composite Key */
    Flag_DupRow = Table.AddColumn(Def_CompositeKey, "IsDuplicate_Row", 
        each if List.Count(List.Select(Def_CompositeKey[Composite_Key], (x) => x = [Composite_Key])) > 1 
        then "Yes" else "No"
        ),

    /* Flag duplicate products by Prod_ID */
    Flag_DupProd = Table.AddColumn(Flag_DupRow, "IsDuplicate_Prod", 
        each if List.Count(List.Select(Source[Prod_ID], (x) => x = [Prod_ID])) > 1 
        then "Yes" else "No"
        )
in
    Flag_DupProd

Result:
------------------------------------------------
Method-2: Using Text.ToBinary method and Composite Key (Binary Encoded) based on specific Columns:

let
    // Source dataset
    Source = ds_Sample,

    // Columns to include in Composite_Hash_Key
    SelectedCols = {"Trans_ID","Prod_ID"},

    // Generate Composite_Hash_Key from selected columns only
    Def_CompositeKey = Table.AddColumn(Source, "Composite_Hash_Key", 
        each Binary.ToText(
            Text.ToBinary(
                Text.Combine(
                    List.Transform(
                        Record.ToList(Record.SelectFields(_, SelectedCols)), 
                        each Text.From(_)
                        ), 
                        "|"
                    )
                ),
                BinaryEncoding.Base64
            )
        ),

    // Flag duplicate rows using Composite_Hash_Key
    Flag_DupRow = Table.AddColumn(Def_CompositeKey, "IsDuplicate_Row", 
        each if List.Count(List.Select(Def_CompositeKey[Composite_Hash_Key], (x) => x = [Composite_Hash_Key])) > 1 
        then "Yes" else "No"
        ),

    // Flag duplicate products by ProdID
    Flag_DupProd = Table.AddColumn(Flag_DupRow, "IsDuplicate_Prod", 
        each if List.Count(List.Select(Source[Prod_ID], (x) => x = [Prod_ID])) > 1 
        then "Yes" else "No"
        )
in
    Flag_DupProd

Result:
------------------------------------------------
Method-3: Using Table.Group method to identify the Duplicate values based on Prod_ID:

let
    Source = ds_Sample,
    // Count how many times each ProdID appears
    GroupByProd = Table.Group(Source, {"Prod_ID"}, {{"Prod_Count", each Table.RowCount(_), Int64.Type}}),
    // Join back to original details table
    MergeToDetails = Table.NestedJoin(Source, {"Prod_ID"}, GroupByProd, {"Prod_ID"}, "ProdCount"),
    ExpandTable = Table.ExpandTableColumn(MergeToDetails, "ProdCount", {"Prod_Count"}),
    // Add a column to flag duplicate values
    FlagDup_Prod = Table.AddColumn(ExpandTable, "IsDuplicate_Prod", each if [Prod_Count] > 1 then "Yes" else "No")
in
    FlagDup_Prod

Result:
------------------------------------------------
Method-4: Using Table.Group method to return the no. of Duplicate rows and products:

let
    Source = ds_Sample,
    // 1. Count duplicate rows
    DistinctRows = Table.Distinct(Source),
    DuplicateRowCount = Table.RowCount(Source) - Table.RowCount(DistinctRows),

    // 2. Count duplicate products (ProdID)
    GroupByProd = Table.Group(Source, {"Prod_ID"}, {{"Count", each Table.RowCount(_), Int64.Type}}),
    DuplicateProd = Table.RowCount(Table.SelectRows(GroupByProd, each [Count] > 1)),

    // 3. Return summary of results
    Summary= #table(
        {"Metric", "Value"},
        {
            {"Duplicate Rows", DuplicateRowCount},
            {"Duplicate Products", DuplicateProd}
        }
    )
in
    Summary

Result:

--------------------------------------------------------------------------------------------------------
Thanks
--------------------------------------------------------------------------------------------------------

No comments:

Post a Comment

Hi User, Thank You for visiting My Blog. If you wish, please share your genuine Feedback or comments only related to this Blog Posts. It is my humble request that, please do not post any Spam comments or Advertising kind of comments, which will be Ignored.

Featured Post from this Blog

How to compare Current Snapshot Data with Previous Snapshot in Power BI

How to Dynamically compare two Snapshots Data in Power BI Scenario: Suppose we have a sample Sales data, which is stored with Monthly Snapsh...

Popular Posts from this Blog