Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Linear cost dedupe #144

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ module github.com/minio/pkg/v3
go 1.23

require (
github.com/cespare/xxhash/v2 v2.3.0
github.com/cheggaaa/pb v1.0.29
github.com/fatih/color v1.18.0
github.com/fatih/structs v1.1.0
github.com/go-ldap/ldap/v3 v3.4.8
github.com/lestrrat-go/jwx v1.2.30
github.com/lestrrat-go/jwx/v2 v2.1.3
github.com/mattn/go-colorable v0.1.13
github.com/mattn/go-isatty v0.0.20
Expand All @@ -16,7 +16,9 @@ require (
github.com/minio/mux v1.8.2
github.com/montanaflynn/stats v0.7.1
github.com/rjeczalik/notify v0.9.3
github.com/secure-io/sio-go v0.3.1
github.com/tinylib/msgp v1.2.5
github.com/zeebo/xxh3 v1.0.2
go.etcd.io/etcd/client/v3 v3.5.17
golang.org/x/crypto v0.31.0
golang.org/x/sys v0.28.0
Expand All @@ -39,7 +41,6 @@ require (
github.com/google/uuid v1.6.0 // indirect
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
github.com/kr/pretty v0.2.1 // indirect
github.com/lestrrat-go/backoff/v2 v2.0.8 // indirect
github.com/lestrrat-go/blackmagic v1.0.2 // indirect
github.com/lestrrat-go/httpcc v1.0.1 // indirect
github.com/lestrrat-go/httprc v1.0.6 // indirect
Expand All @@ -51,7 +52,6 @@ require (
github.com/minio/md5-simd v1.1.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.61.0 // indirect
Expand All @@ -61,7 +61,6 @@ require (
github.com/rivo/uniseg v0.4.7 // indirect
github.com/rs/xid v1.6.0 // indirect
github.com/safchain/ethtool v0.5.9 // indirect
github.com/secure-io/sio-go v0.3.1 // indirect
github.com/segmentio/asm v1.2.0 // indirect
github.com/shirou/gopsutil/v3 v3.24.5 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect
Expand Down
13 changes: 6 additions & 7 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+
github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU=
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI=
github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cheggaaa/pb v1.0.29 h1:FckUN5ngEk2LpvuG0fw1GEFx6LtyY2pWI/Z2QgCnEYo=
github.com/cheggaaa/pb v1.0.29/go.mod h1:W40334L7FMC5JKWldsTWbdGjLo0RxUKK73K+TuPxX30=
github.com/coreos/go-semver v0.3.1 h1:yi21YpKnrx1gt5R+la8n5WgS0kCrsPp33dmEyHReZr4=
Expand Down Expand Up @@ -77,8 +79,6 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lestrrat-go/backoff/v2 v2.0.8 h1:oNb5E5isby2kiro9AgdHLv5N5tint1AnDVVf2E2un5A=
github.com/lestrrat-go/backoff/v2 v2.0.8/go.mod h1:rHP/q/r9aT27n24JQLa7JhSQZCKBBOiM/uP402WwN8Y=
github.com/lestrrat-go/blackmagic v1.0.2 h1:Cg2gVSc9h7sz9NOByczrbUvLopQmXrfFx//N+AkAr5k=
github.com/lestrrat-go/blackmagic v1.0.2/go.mod h1:UrEqBzIR2U6CnzVyUtfM6oZNMt/7O7Vohk2J0OGSAtU=
github.com/lestrrat-go/httpcc v1.0.1 h1:ydWCStUeJLkpYyjLDHihupbn2tYmZ7m22BGkcvZZrIE=
Expand All @@ -87,11 +87,8 @@ github.com/lestrrat-go/httprc v1.0.6 h1:qgmgIRhpvBqexMJjA/PmwSvhNk679oqD1RbovdCG
github.com/lestrrat-go/httprc v1.0.6/go.mod h1:mwwz3JMTPBjHUkkDv/IGJ39aALInZLrhBp0X7KGUZlo=
github.com/lestrrat-go/iter v1.0.2 h1:gMXo1q4c2pHmC3dn8LzRhJfP1ceCbgSiT9lUydIzltI=
github.com/lestrrat-go/iter v1.0.2/go.mod h1:Momfcq3AnRlRjI5b5O8/G5/BvpzrhoFTZcn06fEOPt4=
github.com/lestrrat-go/jwx v1.2.30 h1:VKIFrmjYn0z2J51iLPadqoHIVLzvWNa1kCsTqNDHYPA=
github.com/lestrrat-go/jwx v1.2.30/go.mod h1:vMxrwFhunGZ3qddmfmEm2+uced8MSI6QFWGTKygjSzQ=
github.com/lestrrat-go/jwx/v2 v2.1.3 h1:Ud4lb2QuxRClYAmRleF50KrbKIoM1TddXgBrneT5/Jo=
github.com/lestrrat-go/jwx/v2 v2.1.3/go.mod h1:q6uFgbgZfEmQrfJfrCo90QcQOcXFMfbI/fO0NqRtvZo=
github.com/lestrrat-go/option v1.0.0/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
github.com/lestrrat-go/option v1.0.1 h1:oAzP2fvZGQKWkvHa1/SAcFolBEca1oN+mQ7eooNBEYU=
github.com/lestrrat-go/option v1.0.1/go.mod h1:5ZHFbivi4xwXxhxY9XHDe2FHo6/Z7WWmtT7T5nBBp3I=
github.com/lufia/plan9stats v0.0.0-20240909124753-873cd0166683 h1:7UMa6KCCMjZEMDtTVdcGu0B1GmmC7QJKiCCjyTAWQy0=
Expand Down Expand Up @@ -123,8 +120,6 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c h1:dAMKvw0MlJT1GshSTtih8C2gDs04w8dReiOGXrGLNoY=
github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
Expand Down Expand Up @@ -180,6 +175,10 @@ github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9dec
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/yusufpapurcu/wmi v1.2.4 h1:zFUKzehAFReQwLys1b/iSMl+JQGSCSjtVqQn9bBrPo0=
github.com/yusufpapurcu/wmi v1.2.4/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ=
github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0=
github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
go.etcd.io/etcd/api/v3 v3.5.17 h1:cQB8eb8bxwuxOilBpMJAEo8fAONyrdXTHUNcMd8yT1w=
go.etcd.io/etcd/api/v3 v3.5.17/go.mod h1:d1hvkRuXkts6PmaYk2Vrgqbv7H4ADfAKhyJqHNLJCB4=
go.etcd.io/etcd/client/pkg/v3 v3.5.17 h1:XxnDXAWq2pnxqx76ljWwiQ9jylbpC4rvkAeRVOUKKVw=
Expand Down
31 changes: 31 additions & 0 deletions policy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
package policy

import (
"crypto/rand"
"encoding/binary"
"encoding/json"
"io"
"strings"
Expand Down Expand Up @@ -242,6 +244,11 @@ func MergePolicies(inputs ...Policy) Policy {
}

func (iamp *Policy) dropDuplicateStatements() {
// Select an O(N) version instead of O(N!) for more statements.
if len(iamp.Statements) > 10 {
iamp.dropDuplicateStatementsMany()
return
}
dups := make(map[int]struct{})
for i := range iamp.Statements {
if _, ok := dups[i]; ok {
Expand Down Expand Up @@ -271,6 +278,30 @@ func (iamp *Policy) dropDuplicateStatements() {
iamp.Statements = iamp.Statements[:c]
}

func (iamp *Policy) dropDuplicateStatementsMany() {
// Calculate a hash for each.
// Drop statements with duplicate hashes.
found := make(map[[16]byte]struct{}, len(iamp.Statements))

// Apply a base seed
var baseSeed [8]byte
rand.Read(baseSeed[:])
var seed uint64
binary.LittleEndian.PutUint64(baseSeed[:], seed)
writeAt := 0
for _, s := range iamp.Statements {
h := s.hash(seed)
if _, ok := found[h]; ok {
// duplicate, do not write.
continue
}
found[h] = struct{}{}
iamp.Statements[writeAt] = s
writeAt++
}
iamp.Statements = iamp.Statements[:writeAt]
}

// UnmarshalJSON - decodes JSON data to Iamp.
func (iamp *Policy) UnmarshalJSON(data []byte) error {
// subtype to avoid recursive call to UnmarshalJSON()
Expand Down
153 changes: 149 additions & 4 deletions policy/policy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"bytes"
"encoding/json"
"net"
"strconv"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -1543,11 +1544,155 @@ func TestMergePolicies(t *testing.T) {
},
},
},
{
inputs: []Policy{p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3},
expected: Policy{
Version: DefaultVersion,
Statements: []Statement{
NewStatement(
"",
Deny,
NewActionSet(AllAdminActions),
ResourceSet{},
condition.NewFunctions(),
),
NewStatement(
"",
Allow,
NewActionSet(AllActions),
NewResourceSet(NewResource("*")),
condition.NewFunctions(),
),
NewStatement(
"",
Allow,
NewActionSet(GetBucketLocationAction),
NewResourceSet(NewResource("mybucket")),
condition.NewFunctions(),
),
},
},
},
}
for i, testCase := range testCases {
got := MergePolicies(testCase.inputs...)
if !got.Equals(testCase.expected) {
t.Errorf("Case %d: expected: %v, got %v", i+1, got, testCase.expected)
}
t.Run(strconv.Itoa(i), func(t *testing.T) {
got := MergePolicies(testCase.inputs...)
if !got.Equals(testCase.expected) {
t.Errorf("Case %d: expected: %v, got %v", i, testCase.expected, got)
}
})
}
}

func BenchmarkDedupe(b *testing.B) {
var allActions []Action
var allAdminActions []Action
for action := range supportedActions {
allActions = append(allActions, action)
}
for action := range supportedAdminActions {
allAdminActions = append(allAdminActions, Action(action))
}

p1 := Policy{
Version: DefaultVersion,
Statements: []Statement{
NewStatement(
"",
Deny,
NewActionSet(allAdminActions...),
NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")),
condition.NewFunctions(),
),
NewStatement(
"",
Allow,
NewActionSet(allActions...),
NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")),
condition.NewFunctions(),
),
},
}

// p2 is a subset of p1
p2 := Policy{
Version: DefaultVersion,
Statements: []Statement{
NewStatement(
"",
Deny,
NewActionSet(allAdminActions...),
NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")),
condition.NewFunctions(),
),
},
}

p3 := Policy{
ID: "MyPolicyForMyBucket1",
Version: DefaultVersion,
Statements: []Statement{
NewStatement(
"",
Allow,
NewActionSet(allActions...),
NewResourceSet(NewResource("mybucketA"), NewResource("mybucketB"), NewResource("mybucketC"), NewResource("mybucketD"), NewResource("mybucketE"), NewResource("mybucketF"), NewResource("mybucketG"), NewResource("mybucketH"), NewResource("mybucketI"), NewResource("mybucketJ"), NewResource("mybucketK"), NewResource("mybucketL"), NewResource("mybucketM"), NewResource("mybucketN"), NewResource("mybucketO"), NewResource("mybucketP"), NewResource("mybucketQ"), NewResource("mybucketR"), NewResource("mybucketS"), NewResource("mybucketS"), NewResource("mybucketU"), NewResource("mybucketV"), NewResource("mybucketX")),
condition.NewFunctions(),
),
},
}

testCases := []struct {
inputs []Policy
expected Policy
}{
{
inputs: []Policy{p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3, p1, p2, p3},
expected: Policy{
Version: DefaultVersion,
Statements: []Statement{
NewStatement(
"",
Deny,
NewActionSet(allAdminActions...),
NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")),
condition.NewFunctions(),
),
NewStatement(
"",
Allow,
NewActionSet(allActions...),
NewResourceSet(NewResource("bucket0"), NewResource("bucket1"), NewResource("bucket2"), NewResource("bucket3"), NewResource("bucket4"), NewResource("bucket5")),
condition.NewFunctions(),
),
NewStatement(
"",
Allow,
NewActionSet(allActions...),
NewResourceSet(NewResource("mybucketA"), NewResource("mybucketB"), NewResource("mybucketC"), NewResource("mybucketD"), NewResource("mybucketE"), NewResource("mybucketF"), NewResource("mybucketG"), NewResource("mybucketH"), NewResource("mybucketI"), NewResource("mybucketJ"), NewResource("mybucketK"), NewResource("mybucketL"), NewResource("mybucketM"), NewResource("mybucketN"), NewResource("mybucketO"), NewResource("mybucketP"), NewResource("mybucketQ"), NewResource("mybucketR"), NewResource("mybucketS"), NewResource("mybucketS"), NewResource("mybucketU"), NewResource("mybucketV"), NewResource("mybucketX")),
condition.NewFunctions(),
),
},
},
},
}
for i, testCase := range testCases {
b.Run(strconv.Itoa(i), func(b *testing.B) {
var merged Policy
for _, p := range testCase.inputs {
if merged.Version == "" {
merged.Version = p.Version
}
for _, st := range p.Statements {
merged.Statements = append(merged.Statements, st.Clone())
}
}
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
shallow := merged
shallow.dropDuplicateStatements()
}
})
}
}
40 changes: 40 additions & 0 deletions policy/statement.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
package policy

import (
"encoding/binary"
"strings"

"github.com/minio/pkg/v3/policy/condition"
"github.com/zeebo/xxh3"
)

// Statement - iam policy statement.
Expand Down Expand Up @@ -203,6 +205,44 @@ func (statement Statement) Equals(st Statement) bool {
return true
}

// Equals checks if two statements are equal
func (statement Statement) hash(seed uint64) [16]byte {
// Order independent xor.
xorTo := func(dst *xxh3.Uint128, v xxh3.Uint128) {
dst.Lo ^= v.Lo
dst.Hi ^= v.Hi
}
// Add value with seed.
xorInt := func(dst *xxh3.Uint128, n int, seed uint64) {
var tmp [8]byte
binary.LittleEndian.PutUint64(tmp[:], uint64(n))
xorTo(dst, xxh3.Hash128Seed(tmp[:], seed))
}

h := xxh3.HashString128Seed(string(statement.Effect), seed)

xorInt(&h, len(statement.Actions), seed+1)
for action := range statement.Actions {
xorTo(&h, xxh3.HashString128Seed(string(action), seed+2))
}

xorInt(&h, len(statement.NotActions), seed+3)
for action := range statement.NotActions {
xorTo(&h, xxh3.HashString128Seed(string(action), seed+4))
}

xorInt(&h, len(statement.Resources), seed+5)
for res := range statement.Resources {
xorTo(&h, xxh3.HashString128Seed(res.Pattern+res.Type.String(), seed+6))
}

xorInt(&h, len(statement.Conditions), seed+7)
for _, cond := range statement.Conditions {
xorTo(&h, xxh3.HashString128Seed(cond.String(), seed+8))
}
return h.Bytes()
}

// Clone clones Statement structure
func (statement Statement) Clone() Statement {
return Statement{
Expand Down