feat: add setting to block disposable emails

- Add a new setting `EMAIL_DOMAIN_BLOCK_DISPOSABLE` that will append a list of
  domains that are known for being used by temporary or disposable email
  services.

- Add a utility to automatically download and format the list of domains from
  the disposable-email-domains project on github.

  (https://github.com/disposable-email-domains/disposable-email-domains)
  license: CC0 1.0 Universal (CC0 1.0) [Public Domain]

  from README:
  """
  This repo contains a list of disposable and temporary email address domains often used to register dummy users in order to spam or abuse some services.

  We cannot guarantee all of these can still be considered disposable but we do basic checking so chances are they were disposable at one point in time.
  """
This commit is contained in:
James Hatfield 2024-11-03 10:47:25 -06:00
parent 7015bdfa48
commit 16d06705b3
5 changed files with 4173 additions and 11 deletions

View File

@ -51,6 +51,9 @@ GOMOCK_PACKAGE ?= go.uber.org/mock/mockgen@v0.4.0 # renovate: datasource=go
GOPLS_PACKAGE ?= golang.org/x/tools/gopls@v0.16.2 # renovate: datasource=go GOPLS_PACKAGE ?= golang.org/x/tools/gopls@v0.16.2 # renovate: datasource=go
RENOVATE_NPM_PACKAGE ?= renovate@39.19.1 # renovate: datasource=docker packageName=code.forgejo.org/forgejo-contrib/renovate RENOVATE_NPM_PACKAGE ?= renovate@39.19.1 # renovate: datasource=docker packageName=code.forgejo.org/forgejo-contrib/renovate
# https://github.com/disposable-email-domains/disposable-email-domains/commits/main/
DISPOSABLE_EMAILS_SHA ?= 0c27e671231d27cf66370034d7f6818037416989 # renovate: ...
ifeq ($(HAS_GO), yes) ifeq ($(HAS_GO), yes)
CGO_EXTRA_CFLAGS := -DSQLITE_MAX_VARIABLE_NUMBER=32766 CGO_EXTRA_CFLAGS := -DSQLITE_MAX_VARIABLE_NUMBER=32766
CGO_CFLAGS ?= $(shell $(GO) env CGO_CFLAGS) $(CGO_EXTRA_CFLAGS) CGO_CFLAGS ?= $(shell $(GO) env CGO_CFLAGS) $(CGO_EXTRA_CFLAGS)
@ -417,10 +420,10 @@ lint-frontend: lint-js lint-css
lint-frontend-fix: lint-js-fix lint-css-fix lint-frontend-fix: lint-js-fix lint-css-fix
.PHONY: lint-backend .PHONY: lint-backend
lint-backend: lint-go lint-go-vet lint-editorconfig lint-renovate lint-locale lint-backend: lint-go lint-go-vet lint-editorconfig lint-renovate lint-locale lint-disposable-emails
.PHONY: lint-backend-fix .PHONY: lint-backend-fix
lint-backend-fix: lint-go-fix lint-go-vet lint-editorconfig lint-backend-fix: lint-go-fix lint-go-vet lint-editorconfig lint-disposable-emails-fix
.PHONY: lint-codespell .PHONY: lint-codespell
lint-codespell: lint-codespell:
@ -511,6 +514,14 @@ lint-go-gopls:
lint-editorconfig: lint-editorconfig:
$(GO) run $(EDITORCONFIG_CHECKER_PACKAGE) templates .forgejo/workflows $(GO) run $(EDITORCONFIG_CHECKER_PACKAGE) templates .forgejo/workflows
.PHONY: lint-disposable-emails
lint-disposable-emails:
$(GO) run build/generate-disposable-email.go -check -r $(DISPOSABLE_EMAILS_SHA)
.PHONY: lint-disposable-emails-fix
lint-disposable-emails-fix:
$(GO) run build/generate-disposable-email.go -r $(DISPOSABLE_EMAILS_SHA)
.PHONY: lint-templates .PHONY: lint-templates
lint-templates: .venv node_modules lint-templates: .venv node_modules
@node tools/lint-templates-svg.js @node tools/lint-templates-svg.js

View File

@ -0,0 +1,203 @@
// Copyright 2024 James Hatfield
// SPDX-License-Identifier: MIT
//go:build ignore
package main
import (
"bufio"
"bytes"
"crypto"
"flag"
"fmt"
"go/format"
"io"
"log"
"net/http"
"os"
"regexp"
"strings"
)
const disposableEmailListURL string = "https://raw.githubusercontent.com/disposable-email-domains/disposable-email-domains/%s/disposable_email_blocklist.conf"
var (
gitRef *string = flag.String("r", "master", "Git reference of the domain list version")
outPat *string = flag.String("o", "modules/setting/disposable_email_domain_data.go", "Output path")
check *bool = flag.Bool("check", false, "Check if the current output file matches the current upstream list")
)
func main() {
flag.Parse()
if *check {
// read in the local copy of the domain list
local, err := get_local_file()
if err != nil {
log.Fatalf("File Read Error: %v", err)
}
// generate the remote copy of the domain list
remote, err := generate()
if err != nil {
log.Fatalf("Generation Error: %v", err)
}
// strip the comments from both (so we dont fail simply due to git ref difference)
local = strip_comments(local)
remote = strip_comments(remote)
// generate the hash of the local copy
local_sha, err := hash(local)
if err != nil {
log.Fatalf("Local Hash Generation Error: %v", err)
}
// generate the hash of the remote copy
remote_sha, err := hash(remote)
if err != nil {
log.Fatalf("Remote Hash Generation Error: %v", err)
}
// if the hashes dont match then the local copy needs to be updated
if local_sha != remote_sha {
log.Fatalf("Disposable email domain list needs to be updated!! \"make lint-disposable-emails-fix\"")
}
} else {
// generate the source code (array of domains)
res, err := generate()
if err != nil {
log.Fatalf("Generation Error: %v", err)
}
// write result to a file
err = os.WriteFile(*outPat, res, 0o644)
if err != nil {
log.Fatalf("File Write Error: %v", err)
}
}
}
func strip_comments(data []byte) []byte {
result := make([]byte, 0, len(data))
re := regexp.MustCompile(`^\W*//.*$`)
for _, line := range bytes.Split(data, []byte("\n")) {
if !re.Match(line) {
result = append(result, line...)
}
}
return result
}
func hash(data []byte) (string, error) {
var err error
hash := crypto.SHA3_256.New()
_, err = hash.Write(data)
if err != nil {
return "", err
}
return fmt.Sprintf("%x", hash.Sum(nil)), err
}
func get_local_file() ([]byte, error) {
var err error
f, err := os.Open(*outPat)
if err != nil {
return nil, err
}
defer f.Close()
data, err := io.ReadAll(f)
if err != nil {
return nil, err
}
return data, err
}
func get_remote() ([]string, error) {
var err error
var url string = fmt.Sprintf(disposableEmailListURL, *gitRef)
// download the domain list
res, err := http.Get(url)
if err != nil {
return nil, err
}
defer res.Body.Close()
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
// go through all entries (1 domain per line)
scanner := bufio.NewScanner(bytes.NewReader(body))
var arrDomains []string
for scanner.Scan() {
line := scanner.Text()
arrDomains = append(arrDomains, line)
}
return arrDomains, err
}
func generate() ([]byte, error) {
var err error
var url string = fmt.Sprintf(disposableEmailListURL, *gitRef)
// download the domains list
arrDomains, err := get_remote()
if err != nil {
return nil, err
}
// build the string in a readable way
var sb strings.Builder
_, err = sb.WriteString("[]string{\n")
if err != nil {
return nil, err
}
for _, item := range arrDomains {
_, err = sb.WriteString(fmt.Sprintf("\t%q,\n", item))
if err != nil {
return nil, err
}
}
_, err = sb.WriteString("}")
if err != nil {
return nil, err
}
// insert the values into file
final := fmt.Sprintf(hdr, url, sb.String())
return format.Source([]byte(final))
}
const hdr = `
// Copyright 2024 James Hatfield
// SPDX-License-Identifier: MIT
//
// Code generated by build/generate-disposable-email.go. DO NOT EDIT
// Sourced from %s
package setting
import "sync"
var DisposableEmailDomains = sync.OnceValue(func() []string {
return %s
})
`

File diff suppressed because it is too large Load Diff

View File

@ -5,6 +5,7 @@ package setting
import ( import (
"regexp" "regexp"
"slices"
"strings" "strings"
"time" "time"
@ -37,6 +38,7 @@ var Service = struct {
RegisterManualConfirm bool RegisterManualConfirm bool
EmailDomainAllowList []glob.Glob EmailDomainAllowList []glob.Glob
EmailDomainBlockList []glob.Glob EmailDomainBlockList []glob.Glob
EmailDomainBlockDisposable bool
DisableRegistration bool DisableRegistration bool
AllowOnlyInternalRegistration bool AllowOnlyInternalRegistration bool
AllowOnlyExternalRegistration bool AllowOnlyExternalRegistration bool
@ -156,6 +158,22 @@ func loadServiceFrom(rootCfg ConfigProvider) {
} }
Service.EmailDomainAllowList = CompileEmailGlobList(sec, "EMAIL_DOMAIN_WHITELIST", "EMAIL_DOMAIN_ALLOWLIST") Service.EmailDomainAllowList = CompileEmailGlobList(sec, "EMAIL_DOMAIN_WHITELIST", "EMAIL_DOMAIN_ALLOWLIST")
Service.EmailDomainBlockList = CompileEmailGlobList(sec, "EMAIL_DOMAIN_BLOCKLIST") Service.EmailDomainBlockList = CompileEmailGlobList(sec, "EMAIL_DOMAIN_BLOCKLIST")
Service.EmailDomainBlockDisposable = sec.Key("EMAIL_DOMAIN_BLOCK_DISPOSABLE").MustBool(false)
if Service.EmailDomainBlockDisposable {
toAdd := make([]glob.Glob, 0, len(DisposableEmailDomains()))
for _, domain := range DisposableEmailDomains() {
domain = strings.ToLower(domain)
// Only add domains that aren't blocked yet.
if !slices.ContainsFunc(Service.EmailDomainBlockList, func(g glob.Glob) bool { return g.Match(domain) }) {
if g, err := glob.Compile(domain); err != nil {
log.Error("Error in disposable domain %s: %v", domain, err)
} else {
toAdd = append(toAdd, g)
}
}
}
Service.EmailDomainBlockList = append(Service.EmailDomainBlockList, toAdd...)
}
Service.ShowRegistrationButton = sec.Key("SHOW_REGISTRATION_BUTTON").MustBool(!(Service.DisableRegistration || Service.AllowOnlyExternalRegistration)) Service.ShowRegistrationButton = sec.Key("SHOW_REGISTRATION_BUTTON").MustBool(!(Service.DisableRegistration || Service.AllowOnlyExternalRegistration))
Service.ShowMilestonesDashboardPage = sec.Key("SHOW_MILESTONES_DASHBOARD_PAGE").MustBool(true) Service.ShowMilestonesDashboardPage = sec.Key("SHOW_MILESTONES_DASHBOARD_PAGE").MustBool(true)
Service.RequireSignInView = sec.Key("REQUIRE_SIGNIN_VIEW").MustBool() Service.RequireSignInView = sec.Key("REQUIRE_SIGNIN_VIEW").MustBool()

View File

@ -4,6 +4,9 @@
package setting package setting
import ( import (
"fmt"
"sort"
"strings"
"testing" "testing"
"code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/structs"
@ -11,8 +14,18 @@ import (
"github.com/gobwas/glob" "github.com/gobwas/glob"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"golang.org/x/net/publicsuffix"
) )
func match(globs []glob.Glob, s string) bool {
for _, g := range globs {
if g.Match(s) {
return true
}
}
return false
}
func TestLoadServices(t *testing.T) { func TestLoadServices(t *testing.T) {
oldService := Service oldService := Service
defer func() { defer func() {
@ -28,15 +41,6 @@ EMAIL_DOMAIN_BLOCKLIST = d3, *.b
require.NoError(t, err) require.NoError(t, err)
loadServiceFrom(cfg) loadServiceFrom(cfg)
match := func(globs []glob.Glob, s string) bool {
for _, g := range globs {
if g.Match(s) {
return true
}
}
return false
}
assert.True(t, match(Service.EmailDomainAllowList, "d1")) assert.True(t, match(Service.EmailDomainAllowList, "d1"))
assert.True(t, match(Service.EmailDomainAllowList, "foo.w")) assert.True(t, match(Service.EmailDomainAllowList, "foo.w"))
assert.True(t, match(Service.EmailDomainAllowList, "d2")) assert.True(t, match(Service.EmailDomainAllowList, "d2"))
@ -48,6 +52,121 @@ EMAIL_DOMAIN_BLOCKLIST = d3, *.b
assert.False(t, match(Service.EmailDomainBlockList, "d1")) assert.False(t, match(Service.EmailDomainBlockList, "d1"))
} }
func TestLoadServiceBlockDisposable(t *testing.T) {
oldService := Service
defer func() {
Service = oldService
}()
cfg, err := NewConfigProviderFromData(`
[service]
EMAIL_DOMAIN_BLOCK_DISPOSABLE = true
`)
require.NoError(t, err)
loadServiceFrom(cfg)
for _, domain := range DisposableEmailDomains() {
require.True(t, match(Service.EmailDomainBlockList, domain))
}
require.Len(t, Service.EmailDomainBlockList, len(DisposableEmailDomains()))
knownGood := [...]string{
"aol.com",
"gmx.com",
"mail.com",
"zoho.com",
"proton.me",
"gmail.com",
"yahoo.com",
"icloud.com",
"outlook.com",
"protonmail.com",
}
for _, domain := range knownGood {
require.False(t, match(Service.EmailDomainBlockList, domain))
}
}
func TestLoadServiceBlockDisposableWithExistingGlobs(t *testing.T) {
oldService := Service
defer func() {
Service = oldService
}()
tldCounts := make(map[string]int)
for _, domain := range DisposableEmailDomains() {
tld, _ := publicsuffix.PublicSuffix(domain)
tldCounts[tld]++
}
type tldkv struct {
Tld string
Count int
}
sortedTldCounts := make([]tldkv, 0)
for tld, count := range tldCounts {
sortedTldCounts = append(sortedTldCounts, tldkv{tld, count})
}
sort.Slice(sortedTldCounts, func(i, j int) bool {
return sortedTldCounts[i].Count > sortedTldCounts[j].Count
})
require.GreaterOrEqual(t, len(sortedTldCounts), 2)
blockString := fmt.Sprintf("*.%s,*.%s", sortedTldCounts[0].Tld, sortedTldCounts[1].Tld)
cfg, err := NewConfigProviderFromData(fmt.Sprintf(`
[service]
EMAIL_DOMAIN_BLOCKLIST = %s
EMAIL_DOMAIN_BLOCK_DISPOSABLE = true
`, blockString))
require.NoError(t, err)
loadServiceFrom(cfg)
for _, domain := range DisposableEmailDomains() {
require.True(t, match(Service.EmailDomainBlockList, domain))
}
redundant := 0
for _, val := range DisposableEmailDomains() {
if strings.HasSuffix(val, sortedTldCounts[0].Tld) ||
strings.HasSuffix(val, sortedTldCounts[1].Tld) {
redundant++
}
}
expected := len(DisposableEmailDomains()) - redundant + 2
require.Len(t, Service.EmailDomainBlockList, expected)
}
func TestLoadServiceBlockDisposableWithComplementGlobs(t *testing.T) {
oldService := Service
defer func() {
Service = oldService
}()
cfg, err := NewConfigProviderFromData(`
[service]
EMAIL_DOMAIN_BLOCKLIST = *.random
EMAIL_DOMAIN_BLOCK_DISPOSABLE = true
`)
require.NoError(t, err)
loadServiceFrom(cfg)
for _, domain := range DisposableEmailDomains() {
require.True(t, match(Service.EmailDomainBlockList, domain))
}
expected := len(DisposableEmailDomains()) + 1
require.Len(t, Service.EmailDomainBlockList, expected)
}
func TestLoadServiceVisibilityModes(t *testing.T) { func TestLoadServiceVisibilityModes(t *testing.T) {
oldService := Service oldService := Service
defer func() { defer func() {