From f1f20eb9705566f861330f8da7e2f2a84dae46af Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 20 Feb 2019 10:03:46 +0100
Subject: [PATCH 01/68] drm/tegra: sor: Move register programming out of
 ->init()

The hardware is not guaranteed to be enabled during execution of the
tegra_sor_init() function, which can lead to a crash on some Tegra SoCs.
Fix this by moving all register programming into code that is guaranteed
to only be executed when the hardware is enabled.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index fbbb974c1e1a..91d5c5041d2c 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -2161,6 +2161,15 @@ static void tegra_sor_audio_prepare(struct tegra_sor *sor)
 {
 	u32 value;
 
+	/*
+	 * Enable and unmask the HDA codec SCRATCH0 register interrupt. This
+	 * is used for interoperability between the HDA codec driver and the
+	 * HDMI/DP driver.
+	 */
+	value = SOR_INT_CODEC_SCRATCH1 | SOR_INT_CODEC_SCRATCH0;
+	tegra_sor_writel(sor, value, SOR_INT_ENABLE);
+	tegra_sor_writel(sor, value, SOR_INT_MASK);
+
 	tegra_sor_write_eld(sor);
 
 	value = SOR_AUDIO_HDA_PRESENSE_ELDV | SOR_AUDIO_HDA_PRESENSE_PD;
@@ -2170,6 +2179,8 @@ static void tegra_sor_audio_prepare(struct tegra_sor *sor)
 static void tegra_sor_audio_unprepare(struct tegra_sor *sor)
 {
 	tegra_sor_writel(sor, 0, SOR_AUDIO_HDA_PRESENSE);
+	tegra_sor_writel(sor, 0, SOR_INT_MASK);
+	tegra_sor_writel(sor, 0, SOR_INT_ENABLE);
 }
 
 static int tegra_sor_hdmi_enable_audio_infoframe(struct tegra_sor *sor)
@@ -2811,7 +2822,6 @@ static int tegra_sor_init(struct host1x_client *client)
 	struct tegra_sor *sor = host1x_client_to_sor(client);
 	int connector = DRM_MODE_CONNECTOR_Unknown;
 	int encoder = DRM_MODE_ENCODER_NONE;
-	u32 value;
 	int err;
 
 	if (!sor->aux) {
@@ -2914,15 +2924,6 @@ static int tegra_sor_init(struct host1x_client *client)
 	if (err < 0)
 		return err;
 
-	/*
-	 * Enable and unmask the HDA codec SCRATCH0 register interrupt. This
-	 * is used for interoperability between the HDA codec driver and the
-	 * HDMI/DP driver.
-	 */
-	value = SOR_INT_CODEC_SCRATCH1 | SOR_INT_CODEC_SCRATCH0;
-	tegra_sor_writel(sor, value, SOR_INT_ENABLE);
-	tegra_sor_writel(sor, value, SOR_INT_MASK);
-
 	return 0;
 }
 
@@ -2931,9 +2932,6 @@ static int tegra_sor_exit(struct host1x_client *client)
 	struct tegra_sor *sor = host1x_client_to_sor(client);
 	int err;
 
-	tegra_sor_writel(sor, 0, SOR_INT_MASK);
-	tegra_sor_writel(sor, 0, SOR_INT_ENABLE);
-
 	tegra_output_exit(&sor->output);
 
 	if (sor->aux) {

From 051172e8c1ceef8749f19faacc1d3bef65d20d8d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 25 Sep 2019 13:26:59 +0200
Subject: [PATCH 02/68] drm/tegra: Fix ordering of cleanup code

Commit Fixes: b9f8b09ce256 ("drm/tegra: Setup shared IOMMU domain after
initialization") changed the initialization order of the IOMMU related
bits but didn't update the cleanup path accordingly. This asymmetry can
cause failures during error recovery.

Fixes: b9f8b09ce256 ("drm/tegra: Setup shared IOMMU domain after initialization")
Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Tested-by: Dmitry Osipenko <digetx@gmail.com>
---
 drivers/gpu/drm/tegra/drm.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 6fb7d74ff553..bc7cc32140f8 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -201,19 +201,19 @@ hub:
 	if (tegra->hub)
 		tegra_display_hub_cleanup(tegra->hub);
 device:
-	host1x_device_exit(device);
-fbdev:
-	drm_kms_helper_poll_fini(drm);
-	tegra_drm_fb_free(drm);
-config:
-	drm_mode_config_cleanup(drm);
-
 	if (tegra->domain) {
 		mutex_destroy(&tegra->mm_lock);
 		drm_mm_takedown(&tegra->mm);
 		put_iova_domain(&tegra->carveout.domain);
 		iova_cache_put();
 	}
+
+	host1x_device_exit(device);
+fbdev:
+	drm_kms_helper_poll_fini(drm);
+	tegra_drm_fb_free(drm);
+config:
+	drm_mode_config_cleanup(drm);
 domain:
 	if (tegra->domain)
 		iommu_domain_free(tegra->domain);

From 004e822a6faa807f4413ac066fa29ea433550070 Mon Sep 17 00:00:00 2001
From: Dariusz Marcinkiewicz <darekm@google.com>
Date: Wed, 14 Aug 2019 12:45:05 +0200
Subject: [PATCH 03/68] drm/tegra: Use cec_notifier_conn_(un)register()

Use the new cec_notifier_conn_(un)register() functions to
(un)register the notifier for the HDMI connector, and fill in
the cec_connector_info.

Signed-off-by: Dariusz Marcinkiewicz <darekm@google.com>
Tested-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/output.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index bdcaa4c7168c..34373734ff68 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -70,6 +70,11 @@ tegra_output_connector_detect(struct drm_connector *connector, bool force)
 
 void tegra_output_connector_destroy(struct drm_connector *connector)
 {
+	struct tegra_output *output = connector_to_output(connector);
+
+	if (output->cec)
+		cec_notifier_conn_unregister(output->cec);
+
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 }
@@ -163,18 +168,11 @@ int tegra_output_probe(struct tegra_output *output)
 		disable_irq(output->hpd_irq);
 	}
 
-	output->cec = cec_notifier_get(output->dev);
-	if (!output->cec)
-		return -ENOMEM;
-
 	return 0;
 }
 
 void tegra_output_remove(struct tegra_output *output)
 {
-	if (output->cec)
-		cec_notifier_put(output->cec);
-
 	if (output->hpd_gpio)
 		free_irq(output->hpd_irq, output);
 
@@ -184,6 +182,7 @@ void tegra_output_remove(struct tegra_output *output)
 
 int tegra_output_init(struct drm_device *drm, struct tegra_output *output)
 {
+	int connector_type;
 	int err;
 
 	if (output->panel) {
@@ -199,6 +198,21 @@ int tegra_output_init(struct drm_device *drm, struct tegra_output *output)
 	if (output->hpd_gpio)
 		enable_irq(output->hpd_irq);
 
+	connector_type = output->connector.connector_type;
+	/*
+	 * Create a CEC notifier for HDMI connector.
+	 */
+	if (connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIB) {
+		struct cec_connector_info conn_info;
+
+		cec_fill_conn_info_from_drm(&conn_info, &output->connector);
+		output->cec = cec_notifier_conn_register(output->dev, NULL,
+							 &conn_info);
+		if (!output->cec)
+			return -ENOMEM;
+	}
+
 	return 0;
 }
 

From d98914ebc2af4504e2abc266610c29b4131598a3 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 9 Sep 2019 14:28:46 +0200
Subject: [PATCH 04/68] gpu: host1x: Do not limit DMA segment size

host1x nor any its clients have any limitations on the DMA segment size,
so don't pretend that they do.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/bus.c | 2 +-
 drivers/gpu/host1x/dev.c | 3 +++
 drivers/gpu/host1x/dev.h | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 742aa9ff21b8..2c8559ff3481 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -445,7 +445,7 @@ static int host1x_device_add(struct host1x *host1x,
 	of_dma_configure(&device->dev, host1x->dev->of_node, true);
 
 	device->dev.dma_parms = &device->dma_parms;
-	dma_set_max_seg_size(&device->dev, SZ_4M);
+	dma_set_max_seg_size(&device->dev, UINT_MAX);
 
 	err = host1x_device_parse_dt(device, driver);
 	if (err < 0) {
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 5a3f797240d4..e5fdeea27158 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -237,6 +237,9 @@ static int host1x_probe(struct platform_device *pdev)
 			return PTR_ERR(host->hv_regs);
 	}
 
+	host->dev->dma_parms = &host->dma_parms;
+	dma_set_max_seg_size(host->dev, UINT_MAX);
+
 	dma_set_mask_and_coherent(host->dev, host->info->dma_mask);
 
 	if (host->info->init) {
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index ff56f5e23a02..abafde7c665e 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -140,6 +140,8 @@ struct host1x {
 	struct list_head devices;
 
 	struct list_head list;
+
+	struct device_dma_parameters dma_parms;
 };
 
 void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);

From b9cd7b954a6dc2329458599222d994837530a350 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 18 Jun 2018 14:01:10 +0200
Subject: [PATCH 05/68] gpu: host1x: Remove gratuitous blank line

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/intr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 26f3c741d085..9245add23b5d 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -105,7 +105,6 @@ static void action_submit_complete(struct host1x_waitlist *waiter)
 	/*  Add nr_completed to trace */
 	trace_host1x_channel_submit_complete(dev_name(channel->dev),
 					     waiter->count, waiter->thresh);
-
 }
 
 static void action_wakeup(struct host1x_waitlist *waiter)

From 8f45f5071ad2be6ace6f77892e0015e89fd42028 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 5 Sep 2019 11:39:05 +0200
Subject: [PATCH 06/68] gpu: host1x: Explicitly initialize host1x_info
 structures

It's technically not required to explicitly initialize the fields that
will be zero by default, but it's easier to read these structures if
they are all initialized uniformly.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index e5fdeea27158..452ee5d64021 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -77,6 +77,9 @@ static const struct host1x_info host1x01_info = {
 	.init = host1x01_init,
 	.sync_offset = 0x3000,
 	.dma_mask = DMA_BIT_MASK(32),
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
 };
 
 static const struct host1x_info host1x02_info = {
@@ -87,6 +90,9 @@ static const struct host1x_info host1x02_info = {
 	.init = host1x02_init,
 	.sync_offset = 0x3000,
 	.dma_mask = DMA_BIT_MASK(32),
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
 };
 
 static const struct host1x_info host1x04_info = {
@@ -97,6 +103,9 @@ static const struct host1x_info host1x04_info = {
 	.init = host1x04_init,
 	.sync_offset = 0x2100,
 	.dma_mask = DMA_BIT_MASK(34),
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
 };
 
 static const struct host1x_info host1x05_info = {
@@ -107,6 +116,9 @@ static const struct host1x_info host1x05_info = {
 	.init = host1x05_init,
 	.sync_offset = 0x2100,
 	.dma_mask = DMA_BIT_MASK(34),
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
 };
 
 static const struct host1x_sid_entry tegra186_sid_table[] = {

From caccddcfc4b4de75930df2e8f7fd0c66556b13ff Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 18 Jun 2018 14:01:51 +0200
Subject: [PATCH 07/68] gpu: host1x: Request channels for clients, not devices

A struct device doesn't carry much information that a channel might be
interested in, but the client very much does. Request channels for the
clients rather than their parent devices and store a pointer to them
in order to have that information available when needed.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gr2d.c |  2 +-
 drivers/gpu/drm/tegra/gr3d.c |  2 +-
 drivers/gpu/drm/tegra/vic.c  |  2 +-
 drivers/gpu/host1x/channel.c | 13 +++++++------
 drivers/gpu/host1x/channel.h |  1 +
 include/linux/host1x.h       |  2 +-
 6 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 641299cc85b8..3cbb4a029c41 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -40,7 +40,7 @@ static int gr2d_init(struct host1x_client *client)
 	struct gr2d *gr2d = to_gr2d(drm);
 	int err;
 
-	gr2d->channel = host1x_channel_request(client->dev);
+	gr2d->channel = host1x_channel_request(client);
 	if (!gr2d->channel)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 8b9a35b1cbb3..87a386134cc4 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -49,7 +49,7 @@ static int gr3d_init(struct host1x_client *client)
 	struct gr3d *gr3d = to_gr3d(drm);
 	int err;
 
-	gr3d->channel = host1x_channel_request(client->dev);
+	gr3d->channel = host1x_channel_request(client);
 	if (!gr3d->channel)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index cd0399fd8c63..c97a61c877af 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -198,7 +198,7 @@ static int vic_init(struct host1x_client *client)
 		vic->domain = tegra->domain;
 	}
 
-	vic->channel = host1x_channel_request(client->dev);
+	vic->channel = host1x_channel_request(client);
 	if (!vic->channel) {
 		err = -ENOMEM;
 		goto detach;
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 1436295aa450..4cd212bb570d 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -115,14 +115,14 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)
 
 /**
  * host1x_channel_request() - Allocate a channel
- * @device: Host1x unit this channel will be used to send commands to
+ * @client: Host1x client this channel will be used to send commands to
  *
- * Allocates a new host1x channel for @device. May return NULL if CDMA
+ * Allocates a new host1x channel for @client. May return NULL if CDMA
  * initialization fails.
  */
-struct host1x_channel *host1x_channel_request(struct device *dev)
+struct host1x_channel *host1x_channel_request(struct host1x_client *client)
 {
-	struct host1x *host = dev_get_drvdata(dev->parent);
+	struct host1x *host = dev_get_drvdata(client->dev->parent);
 	struct host1x_channel_list *chlist = &host->channel_list;
 	struct host1x_channel *channel;
 	int err;
@@ -133,7 +133,8 @@ struct host1x_channel *host1x_channel_request(struct device *dev)
 
 	kref_init(&channel->refcount);
 	mutex_init(&channel->submitlock);
-	channel->dev = dev;
+	channel->client = client;
+	channel->dev = client->dev;
 
 	err = host1x_hw_channel_init(host, channel, channel->id);
 	if (err < 0)
@@ -148,7 +149,7 @@ struct host1x_channel *host1x_channel_request(struct device *dev)
 fail:
 	clear_bit(channel->id, chlist->allocated_channels);
 
-	dev_err(dev, "failed to initialize channel\n");
+	dev_err(client->dev, "failed to initialize channel\n");
 
 	return NULL;
 }
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index 4fd694834f74..39044ff6c3aa 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -26,6 +26,7 @@ struct host1x_channel {
 	unsigned int id;
 	struct mutex submitlock;
 	void __iomem *regs;
+	struct host1x_client *client;
 	struct device *dev;
 	struct host1x_cdma cdma;
 };
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index e6eea45e1154..4396cd566a33 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -158,7 +158,7 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base);
 struct host1x_channel;
 struct host1x_job;
 
-struct host1x_channel *host1x_channel_request(struct device *dev);
+struct host1x_channel *host1x_channel_request(struct host1x_client *client);
 struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
 void host1x_channel_put(struct host1x_channel *channel);
 int host1x_job_submit(struct host1x_job *job);

From 33904487f1ab1ec1f8103b1ec5dd3d26f94d9711 Mon Sep 17 00:00:00 2001
From: "Ben Dooks (Codethink)" <ben.dooks@codethink.co.uk>
Date: Thu, 17 Oct 2019 12:04:27 +0100
Subject: [PATCH 08/68] gpu: host1x: Make host1x_cdma_wait_pushbuffer_space()
 static

The host1x_cdma_wait_pushbuffer_space() function is not declared or
directly called from outside the file it is in, so make it static.

Fixes the following sparse warning:

    drivers/gpu/host1x/cdma.c:235:5: warning: symbol 'host1x_cdma_wait_pushbuffer_space' was not declared. Should it be static?

Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/cdma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 48c84c48299c..e8d3fda91d8a 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -232,9 +232,9 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
  *
  * Must be called with the cdma lock held.
  */
-int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
-				      struct host1x_cdma *cdma,
-				      unsigned int needed)
+static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
+					     struct host1x_cdma *cdma,
+					     unsigned int needed)
 {
 	while (true) {
 		struct push_buffer *pb = &cdma->push_buffer;

From 47b15779b03bf70ca5a315775d2b171c7913ebc3 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 9 Sep 2019 14:25:45 +0200
Subject: [PATCH 09/68] drm/tegra: Inherit device DMA parameters from host1x

The display controllers and VIC don't have any limitations on the
DMA segment size. Inherit the DMA parameters from the parent device,
which also doesn't have any such limitations.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c  | 9 +++++++++
 drivers/gpu/drm/tegra/vic.c | 9 +++++++++
 2 files changed, 18 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index fbf57bc3cdab..f7f7984bb749 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -2074,6 +2074,12 @@ static int tegra_dc_init(struct host1x_client *client)
 		goto cleanup;
 	}
 
+	/*
+	 * Inherit the DMA parameters (such as maximum segment size) from the
+	 * parent device.
+	 */
+	client->dev->dma_parms = client->parent->dma_parms;
+
 	return 0;
 
 cleanup:
@@ -2097,6 +2103,9 @@ static int tegra_dc_exit(struct host1x_client *client)
 	if (!tegra_dc_has_window_groups(dc))
 		return 0;
 
+	/* avoid a dangling pointer just in case this disappears */
+	client->dev->dma_parms = NULL;
+
 	devm_free_irq(dc->dev, dc->irq, dc);
 
 	err = tegra_dc_rgb_exit(dc);
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index c97a61c877af..8d98b0cfc47b 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -214,6 +214,12 @@ static int vic_init(struct host1x_client *client)
 	if (err < 0)
 		goto free_syncpt;
 
+	/*
+	 * Inherit the DMA parameters (such as maximum segment size) from the
+	 * parent device.
+	 */
+	client->dev->dma_parms = client->parent->dma_parms;
+
 	return 0;
 
 free_syncpt:
@@ -236,6 +242,9 @@ static int vic_exit(struct host1x_client *client)
 	struct vic *vic = to_vic(drm);
 	int err;
 
+	/* avoid a dangling pointer just in case this disappears */
+	client->dev->dma_parms = NULL;
+
 	err = tegra_drm_unregister_client(tegra, drm);
 	if (err < 0)
 		return err;

From 0301196b5719341ba98223eed9a0e6ea4eab615a Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 4 Sep 2019 13:00:30 +0200
Subject: [PATCH 10/68] drm/tegra: Use DRM_DEBUG_DRIVER for driver messages

The driver-specific messages should use the DRM_UT_DRIVER category so
that they can be properly filtered.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index bc7cc32140f8..498d22a65616 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -163,10 +163,10 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
 		drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1);
 		mutex_init(&tegra->mm_lock);
 
-		DRM_DEBUG("IOMMU apertures:\n");
-		DRM_DEBUG("  GEM: %#llx-%#llx\n", gem_start, gem_end);
-		DRM_DEBUG("  Carveout: %#llx-%#llx\n", carveout_start,
-			  carveout_end);
+		DRM_DEBUG_DRIVER("IOMMU apertures:\n");
+		DRM_DEBUG_DRIVER("  GEM: %#llx-%#llx\n", gem_start, gem_end);
+		DRM_DEBUG_DRIVER("  Carveout: %#llx-%#llx\n", carveout_start,
+				 carveout_end);
 	}
 
 	if (tegra->hub) {

From dd631e8ac90f08c37636fe9e6d12fc7bdf7f429a Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Feb 2019 13:09:49 +0100
Subject: [PATCH 11/68] drm/tegra: vic: Skip stream ID programming without
 IOMMU

If VIC is not behind an IOMMU, don't touch any of the registers related
to stream ID programming.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/vic.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 8d98b0cfc47b..748798f2cdc8 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -97,6 +97,9 @@ static int vic_runtime_suspend(struct device *dev)
 
 static int vic_boot(struct vic *vic)
 {
+#ifdef CONFIG_IOMMU_API
+	struct iommu_fwspec *spec = dev_iommu_fwspec_get(vic->dev);
+#endif
 	u32 fce_ucode_size, fce_bin_data_offset;
 	void *hdr;
 	int err = 0;
@@ -105,15 +108,14 @@ static int vic_boot(struct vic *vic)
 		return 0;
 
 #ifdef CONFIG_IOMMU_API
-	if (vic->config->supports_sid) {
-		struct iommu_fwspec *spec = dev_iommu_fwspec_get(vic->dev);
+	if (vic->config->supports_sid && spec) {
 		u32 value;
 
 		value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) |
 			TRANSCFG_ATT(0, TRANSCFG_SID_HW);
 		vic_writel(vic, value, VIC_TFBIF_TRANSCFG);
 
-		if (spec && spec->num_ids > 0) {
+		if (spec->num_ids > 0) {
 			value = spec->ids[0] & 0xffff;
 
 			vic_writel(vic, value, VIC_THI_STREAMID0);

From d5ad0e3dfe74f465a218bde84efb25c72b3e51c4 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Feb 2019 13:10:41 +0100
Subject: [PATCH 12/68] drm/tegra: vic: Inherit DMA mask from host1x

VIC, just like all other host1x clients, has the same addressing range
as its parent host1x device. Inherit the DMA mask to reflect that.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/vic.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 748798f2cdc8..d60e479cde92 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -385,6 +385,13 @@ static int vic_probe(struct platform_device *pdev)
 	struct vic *vic;
 	int err;
 
+	/* inherit DMA mask from host1x parent */
+	err = dma_coerce_mask_and_coherent(dev, *dev->parent->dma_mask);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err);
+		return err;
+	}
+
 	vic = devm_kzalloc(dev, sizeof(*vic), GFP_KERNEL);
 	if (!vic)
 		return -ENOMEM;

From 7baa943e0bb22454887edd1fffb2d00a4b842fe5 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Feb 2019 14:15:44 +0100
Subject: [PATCH 13/68] drm/tegra: vic: Use common IOMMU attach/detach code

Reuse common code to attach to or detach from an IOMMU domain.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/vic.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index d60e479cde92..e4b17c7ce708 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -34,7 +34,7 @@ struct vic {
 	void __iomem *regs;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
-	struct iommu_domain *domain;
+	struct iommu_group *group;
 	struct device *dev;
 	struct clk *clk;
 	struct reset_control *rst;
@@ -183,21 +183,16 @@ static const struct falcon_ops vic_falcon_ops = {
 static int vic_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct iommu_group *group = iommu_group_get(client->dev);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct vic *vic = to_vic(drm);
 	int err;
 
-	if (group && tegra->domain) {
-		err = iommu_attach_group(tegra->domain, group);
-		if (err < 0) {
-			dev_err(vic->dev, "failed to attach to domain: %d\n",
-				err);
-			return err;
-		}
-
-		vic->domain = tegra->domain;
+	vic->group = host1x_client_iommu_attach(client, false);
+	if (IS_ERR(vic->group)) {
+		err = PTR_ERR(vic->group);
+		dev_err(vic->dev, "failed to attach to domain: %d\n", err);
+		return err;
 	}
 
 	vic->channel = host1x_channel_request(client);
@@ -229,8 +224,7 @@ free_syncpt:
 free_channel:
 	host1x_channel_put(vic->channel);
 detach:
-	if (group && tegra->domain)
-		iommu_detach_group(tegra->domain, group);
+	host1x_client_iommu_detach(client, vic->group);
 
 	return err;
 }
@@ -238,7 +232,6 @@ detach:
 static int vic_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct iommu_group *group = iommu_group_get(client->dev);
 	struct drm_device *dev = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct vic *vic = to_vic(drm);
@@ -253,11 +246,7 @@ static int vic_exit(struct host1x_client *client)
 
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(vic->channel);
-
-	if (vic->domain) {
-		iommu_detach_group(vic->domain, group);
-		vic->domain = NULL;
-	}
+	host1x_client_iommu_detach(client, vic->group);
 
 	return 0;
 }

From aacdf19849734d1be5e407932228ae101ba5b92f Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Feb 2019 14:35:13 +0100
Subject: [PATCH 14/68] drm/tegra: Move IOMMU group into host1x client

Handling of the IOMMU group attachment is common to all clients, so move
the group into the client to simplify code.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c   |  9 ++++-----
 drivers/gpu/drm/tegra/dc.h   |  2 --
 drivers/gpu/drm/tegra/drm.c  | 22 +++++++++++-----------
 drivers/gpu/drm/tegra/drm.h  |  6 ++----
 drivers/gpu/drm/tegra/gr2d.c | 10 ++++------
 drivers/gpu/drm/tegra/gr3d.c | 10 ++++------
 drivers/gpu/drm/tegra/vic.c  | 10 ++++------
 include/linux/host1x.h       |  3 +++
 8 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index f7f7984bb749..3ac535bd99ca 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -2014,9 +2014,8 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (!dc->syncpt)
 		dev_warn(dc->dev, "failed to allocate syncpoint\n");
 
-	dc->group = host1x_client_iommu_attach(client, true);
-	if (IS_ERR(dc->group)) {
-		err = PTR_ERR(dc->group);
+	err = host1x_client_iommu_attach(client, true);
+	if (err < 0) {
 		dev_err(client->dev, "failed to attach to domain: %d\n", err);
 		return err;
 	}
@@ -2089,7 +2088,7 @@ cleanup:
 	if (!IS_ERR(primary))
 		drm_plane_cleanup(primary);
 
-	host1x_client_iommu_detach(client, dc->group);
+	host1x_client_iommu_detach(client);
 	host1x_syncpt_free(dc->syncpt);
 
 	return err;
@@ -2114,7 +2113,7 @@ static int tegra_dc_exit(struct host1x_client *client)
 		return err;
 	}
 
-	host1x_client_iommu_detach(client, dc->group);
+	host1x_client_iommu_detach(client);
 	host1x_syncpt_free(dc->syncpt);
 
 	return 0;
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 0c4d17851f47..3d8ddccd758f 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -90,8 +90,6 @@ struct tegra_dc {
 	struct drm_info_list *debugfs_files;
 
 	const struct tegra_dc_soc_info *soc;
-
-	struct iommu_group *group;
 };
 
 static inline struct tegra_dc *
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 498d22a65616..b74362cb63eb 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1068,8 +1068,7 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra,
 	return 0;
 }
 
-struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
-					       bool shared)
+int host1x_client_iommu_attach(struct host1x_client *client, bool shared)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = drm->dev_private;
@@ -1080,7 +1079,7 @@ struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
 		group = iommu_group_get(client->dev);
 		if (!group) {
 			dev_err(client->dev, "failed to get IOMMU group\n");
-			return ERR_PTR(-ENODEV);
+			return -ENODEV;
 		}
 
 		if (!shared || (shared && (group != tegra->group))) {
@@ -1095,7 +1094,7 @@ struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
 			err = iommu_attach_group(tegra->domain, group);
 			if (err < 0) {
 				iommu_group_put(group);
-				return ERR_PTR(err);
+				return err;
 			}
 
 			if (shared && !tegra->group)
@@ -1103,22 +1102,23 @@ struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
 		}
 	}
 
-	return group;
+	client->group = group;
+
+	return 0;
 }
 
-void host1x_client_iommu_detach(struct host1x_client *client,
-				struct iommu_group *group)
+void host1x_client_iommu_detach(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = drm->dev_private;
 
-	if (group) {
-		if (group == tegra->group) {
-			iommu_detach_group(tegra->domain, group);
+	if (client->group) {
+		if (client->group == tegra->group) {
+			iommu_detach_group(tegra->domain, client->group);
 			tegra->group = NULL;
 		}
 
-		iommu_group_put(group);
+		iommu_group_put(client->group);
 	}
 }
 
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 29911eff9ceb..6a06d636e930 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -100,10 +100,8 @@ int tegra_drm_register_client(struct tegra_drm *tegra,
 			      struct tegra_drm_client *client);
 int tegra_drm_unregister_client(struct tegra_drm *tegra,
 				struct tegra_drm_client *client);
-struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
-					       bool shared);
-void host1x_client_iommu_detach(struct host1x_client *client,
-				struct iommu_group *group);
+int host1x_client_iommu_attach(struct host1x_client *client, bool shared);
+void host1x_client_iommu_detach(struct host1x_client *client);
 
 int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm);
 int tegra_drm_exit(struct tegra_drm *tegra);
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 3cbb4a029c41..5d5af9a05c18 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -17,7 +17,6 @@ struct gr2d_soc {
 };
 
 struct gr2d {
-	struct iommu_group *group;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct clk *clk;
@@ -51,9 +50,8 @@ static int gr2d_init(struct host1x_client *client)
 		goto put;
 	}
 
-	gr2d->group = host1x_client_iommu_attach(client, false);
-	if (IS_ERR(gr2d->group)) {
-		err = PTR_ERR(gr2d->group);
+	err = host1x_client_iommu_attach(client, false);
+	if (err < 0) {
 		dev_err(client->dev, "failed to attach to domain: %d\n", err);
 		goto free;
 	}
@@ -67,7 +65,7 @@ static int gr2d_init(struct host1x_client *client)
 	return 0;
 
 detach:
-	host1x_client_iommu_detach(client, gr2d->group);
+	host1x_client_iommu_detach(client);
 free:
 	host1x_syncpt_free(client->syncpts[0]);
 put:
@@ -87,7 +85,7 @@ static int gr2d_exit(struct host1x_client *client)
 	if (err < 0)
 		return err;
 
-	host1x_client_iommu_detach(client, gr2d->group);
+	host1x_client_iommu_detach(client);
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(gr2d->channel);
 
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 87a386134cc4..c249a6bd8d51 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -23,7 +23,6 @@ struct gr3d_soc {
 };
 
 struct gr3d {
-	struct iommu_group *group;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct clk *clk_secondary;
@@ -60,9 +59,8 @@ static int gr3d_init(struct host1x_client *client)
 		goto put;
 	}
 
-	gr3d->group = host1x_client_iommu_attach(client, false);
-	if (IS_ERR(gr3d->group)) {
-		err = PTR_ERR(gr3d->group);
+	err = host1x_client_iommu_attach(client, false);
+	if (err < 0) {
 		dev_err(client->dev, "failed to attach to domain: %d\n", err);
 		goto free;
 	}
@@ -76,7 +74,7 @@ static int gr3d_init(struct host1x_client *client)
 	return 0;
 
 detach:
-	host1x_client_iommu_detach(client, gr3d->group);
+	host1x_client_iommu_detach(client);
 free:
 	host1x_syncpt_free(client->syncpts[0]);
 put:
@@ -95,7 +93,7 @@ static int gr3d_exit(struct host1x_client *client)
 	if (err < 0)
 		return err;
 
-	host1x_client_iommu_detach(client, gr3d->group);
+	host1x_client_iommu_detach(client);
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(gr3d->channel);
 
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index e4b17c7ce708..d34b1ada422c 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -34,7 +34,6 @@ struct vic {
 	void __iomem *regs;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
-	struct iommu_group *group;
 	struct device *dev;
 	struct clk *clk;
 	struct reset_control *rst;
@@ -188,9 +187,8 @@ static int vic_init(struct host1x_client *client)
 	struct vic *vic = to_vic(drm);
 	int err;
 
-	vic->group = host1x_client_iommu_attach(client, false);
-	if (IS_ERR(vic->group)) {
-		err = PTR_ERR(vic->group);
+	err = host1x_client_iommu_attach(client, false);
+	if (err < 0) {
 		dev_err(vic->dev, "failed to attach to domain: %d\n", err);
 		return err;
 	}
@@ -224,7 +222,7 @@ free_syncpt:
 free_channel:
 	host1x_channel_put(vic->channel);
 detach:
-	host1x_client_iommu_detach(client, vic->group);
+	host1x_client_iommu_detach(client);
 
 	return err;
 }
@@ -246,7 +244,7 @@ static int vic_exit(struct host1x_client *client)
 
 	host1x_syncpt_free(client->syncpts[0]);
 	host1x_channel_put(vic->channel);
-	host1x_client_iommu_detach(client, vic->group);
+	host1x_client_iommu_detach(client);
 
 	return 0;
 }
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 4396cd566a33..df6e613ba715 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -18,6 +18,7 @@ enum host1x_class {
 };
 
 struct host1x_client;
+struct iommu_group;
 
 /**
  * struct host1x_client_ops - host1x client operations
@@ -34,6 +35,7 @@ struct host1x_client_ops {
  * @list: list node for the host1x client
  * @parent: pointer to struct device representing the host1x controller
  * @dev: pointer to struct device backing this host1x client
+ * @group: IOMMU group that this client is a member of
  * @ops: host1x client operations
  * @class: host1x class represented by this client
  * @channel: host1x channel associated with this client
@@ -44,6 +46,7 @@ struct host1x_client {
 	struct list_head list;
 	struct device *parent;
 	struct device *dev;
+	struct iommu_group *group;
 
 	const struct host1x_client_ops *ops;
 

From 7e3c53a096a9e75b12e69f93ef1fbc7cb1b27297 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 4 Jun 2018 17:36:50 +0200
Subject: [PATCH 15/68] drm/tegra: gem: Rename paddr -> iova

The address can refer to either physical memory or IO virtual memory.
If referring to IO virtual memory, there will always be an associated
physical memory address. Rename this variable to "iova" to clarify in
all cases that this is the IO virtual memory, which in the absence of
an IOMMU is identical to the physical address.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c  |  6 +++---
 drivers/gpu/drm/tegra/fb.c  |  4 ++--
 drivers/gpu/drm/tegra/gem.c | 18 +++++++++---------
 drivers/gpu/drm/tegra/gem.h |  2 +-
 drivers/gpu/drm/tegra/hub.c |  2 +-
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 3ac535bd99ca..54966f538141 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -717,7 +717,7 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 	for (i = 0; i < fb->format->num_planes; i++) {
 		struct tegra_bo *bo = tegra_fb_get_plane(fb, i);
 
-		window.base[i] = bo->paddr + fb->offsets[i];
+		window.base[i] = bo->iova + fb->offsets[i];
 
 		/*
 		 * Tegra uses a shared stride for UV planes. Framebuffers are
@@ -869,11 +869,11 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane,
 		return;
 	}
 
-	value |= (bo->paddr >> 10) & 0x3fffff;
+	value |= (bo->iova >> 10) & 0x3fffff;
 	tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR);
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	value = (bo->paddr >> 32) & 0x3;
+	value = (bo->iova >> 32) & 0x3;
 	tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI);
 #endif
 
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index e34325c83d28..7cea89f29a5c 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -269,10 +269,10 @@ static int tegra_fbdev_probe(struct drm_fb_helper *helper,
 		}
 	}
 
-	drm->mode_config.fb_base = (resource_size_t)bo->paddr;
+	drm->mode_config.fb_base = (resource_size_t)bo->iova;
 	info->screen_base = (void __iomem *)bo->vaddr + offset;
 	info->screen_size = size;
-	info->fix.smem_start = (unsigned long)(bo->paddr + offset);
+	info->fix.smem_start = (unsigned long)(bo->iova + offset);
 	info->fix.smem_len = size;
 
 	return 0;
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index fb7667c8dd4c..7412e23500d1 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -33,7 +33,7 @@ static dma_addr_t tegra_bo_pin(struct host1x_bo *bo, struct sg_table **sgt)
 
 	*sgt = obj->sgt;
 
-	return obj->paddr;
+	return obj->iova;
 }
 
 static void tegra_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
@@ -133,9 +133,9 @@ static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo)
 		goto unlock;
 	}
 
-	bo->paddr = bo->mm->start;
+	bo->iova = bo->mm->start;
 
-	bo->size = iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl,
+	bo->size = iommu_map_sg(tegra->domain, bo->iova, bo->sgt->sgl,
 				bo->sgt->nents, prot);
 	if (!bo->size) {
 		dev_err(tegra->drm->dev, "failed to map buffer\n");
@@ -161,7 +161,7 @@ static int tegra_bo_iommu_unmap(struct tegra_drm *tegra, struct tegra_bo *bo)
 		return 0;
 
 	mutex_lock(&tegra->mm_lock);
-	iommu_unmap(tegra->domain, bo->paddr, bo->size);
+	iommu_unmap(tegra->domain, bo->iova, bo->size);
 	drm_mm_remove_node(bo->mm);
 	mutex_unlock(&tegra->mm_lock);
 
@@ -209,7 +209,7 @@ static void tegra_bo_free(struct drm_device *drm, struct tegra_bo *bo)
 		sg_free_table(bo->sgt);
 		kfree(bo->sgt);
 	} else if (bo->vaddr) {
-		dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
+		dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->iova);
 	}
 }
 
@@ -264,7 +264,7 @@ static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo)
 	} else {
 		size_t size = bo->gem.size;
 
-		bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->paddr,
+		bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->iova,
 					 GFP_KERNEL | __GFP_NOWARN);
 		if (!bo->vaddr) {
 			dev_err(drm->dev,
@@ -365,7 +365,7 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm,
 			goto detach;
 		}
 
-		bo->paddr = sg_dma_address(bo->sgt->sgl);
+		bo->iova = sg_dma_address(bo->sgt->sgl);
 	}
 
 	bo->gem.import_attach = attach;
@@ -461,7 +461,7 @@ int __tegra_gem_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma)
 		vma->vm_flags &= ~VM_PFNMAP;
 		vma->vm_pgoff = 0;
 
-		err = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->paddr,
+		err = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->iova,
 				  gem->size);
 		if (err < 0) {
 			drm_gem_vm_close(vma);
@@ -523,7 +523,7 @@ tegra_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
 		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
 			goto free;
 
-		sg_dma_address(sgt->sgl) = bo->paddr;
+		sg_dma_address(sgt->sgl) = bo->iova;
 		sg_dma_len(sgt->sgl) = gem->size;
 	}
 
diff --git a/drivers/gpu/drm/tegra/gem.h b/drivers/gpu/drm/tegra/gem.h
index 83ffb1e14ca3..fafb5724499b 100644
--- a/drivers/gpu/drm/tegra/gem.h
+++ b/drivers/gpu/drm/tegra/gem.h
@@ -31,7 +31,7 @@ struct tegra_bo {
 	struct host1x_bo base;
 	unsigned long flags;
 	struct sg_table *sgt;
-	dma_addr_t paddr;
+	dma_addr_t iova;
 	void *vaddr;
 
 	struct drm_mm_node *mm;
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 839b49c40e51..104115e42190 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -457,7 +457,7 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 	tegra_plane_writel(p, 0, DC_WINBUF_CDE_CONTROL);
 
 	bo = tegra_fb_get_plane(fb, 0);
-	base = bo->paddr;
+	base = bo->iova;
 
 	tegra_plane_writel(p, state->format, DC_WIN_COLOR_DEPTH);
 	tegra_plane_writel(p, 0, DC_WIN_PRECOMP_WGRP_PARAMS);

From d81f3431e61771028acb69a23601ef5379e39f2a Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Jun 2018 14:56:04 +0200
Subject: [PATCH 16/68] drm/tegra: gem: Use dma_get_sgtable()

Rather than manually creating an SG table in an incorrect way, let the
standard dma_get_sgtable() function do it.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gem.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 7412e23500d1..3345cce14662 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -520,11 +520,9 @@ tegra_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
 		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0)
 			goto free;
 	} else {
-		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
+		if (dma_get_sgtable(attach->dev, sgt, bo->vaddr, bo->iova,
+				    gem->size) < 0)
 			goto free;
-
-		sg_dma_address(sgt->sgl) = bo->iova;
-		sg_dma_len(sgt->sgl) = gem->size;
 	}
 
 	return sgt;

From 8b5a3c17a2778fba817c38beb545bc48228b8da5 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Jun 2018 14:59:13 +0200
Subject: [PATCH 17/68] drm/tegra: gem: Always map SG tables for DMA-BUFs

When an importer wants to map a DMA-BUF, make sure to always actually
map it, irrespective of whether the buffer is contiguous or not.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gem.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 3345cce14662..00701cadaceb 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -516,15 +516,15 @@ tegra_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
 
 		for_each_sg(sgt->sgl, sg, bo->num_pages, i)
 			sg_set_page(sg, bo->pages[i], PAGE_SIZE, 0);
-
-		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0)
-			goto free;
 	} else {
 		if (dma_get_sgtable(attach->dev, sgt, bo->vaddr, bo->iova,
 				    gem->size) < 0)
 			goto free;
 	}
 
+	if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0)
+		goto free;
+
 	return sgt;
 
 free:

From acf6b77c4c3dc38de1e435d114c267cd8dedc913 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 8 Jun 2018 15:00:05 +0200
Subject: [PATCH 18/68] drm/tegra: gem: Use sg_alloc_table_from_pages()

Instead of manually creating the SG table for a discontiguous buffer,
use the existing sg_alloc_table_from_pages(). Note that this is not safe
to be used with the ARM DMA/IOMMU integration code because that will not
ensure that the whole buffer is mapped contiguously. Depending on the
size of the individual entries the mapping may end up containing holes
to ensure alignment.

However, we only ever use these buffers with explicit IOMMU API usage
and know how to avoid these holes.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gem.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 00701cadaceb..d2f88cc3134f 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -508,14 +508,9 @@ tegra_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
 		return NULL;
 
 	if (bo->pages) {
-		struct scatterlist *sg;
-		unsigned int i;
-
-		if (sg_alloc_table(sgt, bo->num_pages, GFP_KERNEL))
+		if (sg_alloc_table_from_pages(sgt, bo->pages, bo->num_pages,
+					      0, gem->size, GFP_KERNEL) < 0)
 			goto free;
-
-		for_each_sg(sgt->sgl, sg, bo->num_pages, i)
-			sg_set_page(sg, bo->pages[i], PAGE_SIZE, 0);
 	} else {
 		if (dma_get_sgtable(attach->dev, sgt, bo->vaddr, bo->iova,
 				    gem->size) < 0)

From 5e881f6b29fe69726d0aa11f846c438a5cb7ddb0 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 3 Aug 2015 14:08:34 +0200
Subject: [PATCH 19/68] drm/tegra: dpaux: Support monitor hotplugging

The dpaux driver has a quirk built-in that will delay initialization of
the display driver for a short while, trying to detect an eDP panel. The
reason for this quirk is that the panel may not report as connected
until after the display driver has initialized, at which point the fbdev
emulation will have fallen back to 1024x768 as default resolution, which
will likely not be the eDP panel's native resolution.

With upcoming DisplayPort support, the code needs to be able to cope
with hotpluggable monitors as well. Waiting for a panel to show up is no
longer going to work because the monitor may not be attached on boot. If
the output runs in DisplayPort mode, skip waiting for the panel to show
up.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dpaux.c | 47 +++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 1144605c9737..4a35a6e0e2aa 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -691,21 +691,26 @@ int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output)
 	if (err < 0)
 		return err;
 
-	timeout = jiffies + msecs_to_jiffies(250);
-
-	while (time_before(jiffies, timeout)) {
+	if (output->panel) {
 		enum drm_connector_status status;
 
-		status = drm_dp_aux_detect(aux);
-		if (status == connector_status_connected) {
-			enable_irq(dpaux->irq);
-			return 0;
+		timeout = jiffies + msecs_to_jiffies(250);
+
+		while (time_before(jiffies, timeout)) {
+			status = drm_dp_aux_detect(aux);
+
+			if (status == connector_status_connected)
+				break;
+
+			usleep_range(1000, 2000);
 		}
 
-		usleep_range(1000, 2000);
+		if (status != connector_status_connected)
+			return -ETIMEDOUT;
 	}
 
-	return -ETIMEDOUT;
+	enable_irq(dpaux->irq);
+	return 0;
 }
 
 int drm_dp_aux_detach(struct drm_dp_aux *aux)
@@ -720,21 +725,27 @@ int drm_dp_aux_detach(struct drm_dp_aux *aux)
 	if (err < 0)
 		return err;
 
-	timeout = jiffies + msecs_to_jiffies(250);
-
-	while (time_before(jiffies, timeout)) {
+	if (dpaux->output->panel) {
 		enum drm_connector_status status;
 
-		status = drm_dp_aux_detect(aux);
-		if (status == connector_status_disconnected) {
-			dpaux->output = NULL;
-			return 0;
+		timeout = jiffies + msecs_to_jiffies(250);
+
+		while (time_before(jiffies, timeout)) {
+			status = drm_dp_aux_detect(aux);
+
+			if (status == connector_status_disconnected)
+				break;
+
+			usleep_range(1000, 2000);
 		}
 
-		usleep_range(1000, 2000);
+		if (status != connector_status_disconnected)
+			return -ETIMEDOUT;
+
+		dpaux->output = NULL;
 	}
 
-	return -ETIMEDOUT;
+	return 0;
 }
 
 enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux)

From 245ce70cd466d2e003861788d13726417557eb39 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 3 Mar 2016 15:32:13 +0100
Subject: [PATCH 20/68] drm/tegra: dpaux: Retry on transfer size mismatch

When a transfer didn't complete transmission of the requested number of
bytes, signal that the transaction should be retried.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dpaux.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 4a35a6e0e2aa..819fdd9b4413 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -121,6 +121,7 @@ static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux,
 	struct tegra_dpaux *dpaux = to_dpaux(aux);
 	unsigned long status;
 	ssize_t ret = 0;
+	u8 reply = 0;
 	u32 value;
 
 	/* Tegra has 4x4 byte DP AUX transmit and receive FIFOs. */
@@ -215,23 +216,23 @@ static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux,
 
 	switch ((value & DPAUX_DP_AUXSTAT_REPLY_TYPE_MASK) >> 16) {
 	case 0x00:
-		msg->reply = DP_AUX_NATIVE_REPLY_ACK;
+		reply = DP_AUX_NATIVE_REPLY_ACK;
 		break;
 
 	case 0x01:
-		msg->reply = DP_AUX_NATIVE_REPLY_NACK;
+		reply = DP_AUX_NATIVE_REPLY_NACK;
 		break;
 
 	case 0x02:
-		msg->reply = DP_AUX_NATIVE_REPLY_DEFER;
+		reply = DP_AUX_NATIVE_REPLY_DEFER;
 		break;
 
 	case 0x04:
-		msg->reply = DP_AUX_I2C_REPLY_NACK;
+		reply = DP_AUX_I2C_REPLY_NACK;
 		break;
 
 	case 0x08:
-		msg->reply = DP_AUX_I2C_REPLY_DEFER;
+		reply = DP_AUX_I2C_REPLY_DEFER;
 		break;
 	}
 
@@ -239,14 +240,24 @@ static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux,
 		if (msg->request & DP_AUX_I2C_READ) {
 			size_t count = value & DPAUX_DP_AUXSTAT_REPLY_MASK;
 
-			if (WARN_ON(count != msg->size))
-				count = min_t(size_t, count, msg->size);
+			/*
+			 * There might be a smarter way to do this, but since
+			 * the DP helpers will already retry transactions for
+			 * an -EBUSY return value, simply reuse that instead.
+			 */
+			if (count != msg->size) {
+				ret = -EBUSY;
+				goto out;
+			}
 
 			tegra_dpaux_read_fifo(dpaux, msg->buffer, count);
 			ret = count;
 		}
 	}
 
+	msg->reply = reply;
+
+out:
 	return ret;
 }
 

From 6c79f09fce4dd03f6b623b1d49e1b8a968822f60 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 24 Jun 2019 13:30:24 +0200
Subject: [PATCH 21/68] drm/tegra: dpaux: Fix crash if VDD supply is absent

In order to properly make the VDD supply optional, all accesses to the
regulator need to be ignored, because the regulator core doesn't treat
NULL special.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dpaux.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 819fdd9b4413..1b3acb5852a0 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -505,6 +505,8 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 
 			return PTR_ERR(dpaux->vdd);
 		}
+
+		dpaux->vdd = NULL;
 	}
 
 	platform_set_drvdata(pdev, dpaux);
@@ -698,13 +700,15 @@ int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output)
 	output->connector.polled = DRM_CONNECTOR_POLL_HPD;
 	dpaux->output = output;
 
-	err = regulator_enable(dpaux->vdd);
-	if (err < 0)
-		return err;
-
 	if (output->panel) {
 		enum drm_connector_status status;
 
+		if (dpaux->vdd) {
+			err = regulator_enable(dpaux->vdd);
+			if (err < 0)
+				return err;
+		}
+
 		timeout = jiffies + msecs_to_jiffies(250);
 
 		while (time_before(jiffies, timeout)) {
@@ -732,13 +736,15 @@ int drm_dp_aux_detach(struct drm_dp_aux *aux)
 
 	disable_irq(dpaux->irq);
 
-	err = regulator_disable(dpaux->vdd);
-	if (err < 0)
-		return err;
-
 	if (dpaux->output->panel) {
 		enum drm_connector_status status;
 
+		if (dpaux->vdd) {
+			err = regulator_disable(dpaux->vdd);
+			if (err < 0)
+				return err;
+		}
+
 		timeout = jiffies + msecs_to_jiffies(250);
 
 		while (time_before(jiffies, timeout)) {

From fc4ebe52872665a42028450d720923695e9d4e20 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 27 Jun 2019 12:24:41 +0200
Subject: [PATCH 22/68] drm/tegra: dpaux: Parameterize CMH, DRVZ and DRVI

The CMH, DRVZ and DRVI values vary depending on the SoC generation. Move
them into SoC specific structures so that DT compatible string matching
can be used to select the right parameters and write them to hardware at
the right time.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dpaux.c | 48 +++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 1b3acb5852a0..883ed2f025c3 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -9,6 +9,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/of_gpio.h>
 #include <linux/pinctrl/pinconf-generic.h>
 #include <linux/pinctrl/pinctrl.h>
@@ -30,10 +31,18 @@
 static DEFINE_MUTEX(dpaux_lock);
 static LIST_HEAD(dpaux_list);
 
+struct tegra_dpaux_soc {
+	unsigned int cmh;
+	unsigned int drvz;
+	unsigned int drvi;
+};
+
 struct tegra_dpaux {
 	struct drm_dp_aux aux;
 	struct device *dev;
 
+	const struct tegra_dpaux_soc *soc;
+
 	void __iomem *regs;
 	int irq;
 
@@ -322,9 +331,9 @@ static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function)
 
 	switch (function) {
 	case DPAUX_PADCTL_FUNC_AUX:
-		value = DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
-			DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) |
-			DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) |
+		value = DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) |
 			DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV |
 			DPAUX_HYBRID_PADCTL_MODE_AUX;
 		break;
@@ -332,9 +341,9 @@ static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function)
 	case DPAUX_PADCTL_FUNC_I2C:
 		value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV |
 			DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV |
-			DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
-			DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) |
-			DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) |
+			DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) |
 			DPAUX_HYBRID_PADCTL_MODE_I2C;
 		break;
 
@@ -448,6 +457,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	if (!dpaux)
 		return -ENOMEM;
 
+	dpaux->soc = of_device_get_match_data(&pdev->dev);
 	INIT_WORK(&dpaux->work, tegra_dpaux_hotplug);
 	init_completion(&dpaux->complete);
 	INIT_LIST_HEAD(&dpaux->list);
@@ -655,11 +665,29 @@ static const struct dev_pm_ops tegra_dpaux_pm_ops = {
 	SET_RUNTIME_PM_OPS(tegra_dpaux_suspend, tegra_dpaux_resume, NULL)
 };
 
+static const struct tegra_dpaux_soc tegra124_dpaux_soc = {
+	.cmh = 0x02,
+	.drvz = 0x04,
+	.drvi = 0x18,
+};
+
+static const struct tegra_dpaux_soc tegra210_dpaux_soc = {
+	.cmh = 0x02,
+	.drvz = 0x04,
+	.drvi = 0x30,
+};
+
+static const struct tegra_dpaux_soc tegra194_dpaux_soc = {
+	.cmh = 0x02,
+	.drvz = 0x04,
+	.drvi = 0x2c,
+};
+
 static const struct of_device_id tegra_dpaux_of_match[] = {
-	{ .compatible = "nvidia,tegra194-dpaux", },
-	{ .compatible = "nvidia,tegra186-dpaux", },
-	{ .compatible = "nvidia,tegra210-dpaux", },
-	{ .compatible = "nvidia,tegra124-dpaux", },
+	{ .compatible = "nvidia,tegra194-dpaux", .data = &tegra194_dpaux_soc },
+	{ .compatible = "nvidia,tegra186-dpaux", .data = &tegra210_dpaux_soc },
+	{ .compatible = "nvidia,tegra210-dpaux", .data = &tegra210_dpaux_soc },
+	{ .compatible = "nvidia,tegra124-dpaux", .data = &tegra124_dpaux_soc },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, tegra_dpaux_of_match);

From 0fa5c1bdd2f7dfc3cc5a9b22470fb4ffdbe22272 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 5 Feb 2018 14:31:27 +0100
Subject: [PATCH 23/68] drm/tegra: Add missing kerneldoc for struct drm_dp_link

The drm_dp_link structure tracks capabilities on the DP link. Add some
kerneldoc to explain what each of its fields means.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index 88842fd25abf..1cf252e7309a 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -11,6 +11,13 @@ struct drm_dp_aux;
 
 #define DP_LINK_CAP_ENHANCED_FRAMING (1 << 0)
 
+/**
+ * struct drm_dp_link - DP link capabilities
+ * @revision: DP specification revision supported on the link
+ * @rate: maximum clock rate supported on the link
+ * @num_lanes: maximum number of lanes supported on the link
+ * @capabilities: bitmask of capabilities supported on the link
+ */
 struct drm_dp_link {
 	unsigned char revision;
 	unsigned int rate;

From 1abd6b3304d47ff055063e0d59fc03bb27e0e196 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 3 Dec 2015 11:44:17 +0100
Subject: [PATCH 24/68] drm/tegra: dp: Add drm_dp_link_reset() implementation

Subsequent patches will add non-volatile fields to struct drm_dp_link,
so introduce a function to zero out only the volatile fields.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index 50ba967ebcbd..c19060b8753a 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -8,6 +8,17 @@
 
 #include "dp.h"
 
+static void drm_dp_link_reset(struct drm_dp_link *link)
+{
+	if (!link)
+		return;
+
+	link->revision = 0;
+	link->rate = 0;
+	link->num_lanes = 0;
+	link->capabilities = 0;
+}
+
 /**
  * drm_dp_link_probe() - probe a DisplayPort link for capabilities
  * @aux: DisplayPort AUX channel
@@ -24,7 +35,7 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	u8 values[3];
 	int err;
 
-	memset(link, 0, sizeof(*link));
+	drm_dp_link_reset(link);
 
 	err = drm_dp_dpcd_read(aux, DP_DPCD_REV, values, sizeof(values));
 	if (err < 0)

From c728e2d4a6546905f1179a8237860d8d276aaadc Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 21 Jul 2015 16:33:48 +0200
Subject: [PATCH 25/68] drm/tegra: dp: Track link capabilities alongside
 settings

Store capabilities in max_* fields and add separate fields for the
currently selected settings.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c    | 16 +++++++++++-----
 drivers/gpu/drm/tegra/dp.h    | 15 ++++++++++-----
 drivers/gpu/drm/tegra/dpaux.c |  8 ++++----
 drivers/gpu/drm/tegra/sor.c   | 28 ++++++++++++++--------------
 4 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index c19060b8753a..e55efd46a7d9 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -14,9 +14,12 @@ static void drm_dp_link_reset(struct drm_dp_link *link)
 		return;
 
 	link->revision = 0;
-	link->rate = 0;
-	link->num_lanes = 0;
+	link->max_rate = 0;
+	link->max_lanes = 0;
 	link->capabilities = 0;
+
+	link->rate = 0;
+	link->lanes = 0;
 }
 
 /**
@@ -42,12 +45,15 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 		return err;
 
 	link->revision = values[0];
-	link->rate = drm_dp_bw_code_to_link_rate(values[1]);
-	link->num_lanes = values[2] & DP_MAX_LANE_COUNT_MASK;
+	link->max_rate = drm_dp_bw_code_to_link_rate(values[1]);
+	link->max_lanes = values[2] & DP_MAX_LANE_COUNT_MASK;
 
 	if (values[2] & DP_ENHANCED_FRAME_CAP)
 		link->capabilities |= DP_LINK_CAP_ENHANCED_FRAMING;
 
+	link->rate = link->max_rate;
+	link->lanes = link->max_lanes;
+
 	return 0;
 }
 
@@ -131,7 +137,7 @@ int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	int err;
 
 	values[0] = drm_dp_link_rate_to_bw_code(link->rate);
-	values[1] = link->num_lanes;
+	values[1] = link->lanes;
 
 	if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
 		values[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index 1cf252e7309a..ec0342d4c95e 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -12,17 +12,22 @@ struct drm_dp_aux;
 #define DP_LINK_CAP_ENHANCED_FRAMING (1 << 0)
 
 /**
- * struct drm_dp_link - DP link capabilities
+ * struct drm_dp_link - DP link capabilities and configuration
  * @revision: DP specification revision supported on the link
- * @rate: maximum clock rate supported on the link
- * @num_lanes: maximum number of lanes supported on the link
+ * @max_rate: maximum clock rate supported on the link
+ * @max_lanes: maximum number of lanes supported on the link
  * @capabilities: bitmask of capabilities supported on the link
+ * @rate: currently configured link rate
+ * @lanes: currently configured number of lanes
  */
 struct drm_dp_link {
 	unsigned char revision;
-	unsigned int rate;
-	unsigned int num_lanes;
+	unsigned int max_rate;
+	unsigned int max_lanes;
 	unsigned long capabilities;
+
+	unsigned int rate;
+	unsigned int lanes;
 };
 
 int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link);
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index 883ed2f025c3..bd3361cea49b 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -849,14 +849,14 @@ int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link,
 	if (tp == DP_TRAINING_PATTERN_DISABLE)
 		return 0;
 
-	for (i = 0; i < link->num_lanes; i++)
+	for (i = 0; i < link->lanes; i++)
 		values[i] = DP_TRAIN_MAX_PRE_EMPHASIS_REACHED |
 			    DP_TRAIN_PRE_EMPH_LEVEL_0 |
 			    DP_TRAIN_MAX_SWING_REACHED |
 			    DP_TRAIN_VOLTAGE_SWING_LEVEL_0;
 
 	err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values,
-				link->num_lanes);
+				link->lanes);
 	if (err < 0)
 		return err;
 
@@ -868,13 +868,13 @@ int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link,
 
 	switch (tp) {
 	case DP_TRAINING_PATTERN_1:
-		if (!drm_dp_clock_recovery_ok(status, link->num_lanes))
+		if (!drm_dp_clock_recovery_ok(status, link->lanes))
 			return -EAGAIN;
 
 		break;
 
 	case DP_TRAINING_PATTERN_2:
-		if (!drm_dp_channel_eq_ok(status, link->num_lanes))
+		if (!drm_dp_channel_eq_ok(status, link->lanes))
 			return -EAGAIN;
 
 		break;
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 91d5c5041d2c..dca71250d88c 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -650,7 +650,7 @@ static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 	if (err < 0)
 		return err;
 
-	for (i = 0, value = 0; i < link->num_lanes; i++) {
+	for (i = 0, value = 0; i < link->lanes; i++) {
 		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
 				     SOR_DP_TPG_SCRAMBLER_NONE |
 				     SOR_DP_TPG_PATTERN_TRAIN1;
@@ -671,7 +671,7 @@ static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 	value |= SOR_DP_SPARE_MACRO_SOR_CLK;
 	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
 
-	for (i = 0, value = 0; i < link->num_lanes; i++) {
+	for (i = 0, value = 0; i < link->lanes; i++) {
 		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
 				     SOR_DP_TPG_SCRAMBLER_NONE |
 				     SOR_DP_TPG_PATTERN_TRAIN2;
@@ -686,7 +686,7 @@ static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 	if (err < 0)
 		return err;
 
-	for (i = 0, value = 0; i < link->num_lanes; i++) {
+	for (i = 0, value = 0; i < link->lanes; i++) {
 		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
 				     SOR_DP_TPG_SCRAMBLER_GALIOS |
 				     SOR_DP_TPG_PATTERN_NONE;
@@ -913,11 +913,11 @@ static int tegra_sor_compute_config(struct tegra_sor *sor,
 	u32 num_syms_per_line;
 	unsigned int i;
 
-	if (!link_rate || !link->num_lanes || !pclk || !config->bits_per_pixel)
+	if (!link_rate || !link->lanes || !pclk || !config->bits_per_pixel)
 		return -EINVAL;
 
-	output = link_rate * 8 * link->num_lanes;
 	input = pclk * config->bits_per_pixel;
+	output = link_rate * 8 * link->lanes;
 
 	if (input >= output)
 		return -ERANGE;
@@ -960,7 +960,7 @@ static int tegra_sor_compute_config(struct tegra_sor *sor,
 	watermark = div_u64(watermark + params.error, f);
 	config->watermark = watermark + (config->bits_per_pixel / 8) + 2;
 	num_syms_per_line = (mode->hdisplay * config->bits_per_pixel) *
-			    (link->num_lanes * 8);
+			    (link->lanes * 8);
 
 	if (config->watermark > 30) {
 		config->watermark = 30;
@@ -980,12 +980,12 @@ static int tegra_sor_compute_config(struct tegra_sor *sor,
 	if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
 		config->hblank_symbols -= 3;
 
-	config->hblank_symbols -= 12 / link->num_lanes;
+	config->hblank_symbols -= 12 / link->lanes;
 
 	/* compute the number of symbols per vertical blanking interval */
 	num = (mode->hdisplay - 25) * link_rate;
 	config->vblank_symbols = div_u64(num, pclk);
-	config->vblank_symbols -= 36 / link->num_lanes + 4;
+	config->vblank_symbols -= 36 / link->lanes + 4;
 
 	dev_dbg(sor->dev, "blank symbols: H:%u V:%u\n", config->hblank_symbols,
 		config->vblank_symbols);
@@ -1831,17 +1831,17 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	/* power DP lanes */
 	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 
-	if (link.num_lanes <= 2)
+	if (link.lanes <= 2)
 		value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2);
 	else
 		value |= SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2;
 
-	if (link.num_lanes <= 1)
+	if (link.lanes <= 1)
 		value &= ~SOR_DP_PADCTL_PD_TXD_1;
 	else
 		value |= SOR_DP_PADCTL_PD_TXD_1;
 
-	if (link.num_lanes == 0)
+	if (link.lanes == 0)
 		value &= ~SOR_DP_PADCTL_PD_TXD_0;
 	else
 		value |= SOR_DP_PADCTL_PD_TXD_0;
@@ -1850,7 +1850,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 
 	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
 	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
-	value |= SOR_DP_LINKCTL_LANE_COUNT(link.num_lanes);
+	value |= SOR_DP_LINKCTL_LANE_COUNT(link.lanes);
 	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
 
 	/* start lane sequencer */
@@ -1907,7 +1907,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 		dev_err(sor->dev, "failed to configure eDP link: %d\n", err);
 
 	rate = drm_dp_link_rate_to_bw_code(link.rate);
-	lanes = link.num_lanes;
+	lanes = link.lanes;
 
 	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
 	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
@@ -1925,7 +1925,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 
 	/* disable training pattern generator */
 
-	for (i = 0; i < link.num_lanes; i++) {
+	for (i = 0; i < link.lanes; i++) {
 		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
 				     SOR_DP_TPG_SCRAMBLER_GALIOS |
 				     SOR_DP_TPG_PATTERN_NONE;

From 27ba465ce3397c4705f87c1f73e6d67c1b48ef0f Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 3 Dec 2015 12:45:45 +0100
Subject: [PATCH 26/68] drm/tegra: dp: Turn link capabilities into booleans

Rather than storing capabilities as flags in an integer, use a separate
boolean per capability. This simplifies the code that checks for these
capabilities.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c  | 18 +++++++++++++++---
 drivers/gpu/drm/tegra/dp.h  | 22 +++++++++++++++++++---
 drivers/gpu/drm/tegra/sor.c |  4 ++--
 3 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index e55efd46a7d9..e7602fc39a4a 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -8,6 +8,17 @@
 
 #include "dp.h"
 
+static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps)
+{
+	caps->enhanced_framing = false;
+}
+
+void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
+			   const struct drm_dp_link_caps *src)
+{
+	dest->enhanced_framing = src->enhanced_framing;
+}
+
 static void drm_dp_link_reset(struct drm_dp_link *link)
 {
 	if (!link)
@@ -16,7 +27,8 @@ static void drm_dp_link_reset(struct drm_dp_link *link)
 	link->revision = 0;
 	link->max_rate = 0;
 	link->max_lanes = 0;
-	link->capabilities = 0;
+
+	drm_dp_link_caps_reset(&link->caps);
 
 	link->rate = 0;
 	link->lanes = 0;
@@ -49,7 +61,7 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	link->max_lanes = values[2] & DP_MAX_LANE_COUNT_MASK;
 
 	if (values[2] & DP_ENHANCED_FRAME_CAP)
-		link->capabilities |= DP_LINK_CAP_ENHANCED_FRAMING;
+		link->caps.enhanced_framing = true;
 
 	link->rate = link->max_rate;
 	link->lanes = link->max_lanes;
@@ -139,7 +151,7 @@ int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	values[0] = drm_dp_link_rate_to_bw_code(link->rate);
 	values[1] = link->lanes;
 
-	if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+	if (link->caps.enhanced_framing)
 		values[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
 
 	err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, values, sizeof(values));
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index ec0342d4c95e..6246f9afb5fe 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -7,16 +7,31 @@
 #ifndef DRM_TEGRA_DP_H
 #define DRM_TEGRA_DP_H 1
 
+#include <linux/types.h>
+
 struct drm_dp_aux;
 
-#define DP_LINK_CAP_ENHANCED_FRAMING (1 << 0)
+/**
+ * struct drm_dp_link_caps - DP link capabilities
+ */
+struct drm_dp_link_caps {
+	/**
+	 * @enhanced_framing:
+	 *
+	 * enhanced framing capability (mandatory as of DP 1.2)
+	 */
+	bool enhanced_framing;
+};
+
+void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
+			   const struct drm_dp_link_caps *src);
 
 /**
  * struct drm_dp_link - DP link capabilities and configuration
  * @revision: DP specification revision supported on the link
  * @max_rate: maximum clock rate supported on the link
  * @max_lanes: maximum number of lanes supported on the link
- * @capabilities: bitmask of capabilities supported on the link
+ * @caps: capabilities supported on the link (see &drm_dp_link_caps)
  * @rate: currently configured link rate
  * @lanes: currently configured number of lanes
  */
@@ -24,7 +39,8 @@ struct drm_dp_link {
 	unsigned char revision;
 	unsigned int max_rate;
 	unsigned int max_lanes;
-	unsigned long capabilities;
+
+	struct drm_dp_link_caps caps;
 
 	unsigned int rate;
 	unsigned int lanes;
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index dca71250d88c..dd118366455b 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -977,7 +977,7 @@ static int tegra_sor_compute_config(struct tegra_sor *sor,
 	num = ((mode->htotal - mode->hdisplay) - 7) * link_rate;
 	config->hblank_symbols = div_u64(num, pclk);
 
-	if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+	if (link->caps.enhanced_framing)
 		config->hblank_symbols -= 3;
 
 	config->hblank_symbols -= 12 / link->lanes;
@@ -1918,7 +1918,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
 	value |= SOR_DP_LINKCTL_LANE_COUNT(lanes);
 
-	if (link.capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+	if (link.caps.enhanced_framing)
 		value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
 
 	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);

From 480770440ad1681e5100e9719fc5f9bb8bc46ca8 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 3 Dec 2015 13:02:52 +0100
Subject: [PATCH 27/68] drm/tegra: dp: Probe link using existing parsing
 helpers

Use existing parsing helpers to probe a DisplayPort link.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index e7602fc39a4a..f9234f66062d 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -47,21 +47,20 @@ static void drm_dp_link_reset(struct drm_dp_link *link)
  */
 int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 {
-	u8 values[3];
+	u8 dpcd[DP_RECEIVER_CAP_SIZE];
 	int err;
 
 	drm_dp_link_reset(link);
 
-	err = drm_dp_dpcd_read(aux, DP_DPCD_REV, values, sizeof(values));
+	err = drm_dp_dpcd_read(aux, DP_DPCD_REV, dpcd, sizeof(dpcd));
 	if (err < 0)
 		return err;
 
-	link->revision = values[0];
-	link->max_rate = drm_dp_bw_code_to_link_rate(values[1]);
-	link->max_lanes = values[2] & DP_MAX_LANE_COUNT_MASK;
+	link->revision = dpcd[DP_DPCD_REV];
+	link->max_rate = drm_dp_max_link_rate(dpcd);
+	link->max_lanes = drm_dp_max_lane_count(dpcd);
 
-	if (values[2] & DP_ENHANCED_FRAME_CAP)
-		link->caps.enhanced_framing = true;
+	link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(dpcd);
 
 	link->rate = link->max_rate;
 	link->lanes = link->max_lanes;

From cb072eebfa038361b4f578b65a205ad0abc6fe88 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 3 Dec 2015 13:07:43 +0100
Subject: [PATCH 28/68] drm/tegra: dp: Read fast training capability from link

While probing the DisplayPort link, query the fast training capability.
If supported, drivers can use the fast link training sequence instead of
the more involved full link training sequence.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 3 +++
 drivers/gpu/drm/tegra/dp.h | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index f9234f66062d..97fc0225483f 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -11,12 +11,14 @@
 static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps)
 {
 	caps->enhanced_framing = false;
+	caps->fast_training = false;
 }
 
 void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
 			   const struct drm_dp_link_caps *src)
 {
 	dest->enhanced_framing = src->enhanced_framing;
+	dest->fast_training = src->fast_training;
 }
 
 static void drm_dp_link_reset(struct drm_dp_link *link)
@@ -61,6 +63,7 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	link->max_lanes = drm_dp_max_lane_count(dpcd);
 
 	link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(dpcd);
+	link->caps.fast_training = drm_dp_fast_training_cap(dpcd);
 
 	link->rate = link->max_rate;
 	link->lanes = link->max_lanes;
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index 6246f9afb5fe..d6ae477bab5c 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -21,6 +21,13 @@ struct drm_dp_link_caps {
 	 * enhanced framing capability (mandatory as of DP 1.2)
 	 */
 	bool enhanced_framing;
+
+	/**
+	 * @fast_training:
+	 *
+	 * AUX CH handshake not required for link training
+	 */
+	bool fast_training;
 };
 
 void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,

From db199502fa8b62afddde5379d94cac0439202111 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 7 Jul 2015 20:52:07 +0200
Subject: [PATCH 29/68] drm/tegra: dp: Read TPS3 capability from sink

The TPS3 capability can be exposed by DP 1.2 and later sinks if they
support the alternative training pattern for channel equalization.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 3 +++
 drivers/gpu/drm/tegra/dp.h | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index 97fc0225483f..e22ebab677b9 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -11,6 +11,7 @@
 static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps)
 {
 	caps->enhanced_framing = false;
+	caps->tps3_supported = false;
 	caps->fast_training = false;
 }
 
@@ -18,6 +19,7 @@ void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
 			   const struct drm_dp_link_caps *src)
 {
 	dest->enhanced_framing = src->enhanced_framing;
+	dest->tps3_supported = src->tps3_supported;
 	dest->fast_training = src->fast_training;
 }
 
@@ -63,6 +65,7 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	link->max_lanes = drm_dp_max_lane_count(dpcd);
 
 	link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(dpcd);
+	link->caps.tps3_supported = drm_dp_tps3_supported(dpcd);
 	link->caps.fast_training = drm_dp_fast_training_cap(dpcd);
 
 	link->rate = link->max_rate;
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index d6ae477bab5c..999078812943 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -22,6 +22,13 @@ struct drm_dp_link_caps {
 	 */
 	bool enhanced_framing;
 
+	/**
+	 * tps3_supported:
+	 *
+	 * training pattern sequence 3 supported for equalization
+	 */
+	bool tps3_supported;
+
 	/**
 	 * @fast_training:
 	 *

From 6c651b13e436030f996bcfb2f76833af94e44531 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 5 Feb 2018 14:07:57 +0100
Subject: [PATCH 30/68] drm/tegra: dp: Read channel coding capability from sink

Parse from the sink capabilities whether or not it supports ANSI 8B/10B
channel coding as specified in ANSI X3.230-1994, clause 11.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 3 +++
 drivers/gpu/drm/tegra/dp.h | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index e22ebab677b9..0bd87cff4575 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -13,6 +13,7 @@ static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps)
 	caps->enhanced_framing = false;
 	caps->tps3_supported = false;
 	caps->fast_training = false;
+	caps->channel_coding = false;
 }
 
 void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
@@ -21,6 +22,7 @@ void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
 	dest->enhanced_framing = src->enhanced_framing;
 	dest->tps3_supported = src->tps3_supported;
 	dest->fast_training = src->fast_training;
+	dest->channel_coding = src->channel_coding;
 }
 
 static void drm_dp_link_reset(struct drm_dp_link *link)
@@ -67,6 +69,7 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(dpcd);
 	link->caps.tps3_supported = drm_dp_tps3_supported(dpcd);
 	link->caps.fast_training = drm_dp_fast_training_cap(dpcd);
+	link->caps.channel_coding = drm_dp_channel_coding_supported(dpcd);
 
 	link->rate = link->max_rate;
 	link->lanes = link->max_lanes;
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index 999078812943..984dac21568e 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -35,6 +35,13 @@ struct drm_dp_link_caps {
 	 * AUX CH handshake not required for link training
 	 */
 	bool fast_training;
+
+	/**
+	 * @channel_coding:
+	 *
+	 * ANSI 8B/10B channel coding capability
+	 */
+	bool channel_coding;
 };
 
 void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,

From 4ff9ba5674d16857372b936a8d08920a9851d1cd Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 5 Feb 2018 15:16:18 +0100
Subject: [PATCH 31/68] drm/tegra: dp: Read alternate scrambler reset
 capability from sink

Parse from the sink capabilities whether or not the eDP alternate
scrambler reset value of 0xfffe is supported.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 5 +++++
 drivers/gpu/drm/tegra/dp.h | 7 +++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index 0bd87cff4575..1f48c2190e3b 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -14,6 +14,7 @@ static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps)
 	caps->tps3_supported = false;
 	caps->fast_training = false;
 	caps->channel_coding = false;
+	caps->alternate_scrambler_reset = false;
 }
 
 void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
@@ -23,6 +24,7 @@ void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
 	dest->tps3_supported = src->tps3_supported;
 	dest->fast_training = src->fast_training;
 	dest->channel_coding = src->channel_coding;
+	dest->alternate_scrambler_reset = src->alternate_scrambler_reset;
 }
 
 static void drm_dp_link_reset(struct drm_dp_link *link)
@@ -71,6 +73,9 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	link->caps.fast_training = drm_dp_fast_training_cap(dpcd);
 	link->caps.channel_coding = drm_dp_channel_coding_supported(dpcd);
 
+	if (drm_dp_alternate_scrambler_reset_cap(dpcd))
+		link->caps.alternate_scrambler_reset = true;
+
 	link->rate = link->max_rate;
 	link->lanes = link->max_lanes;
 
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index 984dac21568e..45e8ff18ab6a 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -42,6 +42,13 @@ struct drm_dp_link_caps {
 	 * ANSI 8B/10B channel coding capability
 	 */
 	bool channel_coding;
+
+	/**
+	 * @alternate_scrambler_reset:
+	 *
+	 * eDP alternate scrambler reset capability
+	 */
+	bool alternate_scrambler_reset;
 };
 
 void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,

From 7aa3cc540d00b0be7d225202fa5c2d0c8e99f3f1 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 7 Jul 2015 20:59:22 +0200
Subject: [PATCH 32/68] drm/tegra: dp: Read eDP version from DPCD

If the sink supports eDP, read the eDP revision from it's DPCD.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 18 ++++++++++++++++--
 drivers/gpu/drm/tegra/dp.h |  2 ++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index 1f48c2190e3b..2be0a47ecbec 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -5,9 +5,12 @@
  */
 
 #include <drm/drm_dp_helper.h>
+#include <drm/drm_print.h>
 
 #include "dp.h"
 
+static const u8 drm_dp_edp_revisions[] = { 0x11, 0x12, 0x13, 0x14 };
+
 static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps)
 {
 	caps->enhanced_framing = false;
@@ -37,6 +40,7 @@ static void drm_dp_link_reset(struct drm_dp_link *link)
 	link->max_lanes = 0;
 
 	drm_dp_link_caps_reset(&link->caps);
+	link->edp = 0;
 
 	link->rate = 0;
 	link->lanes = 0;
@@ -55,7 +59,7 @@ static void drm_dp_link_reset(struct drm_dp_link *link)
  */
 int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 {
-	u8 dpcd[DP_RECEIVER_CAP_SIZE];
+	u8 dpcd[DP_RECEIVER_CAP_SIZE], value;
 	int err;
 
 	drm_dp_link_reset(link);
@@ -73,9 +77,19 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	link->caps.fast_training = drm_dp_fast_training_cap(dpcd);
 	link->caps.channel_coding = drm_dp_channel_coding_supported(dpcd);
 
-	if (drm_dp_alternate_scrambler_reset_cap(dpcd))
+	if (drm_dp_alternate_scrambler_reset_cap(dpcd)) {
 		link->caps.alternate_scrambler_reset = true;
 
+		err = drm_dp_dpcd_readb(aux, DP_EDP_DPCD_REV, &value);
+		if (err < 0)
+			return err;
+
+		if (value >= ARRAY_SIZE(drm_dp_edp_revisions))
+			DRM_ERROR("unsupported eDP version: %02x\n", value);
+		else
+			link->edp = drm_dp_edp_revisions[value];
+	}
+
 	link->rate = link->max_rate;
 	link->lanes = link->max_lanes;
 
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index 45e8ff18ab6a..681cbd0a0094 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -60,6 +60,7 @@ void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
  * @max_rate: maximum clock rate supported on the link
  * @max_lanes: maximum number of lanes supported on the link
  * @caps: capabilities supported on the link (see &drm_dp_link_caps)
+ * @edp: eDP revision (0x11: eDP 1.1, 0x12: eDP 1.2, ...)
  * @rate: currently configured link rate
  * @lanes: currently configured number of lanes
  */
@@ -69,6 +70,7 @@ struct drm_dp_link {
 	unsigned int max_lanes;
 
 	struct drm_dp_link_caps caps;
+	unsigned char edp;
 
 	unsigned int rate;
 	unsigned int lanes;

From ad7f2dda38911698deb2cc9ea45362f9a127e3f4 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 7 Jul 2015 21:01:26 +0200
Subject: [PATCH 33/68] drm/tegra: dp: Read AUX read interval from DPCD

Store the AUX read interval from DPCD, so that it can be used to wait
for the durations given in the specification during link training.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 31 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/dp.h | 11 +++++++++++
 2 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index 2be0a47ecbec..757a0256592f 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -40,6 +40,8 @@ static void drm_dp_link_reset(struct drm_dp_link *link)
 	link->max_lanes = 0;
 
 	drm_dp_link_caps_reset(&link->caps);
+	link->aux_rd_interval.cr = 0;
+	link->aux_rd_interval.ce = 0;
 	link->edp = 0;
 
 	link->rate = 0;
@@ -60,6 +62,7 @@ static void drm_dp_link_reset(struct drm_dp_link *link)
 int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 {
 	u8 dpcd[DP_RECEIVER_CAP_SIZE], value;
+	unsigned int rd_interval;
 	int err;
 
 	drm_dp_link_reset(link);
@@ -90,6 +93,34 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 			link->edp = drm_dp_edp_revisions[value];
 	}
 
+	/*
+	 * The DPCD stores the AUX read interval in units of 4 ms. There are
+	 * two special cases:
+	 *
+	 *   1) if the TRAINING_AUX_RD_INTERVAL field is 0, the clock recovery
+	 *      and channel equalization should use 100 us or 400 us AUX read
+	 *      intervals, respectively
+	 *
+	 *   2) for DP v1.4 and above, clock recovery should always use 100 us
+	 *      AUX read intervals
+	 */
+	rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+			   DP_TRAINING_AUX_RD_MASK;
+
+	if (rd_interval > 4) {
+		DRM_DEBUG_KMS("AUX interval %u out of range (max. 4)\n",
+			      rd_interval);
+		rd_interval = 4;
+	}
+
+	rd_interval *= 4 * USEC_PER_MSEC;
+
+	if (rd_interval == 0 || link->revision >= DP_DPCD_REV_14)
+		link->aux_rd_interval.cr = 100;
+
+	if (rd_interval == 0)
+		link->aux_rd_interval.ce = 400;
+
 	link->rate = link->max_rate;
 	link->lanes = link->max_lanes;
 
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index 681cbd0a0094..1fe2d4f45ba3 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -60,6 +60,7 @@ void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
  * @max_rate: maximum clock rate supported on the link
  * @max_lanes: maximum number of lanes supported on the link
  * @caps: capabilities supported on the link (see &drm_dp_link_caps)
+ * @aux_rd_interval: AUX read interval to use for training (in microseconds)
  * @edp: eDP revision (0x11: eDP 1.1, 0x12: eDP 1.2, ...)
  * @rate: currently configured link rate
  * @lanes: currently configured number of lanes
@@ -70,6 +71,16 @@ struct drm_dp_link {
 	unsigned int max_lanes;
 
 	struct drm_dp_link_caps caps;
+
+	/**
+	 * @cr: clock recovery read interval
+	 * @ce: channel equalization read interval
+	 */
+	struct {
+		unsigned int cr;
+		unsigned int ce;
+	} aux_rd_interval;
+
 	unsigned char edp;
 
 	unsigned int rate;

From 553769ff8d8c452cc81a5fe5b0a68cc456c31db3 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 10 Jun 2015 16:35:44 +0200
Subject: [PATCH 34/68] drm/tegra: dp: Set channel coding on link configuration

Make use of ANSI 8B/10B channel coding if the DisplayPort sink supports
it.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index 757a0256592f..ca287b50fad8 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -203,7 +203,7 @@ int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link)
  */
 int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
 {
-	u8 values[2];
+	u8 values[2], value;
 	int err;
 
 	values[0] = drm_dp_link_rate_to_bw_code(link->rate);
@@ -216,5 +216,14 @@ int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	if (err < 0)
 		return err;
 
+	if (link->caps.channel_coding)
+		value = DP_SET_ANSI_8B10B;
+	else
+		value = 0;
+
+	err = drm_dp_dpcd_writeb(aux, DP_MAIN_LINK_CHANNEL_CODING_SET, value);
+	if (err < 0)
+		return err;
+
 	return 0;
 }

From c4a27288520d35e7e6acc6e36fba4585e1bddde6 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 7 Jul 2015 21:14:12 +0200
Subject: [PATCH 35/68] drm/tegra: dp: Enable alternate scrambler reset when
 supported

If the sink is eDP and supports the alternate scrambler reset, enable
it.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index ca287b50fad8..638081b568f4 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -225,5 +225,12 @@ int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	if (err < 0)
 		return err;
 
+	if (link->caps.alternate_scrambler_reset) {
+		err = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET,
+					 DP_ALTERNATE_SCRAMBLER_RESET_ENABLE);
+		if (err < 0)
+			return err;
+	}
+
 	return 0;
 }

From 01f09f242eb5cb194a88cef669a099fa10fcb3f0 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 21 Jul 2015 16:38:11 +0200
Subject: [PATCH 36/68] drm/tegra: dp: Add drm_dp_link_choose() helper

This helper chooses an appropriate configuration, according to the
bitrate requirements of the video mode and the capabilities of the
DisplayPort sink.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 55 ++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/dp.h |  5 ++++
 2 files changed, 60 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index 638081b568f4..e9a5db77a2dc 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -4,6 +4,7 @@
  * Copyright (C) 2015 Rob Clark
  */
 
+#include <drm/drm_crtc.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_print.h>
 
@@ -234,3 +235,57 @@ int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
 
 	return 0;
 }
+
+/**
+ * drm_dp_link_choose() - choose the lowest possible configuration for a mode
+ * @link: DRM DP link object
+ * @mode: DRM display mode
+ * @info: DRM display information
+ *
+ * According to the eDP specification, a source should select a configuration
+ * with the lowest number of lanes and the lowest possible link rate that can
+ * match the bitrate requirements of a video mode. However it must ensure not
+ * to exceed the capabilities of the sink.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int drm_dp_link_choose(struct drm_dp_link *link,
+		       const struct drm_display_mode *mode,
+		       const struct drm_display_info *info)
+{
+	/* available link symbol clock rates */
+	static const unsigned int rates[3] = { 162000, 270000, 540000 };
+	/* available number of lanes */
+	static const unsigned int lanes[3] = { 1, 2, 4 };
+	unsigned long requirement, capacity;
+	unsigned int rate = link->max_rate;
+	unsigned int i, j;
+
+	/* bandwidth requirement */
+	requirement = mode->clock * info->bpc * 3;
+
+	for (i = 0; i < ARRAY_SIZE(lanes) && lanes[i] <= link->max_lanes; i++) {
+		for (j = 0; j < ARRAY_SIZE(rates) && rates[j] <= rate; j++) {
+			/*
+			 * Capacity for this combination of lanes and rate,
+			 * factoring in the ANSI 8B/10B encoding.
+			 *
+			 * Link rates in the DRM DP helpers are really link
+			 * symbol frequencies, so a tenth of the actual rate
+			 * of the link.
+			 */
+			capacity = lanes[i] * (rates[j] * 10) * 8 / 10;
+
+			if (capacity >= requirement) {
+				DRM_DEBUG_KMS("using %u lanes at %u kHz (%lu/%lu kbps)\n",
+					      lanes[i], rates[j], requirement,
+					      capacity);
+				link->lanes = lanes[i];
+				link->rate = rates[j];
+				return 0;
+			}
+		}
+	}
+
+	return -ERANGE;
+}
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index 1fe2d4f45ba3..e07b9a1e43d8 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -9,6 +9,8 @@
 
 #include <linux/types.h>
 
+struct drm_display_info;
+struct drm_display_mode;
 struct drm_dp_aux;
 
 /**
@@ -91,5 +93,8 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link);
 int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link);
 int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link);
 int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link);
+int drm_dp_link_choose(struct drm_dp_link *link,
+		       const struct drm_display_mode *mode,
+		       const struct drm_display_info *info);
 
 #endif

From 6a127160c4883abf3a54d97024eda8118849fd5c Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 1 Feb 2018 17:46:42 +0100
Subject: [PATCH 37/68] drm/tegra: dp: Add support for eDP link rates

Parses additional link rates from DPCD if the sink supports eDP 1.4.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 127 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/dp.h |   9 +++
 2 files changed, 136 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index e9a5db77a2dc..bcf9df965ef8 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -33,6 +33,8 @@ void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
 
 static void drm_dp_link_reset(struct drm_dp_link *link)
 {
+	unsigned int i;
+
 	if (!link)
 		return;
 
@@ -47,6 +49,111 @@ static void drm_dp_link_reset(struct drm_dp_link *link)
 
 	link->rate = 0;
 	link->lanes = 0;
+
+	for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++)
+		link->rates[i] = 0;
+
+	link->num_rates = 0;
+}
+
+/**
+ * drm_dp_link_add_rate() - add a rate to the list of supported rates
+ * @link: the link to add the rate to
+ * @rate: the rate to add
+ *
+ * Add a link rate to the list of supported link rates.
+ *
+ * Returns:
+ * 0 on success or one of the following negative error codes on failure:
+ * - ENOSPC if the maximum number of supported rates has been reached
+ * - EEXISTS if the link already supports this rate
+ *
+ * See also:
+ * drm_dp_link_remove_rate()
+ */
+int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate)
+{
+	unsigned int i, pivot;
+
+	if (link->num_rates == DP_MAX_SUPPORTED_RATES)
+		return -ENOSPC;
+
+	for (pivot = 0; pivot < link->num_rates; pivot++)
+		if (rate <= link->rates[pivot])
+			break;
+
+	if (pivot != link->num_rates && rate == link->rates[pivot])
+		return -EEXIST;
+
+	for (i = link->num_rates; i > pivot; i--)
+		link->rates[i] = link->rates[i - 1];
+
+	link->rates[pivot] = rate;
+	link->num_rates++;
+
+	return 0;
+}
+
+/**
+ * drm_dp_link_remove_rate() - remove a rate from the list of supported rates
+ * @link: the link from which to remove the rate
+ * @rate: the rate to remove
+ *
+ * Removes a link rate from the list of supported link rates.
+ *
+ * Returns:
+ * 0 on success or one of the following negative error codes on failure:
+ * - EINVAL if the specified rate is not among the supported rates
+ *
+ * See also:
+ * drm_dp_link_add_rate()
+ */
+int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate)
+{
+	unsigned int i;
+
+	for (i = 0; i < link->num_rates; i++)
+		if (rate == link->rates[i])
+			break;
+
+	if (i == link->num_rates)
+		return -EINVAL;
+
+	link->num_rates--;
+
+	while (i < link->num_rates) {
+		link->rates[i] = link->rates[i + 1];
+		i++;
+	}
+
+	return 0;
+}
+
+/**
+ * drm_dp_link_update_rates() - normalize the supported link rates array
+ * @link: the link for which to normalize the supported link rates
+ *
+ * Users should call this function after they've manually modified the array
+ * of supported link rates. This function removes any stale entries, compacts
+ * the array and updates the supported link rate count. Note that calling the
+ * drm_dp_link_remove_rate() function already does this janitorial work.
+ *
+ * See also:
+ * drm_dp_link_add_rate(), drm_dp_link_remove_rate()
+ */
+void drm_dp_link_update_rates(struct drm_dp_link *link)
+{
+	unsigned int i, count = 0;
+
+	for (i = 0; i < link->num_rates; i++) {
+		if (link->rates[i] != 0)
+			link->rates[count++] = link->rates[i];
+	}
+
+	for (i = count; i < link->num_rates; i++)
+		link->rates[i] = 0;
+
+	link->num_rates = count;
 }
 
 /**
@@ -125,6 +232,26 @@ int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	link->rate = link->max_rate;
 	link->lanes = link->max_lanes;
 
+	/* Parse SUPPORTED_LINK_RATES from eDP 1.4 */
+	if (link->edp >= 0x14) {
+		u8 supported_rates[DP_MAX_SUPPORTED_RATES * 2];
+		unsigned int i;
+		u16 rate;
+
+		err = drm_dp_dpcd_read(aux, DP_SUPPORTED_LINK_RATES,
+				       supported_rates,
+				       sizeof(supported_rates));
+		if (err < 0)
+			return err;
+
+		for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++) {
+			rate = supported_rates[i * 2 + 1] << 8 |
+			       supported_rates[i * 2 + 0];
+
+			drm_dp_link_add_rate(link, rate * 200);
+		}
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index e07b9a1e43d8..a20ee9f1f1b6 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -66,6 +66,8 @@ void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
  * @edp: eDP revision (0x11: eDP 1.1, 0x12: eDP 1.2, ...)
  * @rate: currently configured link rate
  * @lanes: currently configured number of lanes
+ * @rates: additional supported link rates in kHz (eDP 1.4)
+ * @num_rates: number of additional supported link rates (eDP 1.4)
  */
 struct drm_dp_link {
 	unsigned char revision;
@@ -87,8 +89,15 @@ struct drm_dp_link {
 
 	unsigned int rate;
 	unsigned int lanes;
+
+	unsigned long rates[DP_MAX_SUPPORTED_RATES];
+	unsigned int num_rates;
 };
 
+int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate);
+int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate);
+void drm_dp_link_update_rates(struct drm_dp_link *link);
+
 int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link);
 int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link);
 int drm_dp_link_power_down(struct drm_dp_aux *aux, struct drm_dp_link *link);

From 078c445733c1e8092e23391b251cad6b12f6156e Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 7 Jul 2015 21:21:48 +0200
Subject: [PATCH 38/68] drm/tegra: dp: Add DisplayPort link training helper

Add a helper that will perform link training as described in the
DisplayPort specification.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c | 456 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/dp.h |  68 ++++++
 2 files changed, 524 insertions(+)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index bcf9df965ef8..5b6765d653b4 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -334,6 +334,14 @@ int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
 	u8 values[2], value;
 	int err;
 
+	if (link->ops && link->ops->configure) {
+		err = link->ops->configure(link);
+		if (err < 0) {
+			DRM_ERROR("failed to configure DP link: %d\n", err);
+			return err;
+		}
+	}
+
 	values[0] = drm_dp_link_rate_to_bw_code(link->rate);
 	values[1] = link->lanes;
 
@@ -416,3 +424,451 @@ int drm_dp_link_choose(struct drm_dp_link *link,
 
 	return -ERANGE;
 }
+
+/**
+ * DOC: Link training
+ *
+ * These functions contain common logic and helpers to implement DisplayPort
+ * link training.
+ */
+
+/**
+ * drm_dp_link_train_init() - initialize DisplayPort link training state
+ * @train: DisplayPort link training state
+ */
+void drm_dp_link_train_init(struct drm_dp_link_train *train)
+{
+	struct drm_dp_link_train_set *request = &train->request;
+	struct drm_dp_link_train_set *adjust = &train->adjust;
+	unsigned int i;
+
+	for (i = 0; i < 4; i++) {
+		request->voltage_swing[i] = 0;
+		adjust->voltage_swing[i] = 0;
+
+		request->pre_emphasis[i] = 0;
+		adjust->pre_emphasis[i] = 0;
+
+		request->post_cursor[i] = 0;
+		adjust->post_cursor[i] = 0;
+	}
+
+	train->pattern = DP_TRAINING_PATTERN_DISABLE;
+	train->clock_recovered = false;
+	train->channel_equalized = false;
+}
+
+static bool drm_dp_link_train_valid(const struct drm_dp_link_train *train)
+{
+	return train->clock_recovered && train->channel_equalized;
+}
+
+static int drm_dp_link_apply_training(struct drm_dp_link *link)
+{
+	struct drm_dp_link_train_set *request = &link->train.request;
+	unsigned int lanes = link->lanes, *vs, *pe, *pc, i;
+	struct drm_dp_aux *aux = link->aux;
+	u8 values[4], pattern = 0;
+	int err;
+
+	err = link->ops->apply_training(link);
+	if (err < 0) {
+		DRM_ERROR("failed to apply link training: %d\n", err);
+		return err;
+	}
+
+	vs = request->voltage_swing;
+	pe = request->pre_emphasis;
+	pc = request->post_cursor;
+
+	/* write currently selected voltage-swing and pre-emphasis levels */
+	for (i = 0; i < lanes; i++)
+		values[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL(vs[i]) |
+			    DP_TRAIN_PRE_EMPHASIS_LEVEL(pe[i]);
+
+	err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values, lanes);
+	if (err < 0) {
+		DRM_ERROR("failed to set training parameters: %d\n", err);
+		return err;
+	}
+
+	/* write currently selected post-cursor level (if supported) */
+	if (link->revision >= 0x12 && link->rate == 540000) {
+		values[0] = values[1] = 0;
+
+		for (i = 0; i < lanes; i++)
+			values[i / 2] |= DP_LANE_POST_CURSOR(i, pc[i]);
+
+		err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_1_SET2, values,
+					DIV_ROUND_UP(lanes, 2));
+		if (err < 0) {
+			DRM_ERROR("failed to set post-cursor: %d\n", err);
+			return err;
+		}
+	}
+
+	/* write link pattern */
+	if (link->train.pattern != DP_TRAINING_PATTERN_DISABLE)
+		pattern |= DP_LINK_SCRAMBLING_DISABLE;
+
+	pattern |= link->train.pattern;
+
+	err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, pattern);
+	if (err < 0) {
+		DRM_ERROR("failed to set training pattern: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void drm_dp_link_train_wait(struct drm_dp_link *link)
+{
+	unsigned long min = 0;
+
+	switch (link->train.pattern) {
+	case DP_TRAINING_PATTERN_1:
+		min = link->aux_rd_interval.cr;
+		break;
+
+	case DP_TRAINING_PATTERN_2:
+	case DP_TRAINING_PATTERN_3:
+		min = link->aux_rd_interval.ce;
+		break;
+
+	default:
+		break;
+	}
+
+	if (min > 0)
+		usleep_range(min, 2 * min);
+}
+
+static void drm_dp_link_get_adjustments(struct drm_dp_link *link,
+					u8 status[DP_LINK_STATUS_SIZE])
+{
+	struct drm_dp_link_train_set *adjust = &link->train.adjust;
+	unsigned int i;
+
+	for (i = 0; i < link->lanes; i++) {
+		adjust->voltage_swing[i] =
+			drm_dp_get_adjust_request_voltage(status, i) >>
+				DP_TRAIN_VOLTAGE_SWING_SHIFT;
+
+		adjust->pre_emphasis[i] =
+			drm_dp_get_adjust_request_pre_emphasis(status, i) >>
+				DP_TRAIN_PRE_EMPHASIS_SHIFT;
+
+		adjust->post_cursor[i] =
+			drm_dp_get_adjust_request_post_cursor(status, i);
+	}
+}
+
+static void drm_dp_link_train_adjust(struct drm_dp_link_train *train)
+{
+	struct drm_dp_link_train_set *request = &train->request;
+	struct drm_dp_link_train_set *adjust = &train->adjust;
+	unsigned int i;
+
+	for (i = 0; i < 4; i++)
+		if (request->voltage_swing[i] != adjust->voltage_swing[i])
+			request->voltage_swing[i] = adjust->voltage_swing[i];
+
+	for (i = 0; i < 4; i++)
+		if (request->pre_emphasis[i] != adjust->pre_emphasis[i])
+			request->pre_emphasis[i] = adjust->pre_emphasis[i];
+
+	for (i = 0; i < 4; i++)
+		if (request->post_cursor[i] != adjust->post_cursor[i])
+			request->post_cursor[i] = adjust->post_cursor[i];
+}
+
+static int drm_dp_link_recover_clock(struct drm_dp_link *link)
+{
+	u8 status[DP_LINK_STATUS_SIZE];
+	int err;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		return err;
+
+	drm_dp_link_train_wait(link);
+
+	err = drm_dp_dpcd_read_link_status(link->aux, status);
+	if (err < 0) {
+		DRM_ERROR("failed to read link status: %d\n", err);
+		return err;
+	}
+
+	if (!drm_dp_clock_recovery_ok(status, link->lanes))
+		drm_dp_link_get_adjustments(link, status);
+	else
+		link->train.clock_recovered = true;
+
+	return 0;
+}
+
+static int drm_dp_link_clock_recovery(struct drm_dp_link *link)
+{
+	unsigned int repeat;
+	int err;
+
+	/* start clock recovery using training pattern 1 */
+	link->train.pattern = DP_TRAINING_PATTERN_1;
+
+	for (repeat = 1; repeat < 5; repeat++) {
+		err = drm_dp_link_recover_clock(link);
+		if (err < 0) {
+			DRM_ERROR("failed to recover clock: %d\n", err);
+			return err;
+		}
+
+		drm_dp_link_train_adjust(&link->train);
+
+		if (link->train.clock_recovered)
+			break;
+	}
+
+	return 0;
+}
+
+static int drm_dp_link_equalize_channel(struct drm_dp_link *link)
+{
+	struct drm_dp_aux *aux = link->aux;
+	u8 status[DP_LINK_STATUS_SIZE];
+	int err;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		return err;
+
+	drm_dp_link_train_wait(link);
+
+	err = drm_dp_dpcd_read_link_status(aux, status);
+	if (err < 0) {
+		DRM_ERROR("failed to read link status: %d\n", err);
+		return err;
+	}
+
+	if (!drm_dp_clock_recovery_ok(status, link->lanes)) {
+		DRM_ERROR("clock recovery lost while equalizing channel\n");
+		link->train.clock_recovered = false;
+		return 0;
+	}
+
+	if (!drm_dp_channel_eq_ok(status, link->lanes))
+		drm_dp_link_get_adjustments(link, status);
+	else
+		link->train.channel_equalized = true;
+
+	return 0;
+}
+
+static int drm_dp_link_channel_equalization(struct drm_dp_link *link)
+{
+	unsigned int repeat;
+	int err;
+
+	/* start channel equalization using pattern 2 or 3 */
+	if (link->caps.tps3_supported)
+		link->train.pattern = DP_TRAINING_PATTERN_3;
+	else
+		link->train.pattern = DP_TRAINING_PATTERN_2;
+
+	for (repeat = 1; repeat < 5; repeat++) {
+		err = drm_dp_link_equalize_channel(link);
+		if (err < 0) {
+			DRM_ERROR("failed to equalize channel: %d\n", err);
+			return err;
+		}
+
+		drm_dp_link_train_adjust(&link->train);
+
+		if (link->train.channel_equalized)
+			break;
+	}
+
+	return 0;
+}
+
+static int drm_dp_link_downgrade(struct drm_dp_link *link)
+{
+	switch (link->rate) {
+	case 162000:
+		return -EINVAL;
+
+	case 270000:
+		link->rate = 162000;
+		break;
+
+	case 540000:
+		link->rate = 270000;
+		return 0;
+	}
+
+	return 0;
+}
+
+static void drm_dp_link_train_disable(struct drm_dp_link *link)
+{
+	int err;
+
+	link->train.pattern = DP_TRAINING_PATTERN_DISABLE;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		DRM_ERROR("failed to disable link training: %d\n", err);
+}
+
+static int drm_dp_link_train_full(struct drm_dp_link *link)
+{
+	int err;
+
+retry:
+	DRM_DEBUG_KMS("full-training link: %u lane%s at %u MHz\n",
+		      link->lanes, (link->lanes > 1) ? "s" : "",
+		      link->rate / 100);
+
+	err = drm_dp_link_configure(link->aux, link);
+	if (err < 0) {
+		DRM_ERROR("failed to configure DP link: %d\n", err);
+		return err;
+	}
+
+	err = drm_dp_link_clock_recovery(link);
+	if (err < 0) {
+		DRM_ERROR("clock recovery failed: %d\n", err);
+		goto out;
+	}
+
+	if (!link->train.clock_recovered) {
+		DRM_ERROR("clock recovery failed, downgrading link\n");
+
+		err = drm_dp_link_downgrade(link);
+		if (err < 0)
+			goto out;
+
+		goto retry;
+	}
+
+	DRM_DEBUG_KMS("clock recovery succeeded\n");
+
+	err = drm_dp_link_channel_equalization(link);
+	if (err < 0) {
+		DRM_ERROR("channel equalization failed: %d\n", err);
+		goto out;
+	}
+
+	if (!link->train.channel_equalized) {
+		DRM_ERROR("channel equalization failed, downgrading link\n");
+
+		err = drm_dp_link_downgrade(link);
+		if (err < 0)
+			goto out;
+
+		goto retry;
+	}
+
+	DRM_DEBUG_KMS("channel equalization succeeded\n");
+
+out:
+	drm_dp_link_train_disable(link);
+	return err;
+}
+
+static int drm_dp_link_train_fast(struct drm_dp_link *link)
+{
+	u8 status[DP_LINK_STATUS_SIZE];
+	int err;
+
+	DRM_DEBUG_KMS("fast-training link: %u lane%s at %u MHz\n",
+		      link->lanes, (link->lanes > 1) ? "s" : "",
+		      link->rate / 100);
+
+	err = drm_dp_link_configure(link->aux, link);
+	if (err < 0) {
+		DRM_ERROR("failed to configure DP link: %d\n", err);
+		return err;
+	}
+
+	/* transmit training pattern 1 for 500 microseconds */
+	link->train.pattern = DP_TRAINING_PATTERN_1;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		goto out;
+
+	usleep_range(500, 1000);
+
+	/* transmit training pattern 2 or 3 for 500 microseconds */
+	if (link->caps.tps3_supported)
+		link->train.pattern = DP_TRAINING_PATTERN_3;
+	else
+		link->train.pattern = DP_TRAINING_PATTERN_2;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		goto out;
+
+	usleep_range(500, 1000);
+
+	err = drm_dp_dpcd_read_link_status(link->aux, status);
+	if (err < 0) {
+		DRM_ERROR("failed to read link status: %d\n", err);
+		goto out;
+	}
+
+	if (!drm_dp_clock_recovery_ok(status, link->lanes)) {
+		DRM_ERROR("clock recovery failed\n");
+		err = -EIO;
+	}
+
+	if (!drm_dp_channel_eq_ok(status, link->lanes)) {
+		DRM_ERROR("channel equalization failed\n");
+		err = -EIO;
+	}
+
+out:
+	drm_dp_link_train_disable(link);
+	return err;
+}
+
+/**
+ * drm_dp_link_train() - perform DisplayPort link training
+ * @link: a DP link object
+ *
+ * Uses the context stored in the DP link object to perform link training. It
+ * is expected that drivers will call drm_dp_link_probe() to obtain the link
+ * capabilities before performing link training.
+ *
+ * If the sink supports fast link training (no AUX CH handshake) and valid
+ * training settings are available, this function will try to perform fast
+ * link training and fall back to full link training on failure.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int drm_dp_link_train(struct drm_dp_link *link)
+{
+	int err;
+
+	if (link->caps.fast_training) {
+		if (drm_dp_link_train_valid(&link->train)) {
+			err = drm_dp_link_train_fast(link);
+			if (err < 0)
+				DRM_ERROR("fast link training failed: %d\n",
+					  err);
+			else
+				return 0;
+		} else {
+			DRM_DEBUG_KMS("training parameters not available\n");
+		}
+	} else {
+		DRM_DEBUG_KMS("fast link training not supported\n");
+	}
+
+	err = drm_dp_link_train_full(link);
+	if (err < 0)
+		DRM_ERROR("full link training failed: %d\n", err);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
index a20ee9f1f1b6..cb12ed0c54e7 100644
--- a/drivers/gpu/drm/tegra/dp.h
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -12,6 +12,7 @@
 struct drm_display_info;
 struct drm_display_mode;
 struct drm_dp_aux;
+struct drm_dp_link;
 
 /**
  * struct drm_dp_link_caps - DP link capabilities
@@ -56,6 +57,55 @@ struct drm_dp_link_caps {
 void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
 			   const struct drm_dp_link_caps *src);
 
+/**
+ * struct drm_dp_link_ops - DP link operations
+ */
+struct drm_dp_link_ops {
+	/**
+	 * @apply_training:
+	 */
+	int (*apply_training)(struct drm_dp_link *link);
+
+	/**
+	 * @configure:
+	 */
+	int (*configure)(struct drm_dp_link *link);
+};
+
+#define DP_TRAIN_VOLTAGE_SWING_LEVEL(x) ((x) << 0)
+#define DP_TRAIN_PRE_EMPHASIS_LEVEL(x) ((x) << 3)
+#define DP_LANE_POST_CURSOR(i, x) (((x) & 0x3) << (((i) & 1) << 2))
+
+/**
+ * struct drm_dp_link_train_set - link training settings
+ * @voltage_swing: per-lane voltage swing
+ * @pre_emphasis: per-lane pre-emphasis
+ * @post_cursor: per-lane post-cursor
+ */
+struct drm_dp_link_train_set {
+	unsigned int voltage_swing[4];
+	unsigned int pre_emphasis[4];
+	unsigned int post_cursor[4];
+};
+
+/**
+ * struct drm_dp_link_train - link training state information
+ * @request: currently requested settings
+ * @adjust: adjustments requested by sink
+ * @pattern: currently requested training pattern
+ * @clock_recovered: flag to track if clock recovery has completed
+ * @channel_equalized: flag to track if channel equalization has completed
+ */
+struct drm_dp_link_train {
+	struct drm_dp_link_train_set request;
+	struct drm_dp_link_train_set adjust;
+
+	unsigned int pattern;
+
+	bool clock_recovered;
+	bool channel_equalized;
+};
+
 /**
  * struct drm_dp_link - DP link capabilities and configuration
  * @revision: DP specification revision supported on the link
@@ -92,6 +142,21 @@ struct drm_dp_link {
 
 	unsigned long rates[DP_MAX_SUPPORTED_RATES];
 	unsigned int num_rates;
+
+	/**
+	 * @ops: DP link operations
+	 */
+	const struct drm_dp_link_ops *ops;
+
+	/**
+	 * @aux: DP AUX channel
+	 */
+	struct drm_dp_aux *aux;
+
+	/**
+	 * @train: DP link training state
+	 */
+	struct drm_dp_link_train train;
 };
 
 int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate);
@@ -106,4 +171,7 @@ int drm_dp_link_choose(struct drm_dp_link *link,
 		       const struct drm_display_mode *mode,
 		       const struct drm_display_info *info);
 
+void drm_dp_link_train_init(struct drm_dp_link_train *train);
+int drm_dp_link_train(struct drm_dp_link *link);
+
 #endif

From c176393728c9fcd8f7ef842cb3e4cedda3f418a2 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 15 Oct 2019 14:57:42 +0200
Subject: [PATCH 39/68] drm/tegra: sor: Use DP link training helpers

Make use of the DP link training helpers to implement full and fast link
training. While at it, refactor some of the code and remove various code
sequences that are not necessary.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dpaux.c |  69 ----
 drivers/gpu/drm/tegra/drm.h   |   5 -
 drivers/gpu/drm/tegra/sor.c   | 704 +++++++++++++++++++++++-----------
 drivers/gpu/drm/tegra/sor.h   |   2 +
 4 files changed, 480 insertions(+), 300 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index bd3361cea49b..622cdf1ad246 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -821,72 +821,3 @@ int drm_dp_aux_disable(struct drm_dp_aux *aux)
 
 	return 0;
 }
-
-int drm_dp_aux_prepare(struct drm_dp_aux *aux, u8 encoding)
-{
-	int err;
-
-	err = drm_dp_dpcd_writeb(aux, DP_MAIN_LINK_CHANNEL_CODING_SET,
-				 encoding);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-
-int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link,
-		     u8 pattern)
-{
-	u8 tp = pattern & DP_TRAINING_PATTERN_MASK;
-	u8 status[DP_LINK_STATUS_SIZE], values[4];
-	unsigned int i;
-	int err;
-
-	err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, pattern);
-	if (err < 0)
-		return err;
-
-	if (tp == DP_TRAINING_PATTERN_DISABLE)
-		return 0;
-
-	for (i = 0; i < link->lanes; i++)
-		values[i] = DP_TRAIN_MAX_PRE_EMPHASIS_REACHED |
-			    DP_TRAIN_PRE_EMPH_LEVEL_0 |
-			    DP_TRAIN_MAX_SWING_REACHED |
-			    DP_TRAIN_VOLTAGE_SWING_LEVEL_0;
-
-	err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values,
-				link->lanes);
-	if (err < 0)
-		return err;
-
-	usleep_range(500, 1000);
-
-	err = drm_dp_dpcd_read_link_status(aux, status);
-	if (err < 0)
-		return err;
-
-	switch (tp) {
-	case DP_TRAINING_PATTERN_1:
-		if (!drm_dp_clock_recovery_ok(status, link->lanes))
-			return -EAGAIN;
-
-		break;
-
-	case DP_TRAINING_PATTERN_2:
-		if (!drm_dp_channel_eq_ok(status, link->lanes))
-			return -EAGAIN;
-
-		break;
-
-	default:
-		dev_err(aux->dev, "unsupported training pattern %u\n", tp);
-		return -EINVAL;
-	}
-
-	err = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET, 0);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 6a06d636e930..8b812bb52e5b 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -153,17 +153,12 @@ void tegra_output_connector_destroy(struct drm_connector *connector);
 void tegra_output_encoder_destroy(struct drm_encoder *encoder);
 
 /* from dpaux.c */
-struct drm_dp_link;
-
 struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np);
 enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux);
 int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output);
 int drm_dp_aux_detach(struct drm_dp_aux *aux);
 int drm_dp_aux_enable(struct drm_dp_aux *aux);
 int drm_dp_aux_disable(struct drm_dp_aux *aux);
-int drm_dp_aux_prepare(struct drm_dp_aux *aux, u8 encoding);
-int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link,
-		     u8 pattern);
 
 /* from fb.c */
 struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index dd118366455b..636807e047f0 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -383,6 +383,12 @@ struct tegra_sor_soc {
 	unsigned int num_settings;
 
 	const u8 *xbar_cfg;
+	const u8 *lane_map;
+
+	const u8 (*voltage_swing)[4][4];
+	const u8 (*pre_emphasis)[4][4];
+	const u8 (*post_cursor)[4][4];
+	const u8 (*tx_pu)[4][4];
 };
 
 struct tegra_sor;
@@ -413,6 +419,7 @@ struct tegra_sor {
 
 	u8 xbar_cfg[5];
 
+	struct drm_dp_link link;
 	struct drm_dp_aux *aux;
 
 	struct drm_info_list *debugfs_files;
@@ -598,112 +605,316 @@ static struct clk *tegra_clk_sor_pad_register(struct tegra_sor *sor,
 	return clk;
 }
 
-static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
-				   struct drm_dp_link *link)
+static int tegra_sor_power_up_lanes(struct tegra_sor *sor, unsigned int lanes)
 {
-	unsigned int i;
-	u8 pattern;
+	unsigned long timeout;
 	u32 value;
-	int err;
-
-	/* setup lane parameters */
-	value = SOR_LANE_DRIVE_CURRENT_LANE3(0x40) |
-		SOR_LANE_DRIVE_CURRENT_LANE2(0x40) |
-		SOR_LANE_DRIVE_CURRENT_LANE1(0x40) |
-		SOR_LANE_DRIVE_CURRENT_LANE0(0x40);
-	tegra_sor_writel(sor, value, SOR_LANE_DRIVE_CURRENT0);
-
-	value = SOR_LANE_PREEMPHASIS_LANE3(0x0f) |
-		SOR_LANE_PREEMPHASIS_LANE2(0x0f) |
-		SOR_LANE_PREEMPHASIS_LANE1(0x0f) |
-		SOR_LANE_PREEMPHASIS_LANE0(0x0f);
-	tegra_sor_writel(sor, value, SOR_LANE_PREEMPHASIS0);
-
-	value = SOR_LANE_POSTCURSOR_LANE3(0x00) |
-		SOR_LANE_POSTCURSOR_LANE2(0x00) |
-		SOR_LANE_POSTCURSOR_LANE1(0x00) |
-		SOR_LANE_POSTCURSOR_LANE0(0x00);
-	tegra_sor_writel(sor, value, SOR_LANE_POSTCURSOR0);
-
-	/* disable LVDS mode */
-	tegra_sor_writel(sor, 0, SOR_LVDS);
 
+	/*
+	 * Clear or set the PD_TXD bit corresponding to each lane, depending
+	 * on whether it is used or not.
+	 */
 	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value |= SOR_DP_PADCTL_TX_PU_ENABLE;
-	value &= ~SOR_DP_PADCTL_TX_PU_MASK;
-	value |= SOR_DP_PADCTL_TX_PU(2); /* XXX: don't hardcode? */
+
+	if (lanes <= 2)
+		value &= ~(SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[3]) |
+			   SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[2]));
+	else
+		value |= SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[3]) |
+			 SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[2]);
+
+	if (lanes <= 1)
+		value &= ~SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[1]);
+	else
+		value |= SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[1]);
+
+	if (lanes == 0)
+		value &= ~SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[0]);
+	else
+		value |= SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[0]);
+
 	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
+	/* start lane sequencer */
+	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_DOWN |
+		SOR_LANE_SEQ_CTL_POWER_STATE_UP;
+	tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL);
+
+	timeout = jiffies + msecs_to_jiffies(250);
+
+	while (time_before(jiffies, timeout)) {
+		value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL);
+		if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0)
+			break;
+
+		usleep_range(250, 1000);
+	}
+
+	if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int tegra_sor_power_down_lanes(struct tegra_sor *sor)
+{
+	unsigned long timeout;
+	u32 value;
+
+	/* power down all lanes */
 	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value |= SOR_DP_PADCTL_CM_TXD_3 | SOR_DP_PADCTL_CM_TXD_2 |
-		 SOR_DP_PADCTL_CM_TXD_1 | SOR_DP_PADCTL_CM_TXD_0;
+	value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_0 |
+		   SOR_DP_PADCTL_PD_TXD_1 | SOR_DP_PADCTL_PD_TXD_2);
 	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
-	usleep_range(10, 100);
+	/* start lane sequencer */
+	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_UP |
+		SOR_LANE_SEQ_CTL_POWER_STATE_DOWN;
+	tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL);
+
+	timeout = jiffies + msecs_to_jiffies(250);
+
+	while (time_before(jiffies, timeout)) {
+		value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL);
+		if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0)
+			break;
+
+		usleep_range(25, 100);
+	}
+
+	if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void tegra_sor_dp_precharge(struct tegra_sor *sor, unsigned int lanes)
+{
+	u32 value;
+
+	/* pre-charge all used lanes */
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
+
+	if (lanes <= 2)
+		value &= ~(SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[3]) |
+			   SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[2]));
+	else
+		value |= SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[3]) |
+			 SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[2]);
+
+	if (lanes <= 1)
+		value &= ~SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[1]);
+	else
+		value |= SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[1]);
+
+	if (lanes == 0)
+		value &= ~SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[0]);
+	else
+		value |= SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[0]);
+
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+
+	usleep_range(15, 100);
 
 	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value &= ~(SOR_DP_PADCTL_CM_TXD_3 | SOR_DP_PADCTL_CM_TXD_2 |
 		   SOR_DP_PADCTL_CM_TXD_1 | SOR_DP_PADCTL_CM_TXD_0);
 	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+}
 
-	err = drm_dp_aux_prepare(sor->aux, DP_SET_ANSI_8B10B);
-	if (err < 0)
-		return err;
+static void tegra_sor_dp_term_calibrate(struct tegra_sor *sor)
+{
+	u32 mask = 0x08, adj = 0, value;
 
-	for (i = 0, value = 0; i < link->lanes; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_NONE |
-				     SOR_DP_TPG_PATTERN_TRAIN1;
-		value = (value << 8) | lane;
+	/* enable pad calibration logic */
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
+	value &= ~SOR_DP_PADCTL_PAD_CAL_PD;
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+	value |= SOR_PLL1_TMDS_TERM;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
+
+	while (mask) {
+		adj |= mask;
+
+		value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+		value &= ~SOR_PLL1_TMDS_TERMADJ_MASK;
+		value |= SOR_PLL1_TMDS_TERMADJ(adj);
+		tegra_sor_writel(sor, value, sor->soc->regs->pll1);
+
+		usleep_range(100, 200);
+
+		value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+		if (value & SOR_PLL1_TERM_COMPOUT)
+			adj &= ~mask;
+
+		mask >>= 1;
 	}
 
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+	value &= ~SOR_PLL1_TMDS_TERMADJ_MASK;
+	value |= SOR_PLL1_TMDS_TERMADJ(adj);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
 
-	pattern = DP_TRAINING_PATTERN_1;
+	/* disable pad calibration logic */
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
+	value |= SOR_DP_PADCTL_PAD_CAL_PD;
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+}
 
-	err = drm_dp_aux_train(sor->aux, link, pattern);
-	if (err < 0)
-		return err;
+static int tegra_sor_dp_link_apply_training(struct drm_dp_link *link)
+{
+	struct tegra_sor *sor = container_of(link, struct tegra_sor, link);
+	u32 voltage_swing = 0, pre_emphasis = 0, post_cursor = 0;
+	const struct tegra_sor_soc *soc = sor->soc;
+	u32 pattern = 0, tx_pu = 0, value;
+	unsigned int i;
 
-	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
-	value |= SOR_DP_SPARE_SEQ_ENABLE;
-	value &= ~SOR_DP_SPARE_PANEL_INTERNAL;
-	value |= SOR_DP_SPARE_MACRO_SOR_CLK;
-	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
+	for (value = 0, i = 0; i < link->lanes; i++) {
+		u8 vs = link->train.request.voltage_swing[i];
+		u8 pe = link->train.request.pre_emphasis[i];
+		u8 pc = link->train.request.post_cursor[i];
+		u8 shift = sor->soc->lane_map[i] << 3;
 
-	for (i = 0, value = 0; i < link->lanes; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_NONE |
-				     SOR_DP_TPG_PATTERN_TRAIN2;
-		value = (value << 8) | lane;
+		voltage_swing |= soc->voltage_swing[pc][vs][pe] << shift;
+		pre_emphasis |= soc->pre_emphasis[pc][vs][pe] << shift;
+		post_cursor |= soc->post_cursor[pc][vs][pe] << shift;
+
+		if (sor->soc->tx_pu[pc][vs][pe] > tx_pu)
+			tx_pu = sor->soc->tx_pu[pc][vs][pe];
+
+		switch (link->train.pattern) {
+		case DP_TRAINING_PATTERN_DISABLE:
+			value = SOR_DP_TPG_SCRAMBLER_GALIOS |
+				SOR_DP_TPG_PATTERN_NONE;
+			break;
+
+		case DP_TRAINING_PATTERN_1:
+			value = SOR_DP_TPG_SCRAMBLER_NONE |
+				SOR_DP_TPG_PATTERN_TRAIN1;
+			break;
+
+		case DP_TRAINING_PATTERN_2:
+			value = SOR_DP_TPG_SCRAMBLER_NONE |
+				SOR_DP_TPG_PATTERN_TRAIN2;
+			break;
+
+		case DP_TRAINING_PATTERN_3:
+			value = SOR_DP_TPG_SCRAMBLER_NONE |
+				SOR_DP_TPG_PATTERN_TRAIN3;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+
+		if (link->caps.channel_coding)
+			value |= SOR_DP_TPG_CHANNEL_CODING;
+
+		pattern = pattern << 8 | value;
 	}
 
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
+	tegra_sor_writel(sor, voltage_swing, SOR_LANE_DRIVE_CURRENT0);
+	tegra_sor_writel(sor, pre_emphasis, SOR_LANE_PREEMPHASIS0);
 
-	pattern = DP_LINK_SCRAMBLING_DISABLE | DP_TRAINING_PATTERN_2;
+	if (link->caps.tps3_supported)
+		tegra_sor_writel(sor, post_cursor, SOR_LANE_POSTCURSOR0);
 
-	err = drm_dp_aux_train(sor->aux, link, pattern);
-	if (err < 0)
-		return err;
+	tegra_sor_writel(sor, pattern, SOR_DP_TPG);
 
-	for (i = 0, value = 0; i < link->lanes; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_GALIOS |
-				     SOR_DP_TPG_PATTERN_NONE;
-		value = (value << 8) | lane;
-	}
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
+	value &= ~SOR_DP_PADCTL_TX_PU_MASK;
+	value |= SOR_DP_PADCTL_TX_PU_ENABLE;
+	value |= SOR_DP_PADCTL_TX_PU(tx_pu);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
-
-	pattern = DP_TRAINING_PATTERN_DISABLE;
-
-	err = drm_dp_aux_train(sor->aux, link, pattern);
-	if (err < 0)
-		return err;
+	usleep_range(20, 100);
 
 	return 0;
 }
 
+static int tegra_sor_dp_link_configure(struct drm_dp_link *link)
+{
+	struct tegra_sor *sor = container_of(link, struct tegra_sor, link);
+	unsigned int rate, lanes;
+	u32 value;
+	int err;
+
+	rate = drm_dp_link_rate_to_bw_code(link->rate);
+	lanes = link->lanes;
+
+	/* configure link speed and lane count */
+	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
+	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
+	value |= SOR_CLK_CNTRL_DP_LINK_SPEED(rate);
+	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
+
+	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
+	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
+	value |= SOR_DP_LINKCTL_LANE_COUNT(lanes);
+
+	if (link->caps.enhanced_framing)
+		value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
+
+	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
+
+	usleep_range(400, 1000);
+
+	/* configure load pulse position adjustment */
+	value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+	value &= ~SOR_PLL1_LOADADJ_MASK;
+
+	switch (rate) {
+	case DP_LINK_BW_1_62:
+		value |= SOR_PLL1_LOADADJ(0x3);
+		break;
+
+	case DP_LINK_BW_2_7:
+		value |= SOR_PLL1_LOADADJ(0x4);
+		break;
+
+	case DP_LINK_BW_5_4:
+		value |= SOR_PLL1_LOADADJ(0x6);
+		break;
+	}
+
+	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
+
+	/* use alternate scrambler reset for eDP */
+	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
+
+	if (link->edp == 0)
+		value &= ~SOR_DP_SPARE_PANEL_INTERNAL;
+	else
+		value |= SOR_DP_SPARE_PANEL_INTERNAL;
+
+	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
+
+	err = tegra_sor_power_down_lanes(sor);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to power down lanes: %d\n", err);
+		return err;
+	}
+
+	/* power up and pre-charge lanes */
+	err = tegra_sor_power_up_lanes(sor, lanes);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to power up %u lane%s: %d\n",
+			lanes, (lanes != 1) ? "s" : "", err);
+		return err;
+	}
+
+	tegra_sor_dp_precharge(sor, lanes);
+
+	return 0;
+}
+
+static const struct drm_dp_link_ops tegra_sor_dp_link_ops = {
+	.apply_training = tegra_sor_dp_link_apply_training,
+	.configure = tegra_sor_dp_link_configure,
+};
+
 static void tegra_sor_super_update(struct tegra_sor *sor)
 {
 	tegra_sor_writel(sor, 0, SOR_SUPER_STATE0);
@@ -1201,29 +1412,6 @@ static int tegra_sor_power_down(struct tegra_sor *sor)
 		return err;
 	}
 
-	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_0 |
-		   SOR_DP_PADCTL_PD_TXD_1 | SOR_DP_PADCTL_PD_TXD_2);
-	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
-
-	/* stop lane sequencer */
-	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_UP |
-		SOR_LANE_SEQ_CTL_POWER_STATE_DOWN;
-	tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL);
-
-	timeout = jiffies + msecs_to_jiffies(250);
-
-	while (time_before(jiffies, timeout)) {
-		value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL);
-		if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0)
-			break;
-
-		usleep_range(25, 100);
-	}
-
-	if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0)
-		return -ETIMEDOUT;
-
 	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value |= SOR_PLL2_PORT_POWERDOWN;
 	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
@@ -1603,17 +1791,11 @@ static void tegra_sor_edp_disable(struct drm_encoder *encoder)
 	tegra_sor_writel(sor, 0, SOR_STATE1);
 	tegra_sor_update(sor);
 
-	/*
-	 * The following accesses registers of the display controller, so make
-	 * sure it's only executed when the output is attached to one.
-	 */
-	if (dc) {
-		value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-		value &= ~SOR_ENABLE(0);
-		tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
+	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
+	value &= ~SOR_ENABLE(0);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
-		tegra_dc_commit(dc);
-	}
+	tegra_dc_commit(dc);
 
 	err = tegra_sor_power_down(sor);
 	if (err < 0)
@@ -1679,19 +1861,20 @@ static int calc_h_ref_to_sync(const struct drm_display_mode *mode,
 
 static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 {
-	struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
 	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
 	struct tegra_sor *sor = to_sor(output);
 	struct tegra_sor_config config;
 	struct tegra_sor_state *state;
-	struct drm_dp_link link;
-	u8 rate, lanes;
+	struct drm_display_mode *mode;
+	struct drm_display_info *info;
 	unsigned int i;
-	int err = 0;
 	u32 value;
+	int err;
 
 	state = to_sor_state(output->connector.state);
+	mode = &encoder->crtc->state->adjusted_mode;
+	info = &output->connector.display_info;
 
 	pm_runtime_get_sync(sor->dev);
 
@@ -1702,7 +1885,7 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to enable DP: %d\n", err);
 
-	err = drm_dp_link_probe(sor->aux, &link);
+	err = drm_dp_link_probe(sor->aux, &sor->link);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
 		return;
@@ -1713,13 +1896,6 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
 
-	memset(&config, 0, sizeof(config));
-	config.bits_per_pixel = state->bpc * 3;
-
-	err = tegra_sor_compute_config(sor, mode, &config, &link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to compute configuration: %d\n", err);
-
 	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
 	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
 	value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
@@ -1828,117 +2004,44 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
 
-	/* power DP lanes */
-	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
+	/* use DP-A protocol */
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
+	value |= SOR_STATE_ASY_PROTOCOL_DP_A;
+	tegra_sor_writel(sor, value, SOR_STATE1);
 
-	if (link.lanes <= 2)
-		value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2);
-	else
-		value |= SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2;
-
-	if (link.lanes <= 1)
-		value &= ~SOR_DP_PADCTL_PD_TXD_1;
-	else
-		value |= SOR_DP_PADCTL_PD_TXD_1;
-
-	if (link.lanes == 0)
-		value &= ~SOR_DP_PADCTL_PD_TXD_0;
-	else
-		value |= SOR_DP_PADCTL_PD_TXD_0;
-
-	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
-
-	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
-	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
-	value |= SOR_DP_LINKCTL_LANE_COUNT(link.lanes);
-	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
-
-	/* start lane sequencer */
-	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_DOWN |
-		SOR_LANE_SEQ_CTL_POWER_STATE_UP;
-	tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL);
-
-	while (true) {
-		value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL);
-		if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0)
-			break;
-
-		usleep_range(250, 1000);
-	}
-
-	/* set link bandwidth */
-	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
-	value |= drm_dp_link_rate_to_bw_code(link.rate) << 2;
-	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
-
-	tegra_sor_apply_config(sor, &config);
-
-	/* enable link */
+	/* enable port */
 	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
 	value |= SOR_DP_LINKCTL_ENABLE;
-	value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
 	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
 
-	for (i = 0, value = 0; i < 4; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_GALIOS |
-				     SOR_DP_TPG_PATTERN_NONE;
-		value = (value << 8) | lane;
+	/* calibrate termination resistance (XXX do this only on HPD) */
+	tegra_sor_dp_term_calibrate(sor);
+
+	err = drm_dp_link_train(&sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "link training failed: %d\n", err);
+	else
+		dev_dbg(sor->dev, "link training succeeded\n");
+
+	err = drm_dp_link_power_up(sor->aux, &sor->link);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to power up eDP link: %d\n",
+			err);
+		return;
 	}
 
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
+	/* compute configuration */
+	memset(&config, 0, sizeof(config));
+	config.bits_per_pixel = state->bpc * 3;
 
-	/* enable pad calibration logic */
-	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value |= SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
-
-	err = drm_dp_link_probe(sor->aux, &link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
-
-	err = drm_dp_link_power_up(sor->aux, &link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power up eDP link: %d\n", err);
-
-	err = drm_dp_link_configure(sor->aux, &link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to configure eDP link: %d\n", err);
-
-	rate = drm_dp_link_rate_to_bw_code(link.rate);
-	lanes = link.lanes;
-
-	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
-	value |= SOR_CLK_CNTRL_DP_LINK_SPEED(rate);
-	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
-
-	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
-	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
-	value |= SOR_DP_LINKCTL_LANE_COUNT(lanes);
-
-	if (link.caps.enhanced_framing)
-		value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
-
-	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
-
-	/* disable training pattern generator */
-
-	for (i = 0; i < link.lanes; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_GALIOS |
-				     SOR_DP_TPG_PATTERN_NONE;
-		value = (value << 8) | lane;
+	err = tegra_sor_compute_config(sor, mode, &config, &sor->link);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to compute configuration: %d\n", err);
+		return;
 	}
 
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
-
-	err = tegra_sor_dp_train_fast(sor, &link);
-	if (err < 0)
-		dev_err(sor->dev, "DP fast link training failed: %d\n", err);
-
-	dev_dbg(sor->dev, "fast link training succeeded\n");
+	tegra_sor_apply_config(sor, &config);
 
 	err = tegra_sor_power_up(sor, 250);
 	if (err < 0)
@@ -2842,6 +2945,9 @@ static int tegra_sor_init(struct host1x_client *client)
 			connector = DRM_MODE_CONNECTOR_DisplayPort;
 			encoder = DRM_MODE_ENCODER_TMDS;
 		}
+
+		sor->link.ops = &tegra_sor_dp_link_ops;
+		sor->link.aux = sor->aux;
 	}
 
 	sor->output.dev = sor->dev;
@@ -3042,6 +3148,107 @@ static const struct tegra_sor_regs tegra124_sor_regs = {
 	.dp_padctl2 = 0x73,
 };
 
+/* Tegra124 and Tegra132 have lanes 0 and 2 swapped. */
+static const u8 tegra124_sor_lane_map[4] = {
+	2, 1, 0, 3,
+};
+
+static const u8 tegra124_sor_voltage_swing[4][4][4] = {
+	{
+		{ 0x13, 0x19, 0x1e, 0x28 },
+		{ 0x1e, 0x25, 0x2d, },
+		{ 0x28, 0x32, },
+		{ 0x3c, },
+	}, {
+		{ 0x12, 0x17, 0x1b, 0x25 },
+		{ 0x1c, 0x23, 0x2a, },
+		{ 0x25, 0x2f, },
+		{ 0x39, }
+	}, {
+		{ 0x12, 0x16, 0x1a, 0x22 },
+		{ 0x1b, 0x20, 0x27, },
+		{ 0x24, 0x2d, },
+		{ 0x36, },
+	}, {
+		{ 0x11, 0x14, 0x17, 0x1f },
+		{ 0x19, 0x1e, 0x24, },
+		{ 0x22, 0x2a, },
+		{ 0x32, },
+	},
+};
+
+static const u8 tegra124_sor_pre_emphasis[4][4][4] = {
+	{
+		{ 0x00, 0x09, 0x13, 0x25 },
+		{ 0x00, 0x0f, 0x1e, },
+		{ 0x00, 0x14, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x0a, 0x14, 0x28 },
+		{ 0x00, 0x0f, 0x1e, },
+		{ 0x00, 0x14, },
+		{ 0x00 },
+	}, {
+		{ 0x00, 0x0a, 0x14, 0x28 },
+		{ 0x00, 0x0f, 0x1e, },
+		{ 0x00, 0x14, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x0a, 0x14, 0x28 },
+		{ 0x00, 0x0f, 0x1e, },
+		{ 0x00, 0x14, },
+		{ 0x00, },
+	},
+};
+
+static const u8 tegra124_sor_post_cursor[4][4][4] = {
+	{
+		{ 0x00, 0x00, 0x00, 0x00 },
+		{ 0x00, 0x00, 0x00, },
+		{ 0x00, 0x00, },
+		{ 0x00, },
+	}, {
+		{ 0x02, 0x02, 0x04, 0x05 },
+		{ 0x02, 0x04, 0x05, },
+		{ 0x04, 0x05, },
+		{ 0x05, },
+	}, {
+		{ 0x04, 0x05, 0x08, 0x0b },
+		{ 0x05, 0x09, 0x0b, },
+		{ 0x08, 0x0a, },
+		{ 0x0b, },
+	}, {
+		{ 0x05, 0x09, 0x0b, 0x12 },
+		{ 0x09, 0x0d, 0x12, },
+		{ 0x0b, 0x0f, },
+		{ 0x12, },
+	},
+};
+
+static const u8 tegra124_sor_tx_pu[4][4][4] = {
+	{
+		{ 0x20, 0x30, 0x40, 0x60 },
+		{ 0x30, 0x40, 0x60, },
+		{ 0x40, 0x60, },
+		{ 0x60, },
+	}, {
+		{ 0x20, 0x20, 0x30, 0x50 },
+		{ 0x30, 0x40, 0x50, },
+		{ 0x40, 0x50, },
+		{ 0x60, },
+	}, {
+		{ 0x20, 0x20, 0x30, 0x40, },
+		{ 0x30, 0x30, 0x40, },
+		{ 0x40, 0x50, },
+		{ 0x60, },
+	}, {
+		{ 0x20, 0x20, 0x20, 0x40, },
+		{ 0x30, 0x30, 0x40, },
+		{ 0x40, 0x40, },
+		{ 0x60, },
+	},
+};
+
 static const struct tegra_sor_soc tegra124_sor = {
 	.supports_edp = true,
 	.supports_lvds = true,
@@ -3050,6 +3257,50 @@ static const struct tegra_sor_soc tegra124_sor = {
 	.regs = &tegra124_sor_regs,
 	.has_nvdisplay = false,
 	.xbar_cfg = tegra124_sor_xbar_cfg,
+	.lane_map = tegra124_sor_lane_map,
+	.voltage_swing = tegra124_sor_voltage_swing,
+	.pre_emphasis = tegra124_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
+};
+
+static const u8 tegra132_sor_pre_emphasis[4][4][4] = {
+	{
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x01, 0x0e, 0x1d, },
+		{ 0x01, 0x13, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00 },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00, },
+	},
+};
+
+static const struct tegra_sor_soc tegra132_sor = {
+	.supports_edp = true,
+	.supports_lvds = true,
+	.supports_hdmi = false,
+	.supports_dp = false,
+	.regs = &tegra124_sor_regs,
+	.has_nvdisplay = false,
+	.xbar_cfg = tegra124_sor_xbar_cfg,
+	.lane_map = tegra124_sor_lane_map,
+	.voltage_swing = tegra124_sor_voltage_swing,
+	.pre_emphasis = tegra132_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct tegra_sor_regs tegra210_sor_regs = {
@@ -3067,18 +3318,20 @@ static const struct tegra_sor_regs tegra210_sor_regs = {
 	.dp_padctl2 = 0x73,
 };
 
+static const u8 tegra210_sor_xbar_cfg[5] = {
+	2, 1, 0, 3, 4
+};
+
 static const struct tegra_sor_soc tegra210_sor = {
 	.supports_edp = true,
 	.supports_lvds = false,
 	.supports_hdmi = false,
 	.supports_dp = false,
+
 	.regs = &tegra210_sor_regs,
 	.has_nvdisplay = false,
-	.xbar_cfg = tegra124_sor_xbar_cfg,
-};
 
-static const u8 tegra210_sor_xbar_cfg[5] = {
-	2, 1, 0, 3, 4
+	.xbar_cfg = tegra210_sor_xbar_cfg,
 };
 
 static const struct tegra_sor_soc tegra210_sor1 = {
@@ -3092,7 +3345,6 @@ static const struct tegra_sor_soc tegra210_sor1 = {
 
 	.num_settings = ARRAY_SIZE(tegra210_sor_hdmi_defaults),
 	.settings = tegra210_sor_hdmi_defaults,
-
 	.xbar_cfg = tegra210_sor_xbar_cfg,
 };
 
@@ -3134,7 +3386,6 @@ static const struct tegra_sor_soc tegra186_sor1 = {
 
 	.num_settings = ARRAY_SIZE(tegra186_sor_hdmi_defaults),
 	.settings = tegra186_sor_hdmi_defaults,
-
 	.xbar_cfg = tegra124_sor_xbar_cfg,
 };
 
@@ -3174,6 +3425,7 @@ static const struct of_device_id tegra_sor_of_match[] = {
 	{ .compatible = "nvidia,tegra186-sor", .data = &tegra186_sor },
 	{ .compatible = "nvidia,tegra210-sor1", .data = &tegra210_sor1 },
 	{ .compatible = "nvidia,tegra210-sor", .data = &tegra210_sor },
+	{ .compatible = "nvidia,tegra132-sor", .data = &tegra132_sor },
 	{ .compatible = "nvidia,tegra124-sor", .data = &tegra124_sor },
 	{ },
 };
diff --git a/drivers/gpu/drm/tegra/sor.h b/drivers/gpu/drm/tegra/sor.h
index f8efd8be4b7c..5333406c0401 100644
--- a/drivers/gpu/drm/tegra/sor.h
+++ b/drivers/gpu/drm/tegra/sor.h
@@ -283,10 +283,12 @@
 #define  SOR_DP_PADCTL_CM_TXD_2		(1 << 6)
 #define  SOR_DP_PADCTL_CM_TXD_1		(1 << 5)
 #define  SOR_DP_PADCTL_CM_TXD_0		(1 << 4)
+#define  SOR_DP_PADCTL_CM_TXD(x)	(1 << (4 + (x)))
 #define  SOR_DP_PADCTL_PD_TXD_3		(1 << 3)
 #define  SOR_DP_PADCTL_PD_TXD_0		(1 << 2)
 #define  SOR_DP_PADCTL_PD_TXD_1		(1 << 1)
 #define  SOR_DP_PADCTL_PD_TXD_2		(1 << 0)
+#define  SOR_DP_PADCTL_PD_TXD(x)	(1 << (0 + (x)))
 
 #define SOR_DP_PADCTL1 0x5d
 

From 6f684de537bcd813bcbdbc917c5b41b1fc5b7d8d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 21 Jul 2015 16:59:28 +0200
Subject: [PATCH 40/68] drm/tegra: sor: Hook up I2C-over-AUX to output

This is necessary for the output abstraction to retrieve a list of valid
modes from the EDID of a connected panel/monitor. This will be useful in
conjunction with DisplayPort support that will be added in a subsequent
patch, so that the driver can read EDID via the AUX channel.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 636807e047f0..6f979c5fff7b 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3524,6 +3524,8 @@ static int tegra_sor_probe(struct platform_device *pdev)
 
 		if (!sor->aux)
 			return -EPROBE_DEFER;
+
+		sor->output.ddc = &sor->aux->ddc;
 	}
 
 	if (!sor->aux) {

From 38b445bc135e7eb3aa7b05316020a1d5194554f9 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 3 Aug 2015 15:53:08 +0200
Subject: [PATCH 41/68] drm/tegra: sor: Stabilize eDP

Rework eDP code to correspond more closely to what's documented. This
also improves the reliability of modesets.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 148 +++++++++++++-----------------------
 1 file changed, 54 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 6f979c5fff7b..2023244ad328 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -1878,119 +1878,80 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 
 	pm_runtime_get_sync(sor->dev);
 
-	if (output->panel)
-		drm_panel_prepare(output->panel);
-
-	err = drm_dp_aux_enable(sor->aux);
-	if (err < 0)
-		dev_err(sor->dev, "failed to enable DP: %d\n", err);
-
-	err = drm_dp_link_probe(sor->aux, &sor->link);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
-		return;
-	}
-
 	/* switch to safe parent clock */
 	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
 
-	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
-	value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
-	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
+	err = tegra_io_rail_power_on(TEGRA_IO_RAIL_LVDS);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power on LVDS rail: %d\n", err);
+
+	usleep_range(20, 100);
+
+	err = drm_dp_aux_enable(sor->aux);
+	if (err < 0)
+		dev_err(sor->dev, "failed to enable DPAUX: %d\n", err);
+
+	err = drm_dp_link_probe(sor->aux, &sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
+
+	err = drm_dp_link_choose(&sor->link, mode, info);
+	if (err < 0)
+		dev_err(sor->dev, "failed to choose link: %d\n", err);
+
+	if (output->panel)
+		drm_panel_prepare(output->panel);
 
 	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
 	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-	usleep_range(20, 100);
+
+	usleep_range(20, 40);
 
 	value = tegra_sor_readl(sor, sor->soc->regs->pll3);
 	value |= SOR_PLL3_PLL_VDD_MODE_3V3;
 	tegra_sor_writel(sor, value, sor->soc->regs->pll3);
 
-	value = SOR_PLL0_ICHPMP(0xf) | SOR_PLL0_VCOCAP_RST |
-		SOR_PLL0_PLLREG_LEVEL_V45 | SOR_PLL0_RESISTOR_EXT;
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
+	value &= ~(SOR_PLL0_VCOPD | SOR_PLL0_PWR);
 	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
 
 	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value |= SOR_PLL2_SEQ_PLLCAPPD;
 	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
-	value |= SOR_PLL2_LVDS_ENABLE;
+	value |= SOR_PLL2_SEQ_PLLCAPPD;
 	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
-	value = SOR_PLL1_TERM_COMPOUT | SOR_PLL1_TMDS_TERM;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
-
-	while (true) {
-		value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-		if ((value & SOR_PLL2_SEQ_PLLCAPPD_ENFORCE) == 0)
-			break;
-
-		usleep_range(250, 1000);
-	}
+	usleep_range(200, 400);
 
 	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_POWERDOWN_OVERRIDE;
 	value &= ~SOR_PLL2_PORT_POWERDOWN;
 	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
 
-	/*
-	 * power up
-	 */
-
-	/* set safe link bandwidth (1.62 Gbps) */
 	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
-	value |= SOR_CLK_CNTRL_DP_LINK_SPEED_G1_62;
+	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
+	value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
 	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
 
-	/* step 1 */
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value |= SOR_PLL2_SEQ_PLLCAPPD_ENFORCE | SOR_PLL2_PORT_POWERDOWN |
-		 SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
+	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
+	/* XXX not in TRM */
+	value |= SOR_DP_SPARE_PANEL_INTERNAL;
+	value |= SOR_DP_SPARE_SEQ_ENABLE;
+	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
+
+	/* XXX not in TRM */
+	tegra_sor_writel(sor, 0, SOR_LVDS);
 
 	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
-	value |= SOR_PLL0_VCOPD | SOR_PLL0_PWR;
+	value &= ~SOR_PLL0_ICHPMP_MASK;
+	value &= ~SOR_PLL0_VCOCAP_MASK;
+	value |= SOR_PLL0_ICHPMP(0x1);
+	value |= SOR_PLL0_VCOCAP(0x3);
+	value |= SOR_PLL0_RESISTOR_EXT;
 	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
 
-	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value &= ~SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
-
-	/* step 2 */
-	err = tegra_io_pad_power_enable(sor->pad);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power on I/O pad: %d\n", err);
-
-	usleep_range(5, 100);
-
-	/* step 3 */
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	usleep_range(20, 100);
-
-	/* step 4 */
-	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
-	value &= ~SOR_PLL0_VCOPD;
-	value &= ~SOR_PLL0_PWR;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	usleep_range(200, 1000);
-
-	/* step 5 */
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_PORT_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
 	/* XXX not in TRM */
 	for (value = 0, i = 0; i < 5; i++)
 		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->xbar_cfg[i]) |
@@ -2015,7 +1976,6 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	value |= SOR_DP_LINKCTL_ENABLE;
 	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
 
-	/* calibrate termination resistance (XXX do this only on HPD) */
 	tegra_sor_dp_term_calibrate(sor);
 
 	err = drm_dp_link_train(&sor->link);
@@ -2025,21 +1985,16 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 		dev_dbg(sor->dev, "link training succeeded\n");
 
 	err = drm_dp_link_power_up(sor->aux, &sor->link);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to power up eDP link: %d\n",
-			err);
-		return;
-	}
+	if (err < 0)
+		dev_err(sor->dev, "failed to power up eDP link: %d\n", err);
 
 	/* compute configuration */
 	memset(&config, 0, sizeof(config));
 	config.bits_per_pixel = state->bpc * 3;
 
 	err = tegra_sor_compute_config(sor, mode, &config, &sor->link);
-	if (err < 0) {
+	if (err < 0)
 		dev_err(sor->dev, "failed to compute configuration: %d\n", err);
-		return;
-	}
 
 	tegra_sor_apply_config(sor, &config);
 
@@ -2067,19 +2022,24 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 
 	tegra_sor_update(sor);
 
+	err = tegra_sor_power_up(sor, 250);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
+
+	/* attach and wake up */
+	err = tegra_sor_attach(sor);
+	if (err < 0)
+		dev_err(sor->dev, "failed to attach SOR: %d\n", err);
+
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
 	value |= SOR_ENABLE(0);
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
 	tegra_dc_commit(dc);
 
-	err = tegra_sor_attach(sor);
-	if (err < 0)
-		dev_err(sor->dev, "failed to attach SOR: %d\n", err);
-
 	err = tegra_sor_wakeup(sor);
 	if (err < 0)
-		dev_err(sor->dev, "failed to enable DC: %d\n", err);
+		dev_err(sor->dev, "failed to wakeup SOR: %d\n", err);
 
 	if (output->panel)
 		drm_panel_enable(output->panel);

From c9533131feebc153feef497ad189d22c00462f4c Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 1 Feb 2018 17:47:07 +0100
Subject: [PATCH 42/68] drm/tegra: sor: Filter eDP rates

The SOR found on Tegra SoCs does not support all the rates potentially
advertised by eDP 1.4. Make sure that the rates that are not supported
are filtered out.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 2023244ad328..9e6a1ab7ef65 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -605,6 +605,30 @@ static struct clk *tegra_clk_sor_pad_register(struct tegra_sor *sor,
 	return clk;
 }
 
+static void tegra_sor_filter_rates(struct tegra_sor *sor)
+{
+	struct drm_dp_link *link = &sor->link;
+	unsigned int i;
+
+	/* Tegra only supports RBR, HBR and HBR2 */
+	for (i = 0; i < link->num_rates; i++) {
+		switch (link->rates[i]) {
+		case 1620000:
+		case 2700000:
+		case 5400000:
+			break;
+
+		default:
+			DRM_DEBUG_KMS("link rate %lu kHz not supported\n",
+				      link->rates[i]);
+			link->rates[i] = 0;
+			break;
+		}
+	}
+
+	drm_dp_link_update_rates(link);
+}
+
 static int tegra_sor_power_up_lanes(struct tegra_sor *sor, unsigned int lanes)
 {
 	unsigned long timeout;
@@ -1897,6 +1921,8 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
 
+	tegra_sor_filter_rates(sor);
+
 	err = drm_dp_link_choose(&sor->link, mode, info);
 	if (err < 0)
 		dev_err(sor->dev, "failed to choose link: %d\n", err);

From 0472c21b83192c61dbac7ba98abe8decacbd1d59 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 15 Oct 2019 14:59:37 +0200
Subject: [PATCH 43/68] drm/tegra: sor: Add DisplayPort support

Add support for regular DisplayPort on Tegra210 and Tegra186.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dp.c  |  10 +-
 drivers/gpu/drm/tegra/sor.c | 343 +++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/tegra/sor.h |   1 +
 3 files changed, 348 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
index 5b6765d653b4..70dfb7d1dec5 100644
--- a/drivers/gpu/drm/tegra/dp.c
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -623,10 +623,10 @@ static int drm_dp_link_clock_recovery(struct drm_dp_link *link)
 			return err;
 		}
 
-		drm_dp_link_train_adjust(&link->train);
-
 		if (link->train.clock_recovered)
 			break;
+
+		drm_dp_link_train_adjust(&link->train);
 	}
 
 	return 0;
@@ -682,10 +682,10 @@ static int drm_dp_link_channel_equalization(struct drm_dp_link *link)
 			return err;
 		}
 
-		drm_dp_link_train_adjust(&link->train);
-
 		if (link->train.channel_equalized)
 			break;
+
+		drm_dp_link_train_adjust(&link->train);
 	}
 
 	return 0;
@@ -851,6 +851,8 @@ int drm_dp_link_train(struct drm_dp_link *link)
 {
 	int err;
 
+	drm_dp_link_train_init(&link->train);
+
 	if (link->caps.fast_training) {
 		if (drm_dp_link_train_valid(&link->train)) {
 			err = drm_dp_link_train_fast(link);
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 9e6a1ab7ef65..9cf008d7c67b 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -2904,6 +2904,236 @@ static const struct drm_encoder_helper_funcs tegra_sor_hdmi_helpers = {
 	.atomic_check = tegra_sor_encoder_atomic_check,
 };
 
+static void tegra_sor_dp_disable(struct drm_encoder *encoder)
+{
+	struct tegra_output *output = encoder_to_output(encoder);
+	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
+	struct tegra_sor *sor = to_sor(output);
+	u32 value;
+	int err;
+
+	err = drm_dp_link_power_down(sor->aux, &sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power down link: %d\n", err);
+
+	err = tegra_sor_detach(sor);
+	if (err < 0)
+		dev_err(sor->dev, "failed to detach SOR: %d\n", err);
+
+	tegra_sor_writel(sor, 0, SOR_STATE1);
+	tegra_sor_update(sor);
+
+	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
+
+	if (!sor->soc->has_nvdisplay)
+		value &= ~(SOR1_TIMING_CYA | SOR_ENABLE(1));
+	else
+		value &= ~SOR_ENABLE(sor->index);
+
+	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
+	tegra_dc_commit(dc);
+
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
+	value &= ~SOR_STATE_ASY_SUBOWNER_MASK;
+	value &= ~SOR_STATE_ASY_OWNER_MASK;
+	tegra_sor_writel(sor, value, SOR_STATE1);
+	tegra_sor_update(sor);
+
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
+	if (err < 0)
+		dev_err(sor->dev, "failed to set safe clock: %d\n", err);
+
+	err = tegra_sor_power_down(sor);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power down SOR: %d\n", err);
+
+	err = tegra_io_pad_power_disable(sor->pad);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power off I/O pad: %d\n", err);
+
+	err = drm_dp_aux_disable(sor->aux);
+	if (err < 0)
+		dev_err(sor->dev, "failed disable DPAUX: %d\n", err);
+
+	pm_runtime_put(sor->dev);
+}
+
+static void tegra_sor_dp_enable(struct drm_encoder *encoder)
+{
+	struct tegra_output *output = encoder_to_output(encoder);
+	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
+	struct tegra_sor *sor = to_sor(output);
+	struct tegra_sor_config config;
+	struct tegra_sor_state *state;
+	struct drm_display_mode *mode;
+	struct drm_display_info *info;
+	unsigned int i;
+	u32 value;
+	int err;
+
+	state = to_sor_state(output->connector.state);
+	mode = &encoder->crtc->state->adjusted_mode;
+	info = &output->connector.display_info;
+
+	pm_runtime_get_sync(sor->dev);
+
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
+	if (err < 0)
+		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
+
+	err = tegra_io_pad_power_enable(sor->pad);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power on LVDS rail: %d\n", err);
+
+	usleep_range(20, 100);
+
+	err = drm_dp_aux_enable(sor->aux);
+	if (err < 0)
+		dev_err(sor->dev, "failed to enable DPAUX: %d\n", err);
+
+	err = drm_dp_link_probe(sor->aux, &sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to probe DP link: %d\n", err);
+
+	err = drm_dp_link_choose(&sor->link, mode, info);
+	if (err < 0)
+		dev_err(sor->dev, "failed to choose link: %d\n", err);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
+	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
+
+	usleep_range(20, 40);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll3);
+	value |= SOR_PLL3_PLL_VDD_MODE_3V3;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll3);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
+	value &= ~(SOR_PLL0_VCOPD | SOR_PLL0_PWR);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
+	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
+	value |= SOR_PLL2_SEQ_PLLCAPPD;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
+
+	usleep_range(200, 400);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
+	value &= ~SOR_PLL2_POWERDOWN_OVERRIDE;
+	value &= ~SOR_PLL2_PORT_POWERDOWN;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
+
+	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
+	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
+	value |= SOR_CLK_CNTRL_DP_CLK_SEL_DIFF_DPCLK;
+	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
+
+	usleep_range(200, 400);
+
+	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
+	/* XXX not in TRM */
+	value &= ~SOR_DP_SPARE_PANEL_INTERNAL;
+	value |= SOR_DP_SPARE_SEQ_ENABLE;
+	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
+
+	/* XXX not in TRM */
+	tegra_sor_writel(sor, 0, SOR_LVDS);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
+	value &= ~SOR_PLL0_ICHPMP_MASK;
+	value &= ~SOR_PLL0_VCOCAP_MASK;
+	value |= SOR_PLL0_ICHPMP(0x1);
+	value |= SOR_PLL0_VCOCAP(0x3);
+	value |= SOR_PLL0_RESISTOR_EXT;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
+
+	/* XXX not in TRM */
+	for (value = 0, i = 0; i < 5; i++)
+		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) |
+			 SOR_XBAR_CTRL_LINK1_XSEL(i, i);
+
+	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
+	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
+
+	/* switch to DP parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_pad);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to switch to pad clock: %d\n", err);
+		return;
+	}
+
+	err = clk_set_parent(sor->clk, sor->clk_parent);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to switch to parent clock: %d\n", err);
+		return;
+	}
+
+	/* use DP-A protocol */
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
+	value |= SOR_STATE_ASY_PROTOCOL_DP_A;
+	tegra_sor_writel(sor, value, SOR_STATE1);
+
+	/* enable port */
+	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
+	value |= SOR_DP_LINKCTL_ENABLE;
+	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
+
+	tegra_sor_dp_term_calibrate(sor);
+
+	err = drm_dp_link_train(&sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "link training failed: %d\n", err);
+	else
+		dev_dbg(sor->dev, "link training succeeded\n");
+
+	err = drm_dp_link_power_up(sor->aux, &sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power up DP link: %d\n", err);
+
+	/* compute configuration */
+	memset(&config, 0, sizeof(config));
+	config.bits_per_pixel = state->bpc * 3;
+
+	err = tegra_sor_compute_config(sor, mode, &config, &sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to compute configuration: %d\n", err);
+
+	tegra_sor_apply_config(sor, &config);
+	tegra_sor_mode_set(sor, mode, state);
+	tegra_sor_update(sor);
+
+	err = tegra_sor_power_up(sor, 250);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
+
+	/* attach and wake up */
+	err = tegra_sor_attach(sor);
+	if (err < 0)
+		dev_err(sor->dev, "failed to attach SOR: %d\n", err);
+
+	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
+	value |= SOR_ENABLE(sor->index);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
+
+	tegra_dc_commit(dc);
+
+	err = tegra_sor_wakeup(sor);
+	if (err < 0)
+		dev_err(sor->dev, "failed to wakeup SOR: %d\n", err);
+}
+
+static const struct drm_encoder_helper_funcs tegra_sor_dp_helpers = {
+	.disable = tegra_sor_dp_disable,
+	.enable = tegra_sor_dp_enable,
+	.atomic_check = tegra_sor_encoder_atomic_check,
+};
+
 static int tegra_sor_init(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
@@ -2930,6 +3160,7 @@ static int tegra_sor_init(struct host1x_client *client)
 		} else if (sor->soc->supports_dp) {
 			connector = DRM_MODE_CONNECTOR_DisplayPort;
 			encoder = DRM_MODE_ENCODER_TMDS;
+			helpers = &tegra_sor_dp_helpers;
 		}
 
 		sor->link.ops = &tegra_sor_dp_link_ops;
@@ -3115,6 +3346,43 @@ static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
 	.remove = tegra_sor_hdmi_remove,
 };
 
+static int tegra_sor_dp_probe(struct tegra_sor *sor)
+{
+	int err;
+
+	sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io-hdmi-dp");
+	if (IS_ERR(sor->avdd_io_supply))
+		return PTR_ERR(sor->avdd_io_supply);
+
+	err = regulator_enable(sor->avdd_io_supply);
+	if (err < 0)
+		return err;
+
+	sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-hdmi-dp-pll");
+	if (IS_ERR(sor->vdd_pll_supply))
+		return PTR_ERR(sor->vdd_pll_supply);
+
+	err = regulator_enable(sor->vdd_pll_supply);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int tegra_sor_dp_remove(struct tegra_sor *sor)
+{
+	regulator_disable(sor->vdd_pll_supply);
+	regulator_disable(sor->avdd_io_supply);
+
+	return 0;
+}
+
+static const struct tegra_sor_ops tegra_sor_dp_ops = {
+	.name = "DP",
+	.probe = tegra_sor_dp_probe,
+	.remove = tegra_sor_dp_remove,
+};
+
 static const u8 tegra124_sor_xbar_cfg[5] = {
 	0, 1, 2, 3, 4
 };
@@ -3308,6 +3576,10 @@ static const u8 tegra210_sor_xbar_cfg[5] = {
 	2, 1, 0, 3, 4
 };
 
+static const u8 tegra210_sor_lane_map[4] = {
+	0, 1, 2, 3,
+};
+
 static const struct tegra_sor_soc tegra210_sor = {
 	.supports_edp = true,
 	.supports_lvds = false,
@@ -3318,6 +3590,11 @@ static const struct tegra_sor_soc tegra210_sor = {
 	.has_nvdisplay = false,
 
 	.xbar_cfg = tegra210_sor_xbar_cfg,
+	.lane_map = tegra210_sor_lane_map,
+	.voltage_swing = tegra124_sor_voltage_swing,
+	.pre_emphasis = tegra124_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct tegra_sor_soc tegra210_sor1 = {
@@ -3332,6 +3609,11 @@ static const struct tegra_sor_soc tegra210_sor1 = {
 	.num_settings = ARRAY_SIZE(tegra210_sor_hdmi_defaults),
 	.settings = tegra210_sor_hdmi_defaults,
 	.xbar_cfg = tegra210_sor_xbar_cfg,
+	.lane_map = tegra210_sor_lane_map,
+	.voltage_swing = tegra124_sor_voltage_swing,
+	.pre_emphasis = tegra124_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct tegra_sor_regs tegra186_sor_regs = {
@@ -3349,6 +3631,54 @@ static const struct tegra_sor_regs tegra186_sor_regs = {
 	.dp_padctl2 = 0x16a,
 };
 
+static const u8 tegra186_sor_voltage_swing[4][4][4] = {
+	{
+		{ 0x13, 0x19, 0x1e, 0x28 },
+		{ 0x1e, 0x25, 0x2d, },
+		{ 0x28, 0x32, },
+		{ 0x39, },
+	}, {
+		{ 0x12, 0x16, 0x1b, 0x25 },
+		{ 0x1c, 0x23, 0x2a, },
+		{ 0x25, 0x2f, },
+		{ 0x37, }
+	}, {
+		{ 0x12, 0x16, 0x1a, 0x22 },
+		{ 0x1b, 0x20, 0x27, },
+		{ 0x24, 0x2d, },
+		{ 0x35, },
+	}, {
+		{ 0x11, 0x14, 0x17, 0x1f },
+		{ 0x19, 0x1e, 0x24, },
+		{ 0x22, 0x2a, },
+		{ 0x32, },
+	},
+};
+
+static const u8 tegra186_sor_pre_emphasis[4][4][4] = {
+	{
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x01, 0x0e, 0x1d, },
+		{ 0x01, 0x13, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00 },
+	}, {
+		{ 0x00, 0x08, 0x14, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00, },
+	},
+};
+
 static const struct tegra_sor_soc tegra186_sor = {
 	.supports_edp = false,
 	.supports_lvds = false,
@@ -3359,6 +3689,11 @@ static const struct tegra_sor_soc tegra186_sor = {
 	.has_nvdisplay = true,
 
 	.xbar_cfg = tegra124_sor_xbar_cfg,
+	.lane_map = tegra124_sor_lane_map,
+	.voltage_swing = tegra186_sor_voltage_swing,
+	.pre_emphasis = tegra186_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct tegra_sor_soc tegra186_sor1 = {
@@ -3373,6 +3708,11 @@ static const struct tegra_sor_soc tegra186_sor1 = {
 	.num_settings = ARRAY_SIZE(tegra186_sor_hdmi_defaults),
 	.settings = tegra186_sor_hdmi_defaults,
 	.xbar_cfg = tegra124_sor_xbar_cfg,
+	.lane_map = tegra124_sor_lane_map,
+	.voltage_swing = tegra186_sor_voltage_swing,
+	.pre_emphasis = tegra186_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct tegra_sor_regs tegra194_sor_regs = {
@@ -3530,8 +3870,7 @@ static int tegra_sor_probe(struct platform_device *pdev)
 			sor->ops = &tegra_sor_edp_ops;
 			sor->pad = TEGRA_IO_PAD_LVDS;
 		} else if (sor->soc->supports_dp) {
-			dev_err(&pdev->dev, "DisplayPort not supported yet\n");
-			return -ENODEV;
+			sor->ops = &tegra_sor_dp_ops;
 		} else {
 			dev_err(&pdev->dev, "unknown (DP) support\n");
 			return -ENODEV;
diff --git a/drivers/gpu/drm/tegra/sor.h b/drivers/gpu/drm/tegra/sor.h
index 5333406c0401..00e09d5dca30 100644
--- a/drivers/gpu/drm/tegra/sor.h
+++ b/drivers/gpu/drm/tegra/sor.h
@@ -39,6 +39,7 @@
 #define  SOR_STATE_ASY_CRC_MODE_NON_ACTIVE	(0x2 << 6)
 #define  SOR_STATE_ASY_CRC_MODE_COMPLETE	(0x1 << 6)
 #define  SOR_STATE_ASY_CRC_MODE_ACTIVE		(0x0 << 6)
+#define  SOR_STATE_ASY_SUBOWNER_MASK		(0x3 << 4)
 #define  SOR_STATE_ASY_OWNER_MASK		0xf
 #define  SOR_STATE_ASY_OWNER(x)			(((x) & 0xf) << 0)
 

From b9b9e19762ac167312e17cf93965a46a8c8c3197 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 19 Mar 2018 10:30:37 +0100
Subject: [PATCH 44/68] drm/tegra: sor: Remove tegra186-sor1 support

It turns out that SOR1 is just another instance of the same block as the
SOR0, so there is no need to distinguish them.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 9cf008d7c67b..4e46524f22e3 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3680,23 +3680,6 @@ static const u8 tegra186_sor_pre_emphasis[4][4][4] = {
 };
 
 static const struct tegra_sor_soc tegra186_sor = {
-	.supports_edp = false,
-	.supports_lvds = false,
-	.supports_hdmi = false,
-	.supports_dp = true,
-
-	.regs = &tegra186_sor_regs,
-	.has_nvdisplay = true,
-
-	.xbar_cfg = tegra124_sor_xbar_cfg,
-	.lane_map = tegra124_sor_lane_map,
-	.voltage_swing = tegra186_sor_voltage_swing,
-	.pre_emphasis = tegra186_sor_pre_emphasis,
-	.post_cursor = tegra124_sor_post_cursor,
-	.tx_pu = tegra124_sor_tx_pu,
-};
-
-static const struct tegra_sor_soc tegra186_sor1 = {
 	.supports_edp = false,
 	.supports_lvds = false,
 	.supports_hdmi = true,
@@ -3747,7 +3730,6 @@ static const struct tegra_sor_soc tegra194_sor = {
 
 static const struct of_device_id tegra_sor_of_match[] = {
 	{ .compatible = "nvidia,tegra194-sor", .data = &tegra194_sor },
-	{ .compatible = "nvidia,tegra186-sor1", .data = &tegra186_sor1 },
 	{ .compatible = "nvidia,tegra186-sor", .data = &tegra186_sor },
 	{ .compatible = "nvidia,tegra210-sor1", .data = &tegra210_sor1 },
 	{ .compatible = "nvidia,tegra210-sor", .data = &tegra210_sor },

From 24e64f86da40e68c5f58af08796110f147b12193 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 24 Jul 2019 17:06:17 +0200
Subject: [PATCH 45/68] drm/tegra: sor: Use correct SOR index on Tegra210

The device tree bindings for the Tegra210 SOR don't require the
controller instance to be defined, since the instance can be derived
from the compatible string. The index is never used on Tegra210, so we
got away with it not getting set. However, subsequent patches will
change that, so make sure the proper index is used.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 4e46524f22e3..c7faf088cabc 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3759,6 +3759,11 @@ static int tegra_sor_parse_dt(struct tegra_sor *sor)
 		 * earlier
 		 */
 		sor->pad = TEGRA_IO_PAD_HDMI_DP0 + sor->index;
+	} else {
+		if (sor->soc->supports_edp)
+			sor->index = 0;
+		else
+			sor->index = 1;
 	}
 
 	err = of_property_read_u32_array(np, "nvidia,xbar-cfg", xbar_cfg, 5);

From 4bdf4710e00a502347e33dc2454821fa9af2968b Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 24 Jun 2019 17:06:34 +0200
Subject: [PATCH 46/68] drm/tegra: sor: Implement pad clock for all SOR
 instances

So far the pad clock was only needed on the second SOR instance. The
clock does exist for all SOR instances, though, so make sure it is
always implemented. This prepares for further unification of the code
in subsequent patches.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index c7faf088cabc..71a7ed57cb4f 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -522,8 +522,9 @@ static inline struct tegra_clk_sor_pad *to_pad(struct clk_hw *hw)
 	return container_of(hw, struct tegra_clk_sor_pad, hw);
 }
 
-static const char * const tegra_clk_sor_pad_parents[] = {
-	"pll_d2_out0", "pll_dp"
+static const char * const tegra_clk_sor_pad_parents[2][2] = {
+	{ "pll_d_out0", "pll_dp" },
+	{ "pll_d2_out0", "pll_dp" },
 };
 
 static int tegra_clk_sor_pad_set_parent(struct clk_hw *hw, u8 index)
@@ -594,8 +595,8 @@ static struct clk *tegra_clk_sor_pad_register(struct tegra_sor *sor,
 
 	init.name = name;
 	init.flags = 0;
-	init.parent_names = tegra_clk_sor_pad_parents;
-	init.num_parents = ARRAY_SIZE(tegra_clk_sor_pad_parents);
+	init.parent_names = tegra_clk_sor_pad_parents[sor->index];
+	init.num_parents = ARRAY_SIZE(tegra_clk_sor_pad_parents[sor->index]);
 	init.ops = &tegra_clk_sor_pad_ops;
 
 	pad->hw.init = &init;
@@ -4016,6 +4017,8 @@ static int tegra_sor_probe(struct platform_device *pdev)
 	 * pad output clock.
 	 */
 	if (!sor->clk_pad) {
+		char *name;
+
 		err = pm_runtime_get_sync(&pdev->dev);
 		if (err < 0) {
 			dev_err(&pdev->dev, "failed to get runtime PM: %d\n",
@@ -4023,8 +4026,13 @@ static int tegra_sor_probe(struct platform_device *pdev)
 			goto remove;
 		}
 
-		sor->clk_pad = tegra_clk_sor_pad_register(sor,
-							  "sor1_pad_clkout");
+		name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "sor%u_pad_clkout", sor->index);
+		if (!name) {
+			err = -ENOMEM;
+			goto remove;
+		}
+
+		sor->clk_pad = tegra_clk_sor_pad_register(sor, name);
 		pm_runtime_put(&pdev->dev);
 	}
 

From 1c3cc0df8299d95a11421ab596b3e9cdcce1a77d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 27 Jun 2019 12:29:56 +0200
Subject: [PATCH 47/68] drm/tegra: sor: Deduplicate connector type detection
 code

The connector type detection code is duplicated in two places. Keeping
both places in sync is an extra maintenance burden that can be avoided
by comparing the connector type operations that are set upon the first
detection.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 218 ++++++++++++++++++------------------
 1 file changed, 109 insertions(+), 109 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 71a7ed57cb4f..af234d5efbd7 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3135,6 +3135,112 @@ static const struct drm_encoder_helper_funcs tegra_sor_dp_helpers = {
 	.atomic_check = tegra_sor_encoder_atomic_check,
 };
 
+static const struct tegra_sor_ops tegra_sor_edp_ops = {
+	.name = "eDP",
+};
+
+static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
+{
+	int err;
+
+	sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io");
+	if (IS_ERR(sor->avdd_io_supply)) {
+		dev_err(sor->dev, "cannot get AVDD I/O supply: %ld\n",
+			PTR_ERR(sor->avdd_io_supply));
+		return PTR_ERR(sor->avdd_io_supply);
+	}
+
+	err = regulator_enable(sor->avdd_io_supply);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to enable AVDD I/O supply: %d\n",
+			err);
+		return err;
+	}
+
+	sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-pll");
+	if (IS_ERR(sor->vdd_pll_supply)) {
+		dev_err(sor->dev, "cannot get VDD PLL supply: %ld\n",
+			PTR_ERR(sor->vdd_pll_supply));
+		return PTR_ERR(sor->vdd_pll_supply);
+	}
+
+	err = regulator_enable(sor->vdd_pll_supply);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to enable VDD PLL supply: %d\n",
+			err);
+		return err;
+	}
+
+	sor->hdmi_supply = devm_regulator_get(sor->dev, "hdmi");
+	if (IS_ERR(sor->hdmi_supply)) {
+		dev_err(sor->dev, "cannot get HDMI supply: %ld\n",
+			PTR_ERR(sor->hdmi_supply));
+		return PTR_ERR(sor->hdmi_supply);
+	}
+
+	err = regulator_enable(sor->hdmi_supply);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to enable HDMI supply: %d\n", err);
+		return err;
+	}
+
+	INIT_DELAYED_WORK(&sor->scdc, tegra_sor_hdmi_scdc_work);
+
+	return 0;
+}
+
+static int tegra_sor_hdmi_remove(struct tegra_sor *sor)
+{
+	regulator_disable(sor->hdmi_supply);
+	regulator_disable(sor->vdd_pll_supply);
+	regulator_disable(sor->avdd_io_supply);
+
+	return 0;
+}
+
+static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
+	.name = "HDMI",
+	.probe = tegra_sor_hdmi_probe,
+	.remove = tegra_sor_hdmi_remove,
+};
+
+static int tegra_sor_dp_probe(struct tegra_sor *sor)
+{
+	int err;
+
+	sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io-hdmi-dp");
+	if (IS_ERR(sor->avdd_io_supply))
+		return PTR_ERR(sor->avdd_io_supply);
+
+	err = regulator_enable(sor->avdd_io_supply);
+	if (err < 0)
+		return err;
+
+	sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-hdmi-dp-pll");
+	if (IS_ERR(sor->vdd_pll_supply))
+		return PTR_ERR(sor->vdd_pll_supply);
+
+	err = regulator_enable(sor->vdd_pll_supply);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int tegra_sor_dp_remove(struct tegra_sor *sor)
+{
+	regulator_disable(sor->vdd_pll_supply);
+	regulator_disable(sor->avdd_io_supply);
+
+	return 0;
+}
+
+static const struct tegra_sor_ops tegra_sor_dp_ops = {
+	.name = "DP",
+	.probe = tegra_sor_dp_probe,
+	.remove = tegra_sor_dp_remove,
+};
+
 static int tegra_sor_init(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
@@ -3145,7 +3251,7 @@ static int tegra_sor_init(struct host1x_client *client)
 	int err;
 
 	if (!sor->aux) {
-		if (sor->soc->supports_hdmi) {
+		if (sor->ops == &tegra_sor_hdmi_ops) {
 			connector = DRM_MODE_CONNECTOR_HDMIA;
 			encoder = DRM_MODE_ENCODER_TMDS;
 			helpers = &tegra_sor_hdmi_helpers;
@@ -3154,11 +3260,11 @@ static int tegra_sor_init(struct host1x_client *client)
 			encoder = DRM_MODE_ENCODER_LVDS;
 		}
 	} else {
-		if (sor->soc->supports_edp) {
+		if (sor->ops == &tegra_sor_edp_ops) {
 			connector = DRM_MODE_CONNECTOR_eDP;
 			encoder = DRM_MODE_ENCODER_TMDS;
 			helpers = &tegra_sor_edp_helpers;
-		} else if (sor->soc->supports_dp) {
+		} else {
 			connector = DRM_MODE_CONNECTOR_DisplayPort;
 			encoder = DRM_MODE_ENCODER_TMDS;
 			helpers = &tegra_sor_dp_helpers;
@@ -3278,112 +3384,6 @@ static const struct host1x_client_ops sor_client_ops = {
 	.exit = tegra_sor_exit,
 };
 
-static const struct tegra_sor_ops tegra_sor_edp_ops = {
-	.name = "eDP",
-};
-
-static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
-{
-	int err;
-
-	sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io");
-	if (IS_ERR(sor->avdd_io_supply)) {
-		dev_err(sor->dev, "cannot get AVDD I/O supply: %ld\n",
-			PTR_ERR(sor->avdd_io_supply));
-		return PTR_ERR(sor->avdd_io_supply);
-	}
-
-	err = regulator_enable(sor->avdd_io_supply);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to enable AVDD I/O supply: %d\n",
-			err);
-		return err;
-	}
-
-	sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-pll");
-	if (IS_ERR(sor->vdd_pll_supply)) {
-		dev_err(sor->dev, "cannot get VDD PLL supply: %ld\n",
-			PTR_ERR(sor->vdd_pll_supply));
-		return PTR_ERR(sor->vdd_pll_supply);
-	}
-
-	err = regulator_enable(sor->vdd_pll_supply);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to enable VDD PLL supply: %d\n",
-			err);
-		return err;
-	}
-
-	sor->hdmi_supply = devm_regulator_get(sor->dev, "hdmi");
-	if (IS_ERR(sor->hdmi_supply)) {
-		dev_err(sor->dev, "cannot get HDMI supply: %ld\n",
-			PTR_ERR(sor->hdmi_supply));
-		return PTR_ERR(sor->hdmi_supply);
-	}
-
-	err = regulator_enable(sor->hdmi_supply);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to enable HDMI supply: %d\n", err);
-		return err;
-	}
-
-	INIT_DELAYED_WORK(&sor->scdc, tegra_sor_hdmi_scdc_work);
-
-	return 0;
-}
-
-static int tegra_sor_hdmi_remove(struct tegra_sor *sor)
-{
-	regulator_disable(sor->hdmi_supply);
-	regulator_disable(sor->vdd_pll_supply);
-	regulator_disable(sor->avdd_io_supply);
-
-	return 0;
-}
-
-static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
-	.name = "HDMI",
-	.probe = tegra_sor_hdmi_probe,
-	.remove = tegra_sor_hdmi_remove,
-};
-
-static int tegra_sor_dp_probe(struct tegra_sor *sor)
-{
-	int err;
-
-	sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io-hdmi-dp");
-	if (IS_ERR(sor->avdd_io_supply))
-		return PTR_ERR(sor->avdd_io_supply);
-
-	err = regulator_enable(sor->avdd_io_supply);
-	if (err < 0)
-		return err;
-
-	sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-hdmi-dp-pll");
-	if (IS_ERR(sor->vdd_pll_supply))
-		return PTR_ERR(sor->vdd_pll_supply);
-
-	err = regulator_enable(sor->vdd_pll_supply);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-
-static int tegra_sor_dp_remove(struct tegra_sor *sor)
-{
-	regulator_disable(sor->vdd_pll_supply);
-	regulator_disable(sor->avdd_io_supply);
-
-	return 0;
-}
-
-static const struct tegra_sor_ops tegra_sor_dp_ops = {
-	.name = "DP",
-	.probe = tegra_sor_dp_probe,
-	.remove = tegra_sor_dp_remove,
-};
-
 static const u8 tegra124_sor_xbar_cfg[5] = {
 	0, 1, 2, 3, 4
 };

From bae88815ad10c47d1cee96987733741795ef338d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 27 Jun 2019 12:32:16 +0200
Subject: [PATCH 48/68] drm/tegra: sor: Support DisplayPort on Tegra194

Reuse parameters from earlier generations to support DisplayPort on
Tegra194.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index af234d5efbd7..fdbd76ec1cf7 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3727,6 +3727,11 @@ static const struct tegra_sor_soc tegra194_sor = {
 	.settings = tegra194_sor_hdmi_defaults,
 
 	.xbar_cfg = tegra210_sor_xbar_cfg,
+	.lane_map = tegra124_sor_lane_map,
+	.voltage_swing = tegra186_sor_voltage_swing,
+	.pre_emphasis = tegra186_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct of_device_id tegra_sor_of_match[] = {

From 61417aaa11402f8363a806a7167c46041f44465e Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 24 Jul 2019 16:59:04 +0200
Subject: [PATCH 49/68] drm/tegra: sor: Unify clock setup for eDP, HDMI and DP

With the clocks modelled consistently across SoC generations, the clock
setup for eDP, HDMI and DP can now be unified.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 94 ++++++++++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index fdbd76ec1cf7..a245bbbd638a 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -527,6 +527,14 @@ static const char * const tegra_clk_sor_pad_parents[2][2] = {
 	{ "pll_d2_out0", "pll_dp" },
 };
 
+/*
+ * Implementing ->set_parent() here isn't really required because the parent
+ * will be explicitly selected in the driver code via the DP_CLK_SEL mux in
+ * the SOR_CLK_CNTRL register. This is primarily for compatibility with the
+ * Tegra186 and later SoC generations where the BPMP implements this clock
+ * and doesn't expose the mux via the common clock framework.
+ */
+
 static int tegra_clk_sor_pad_set_parent(struct clk_hw *hw, u8 index)
 {
 	struct tegra_clk_sor_pad *pad = to_pad(hw);
@@ -1987,10 +1995,36 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
 	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
 
-	/* switch to DP parent clock */
-	err = tegra_sor_set_parent_clock(sor, sor->clk_dp);
-	if (err < 0)
-		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
+	/*
+	 * Switch the pad clock to the DP clock. Note that we cannot actually
+	 * do this because Tegra186 and later don't support clk_set_parent()
+	 * on the sorX_pad_clkout clocks. We already do the equivalent above
+	 * using the DP_CLK_SEL mux of the SOR_CLK_CNTRL register.
+	 */
+#if 0
+	err = clk_set_parent(sor->clk_pad, sor->clk_dp);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to select pad parent clock: %d\n",
+			err);
+		return;
+	}
+#endif
+
+	/* switch the SOR clock to the pad clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_pad);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to select SOR parent clock: %d\n",
+			err);
+		return;
+	}
+
+	/* switch the output clock to the parent pixel clock */
+	err = clk_set_parent(sor->clk, sor->clk_parent);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to select output parent clock: %d\n",
+			err);
+		return;
+	}
 
 	/* use DP-A protocol */
 	value = tegra_sor_readl(sor, SOR_STATE1);
@@ -2661,16 +2695,34 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
 	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
 
-	/* switch to parent clock */
-	err = clk_set_parent(sor->clk, sor->clk_parent);
+	/*
+	 * Switch the pad clock to the DP clock. Note that we cannot actually
+	 * do this because Tegra186 and later don't support clk_set_parent()
+	 * on the sorX_pad_clkout clocks. We already do the equivalent above
+	 * using the DP_CLK_SEL mux of the SOR_CLK_CNTRL register.
+	 */
+#if 0
+	err = clk_set_parent(sor->clk_pad, sor->clk_dp);
 	if (err < 0) {
-		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
+		dev_err(sor->dev, "failed to select pad parent clock: %d\n",
+			err);
+		return;
+	}
+#endif
+
+	/* switch the SOR clock to the pad clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_pad);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to select SOR parent clock: %d\n",
+			err);
 		return;
 	}
 
-	err = tegra_sor_set_parent_clock(sor, sor->clk_pad);
+	/* switch the output clock to the parent pixel clock */
+	err = clk_set_parent(sor->clk, sor->clk_parent);
 	if (err < 0) {
-		dev_err(sor->dev, "failed to set pad clock: %d\n", err);
+		dev_err(sor->dev, "failed to select output parent clock: %d\n",
+			err);
 		return;
 	}
 
@@ -3061,16 +3113,34 @@ static void tegra_sor_dp_enable(struct drm_encoder *encoder)
 	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
 	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
 
-	/* switch to DP parent clock */
+	/*
+	 * Switch the pad clock to the DP clock. Note that we cannot actually
+	 * do this because Tegra186 and later don't support clk_set_parent()
+	 * on the sorX_pad_clkout clocks. We already do the equivalent above
+	 * using the DP_CLK_SEL mux of the SOR_CLK_CNTRL register.
+	 */
+#if 0
+	err = clk_set_parent(sor->clk_pad, sor->clk_parent);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to select pad parent clock: %d\n",
+			err);
+		return;
+	}
+#endif
+
+	/* switch the SOR clock to the pad clock */
 	err = tegra_sor_set_parent_clock(sor, sor->clk_pad);
 	if (err < 0) {
-		dev_err(sor->dev, "failed to switch to pad clock: %d\n", err);
+		dev_err(sor->dev, "failed to select SOR parent clock: %d\n",
+			err);
 		return;
 	}
 
+	/* switch the output clock to the parent pixel clock */
 	err = clk_set_parent(sor->clk, sor->clk_parent);
 	if (err < 0) {
-		dev_err(sor->dev, "failed to switch to parent clock: %d\n", err);
+		dev_err(sor->dev, "failed to select output parent clock: %d\n",
+			err);
 		return;
 	}
 

From d23691f647855ff765c67d281ec73c59104d9eb1 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 24 Jul 2019 17:10:31 +0200
Subject: [PATCH 50/68] drm/tegra: sor: Use correct I/O pad for DP

The correct I/O pad needs to be powered up before DP can be used. Make
sure the correct default is set for Tegra generations where the I/O pad
cannot be derived from the SOR instance.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index a245bbbd638a..bf2e31199166 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -3934,6 +3934,7 @@ static int tegra_sor_probe(struct platform_device *pdev)
 			sor->pad = TEGRA_IO_PAD_LVDS;
 		} else if (sor->soc->supports_dp) {
 			sor->ops = &tegra_sor_dp_ops;
+			sor->pad = TEGRA_IO_PAD_LVDS;
 		} else {
 			dev_err(&pdev->dev, "unknown (DP) support\n");
 			return -ENODEV;

From d278e4a9714d9c52429e670c5a3cf2e7ad7e67f9 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 27 Jun 2019 12:34:57 +0200
Subject: [PATCH 51/68] drm/tegra: sor: Unify eDP and DP support

The SOR0 on Tegra210 does, contrary to what was previously assumed, in
fact support DisplayPort. The difference between SOR0 and SOR1 is that
the latter supports audio and HDCP over DP, whereas the former doesn't.

The code for eDP and DP is now almost identical and the differences can
easily be parameterized based on the presence of a panel. There is no
need any longer to duplicate the code.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 421 +++++++-----------------------------
 1 file changed, 74 insertions(+), 347 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index bf2e31199166..63831c37ad35 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -371,10 +371,11 @@ struct tegra_sor_regs {
 };
 
 struct tegra_sor_soc {
-	bool supports_edp;
 	bool supports_lvds;
 	bool supports_hdmi;
 	bool supports_dp;
+	bool supports_audio;
+	bool supports_hdcp;
 
 	const struct tegra_sor_regs *regs;
 	bool has_nvdisplay;
@@ -1806,306 +1807,6 @@ static const struct drm_encoder_funcs tegra_sor_encoder_funcs = {
 	.destroy = tegra_output_encoder_destroy,
 };
 
-static void tegra_sor_edp_disable(struct drm_encoder *encoder)
-{
-	struct tegra_output *output = encoder_to_output(encoder);
-	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	struct tegra_sor *sor = to_sor(output);
-	u32 value;
-	int err;
-
-	if (output->panel)
-		drm_panel_disable(output->panel);
-
-	err = tegra_sor_detach(sor);
-	if (err < 0)
-		dev_err(sor->dev, "failed to detach SOR: %d\n", err);
-
-	tegra_sor_writel(sor, 0, SOR_STATE1);
-	tegra_sor_update(sor);
-
-	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-	value &= ~SOR_ENABLE(0);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
-
-	tegra_dc_commit(dc);
-
-	err = tegra_sor_power_down(sor);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power down SOR: %d\n", err);
-
-	if (sor->aux) {
-		err = drm_dp_aux_disable(sor->aux);
-		if (err < 0)
-			dev_err(sor->dev, "failed to disable DP: %d\n", err);
-	}
-
-	err = tegra_io_pad_power_disable(sor->pad);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power off I/O pad: %d\n", err);
-
-	if (output->panel)
-		drm_panel_unprepare(output->panel);
-
-	pm_runtime_put(sor->dev);
-}
-
-#if 0
-static int calc_h_ref_to_sync(const struct drm_display_mode *mode,
-			      unsigned int *value)
-{
-	unsigned int hfp, hsw, hbp, a = 0, b;
-
-	hfp = mode->hsync_start - mode->hdisplay;
-	hsw = mode->hsync_end - mode->hsync_start;
-	hbp = mode->htotal - mode->hsync_end;
-
-	pr_info("hfp: %u, hsw: %u, hbp: %u\n", hfp, hsw, hbp);
-
-	b = hfp - 1;
-
-	pr_info("a: %u, b: %u\n", a, b);
-	pr_info("a + hsw + hbp = %u\n", a + hsw + hbp);
-
-	if (a + hsw + hbp <= 11) {
-		a = 1 + 11 - hsw - hbp;
-		pr_info("a: %u\n", a);
-	}
-
-	if (a > b)
-		return -EINVAL;
-
-	if (hsw < 1)
-		return -EINVAL;
-
-	if (mode->hdisplay < 16)
-		return -EINVAL;
-
-	if (value) {
-		if (b > a && a % 2)
-			*value = a + 1;
-		else
-			*value = a;
-	}
-
-	return 0;
-}
-#endif
-
-static void tegra_sor_edp_enable(struct drm_encoder *encoder)
-{
-	struct tegra_output *output = encoder_to_output(encoder);
-	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	struct tegra_sor *sor = to_sor(output);
-	struct tegra_sor_config config;
-	struct tegra_sor_state *state;
-	struct drm_display_mode *mode;
-	struct drm_display_info *info;
-	unsigned int i;
-	u32 value;
-	int err;
-
-	state = to_sor_state(output->connector.state);
-	mode = &encoder->crtc->state->adjusted_mode;
-	info = &output->connector.display_info;
-
-	pm_runtime_get_sync(sor->dev);
-
-	/* switch to safe parent clock */
-	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
-	if (err < 0)
-		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
-
-	err = tegra_io_rail_power_on(TEGRA_IO_RAIL_LVDS);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power on LVDS rail: %d\n", err);
-
-	usleep_range(20, 100);
-
-	err = drm_dp_aux_enable(sor->aux);
-	if (err < 0)
-		dev_err(sor->dev, "failed to enable DPAUX: %d\n", err);
-
-	err = drm_dp_link_probe(sor->aux, &sor->link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
-
-	tegra_sor_filter_rates(sor);
-
-	err = drm_dp_link_choose(&sor->link, mode, info);
-	if (err < 0)
-		dev_err(sor->dev, "failed to choose link: %d\n", err);
-
-	if (output->panel)
-		drm_panel_prepare(output->panel);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	usleep_range(20, 40);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll3);
-	value |= SOR_PLL3_PLL_VDD_MODE_3V3;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll3);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
-	value &= ~(SOR_PLL0_VCOPD | SOR_PLL0_PWR);
-	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
-	value |= SOR_PLL2_SEQ_PLLCAPPD;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	usleep_range(200, 400);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_POWERDOWN_OVERRIDE;
-	value &= ~SOR_PLL2_PORT_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
-	value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
-	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
-
-	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
-	/* XXX not in TRM */
-	value |= SOR_DP_SPARE_PANEL_INTERNAL;
-	value |= SOR_DP_SPARE_SEQ_ENABLE;
-	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
-
-	/* XXX not in TRM */
-	tegra_sor_writel(sor, 0, SOR_LVDS);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
-	value &= ~SOR_PLL0_ICHPMP_MASK;
-	value &= ~SOR_PLL0_VCOCAP_MASK;
-	value |= SOR_PLL0_ICHPMP(0x1);
-	value |= SOR_PLL0_VCOCAP(0x3);
-	value |= SOR_PLL0_RESISTOR_EXT;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
-
-	/* XXX not in TRM */
-	for (value = 0, i = 0; i < 5; i++)
-		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->xbar_cfg[i]) |
-			 SOR_XBAR_CTRL_LINK1_XSEL(i, i);
-
-	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
-	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
-
-	/*
-	 * Switch the pad clock to the DP clock. Note that we cannot actually
-	 * do this because Tegra186 and later don't support clk_set_parent()
-	 * on the sorX_pad_clkout clocks. We already do the equivalent above
-	 * using the DP_CLK_SEL mux of the SOR_CLK_CNTRL register.
-	 */
-#if 0
-	err = clk_set_parent(sor->clk_pad, sor->clk_dp);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to select pad parent clock: %d\n",
-			err);
-		return;
-	}
-#endif
-
-	/* switch the SOR clock to the pad clock */
-	err = tegra_sor_set_parent_clock(sor, sor->clk_pad);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to select SOR parent clock: %d\n",
-			err);
-		return;
-	}
-
-	/* switch the output clock to the parent pixel clock */
-	err = clk_set_parent(sor->clk, sor->clk_parent);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to select output parent clock: %d\n",
-			err);
-		return;
-	}
-
-	/* use DP-A protocol */
-	value = tegra_sor_readl(sor, SOR_STATE1);
-	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
-	value |= SOR_STATE_ASY_PROTOCOL_DP_A;
-	tegra_sor_writel(sor, value, SOR_STATE1);
-
-	/* enable port */
-	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
-	value |= SOR_DP_LINKCTL_ENABLE;
-	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
-
-	tegra_sor_dp_term_calibrate(sor);
-
-	err = drm_dp_link_train(&sor->link);
-	if (err < 0)
-		dev_err(sor->dev, "link training failed: %d\n", err);
-	else
-		dev_dbg(sor->dev, "link training succeeded\n");
-
-	err = drm_dp_link_power_up(sor->aux, &sor->link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power up eDP link: %d\n", err);
-
-	/* compute configuration */
-	memset(&config, 0, sizeof(config));
-	config.bits_per_pixel = state->bpc * 3;
-
-	err = tegra_sor_compute_config(sor, mode, &config, &sor->link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to compute configuration: %d\n", err);
-
-	tegra_sor_apply_config(sor, &config);
-
-	err = tegra_sor_power_up(sor, 250);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
-
-	/* CSTM (LVDS, link A/B, upper) */
-	value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_A | SOR_CSTM_LINK_ACT_B |
-		SOR_CSTM_UPPER;
-	tegra_sor_writel(sor, value, SOR_CSTM);
-
-	/* use DP-A protocol */
-	value = tegra_sor_readl(sor, SOR_STATE1);
-	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
-	value |= SOR_STATE_ASY_PROTOCOL_DP_A;
-	tegra_sor_writel(sor, value, SOR_STATE1);
-
-	tegra_sor_mode_set(sor, mode, state);
-
-	/* PWM setup */
-	err = tegra_sor_setup_pwm(sor, 250);
-	if (err < 0)
-		dev_err(sor->dev, "failed to setup PWM: %d\n", err);
-
-	tegra_sor_update(sor);
-
-	err = tegra_sor_power_up(sor, 250);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
-
-	/* attach and wake up */
-	err = tegra_sor_attach(sor);
-	if (err < 0)
-		dev_err(sor->dev, "failed to attach SOR: %d\n", err);
-
-	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-	value |= SOR_ENABLE(0);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
-
-	tegra_dc_commit(dc);
-
-	err = tegra_sor_wakeup(sor);
-	if (err < 0)
-		dev_err(sor->dev, "failed to wakeup SOR: %d\n", err);
-
-	if (output->panel)
-		drm_panel_enable(output->panel);
-}
-
 static int
 tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 			       struct drm_crtc_state *crtc_state,
@@ -2155,12 +1856,6 @@ tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 	return 0;
 }
 
-static const struct drm_encoder_helper_funcs tegra_sor_edp_helpers = {
-	.disable = tegra_sor_edp_disable,
-	.enable = tegra_sor_edp_enable,
-	.atomic_check = tegra_sor_encoder_atomic_check,
-};
-
 static inline u32 tegra_sor_hdmi_subpack(const u8 *ptr, size_t size)
 {
 	u32 value = 0;
@@ -2535,9 +2230,9 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
 
 	if (!sor->soc->has_nvdisplay)
-		value &= ~(SOR1_TIMING_CYA | SOR_ENABLE(1));
-	else
-		value &= ~SOR_ENABLE(sor->index);
+		value &= ~SOR1_TIMING_CYA;
+
+	value &= ~SOR_ENABLE(sor->index);
 
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
@@ -2928,9 +2623,9 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
 
 	if (!sor->soc->has_nvdisplay)
-		value |= SOR_ENABLE(1) | SOR1_TIMING_CYA;
-	else
-		value |= SOR_ENABLE(sor->index);
+		value |= SOR1_TIMING_CYA;
+
+	value |= SOR_ENABLE(sor->index);
 
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
@@ -2965,6 +2660,9 @@ static void tegra_sor_dp_disable(struct drm_encoder *encoder)
 	u32 value;
 	int err;
 
+	if (output->panel)
+		drm_panel_disable(output->panel);
+
 	err = drm_dp_link_power_down(sor->aux, &sor->link);
 	if (err < 0)
 		dev_err(sor->dev, "failed to power down link: %d\n", err);
@@ -2977,12 +2675,7 @@ static void tegra_sor_dp_disable(struct drm_encoder *encoder)
 	tegra_sor_update(sor);
 
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-
-	if (!sor->soc->has_nvdisplay)
-		value &= ~(SOR1_TIMING_CYA | SOR_ENABLE(1));
-	else
-		value &= ~SOR_ENABLE(sor->index);
-
+	value &= ~SOR_ENABLE(sor->index);
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 	tegra_dc_commit(dc);
 
@@ -3010,6 +2703,9 @@ static void tegra_sor_dp_disable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed disable DPAUX: %d\n", err);
 
+	if (output->panel)
+		drm_panel_unprepare(output->panel);
+
 	pm_runtime_put(sor->dev);
 }
 
@@ -3051,10 +2747,15 @@ static void tegra_sor_dp_enable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to probe DP link: %d\n", err);
 
+	tegra_sor_filter_rates(sor);
+
 	err = drm_dp_link_choose(&sor->link, mode, info);
 	if (err < 0)
 		dev_err(sor->dev, "failed to choose link: %d\n", err);
 
+	if (output->panel)
+		drm_panel_prepare(output->panel);
+
 	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
 	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
@@ -3083,14 +2784,23 @@ static void tegra_sor_dp_enable(struct drm_encoder *encoder)
 
 	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
 	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
-	value |= SOR_CLK_CNTRL_DP_CLK_SEL_DIFF_DPCLK;
+
+	if (output->panel)
+		value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
+	else
+		value |= SOR_CLK_CNTRL_DP_CLK_SEL_DIFF_DPCLK;
+
 	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
 
 	usleep_range(200, 400);
 
 	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
 	/* XXX not in TRM */
-	value &= ~SOR_DP_SPARE_PANEL_INTERNAL;
+	if (output->panel)
+		value |= SOR_DP_SPARE_PANEL_INTERNAL;
+	else
+		value &= ~SOR_DP_SPARE_PANEL_INTERNAL;
+
 	value |= SOR_DP_SPARE_SEQ_ENABLE;
 	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
 
@@ -3177,6 +2887,19 @@ static void tegra_sor_dp_enable(struct drm_encoder *encoder)
 
 	tegra_sor_apply_config(sor, &config);
 	tegra_sor_mode_set(sor, mode, state);
+
+	if (output->panel) {
+		/* CSTM (LVDS, link A/B, upper) */
+		value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_A | SOR_CSTM_LINK_ACT_B |
+			SOR_CSTM_UPPER;
+		tegra_sor_writel(sor, value, SOR_CSTM);
+
+		/* PWM setup */
+		err = tegra_sor_setup_pwm(sor, 250);
+		if (err < 0)
+			dev_err(sor->dev, "failed to setup PWM: %d\n", err);
+	}
+
 	tegra_sor_update(sor);
 
 	err = tegra_sor_power_up(sor, 250);
@@ -3197,6 +2920,9 @@ static void tegra_sor_dp_enable(struct drm_encoder *encoder)
 	err = tegra_sor_wakeup(sor);
 	if (err < 0)
 		dev_err(sor->dev, "failed to wakeup SOR: %d\n", err);
+
+	if (output->panel)
+		drm_panel_enable(output->panel);
 }
 
 static const struct drm_encoder_helper_funcs tegra_sor_dp_helpers = {
@@ -3205,10 +2931,6 @@ static const struct drm_encoder_helper_funcs tegra_sor_dp_helpers = {
 	.atomic_check = tegra_sor_encoder_atomic_check,
 };
 
-static const struct tegra_sor_ops tegra_sor_edp_ops = {
-	.name = "eDP",
-};
-
 static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
 {
 	int err;
@@ -3330,10 +3052,10 @@ static int tegra_sor_init(struct host1x_client *client)
 			encoder = DRM_MODE_ENCODER_LVDS;
 		}
 	} else {
-		if (sor->ops == &tegra_sor_edp_ops) {
+		if (sor->output.panel) {
 			connector = DRM_MODE_CONNECTOR_eDP;
 			encoder = DRM_MODE_ENCODER_TMDS;
-			helpers = &tegra_sor_edp_helpers;
+			helpers = &tegra_sor_dp_helpers;
 		} else {
 			connector = DRM_MODE_CONNECTOR_DisplayPort;
 			encoder = DRM_MODE_ENCODER_TMDS;
@@ -3575,10 +3297,11 @@ static const u8 tegra124_sor_tx_pu[4][4][4] = {
 };
 
 static const struct tegra_sor_soc tegra124_sor = {
-	.supports_edp = true,
 	.supports_lvds = true,
 	.supports_hdmi = false,
-	.supports_dp = false,
+	.supports_dp = true,
+	.supports_audio = false,
+	.supports_hdcp = false,
 	.regs = &tegra124_sor_regs,
 	.has_nvdisplay = false,
 	.xbar_cfg = tegra124_sor_xbar_cfg,
@@ -3614,10 +3337,11 @@ static const u8 tegra132_sor_pre_emphasis[4][4][4] = {
 };
 
 static const struct tegra_sor_soc tegra132_sor = {
-	.supports_edp = true,
 	.supports_lvds = true,
 	.supports_hdmi = false,
-	.supports_dp = false,
+	.supports_dp = true,
+	.supports_audio = false,
+	.supports_hdcp = false,
 	.regs = &tegra124_sor_regs,
 	.has_nvdisplay = false,
 	.xbar_cfg = tegra124_sor_xbar_cfg,
@@ -3652,10 +3376,11 @@ static const u8 tegra210_sor_lane_map[4] = {
 };
 
 static const struct tegra_sor_soc tegra210_sor = {
-	.supports_edp = true,
 	.supports_lvds = false,
 	.supports_hdmi = false,
-	.supports_dp = false,
+	.supports_dp = true,
+	.supports_audio = false,
+	.supports_hdcp = false,
 
 	.regs = &tegra210_sor_regs,
 	.has_nvdisplay = false,
@@ -3669,10 +3394,11 @@ static const struct tegra_sor_soc tegra210_sor = {
 };
 
 static const struct tegra_sor_soc tegra210_sor1 = {
-	.supports_edp = false,
 	.supports_lvds = false,
 	.supports_hdmi = true,
 	.supports_dp = true,
+	.supports_audio = true,
+	.supports_hdcp = true,
 
 	.regs = &tegra210_sor_regs,
 	.has_nvdisplay = false,
@@ -3751,10 +3477,11 @@ static const u8 tegra186_sor_pre_emphasis[4][4][4] = {
 };
 
 static const struct tegra_sor_soc tegra186_sor = {
-	.supports_edp = false,
 	.supports_lvds = false,
 	.supports_hdmi = true,
 	.supports_dp = true,
+	.supports_audio = true,
+	.supports_hdcp = true,
 
 	.regs = &tegra186_sor_regs,
 	.has_nvdisplay = true,
@@ -3785,10 +3512,11 @@ static const struct tegra_sor_regs tegra194_sor_regs = {
 };
 
 static const struct tegra_sor_soc tegra194_sor = {
-	.supports_edp = true,
 	.supports_lvds = false,
 	.supports_hdmi = true,
 	.supports_dp = true,
+	.supports_audio = true,
+	.supports_hdcp = true,
 
 	.regs = &tegra194_sor_regs,
 	.has_nvdisplay = true,
@@ -3836,7 +3564,7 @@ static int tegra_sor_parse_dt(struct tegra_sor *sor)
 		 */
 		sor->pad = TEGRA_IO_PAD_HDMI_DP0 + sor->index;
 	} else {
-		if (sor->soc->supports_edp)
+		if (!sor->soc->supports_audio)
 			sor->index = 0;
 		else
 			sor->index = 1;
@@ -3929,16 +3657,15 @@ static int tegra_sor_probe(struct platform_device *pdev)
 			return -ENODEV;
 		}
 	} else {
-		if (sor->soc->supports_edp) {
-			sor->ops = &tegra_sor_edp_ops;
-			sor->pad = TEGRA_IO_PAD_LVDS;
-		} else if (sor->soc->supports_dp) {
-			sor->ops = &tegra_sor_dp_ops;
-			sor->pad = TEGRA_IO_PAD_LVDS;
-		} else {
-			dev_err(&pdev->dev, "unknown (DP) support\n");
-			return -ENODEV;
-		}
+		np = of_parse_phandle(pdev->dev.of_node, "nvidia,panel", 0);
+		/*
+		 * No need to keep this around since we only use it as a check
+		 * to see if a panel is connected (eDP) or not (DP).
+		 */
+		of_node_put(np);
+
+		sor->ops = &tegra_sor_dp_ops;
+		sor->pad = TEGRA_IO_PAD_LVDS;
 	}
 
 	err = tegra_sor_parse_dt(sor);

From 68a2ebb54da4dfa2cc3a2239d2e2dfeec2c5e522 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 25 Jul 2019 15:41:36 +0200
Subject: [PATCH 52/68] drm/tegra: sor: Avoid timeouts on unplug events

When the SOR is disabled in DP mode as part of an unplug event, do not
attempt to power the DP link down. Powering down the link requires the
DPAUX to transmit AUX messages which only works if there's a connected
sink.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 63831c37ad35..b81e6d39aa32 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -2663,9 +2663,16 @@ static void tegra_sor_dp_disable(struct drm_encoder *encoder)
 	if (output->panel)
 		drm_panel_disable(output->panel);
 
-	err = drm_dp_link_power_down(sor->aux, &sor->link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power down link: %d\n", err);
+	/*
+	 * Do not attempt to power down a DP link if we're not connected since
+	 * the AUX transactions would just be timing out.
+	 */
+	if (output->connector.status != connector_status_disconnected) {
+		err = drm_dp_link_power_down(sor->aux, &sor->link);
+		if (err < 0)
+			dev_err(sor->dev, "failed to power down link: %d\n",
+				err);
+	}
 
 	err = tegra_sor_detach(sor);
 	if (err < 0)

From a9087cf2e1994edb58418b1fd3bb2026a7d85c88 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 24 Jun 2019 15:13:16 +0200
Subject: [PATCH 53/68] drm/tegra: sor: Extract common audio enabling code

The code to enable audio support is split into two parts, one being
generic for the SOR and another part that is specific whether the SOR is
in HDMI mode or in DP mode. Split out the common part in preparation for
reusing the code in DP mode.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 43 +++++++++++++++++++++----------------
 1 file changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index b81e6d39aa32..478c001f4453 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -2002,6 +2002,30 @@ static void tegra_sor_audio_unprepare(struct tegra_sor *sor)
 	tegra_sor_writel(sor, 0, SOR_INT_ENABLE);
 }
 
+static void tegra_sor_audio_enable(struct tegra_sor *sor)
+{
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_AUDIO_CNTRL);
+
+	/* select HDA audio input */
+	value &= ~SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_MASK);
+	value |= SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_HDA);
+
+	/* inject null samples */
+	if (sor->format.channels != 2)
+		value &= ~SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
+	else
+		value |= SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
+
+	value |= SOR_AUDIO_CNTRL_AFIFO_FLUSH;
+
+	tegra_sor_writel(sor, value, SOR_AUDIO_CNTRL);
+
+	/* enable advertising HBR capability */
+	tegra_sor_writel(sor, SOR_AUDIO_SPARE_HBR_ENABLE, SOR_AUDIO_SPARE);
+}
+
 static int tegra_sor_hdmi_enable_audio_infoframe(struct tegra_sor *sor)
 {
 	u8 buffer[HDMI_INFOFRAME_SIZE(AUDIO)];
@@ -2037,24 +2061,7 @@ static void tegra_sor_hdmi_audio_enable(struct tegra_sor *sor)
 {
 	u32 value;
 
-	value = tegra_sor_readl(sor, SOR_AUDIO_CNTRL);
-
-	/* select HDA audio input */
-	value &= ~SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_MASK);
-	value |= SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_HDA);
-
-	/* inject null samples */
-	if (sor->format.channels != 2)
-		value &= ~SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
-	else
-		value |= SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
-
-	value |= SOR_AUDIO_CNTRL_AFIFO_FLUSH;
-
-	tegra_sor_writel(sor, value, SOR_AUDIO_CNTRL);
-
-	/* enable advertising HBR capability */
-	tegra_sor_writel(sor, SOR_AUDIO_SPARE_HBR_ENABLE, SOR_AUDIO_SPARE);
+	tegra_sor_audio_enable(sor);
 
 	tegra_sor_writel(sor, 0, SOR_HDMI_ACR_CTRL);
 

From 85d0c4b54fbd5383950ce364641883ea4ad45362 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 24 Jun 2019 15:15:39 +0200
Subject: [PATCH 54/68] drm/tegra: sor: Introduce audio enable/disable
 callbacks

In order to support different modes (DP in addition to HDMI), split out
the audio setup/teardown into callbacks.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/sor.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 478c001f4453..615cb319fa8b 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -398,6 +398,8 @@ struct tegra_sor_ops {
 	const char *name;
 	int (*probe)(struct tegra_sor *sor);
 	int (*remove)(struct tegra_sor *sor);
+	void (*audio_enable)(struct tegra_sor *sor);
+	void (*audio_disable)(struct tegra_sor *sor);
 };
 
 struct tegra_sor {
@@ -3008,6 +3010,8 @@ static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
 	.name = "HDMI",
 	.probe = tegra_sor_hdmi_probe,
 	.remove = tegra_sor_hdmi_remove,
+	.audio_enable = tegra_sor_hdmi_audio_enable,
+	.audio_disable = tegra_sor_hdmi_audio_disable,
 };
 
 static int tegra_sor_dp_probe(struct tegra_sor *sor)
@@ -3616,9 +3620,11 @@ static irqreturn_t tegra_sor_irq(int irq, void *data)
 
 			tegra_hda_parse_format(format, &sor->format);
 
-			tegra_sor_hdmi_audio_enable(sor);
+			if (sor->ops->audio_enable)
+				sor->ops->audio_enable(sor);
 		} else {
-			tegra_sor_hdmi_audio_disable(sor);
+			if (sor->ops->audio_disable)
+				sor->ops->audio_disable(sor);
 		}
 	}
 

From a7303f7735717c95bf5f65c18c901c72e91dc55b Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:16:10 +0100
Subject: [PATCH 55/68] drm/tegra: Do not use ->load() and ->unload() callbacks

The ->load() and ->unload() drivers are midlayers and should be avoided
in modern drivers. Fix this by moving the code into the driver ->probe()
and ->remove() implementations, respectively.

v2: kick out conflicting framebuffers before initializing fbdev
v3: rebase onto drm/tegra/for-next

Tested-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 325 +++++++++++++++++-------------------
 1 file changed, 156 insertions(+), 169 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index b74362cb63eb..7480f575188d 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -86,168 +86,6 @@ tegra_drm_mode_config_helpers = {
 	.atomic_commit_tail = tegra_atomic_commit_tail,
 };
 
-static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
-{
-	struct host1x_device *device = to_host1x_device(drm->dev);
-	struct tegra_drm *tegra;
-	int err;
-
-	tegra = kzalloc(sizeof(*tegra), GFP_KERNEL);
-	if (!tegra)
-		return -ENOMEM;
-
-	if (iommu_present(&platform_bus_type)) {
-		tegra->domain = iommu_domain_alloc(&platform_bus_type);
-		if (!tegra->domain) {
-			err = -ENOMEM;
-			goto free;
-		}
-
-		err = iova_cache_get();
-		if (err < 0)
-			goto domain;
-	}
-
-	mutex_init(&tegra->clients_lock);
-	INIT_LIST_HEAD(&tegra->clients);
-
-	drm->dev_private = tegra;
-	tegra->drm = drm;
-
-	drm_mode_config_init(drm);
-
-	drm->mode_config.min_width = 0;
-	drm->mode_config.min_height = 0;
-
-	drm->mode_config.max_width = 4096;
-	drm->mode_config.max_height = 4096;
-
-	drm->mode_config.allow_fb_modifiers = true;
-
-	drm->mode_config.normalize_zpos = true;
-
-	drm->mode_config.funcs = &tegra_drm_mode_config_funcs;
-	drm->mode_config.helper_private = &tegra_drm_mode_config_helpers;
-
-	err = tegra_drm_fb_prepare(drm);
-	if (err < 0)
-		goto config;
-
-	drm_kms_helper_poll_init(drm);
-
-	err = host1x_device_init(device);
-	if (err < 0)
-		goto fbdev;
-
-	if (tegra->domain) {
-		u64 carveout_start, carveout_end, gem_start, gem_end;
-		u64 dma_mask = dma_get_mask(&device->dev);
-		dma_addr_t start, end;
-		unsigned long order;
-
-		start = tegra->domain->geometry.aperture_start & dma_mask;
-		end = tegra->domain->geometry.aperture_end & dma_mask;
-
-		gem_start = start;
-		gem_end = end - CARVEOUT_SZ;
-		carveout_start = gem_end + 1;
-		carveout_end = end;
-
-		order = __ffs(tegra->domain->pgsize_bitmap);
-		init_iova_domain(&tegra->carveout.domain, 1UL << order,
-				 carveout_start >> order);
-
-		tegra->carveout.shift = iova_shift(&tegra->carveout.domain);
-		tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
-
-		drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1);
-		mutex_init(&tegra->mm_lock);
-
-		DRM_DEBUG_DRIVER("IOMMU apertures:\n");
-		DRM_DEBUG_DRIVER("  GEM: %#llx-%#llx\n", gem_start, gem_end);
-		DRM_DEBUG_DRIVER("  Carveout: %#llx-%#llx\n", carveout_start,
-				 carveout_end);
-	}
-
-	if (tegra->hub) {
-		err = tegra_display_hub_prepare(tegra->hub);
-		if (err < 0)
-			goto device;
-	}
-
-	/*
-	 * We don't use the drm_irq_install() helpers provided by the DRM
-	 * core, so we need to set this manually in order to allow the
-	 * DRM_IOCTL_WAIT_VBLANK to operate correctly.
-	 */
-	drm->irq_enabled = true;
-
-	/* syncpoints are used for full 32-bit hardware VBLANK counters */
-	drm->max_vblank_count = 0xffffffff;
-
-	err = drm_vblank_init(drm, drm->mode_config.num_crtc);
-	if (err < 0)
-		goto hub;
-
-	drm_mode_config_reset(drm);
-
-	err = tegra_drm_fb_init(drm);
-	if (err < 0)
-		goto hub;
-
-	return 0;
-
-hub:
-	if (tegra->hub)
-		tegra_display_hub_cleanup(tegra->hub);
-device:
-	if (tegra->domain) {
-		mutex_destroy(&tegra->mm_lock);
-		drm_mm_takedown(&tegra->mm);
-		put_iova_domain(&tegra->carveout.domain);
-		iova_cache_put();
-	}
-
-	host1x_device_exit(device);
-fbdev:
-	drm_kms_helper_poll_fini(drm);
-	tegra_drm_fb_free(drm);
-config:
-	drm_mode_config_cleanup(drm);
-domain:
-	if (tegra->domain)
-		iommu_domain_free(tegra->domain);
-free:
-	kfree(tegra);
-	return err;
-}
-
-static void tegra_drm_unload(struct drm_device *drm)
-{
-	struct host1x_device *device = to_host1x_device(drm->dev);
-	struct tegra_drm *tegra = drm->dev_private;
-	int err;
-
-	drm_kms_helper_poll_fini(drm);
-	tegra_drm_fb_exit(drm);
-	drm_atomic_helper_shutdown(drm);
-	drm_mode_config_cleanup(drm);
-
-	err = host1x_device_exit(device);
-	if (err < 0)
-		return;
-
-	if (tegra->domain) {
-		mutex_destroy(&tegra->mm_lock);
-		drm_mm_takedown(&tegra->mm);
-		put_iova_domain(&tegra->carveout.domain);
-		iova_cache_put();
-		iommu_domain_free(tegra->domain);
-	}
-
-	kfree(tegra);
-}
-
 static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
 {
 	struct tegra_drm_file *fpriv;
@@ -1014,8 +852,6 @@ static int tegra_debugfs_init(struct drm_minor *minor)
 static struct drm_driver tegra_drm_driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM |
 			   DRIVER_ATOMIC | DRIVER_RENDER,
-	.load = tegra_drm_load,
-	.unload = tegra_drm_unload,
 	.open = tegra_drm_open,
 	.postclose = tegra_drm_postclose,
 	.lastclose = drm_fb_helper_lastclose,
@@ -1202,6 +1038,7 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
 static int host1x_drm_probe(struct host1x_device *dev)
 {
 	struct drm_driver *driver = &tegra_drm_driver;
+	struct tegra_drm *tegra;
 	struct drm_device *drm;
 	int err;
 
@@ -1209,18 +1046,147 @@ static int host1x_drm_probe(struct host1x_device *dev)
 	if (IS_ERR(drm))
 		return PTR_ERR(drm);
 
-	dev_set_drvdata(&dev->dev, drm);
-
-	err = drm_fb_helper_remove_conflicting_framebuffers(NULL, "tegradrmfb", false);
-	if (err < 0)
+	tegra = kzalloc(sizeof(*tegra), GFP_KERNEL);
+	if (!tegra) {
+		err = -ENOMEM;
 		goto put;
+	}
+
+	if (iommu_present(&platform_bus_type)) {
+		tegra->domain = iommu_domain_alloc(&platform_bus_type);
+		if (!tegra->domain) {
+			err = -ENOMEM;
+			goto free;
+		}
+
+		err = iova_cache_get();
+		if (err < 0)
+			goto domain;
+	}
+
+	mutex_init(&tegra->clients_lock);
+	INIT_LIST_HEAD(&tegra->clients);
+
+	dev_set_drvdata(&dev->dev, drm);
+	drm->dev_private = tegra;
+	tegra->drm = drm;
+
+	drm_mode_config_init(drm);
+
+	drm->mode_config.min_width = 0;
+	drm->mode_config.min_height = 0;
+
+	drm->mode_config.max_width = 4096;
+	drm->mode_config.max_height = 4096;
+
+	drm->mode_config.allow_fb_modifiers = true;
+
+	drm->mode_config.normalize_zpos = true;
+
+	drm->mode_config.funcs = &tegra_drm_mode_config_funcs;
+	drm->mode_config.helper_private = &tegra_drm_mode_config_helpers;
+
+	err = tegra_drm_fb_prepare(drm);
+	if (err < 0)
+		goto config;
+
+	drm_kms_helper_poll_init(drm);
+
+	err = host1x_device_init(dev);
+	if (err < 0)
+		goto fbdev;
+
+	if (tegra->domain) {
+		u64 carveout_start, carveout_end, gem_start, gem_end;
+		u64 dma_mask = dma_get_mask(&dev->dev);
+		dma_addr_t start, end;
+		unsigned long order;
+
+		start = tegra->domain->geometry.aperture_start & dma_mask;
+		end = tegra->domain->geometry.aperture_end & dma_mask;
+
+		gem_start = start;
+		gem_end = end - CARVEOUT_SZ;
+		carveout_start = gem_end + 1;
+		carveout_end = end;
+
+		order = __ffs(tegra->domain->pgsize_bitmap);
+		init_iova_domain(&tegra->carveout.domain, 1UL << order,
+				 carveout_start >> order);
+
+		tegra->carveout.shift = iova_shift(&tegra->carveout.domain);
+		tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
+
+		drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1);
+		mutex_init(&tegra->mm_lock);
+
+		DRM_DEBUG_DRIVER("IOMMU apertures:\n");
+		DRM_DEBUG_DRIVER("  GEM: %#llx-%#llx\n", gem_start, gem_end);
+		DRM_DEBUG_DRIVER("  Carveout: %#llx-%#llx\n", carveout_start,
+				 carveout_end);
+	}
+
+	if (tegra->hub) {
+		err = tegra_display_hub_prepare(tegra->hub);
+		if (err < 0)
+			goto device;
+	}
+
+	/*
+	 * We don't use the drm_irq_install() helpers provided by the DRM
+	 * core, so we need to set this manually in order to allow the
+	 * DRM_IOCTL_WAIT_VBLANK to operate correctly.
+	 */
+	drm->irq_enabled = true;
+
+	/* syncpoints are used for full 32-bit hardware VBLANK counters */
+	drm->max_vblank_count = 0xffffffff;
+
+	err = drm_vblank_init(drm, drm->mode_config.num_crtc);
+	if (err < 0)
+		goto hub;
+
+	drm_mode_config_reset(drm);
+
+	err = drm_fb_helper_remove_conflicting_framebuffers(NULL, "tegradrmfb",
+							    false);
+	if (err < 0)
+		goto hub;
+
+	err = tegra_drm_fb_init(drm);
+	if (err < 0)
+		goto hub;
 
 	err = drm_dev_register(drm, 0);
 	if (err < 0)
-		goto put;
+		goto fb;
 
 	return 0;
 
+fb:
+	tegra_drm_fb_exit(drm);
+hub:
+	if (tegra->hub)
+		tegra_display_hub_cleanup(tegra->hub);
+device:
+	if (tegra->domain) {
+		mutex_destroy(&tegra->mm_lock);
+		drm_mm_takedown(&tegra->mm);
+		put_iova_domain(&tegra->carveout.domain);
+		iova_cache_put();
+	}
+
+	host1x_device_exit(dev);
+fbdev:
+	drm_kms_helper_poll_fini(drm);
+	tegra_drm_fb_free(drm);
+config:
+	drm_mode_config_cleanup(drm);
+domain:
+	if (tegra->domain)
+		iommu_domain_free(tegra->domain);
+free:
+	kfree(tegra);
 put:
 	drm_dev_put(drm);
 	return err;
@@ -1229,8 +1195,29 @@ put:
 static int host1x_drm_remove(struct host1x_device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(&dev->dev);
+	struct tegra_drm *tegra = drm->dev_private;
+	int err;
 
 	drm_dev_unregister(drm);
+
+	drm_kms_helper_poll_fini(drm);
+	tegra_drm_fb_exit(drm);
+	drm_atomic_helper_shutdown(drm);
+	drm_mode_config_cleanup(drm);
+
+	err = host1x_device_exit(dev);
+	if (err < 0)
+		dev_err(&dev->dev, "host1x device cleanup failed: %d\n", err);
+
+	if (tegra->domain) {
+		mutex_destroy(&tegra->mm_lock);
+		drm_mm_takedown(&tegra->mm);
+		put_iova_domain(&tegra->carveout.domain);
+		iova_cache_put();
+		iommu_domain_free(tegra->domain);
+	}
+
+	kfree(tegra);
 	drm_dev_put(drm);
 
 	return 0;

From 7edd7961e58d531d19758134919de13dac47bcbe Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:08 +0100
Subject: [PATCH 56/68] drm/tegra: Simplify IOMMU group selection

All the devices that make up the DRM device are now part of the same
IOMMU group. This simplifies the handling of the IOMMU attachment and
also avoids exhausting the number of IOMMUs available on early Tegra
SoC generations.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c   |  2 +-
 drivers/gpu/drm/tegra/drm.c  | 34 ++++++++++++++++++++--------------
 drivers/gpu/drm/tegra/drm.h  |  3 +--
 drivers/gpu/drm/tegra/gr2d.c |  2 +-
 drivers/gpu/drm/tegra/gr3d.c |  2 +-
 drivers/gpu/drm/tegra/vic.c  |  2 +-
 6 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 54966f538141..36c36b295ab1 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -2014,7 +2014,7 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (!dc->syncpt)
 		dev_warn(dc->dev, "failed to allocate syncpoint\n");
 
-	err = host1x_client_iommu_attach(client, true);
+	err = host1x_client_iommu_attach(client);
 	if (err < 0) {
 		dev_err(client->dev, "failed to attach to domain: %d\n", err);
 		return err;
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 7480f575188d..9a1c1694604a 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -904,7 +904,7 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra,
 	return 0;
 }
 
-int host1x_client_iommu_attach(struct host1x_client *client, bool shared)
+int host1x_client_iommu_attach(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = drm->dev_private;
@@ -912,29 +912,30 @@ int host1x_client_iommu_attach(struct host1x_client *client, bool shared)
 	int err;
 
 	if (tegra->domain) {
+		struct iommu_domain *domain;
+
 		group = iommu_group_get(client->dev);
 		if (!group) {
 			dev_err(client->dev, "failed to get IOMMU group\n");
 			return -ENODEV;
 		}
 
-		if (!shared || (shared && (group != tegra->group))) {
 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
-			if (client->dev->archdata.mapping) {
-				struct dma_iommu_mapping *mapping =
-					to_dma_iommu_mapping(client->dev);
-				arm_iommu_detach_device(client->dev);
-				arm_iommu_release_mapping(mapping);
-			}
+		if (client->dev->archdata.mapping) {
+			struct dma_iommu_mapping *mapping =
+				to_dma_iommu_mapping(client->dev);
+			arm_iommu_detach_device(client->dev);
+			arm_iommu_release_mapping(mapping);
+		}
 #endif
+
+		domain = iommu_get_domain_for_dev(client->dev);
+		if (domain != tegra->domain) {
 			err = iommu_attach_group(tegra->domain, group);
 			if (err < 0) {
 				iommu_group_put(group);
 				return err;
 			}
-
-			if (shared && !tegra->group)
-				tegra->group = group;
 		}
 	}
 
@@ -947,12 +948,17 @@ void host1x_client_iommu_detach(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = drm->dev_private;
+	struct iommu_domain *domain;
 
 	if (client->group) {
-		if (client->group == tegra->group) {
+		/*
+		 * Devices that are part of the same group may no longer be
+		 * attached to a domain at this point because their group may
+		 * have been detached by an earlier client.
+		 */
+		domain = iommu_get_domain_for_dev(client->dev);
+		if (domain)
 			iommu_detach_group(tegra->domain, client->group);
-			tegra->group = NULL;
-		}
 
 		iommu_group_put(client->group);
 	}
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 8b812bb52e5b..28f2820a7371 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -36,7 +36,6 @@ struct tegra_drm {
 	struct drm_device *drm;
 
 	struct iommu_domain *domain;
-	struct iommu_group *group;
 	struct mutex mm_lock;
 	struct drm_mm mm;
 
@@ -100,7 +99,7 @@ int tegra_drm_register_client(struct tegra_drm *tegra,
 			      struct tegra_drm_client *client);
 int tegra_drm_unregister_client(struct tegra_drm *tegra,
 				struct tegra_drm_client *client);
-int host1x_client_iommu_attach(struct host1x_client *client, bool shared);
+int host1x_client_iommu_attach(struct host1x_client *client);
 void host1x_client_iommu_detach(struct host1x_client *client);
 
 int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm);
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 5d5af9a05c18..1fc4e56c7cc5 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -50,7 +50,7 @@ static int gr2d_init(struct host1x_client *client)
 		goto put;
 	}
 
-	err = host1x_client_iommu_attach(client, false);
+	err = host1x_client_iommu_attach(client);
 	if (err < 0) {
 		dev_err(client->dev, "failed to attach to domain: %d\n", err);
 		goto free;
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index c249a6bd8d51..24fae0f64032 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -59,7 +59,7 @@ static int gr3d_init(struct host1x_client *client)
 		goto put;
 	}
 
-	err = host1x_client_iommu_attach(client, false);
+	err = host1x_client_iommu_attach(client);
 	if (err < 0) {
 		dev_err(client->dev, "failed to attach to domain: %d\n", err);
 		goto free;
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index d34b1ada422c..603f41ed4b81 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -187,7 +187,7 @@ static int vic_init(struct host1x_client *client)
 	struct vic *vic = to_vic(drm);
 	int err;
 
-	err = host1x_client_iommu_attach(client, false);
+	err = host1x_client_iommu_attach(client);
 	if (err < 0) {
 		dev_err(vic->dev, "failed to attach to domain: %d\n", err);
 		return err;

From 80327ce3d4edaa9abde1c6e1a1785572c7de3750 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:09 +0100
Subject: [PATCH 57/68] gpu: host1x: Overhaul host1x_bo_{pin,unpin}() API

The host1x_bo_pin() and host1x_bo_unpin() APIs are used to pin and unpin
buffers during host1x job submission. Pinning currently returns the SG
table and the DMA address (an IOVA if an IOMMU is used or a physical
address if no IOMMU is used) of the buffer. The DMA address is only used
for buffers that are relocated, whereas the host1x driver will map
gather buffers into its own IOVA space so that they can be processed by
the CDMA engine.

This approach has a couple of issues. On one hand it's not very useful
to return a DMA address for the buffer if host1x doesn't need it. On the
other hand, returning the SG table of the buffer is suboptimal because a
single SG table cannot be shared for multiple mappings, because the DMA
address is stored within the SG table, and the DMA address may be
different for different devices.

Subsequent patches will move the host1x driver over to the DMA API which
doesn't work with a single shared SG table. Fix this by returning a new
SG table each time a buffer is pinned. This allows the buffer to be
referenced by multiple jobs for different engines.

Change the prototypes of host1x_bo_pin() and host1x_bo_unpin() to take a
struct device *, specifying the device for which the buffer should be
pinned. This is required in order to be able to properly construct the
SG table. While at it, make host1x_bo_pin() return the SG table because
that allows us to return an ERR_PTR()-encoded error code if we need to,
or return NULL to signal that we don't need the SG table to be remapped
and can simply use the DMA address as-is. At the same time, returning
the DMA address is made optional because in the example of command
buffers, host1x doesn't need to know the DMA address since it will have
to create its own mapping anyway.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gem.c | 34 ++++++++++++++++++++++++++++++----
 drivers/gpu/host1x/job.c    | 15 ++++++++++++---
 include/linux/host1x.h      | 17 ++++++++++-------
 3 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index d2f88cc3134f..564ef60f67c2 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -27,17 +27,43 @@ static void tegra_bo_put(struct host1x_bo *bo)
 	drm_gem_object_put_unlocked(&obj->gem);
 }
 
-static dma_addr_t tegra_bo_pin(struct host1x_bo *bo, struct sg_table **sgt)
+static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
+				     dma_addr_t *phys)
 {
 	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
+	struct sg_table *sgt;
+	int err;
 
-	*sgt = obj->sgt;
+	if (phys)
+		*phys = obj->iova;
 
-	return obj->iova;
+	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt)
+		return ERR_PTR(-ENOMEM);
+
+	if (obj->pages) {
+		err = sg_alloc_table_from_pages(sgt, obj->pages, obj->num_pages,
+						0, obj->gem.size, GFP_KERNEL);
+		if (err < 0)
+			goto free;
+	} else {
+		err = dma_get_sgtable(dev, sgt, obj->vaddr, obj->iova,
+				      obj->gem.size);
+		if (err < 0)
+			goto free;
+	}
+
+	return sgt;
+
+free:
+	kfree(sgt);
+	return ERR_PTR(err);
 }
 
-static void tegra_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
+static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt)
 {
+	sg_free_table(sgt);
+	kfree(sgt);
 }
 
 static void *tegra_bo_mmap(struct host1x_bo *bo)
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index eaa5c3352c13..90dd592fdfca 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -99,6 +99,7 @@ EXPORT_SYMBOL(host1x_job_add_gather);
 
 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 {
+	struct device *dev = job->client->dev;
 	unsigned int i;
 	int err;
 
@@ -115,7 +116,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
+		sgt = host1x_bo_pin(dev, reloc->target.bo, &phys_addr);
+		if (IS_ERR(sgt)) {
+			err = PTR_ERR(sgt);
+			goto unpin;
+		}
 
 		job->addr_phys[job->num_unpins] = phys_addr;
 		job->unpins[job->num_unpins].bo = reloc->target.bo;
@@ -139,7 +144,11 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		phys_addr = host1x_bo_pin(g->bo, &sgt);
+		sgt = host1x_bo_pin(host->dev, g->bo, &phys_addr);
+		if (IS_ERR(sgt)) {
+			err = PTR_ERR(sgt);
+			goto unpin;
+		}
 
 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
 			for_each_sg(sgt->sgl, sg, sgt->nents, j)
@@ -566,7 +575,7 @@ void host1x_job_unpin(struct host1x_job *job)
 				iova_pfn(&host->iova, job->addr_phys[i]));
 		}
 
-		host1x_bo_unpin(unpin->bo, unpin->sgt);
+		host1x_bo_unpin(host->dev, unpin->bo, unpin->sgt);
 		host1x_bo_put(unpin->bo);
 	}
 
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index df6e613ba715..1ba23a6a2021 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -67,8 +67,9 @@ struct sg_table;
 struct host1x_bo_ops {
 	struct host1x_bo *(*get)(struct host1x_bo *bo);
 	void (*put)(struct host1x_bo *bo);
-	dma_addr_t (*pin)(struct host1x_bo *bo, struct sg_table **sgt);
-	void (*unpin)(struct host1x_bo *bo, struct sg_table *sgt);
+	struct sg_table *(*pin)(struct device *dev, struct host1x_bo *bo,
+				dma_addr_t *phys);
+	void (*unpin)(struct device *dev, struct sg_table *sgt);
 	void *(*mmap)(struct host1x_bo *bo);
 	void (*munmap)(struct host1x_bo *bo, void *addr);
 	void *(*kmap)(struct host1x_bo *bo, unsigned int pagenum);
@@ -95,15 +96,17 @@ static inline void host1x_bo_put(struct host1x_bo *bo)
 	bo->ops->put(bo);
 }
 
-static inline dma_addr_t host1x_bo_pin(struct host1x_bo *bo,
-				       struct sg_table **sgt)
+static inline struct sg_table *host1x_bo_pin(struct device *dev,
+					     struct host1x_bo *bo,
+					     dma_addr_t *phys)
 {
-	return bo->ops->pin(bo, sgt);
+	return bo->ops->pin(dev, bo, phys);
 }
 
-static inline void host1x_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
+static inline void host1x_bo_unpin(struct device *dev, struct host1x_bo *bo,
+				   struct sg_table *sgt)
 {
-	bo->ops->unpin(bo, sgt);
+	bo->ops->unpin(dev, sgt);
 }
 
 static inline void *host1x_bo_mmap(struct host1x_bo *bo)

From 44156eee91ba6f027afbfd6a39016c0e7e31c8e9 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:10 +0100
Subject: [PATCH 58/68] gpu: host1x: Clean up debugfs on removal

The debugfs files created for host1x are never removed, causing these
files to be left dangling in debugfs. This results in a crash when any
of these files are accessed after the host1x driver has been removed,
as well as a failure to create the debugfs entries when they are added
again on driver probe.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 452ee5d64021..f30b8447a319 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -402,6 +402,7 @@ static int host1x_remove(struct platform_device *pdev)
 	struct host1x *host = platform_get_drvdata(pdev);
 
 	host1x_unregister(host);
+	host1x_debug_deinit(host);
 	host1x_intr_deinit(host);
 	host1x_syncpt_deinit(host);
 	reset_control_assert(host->rst);

From ab4f81bfc2a8d429130182f8ea3f29a8b1754931 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:11 +0100
Subject: [PATCH 59/68] gpu: host1x: Add direction flags to relocations

Add direction flags to host1x relocations performed during job pinning.
These flags indicate the kinds of accesses that hardware is allowed to
perform on the relocated buffers.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 2 ++
 include/linux/host1x.h      | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 9a1c1694604a..efc8a27b9e6a 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -149,6 +149,8 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,
 	if (err < 0)
 		return err;
 
+	dest->flags = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
+
 	dest->cmdbuf.bo = host1x_bo_lookup(file, cmdbuf);
 	if (!dest->cmdbuf.bo)
 		return -ENOENT;
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index 1ba23a6a2021..6f8d772591ba 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -173,6 +173,9 @@ int host1x_job_submit(struct host1x_job *job);
  * host1x job
  */
 
+#define HOST1X_RELOC_READ	(1 << 0)
+#define HOST1X_RELOC_WRITE	(1 << 1)
+
 struct host1x_reloc {
 	struct {
 		struct host1x_bo *bo;
@@ -183,6 +186,7 @@ struct host1x_reloc {
 		unsigned long offset;
 	} target;
 	unsigned long shift;
+	unsigned long flags;
 };
 
 struct host1x_job {

From b78e70c04c149299bd210759d7c7af7c86b89ca8 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:12 +0100
Subject: [PATCH 60/68] gpu: host1x: Allocate gather copy for host1x

Currently when the gather buffers are copied, they are copied to a
buffer that is allocated for the host1x client that wants to execute the
command streams in the buffers. However, the gather buffers will be read
by the host1x device, which causes SMMU faults if the DMA API is backed
by an IOMMU.

Fix this by allocating the gather buffer copy for the host1x device,
which makes sure that it will be mapped into the host1x's IOVA space if
the DMA API is backed by an IOMMU.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/job.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 90dd592fdfca..2e0c3e0ca1fa 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -445,7 +445,8 @@ out:
 	return err;
 }
 
-static inline int copy_gathers(struct host1x_job *job, struct device *dev)
+static inline int copy_gathers(struct device *host, struct host1x_job *job,
+			       struct device *dev)
 {
 	struct host1x_firewall fw;
 	size_t size = 0;
@@ -468,12 +469,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 	 * Try a non-blocking allocation from a higher priority pools first,
 	 * as awaiting for the allocation here is a major performance hit.
 	 */
-	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
+	job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
 					       GFP_NOWAIT);
 
 	/* the higher priority allocation failed, try the generic-blocking */
 	if (!job->gather_copy_mapped)
-		job->gather_copy_mapped = dma_alloc_wc(dev, size,
+		job->gather_copy_mapped = dma_alloc_wc(host, size,
 						       &job->gather_copy,
 						       GFP_KERNEL);
 	if (!job->gather_copy_mapped)
@@ -521,7 +522,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 		goto out;
 
 	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
-		err = copy_gathers(job, dev);
+		err = copy_gathers(host->dev, job, dev);
 		if (err)
 			goto out;
 	}
@@ -582,7 +583,7 @@ void host1x_job_unpin(struct host1x_job *job)
 	job->num_unpins = 0;
 
 	if (job->gather_copy_size)
-		dma_free_wc(job->channel->dev, job->gather_copy_size,
+		dma_free_wc(host->dev, job->gather_copy_size,
 			    job->gather_copy_mapped, job->gather_copy);
 }
 EXPORT_SYMBOL(host1x_job_unpin);

From af1cbfb9bf0fe079ca328231451fd4db8b3eafec Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:13 +0100
Subject: [PATCH 61/68] gpu: host1x: Support DMA mapping of buffers

If host1x_bo_pin() returns an SG table, create a DMA mapping for the
buffer. For buffers that the host1x client has already mapped itself,
host1x_bo_pin() returns NULL and the existing DMA address is used.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/gem.c | 18 +++++++--
 drivers/gpu/host1x/dev.c    | 16 ++------
 drivers/gpu/host1x/job.c    | 73 ++++++++++++++++++++++++++++++++-----
 drivers/gpu/host1x/job.h    |  4 ++
 4 files changed, 87 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 564ef60f67c2..746dae32c484 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -34,9 +34,19 @@ static struct sg_table *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
 	struct sg_table *sgt;
 	int err;
 
-	if (phys)
+	/*
+	 * If we've manually mapped the buffer object through the IOMMU, make
+	 * sure to return the IOVA address of our mapping.
+	 */
+	if (phys && obj->mm) {
 		*phys = obj->iova;
+		return NULL;
+	}
 
+	/*
+	 * If we don't have a mapping for this buffer yet, return an SG table
+	 * so that host1x can do the mapping for us via the DMA API.
+	 */
 	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
 	if (!sgt)
 		return ERR_PTR(-ENOMEM);
@@ -62,8 +72,10 @@ free:
 
 static void tegra_bo_unpin(struct device *dev, struct sg_table *sgt)
 {
-	sg_free_table(sgt);
-	kfree(sgt);
+	if (sgt) {
+		sg_free_table(sgt);
+		kfree(sgt);
+	}
 }
 
 static void *tegra_bo_mmap(struct host1x_bo *bo)
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index f30b8447a319..5bdc484398f4 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -18,10 +18,6 @@
 #include <trace/events/host1x.h>
 #undef CREATE_TRACE_POINTS
 
-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
-#include <asm/dma-iommu.h>
-#endif
-
 #include "bus.h"
 #include "channel.h"
 #include "debug.h"
@@ -276,17 +272,13 @@ static int host1x_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to get reset: %d\n", err);
 		return err;
 	}
-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
-	if (host->dev->archdata.mapping) {
-		struct dma_iommu_mapping *mapping =
-				to_dma_iommu_mapping(host->dev);
-		arm_iommu_detach_device(host->dev);
-		arm_iommu_release_mapping(mapping);
-	}
-#endif
+
 	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
 		goto skip_iommu;
 
+	if (iommu_get_domain_for_dev(&pdev->dev))
+		goto skip_iommu;
+
 	host->group = iommu_group_get(&pdev->dev);
 	if (host->group) {
 		struct iommu_domain_geometry *geometry;
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 2e0c3e0ca1fa..25ca54de8fc5 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -99,7 +99,8 @@ EXPORT_SYMBOL(host1x_job_add_gather);
 
 static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 {
-	struct device *dev = job->client->dev;
+	struct host1x_client *client = job->client;
+	struct device *dev = client->dev;
 	unsigned int i;
 	int err;
 
@@ -107,8 +108,8 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 
 	for (i = 0; i < job->num_relocs; i++) {
 		struct host1x_reloc *reloc = &job->relocs[i];
+		dma_addr_t phys_addr, *phys;
 		struct sg_table *sgt;
-		dma_addr_t phys_addr;
 
 		reloc->target.bo = host1x_bo_get(reloc->target.bo);
 		if (!reloc->target.bo) {
@@ -116,12 +117,51 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		sgt = host1x_bo_pin(dev, reloc->target.bo, &phys_addr);
+		if (client->group)
+			phys = &phys_addr;
+		else
+			phys = NULL;
+
+		sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
 		if (IS_ERR(sgt)) {
 			err = PTR_ERR(sgt);
 			goto unpin;
 		}
 
+		if (sgt) {
+			unsigned long mask = HOST1X_RELOC_READ |
+					     HOST1X_RELOC_WRITE;
+			enum dma_data_direction dir;
+
+			switch (reloc->flags & mask) {
+			case HOST1X_RELOC_READ:
+				dir = DMA_TO_DEVICE;
+				break;
+
+			case HOST1X_RELOC_WRITE:
+				dir = DMA_FROM_DEVICE;
+				break;
+
+			case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
+				dir = DMA_BIDIRECTIONAL;
+				break;
+
+			default:
+				err = -EINVAL;
+				goto unpin;
+			}
+
+			err = dma_map_sg(dev, sgt->sgl, sgt->nents, dir);
+			if (!err) {
+				err = -ENOMEM;
+				goto unpin;
+			}
+
+			job->unpins[job->num_unpins].dev = dev;
+			job->unpins[job->num_unpins].dir = dir;
+			phys_addr = sg_dma_address(sgt->sgl);
+		}
+
 		job->addr_phys[job->num_unpins] = phys_addr;
 		job->unpins[job->num_unpins].bo = reloc->target.bo;
 		job->unpins[job->num_unpins].sgt = sgt;
@@ -144,7 +184,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 			goto unpin;
 		}
 
-		sgt = host1x_bo_pin(host->dev, g->bo, &phys_addr);
+		sgt = host1x_bo_pin(host->dev, g->bo, NULL);
 		if (IS_ERR(sgt)) {
 			err = PTR_ERR(sgt);
 			goto unpin;
@@ -172,15 +212,24 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
 				goto unpin;
 			}
 
-			job->addr_phys[job->num_unpins] =
-				iova_dma_addr(&host->iova, alloc);
 			job->unpins[job->num_unpins].size = gather_size;
+			phys_addr = iova_dma_addr(&host->iova, alloc);
 		} else {
-			job->addr_phys[job->num_unpins] = phys_addr;
+			err = dma_map_sg(host->dev, sgt->sgl, sgt->nents,
+					 DMA_TO_DEVICE);
+			if (!err) {
+				err = -ENOMEM;
+				goto unpin;
+			}
+
+			job->unpins[job->num_unpins].dev = host->dev;
+			phys_addr = sg_dma_address(sgt->sgl);
 		}
 
-		job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
+		job->addr_phys[job->num_unpins] = phys_addr;
+		job->gather_addr_phys[i] = phys_addr;
 
+		job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
 		job->unpins[job->num_unpins].bo = g->bo;
 		job->unpins[job->num_unpins].sgt = sgt;
 		job->num_unpins++;
@@ -567,6 +616,8 @@ void host1x_job_unpin(struct host1x_job *job)
 
 	for (i = 0; i < job->num_unpins; i++) {
 		struct host1x_job_unpin_data *unpin = &job->unpins[i];
+		struct device *dev = unpin->dev ?: host->dev;
+		struct sg_table *sgt = unpin->sgt;
 
 		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
 		    unpin->size && host->domain) {
@@ -576,7 +627,11 @@ void host1x_job_unpin(struct host1x_job *job)
 				iova_pfn(&host->iova, job->addr_phys[i]));
 		}
 
-		host1x_bo_unpin(host->dev, unpin->bo, unpin->sgt);
+		if (unpin->dev && sgt)
+			dma_unmap_sg(unpin->dev, sgt->sgl, sgt->nents,
+				     unpin->dir);
+
+		host1x_bo_unpin(dev, unpin->bo, sgt);
 		host1x_bo_put(unpin->bo);
 	}
 
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 62b8805e6b35..94bc2e4ae241 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -8,6 +8,8 @@
 #ifndef __HOST1X_JOB_H
 #define __HOST1X_JOB_H
 
+#include <linux/dma-direction.h>
+
 struct host1x_job_gather {
 	unsigned int words;
 	dma_addr_t base;
@@ -19,7 +21,9 @@ struct host1x_job_gather {
 struct host1x_job_unpin_data {
 	struct host1x_bo *bo;
 	struct sg_table *sgt;
+	struct device *dev;
 	size_t size;
+	enum dma_data_direction dir;
 };
 
 /*

From 06867a362de08ff94fb573d84fd213795fbb3922 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:14 +0100
Subject: [PATCH 62/68] gpu: host1x: Set DMA mask based on IOMMU setup

If the Tegra DRM clients are backed by an IOMMU, push buffers are likely
to be allocated beyond the 32-bit boundary if sufficient system memory
is available. This is problematic on earlier generations of Tegra where
host1x supports a maximum of 32 address bits for the GATHER opcode. More
recent versions of Tegra (Tegra186 and later) have a wide variant of the
GATHER opcode, which allows addressing up to 64 bits of memory.

If host1x itself is behind an IOMMU as well this doesn't matter because
the IOMMU's input address space is restricted to 32 bits on generations
without support for wide GATHER opcodes.

However, if host1x is not behind an IOMMU, it won't be able to process
push buffers beyond the 32-bit boundary on Tegra generations that don't
support wide GATHER opcodes. Restrict the DMA mask to 32 bits on these
generations prevents buffers from being allocated from beyond the 32-bit
boundary.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/dev.c | 214 ++++++++++++++++++++++++---------------
 drivers/gpu/host1x/dev.h |   1 +
 2 files changed, 136 insertions(+), 79 deletions(-)

diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 5bdc484398f4..a738ea55e407 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -73,6 +73,7 @@ static const struct host1x_info host1x01_info = {
 	.init = host1x01_init,
 	.sync_offset = 0x3000,
 	.dma_mask = DMA_BIT_MASK(32),
+	.has_wide_gather = false,
 	.has_hypervisor = false,
 	.num_sid_entries = 0,
 	.sid_table = NULL,
@@ -86,6 +87,7 @@ static const struct host1x_info host1x02_info = {
 	.init = host1x02_init,
 	.sync_offset = 0x3000,
 	.dma_mask = DMA_BIT_MASK(32),
+	.has_wide_gather = false,
 	.has_hypervisor = false,
 	.num_sid_entries = 0,
 	.sid_table = NULL,
@@ -99,6 +101,7 @@ static const struct host1x_info host1x04_info = {
 	.init = host1x04_init,
 	.sync_offset = 0x2100,
 	.dma_mask = DMA_BIT_MASK(34),
+	.has_wide_gather = false,
 	.has_hypervisor = false,
 	.num_sid_entries = 0,
 	.sid_table = NULL,
@@ -112,6 +115,7 @@ static const struct host1x_info host1x05_info = {
 	.init = host1x05_init,
 	.sync_offset = 0x2100,
 	.dma_mask = DMA_BIT_MASK(34),
+	.has_wide_gather = false,
 	.has_hypervisor = false,
 	.num_sid_entries = 0,
 	.sid_table = NULL,
@@ -134,6 +138,7 @@ static const struct host1x_info host1x06_info = {
 	.init = host1x06_init,
 	.sync_offset = 0x0,
 	.dma_mask = DMA_BIT_MASK(40),
+	.has_wide_gather = true,
 	.has_hypervisor = true,
 	.num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
 	.sid_table = tegra186_sid_table,
@@ -156,6 +161,7 @@ static const struct host1x_info host1x07_info = {
 	.init = host1x07_init,
 	.sync_offset = 0x0,
 	.dma_mask = DMA_BIT_MASK(40),
+	.has_wide_gather = true,
 	.has_hypervisor = true,
 	.num_sid_entries = ARRAY_SIZE(tegra194_sid_table),
 	.sid_table = tegra194_sid_table,
@@ -186,6 +192,117 @@ static void host1x_setup_sid_table(struct host1x *host)
 	}
 }
 
+static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
+	int err;
+
+	/*
+	 * If the host1x firewall is enabled, there's no need to enable IOMMU
+	 * support. Similarly, if host1x is already attached to an IOMMU (via
+	 * the DMA API), don't try to attach again.
+	 */
+	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain)
+		return domain;
+
+	host->group = iommu_group_get(host->dev);
+	if (host->group) {
+		struct iommu_domain_geometry *geometry;
+		dma_addr_t start, end;
+		unsigned long order;
+
+		err = iova_cache_get();
+		if (err < 0)
+			goto put_group;
+
+		host->domain = iommu_domain_alloc(&platform_bus_type);
+		if (!host->domain) {
+			err = -ENOMEM;
+			goto put_cache;
+		}
+
+		err = iommu_attach_group(host->domain, host->group);
+		if (err) {
+			if (err == -ENODEV)
+				err = 0;
+
+			goto free_domain;
+		}
+
+		geometry = &host->domain->geometry;
+		start = geometry->aperture_start & host->info->dma_mask;
+		end = geometry->aperture_end & host->info->dma_mask;
+
+		order = __ffs(host->domain->pgsize_bitmap);
+		init_iova_domain(&host->iova, 1UL << order, start >> order);
+		host->iova_end = end;
+
+		domain = host->domain;
+	}
+
+	return domain;
+
+free_domain:
+	iommu_domain_free(host->domain);
+	host->domain = NULL;
+put_cache:
+	iova_cache_put();
+put_group:
+	iommu_group_put(host->group);
+	host->group = NULL;
+
+	return ERR_PTR(err);
+}
+
+static int host1x_iommu_init(struct host1x *host)
+{
+	u64 mask = host->info->dma_mask;
+	struct iommu_domain *domain;
+	int err;
+
+	domain = host1x_iommu_attach(host);
+	if (IS_ERR(domain)) {
+		err = PTR_ERR(domain);
+		dev_err(host->dev, "failed to attach to IOMMU: %d\n", err);
+		return err;
+	}
+
+	/*
+	 * If we're not behind an IOMMU make sure we don't get push buffers
+	 * that are allocated outside of the range addressable by the GATHER
+	 * opcode.
+	 *
+	 * Newer generations of Tegra (Tegra186 and later) support a wide
+	 * variant of the GATHER opcode that allows addressing more bits.
+	 */
+	if (!domain && !host->info->has_wide_gather)
+		mask = DMA_BIT_MASK(32);
+
+	err = dma_coerce_mask_and_coherent(host->dev, mask);
+	if (err < 0) {
+		dev_err(host->dev, "failed to set DMA mask: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void host1x_iommu_exit(struct host1x *host)
+{
+	if (host->domain) {
+		put_iova_domain(&host->iova);
+		iommu_detach_group(host->domain, host->group);
+
+		iommu_domain_free(host->domain);
+		host->domain = NULL;
+
+		iova_cache_put();
+
+		iommu_group_put(host->group);
+		host->group = NULL;
+	}
+}
+
 static int host1x_probe(struct platform_device *pdev)
 {
 	struct host1x *host;
@@ -248,8 +365,6 @@ static int host1x_probe(struct platform_device *pdev)
 	host->dev->dma_parms = &host->dma_parms;
 	dma_set_max_seg_size(host->dev, UINT_MAX);
 
-	dma_set_mask_and_coherent(host->dev, host->info->dma_mask);
-
 	if (host->info->init) {
 		err = host->info->init(host);
 		if (err)
@@ -273,82 +388,41 @@ static int host1x_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
-		goto skip_iommu;
-
-	if (iommu_get_domain_for_dev(&pdev->dev))
-		goto skip_iommu;
-
-	host->group = iommu_group_get(&pdev->dev);
-	if (host->group) {
-		struct iommu_domain_geometry *geometry;
-		u64 mask = dma_get_mask(host->dev);
-		dma_addr_t start, end;
-		unsigned long order;
-
-		err = iova_cache_get();
-		if (err < 0)
-			goto put_group;
-
-		host->domain = iommu_domain_alloc(&platform_bus_type);
-		if (!host->domain) {
-			err = -ENOMEM;
-			goto put_cache;
-		}
-
-		err = iommu_attach_group(host->domain, host->group);
-		if (err) {
-			if (err == -ENODEV) {
-				iommu_domain_free(host->domain);
-				host->domain = NULL;
-				iova_cache_put();
-				iommu_group_put(host->group);
-				host->group = NULL;
-				goto skip_iommu;
-			}
-
-			goto fail_free_domain;
-		}
-
-		geometry = &host->domain->geometry;
-		start = geometry->aperture_start & mask;
-		end = geometry->aperture_end & mask;
-
-		order = __ffs(host->domain->pgsize_bitmap);
-		init_iova_domain(&host->iova, 1UL << order, start >> order);
-		host->iova_end = end;
+	err = host1x_iommu_init(host);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err);
+		return err;
 	}
 
-skip_iommu:
 	err = host1x_channel_list_init(&host->channel_list,
 				       host->info->nb_channels);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize channel list\n");
-		goto fail_detach_device;
+		goto iommu_exit;
 	}
 
 	err = clk_prepare_enable(host->clk);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to enable clock\n");
-		goto fail_free_channels;
+		goto free_channels;
 	}
 
 	err = reset_control_deassert(host->rst);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to deassert reset: %d\n", err);
-		goto fail_unprepare_disable;
+		goto unprepare_disable;
 	}
 
 	err = host1x_syncpt_init(host);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize syncpts\n");
-		goto fail_reset_assert;
+		goto reset_assert;
 	}
 
 	err = host1x_intr_init(host, syncpt_irq);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize interrupts\n");
-		goto fail_deinit_syncpt;
+		goto deinit_syncpt;
 	}
 
 	host1x_debug_init(host);
@@ -358,33 +432,22 @@ skip_iommu:
 
 	err = host1x_register(host);
 	if (err < 0)
-		goto fail_deinit_intr;
+		goto deinit_intr;
 
 	return 0;
 
-fail_deinit_intr:
+deinit_intr:
 	host1x_intr_deinit(host);
-fail_deinit_syncpt:
+deinit_syncpt:
 	host1x_syncpt_deinit(host);
-fail_reset_assert:
+reset_assert:
 	reset_control_assert(host->rst);
-fail_unprepare_disable:
+unprepare_disable:
 	clk_disable_unprepare(host->clk);
-fail_free_channels:
+free_channels:
 	host1x_channel_list_free(&host->channel_list);
-fail_detach_device:
-	if (host->group && host->domain) {
-		put_iova_domain(&host->iova);
-		iommu_detach_group(host->domain, host->group);
-	}
-fail_free_domain:
-	if (host->domain)
-		iommu_domain_free(host->domain);
-put_cache:
-	if (host->group)
-		iova_cache_put();
-put_group:
-	iommu_group_put(host->group);
+iommu_exit:
+	host1x_iommu_exit(host);
 
 	return err;
 }
@@ -399,14 +462,7 @@ static int host1x_remove(struct platform_device *pdev)
 	host1x_syncpt_deinit(host);
 	reset_control_assert(host->rst);
 	clk_disable_unprepare(host->clk);
-
-	if (host->domain) {
-		put_iova_domain(&host->iova);
-		iommu_detach_group(host->domain, host->group);
-		iommu_domain_free(host->domain);
-		iova_cache_put();
-		iommu_group_put(host->group);
-	}
+	host1x_iommu_exit(host);
 
 	return 0;
 }
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index abafde7c665e..f781a9b0f39d 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -97,6 +97,7 @@ struct host1x_info {
 	int (*init)(struct host1x *host1x); /* initialize per SoC ops */
 	unsigned int sync_offset; /* offset of syncpoint registers */
 	u64 dma_mask; /* mask of addressable memory */
+	bool has_wide_gather; /* supports GATHER_W opcode */
 	bool has_hypervisor; /* has hypervisor registers */
 	unsigned int num_sid_entries;
 	const struct host1x_sid_entry *sid_table;

From 20e7dce255e96a4d58168cf48e20210146dacf23 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:15 +0100
Subject: [PATCH 63/68] drm/tegra: Remove memory allocation from Falcon library

Having to provide allocator hooks to the Falcon library is somewhat
cumbersome and it doesn't give the users of the library a lot of
flexibility to deal with allocations. Instead, remove the notion of
Falcon "operations" and let drivers deal with the memory allocations
themselves.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/falcon.c | 50 ++-----------------
 drivers/gpu/drm/tegra/falcon.h | 11 ----
 drivers/gpu/drm/tegra/vic.c    | 91 ++++++++++++++++++++++++----------
 3 files changed, 68 insertions(+), 84 deletions(-)

diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c
index f49ad36e24db..a5b25e4ecbd2 100644
--- a/drivers/gpu/drm/tegra/falcon.c
+++ b/drivers/gpu/drm/tegra/falcon.c
@@ -59,26 +59,11 @@ static void falcon_copy_firmware_image(struct falcon *falcon,
 				       const struct firmware *firmware)
 {
 	u32 *firmware_vaddr = falcon->firmware.vaddr;
-	dma_addr_t daddr;
 	size_t i;
-	int err;
 
 	/* copy the whole thing taking into account endianness */
 	for (i = 0; i < firmware->size / sizeof(u32); i++)
 		firmware_vaddr[i] = le32_to_cpu(((u32 *)firmware->data)[i]);
-
-	/* ensure that caches are flushed and falcon can see the firmware */
-	daddr = dma_map_single(falcon->dev, firmware_vaddr,
-			       falcon->firmware.size, DMA_TO_DEVICE);
-	err = dma_mapping_error(falcon->dev, daddr);
-	if (err) {
-		dev_err(falcon->dev, "failed to map firmware: %d\n", err);
-		return;
-	}
-	dma_sync_single_for_device(falcon->dev, daddr,
-				   falcon->firmware.size, DMA_TO_DEVICE);
-	dma_unmap_single(falcon->dev, daddr, falcon->firmware.size,
-			 DMA_TO_DEVICE);
 }
 
 static int falcon_parse_firmware_image(struct falcon *falcon)
@@ -125,6 +110,8 @@ int falcon_read_firmware(struct falcon *falcon, const char *name)
 	if (err < 0)
 		return err;
 
+	falcon->firmware.size = falcon->firmware.firmware->size;
+
 	return 0;
 }
 
@@ -133,16 +120,6 @@ int falcon_load_firmware(struct falcon *falcon)
 	const struct firmware *firmware = falcon->firmware.firmware;
 	int err;
 
-	falcon->firmware.size = firmware->size;
-
-	/* allocate iova space for the firmware */
-	falcon->firmware.vaddr = falcon->ops->alloc(falcon, firmware->size,
-						    &falcon->firmware.paddr);
-	if (IS_ERR(falcon->firmware.vaddr)) {
-		dev_err(falcon->dev, "DMA memory mapping failed\n");
-		return PTR_ERR(falcon->firmware.vaddr);
-	}
-
 	/* copy firmware image into local area. this also ensures endianness */
 	falcon_copy_firmware_image(falcon, firmware);
 
@@ -150,27 +127,17 @@ int falcon_load_firmware(struct falcon *falcon)
 	err = falcon_parse_firmware_image(falcon);
 	if (err < 0) {
 		dev_err(falcon->dev, "failed to parse firmware image\n");
-		goto err_setup_firmware_image;
+		return err;
 	}
 
 	release_firmware(firmware);
 	falcon->firmware.firmware = NULL;
 
 	return 0;
-
-err_setup_firmware_image:
-	falcon->ops->free(falcon, falcon->firmware.size,
-			  falcon->firmware.paddr, falcon->firmware.vaddr);
-
-	return err;
 }
 
 int falcon_init(struct falcon *falcon)
 {
-	/* check mandatory ops */
-	if (!falcon->ops || !falcon->ops->alloc || !falcon->ops->free)
-		return -EINVAL;
-
 	falcon->firmware.vaddr = NULL;
 
 	return 0;
@@ -178,17 +145,8 @@ int falcon_init(struct falcon *falcon)
 
 void falcon_exit(struct falcon *falcon)
 {
-	if (falcon->firmware.firmware) {
+	if (falcon->firmware.firmware)
 		release_firmware(falcon->firmware.firmware);
-		falcon->firmware.firmware = NULL;
-	}
-
-	if (falcon->firmware.vaddr) {
-		falcon->ops->free(falcon, falcon->firmware.size,
-				  falcon->firmware.paddr,
-				  falcon->firmware.vaddr);
-		falcon->firmware.vaddr = NULL;
-	}
 }
 
 int falcon_boot(struct falcon *falcon)
diff --git a/drivers/gpu/drm/tegra/falcon.h b/drivers/gpu/drm/tegra/falcon.h
index 3d1243217410..92491a1e90df 100644
--- a/drivers/gpu/drm/tegra/falcon.h
+++ b/drivers/gpu/drm/tegra/falcon.h
@@ -74,15 +74,6 @@ struct falcon_fw_os_header_v1 {
 	u32 data_size;
 };
 
-struct falcon;
-
-struct falcon_ops {
-	void *(*alloc)(struct falcon *falcon, size_t size,
-		       dma_addr_t *paddr);
-	void (*free)(struct falcon *falcon, size_t size,
-		     dma_addr_t paddr, void *vaddr);
-};
-
 struct falcon_firmware_section {
 	unsigned long offset;
 	size_t size;
@@ -107,8 +98,6 @@ struct falcon {
 	/* Set by falcon client */
 	struct device *dev;
 	void __iomem *regs;
-	const struct falcon_ops *ops;
-	void *data;
 
 	struct falcon_firmware firmware;
 };
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 603f41ed4b81..4345b8054617 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -158,27 +158,6 @@ static int vic_boot(struct vic *vic)
 	return 0;
 }
 
-static void *vic_falcon_alloc(struct falcon *falcon, size_t size,
-			      dma_addr_t *iova)
-{
-	struct tegra_drm *tegra = falcon->data;
-
-	return tegra_drm_alloc(tegra, size, iova);
-}
-
-static void vic_falcon_free(struct falcon *falcon, size_t size,
-			    dma_addr_t iova, void *va)
-{
-	struct tegra_drm *tegra = falcon->data;
-
-	return tegra_drm_free(tegra, size, va, iova);
-}
-
-static const struct falcon_ops vic_falcon_ops = {
-	.alloc = vic_falcon_alloc,
-	.free = vic_falcon_free
-};
-
 static int vic_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
@@ -246,6 +225,15 @@ static int vic_exit(struct host1x_client *client)
 	host1x_channel_put(vic->channel);
 	host1x_client_iommu_detach(client);
 
+	if (client->group)
+		tegra_drm_free(tegra, vic->falcon.firmware.size,
+			       vic->falcon.firmware.vaddr,
+			       vic->falcon.firmware.paddr);
+	else
+		dma_free_coherent(vic->dev, vic->falcon.firmware.size,
+				  vic->falcon.firmware.vaddr,
+				  vic->falcon.firmware.paddr);
+
 	return 0;
 }
 
@@ -256,25 +244,75 @@ static const struct host1x_client_ops vic_client_ops = {
 
 static int vic_load_firmware(struct vic *vic)
 {
+	struct host1x_client *client = &vic->client.base;
+	struct tegra_drm *tegra = vic->client.drm;
+	dma_addr_t phys;
+	size_t size;
+	void *virt;
 	int err;
 
-	if (vic->falcon.data)
+	if (vic->falcon.firmware.vaddr)
 		return 0;
 
-	vic->falcon.data = vic->client.drm;
-
 	err = falcon_read_firmware(&vic->falcon, vic->config->firmware);
 	if (err < 0)
-		goto cleanup;
+		return err;
+
+	size = vic->falcon.firmware.size;
+
+	if (!client->group) {
+		virt = dma_alloc_coherent(vic->dev, size, &phys, GFP_KERNEL);
+
+		err = dma_mapping_error(vic->dev, phys);
+		if (err < 0)
+			return err;
+	} else {
+		virt = tegra_drm_alloc(tegra, size, &phys);
+	}
+
+	vic->falcon.firmware.vaddr = virt;
+	vic->falcon.firmware.paddr = phys;
 
 	err = falcon_load_firmware(&vic->falcon);
 	if (err < 0)
 		goto cleanup;
 
+	/*
+	 * In this case we have received an IOVA from the shared domain, so we
+	 * need to make sure to get the physical address so that the DMA API
+	 * knows what memory pages to flush the cache for.
+	 */
+	if (client->group) {
+		phys = dma_map_single(vic->dev, virt, size, DMA_TO_DEVICE);
+
+		err = dma_mapping_error(vic->dev, phys);
+		if (err < 0)
+			goto cleanup;
+
+		/*
+		 * If the DMA API mapped this through a bounce buffer, the
+		 * dma_sync_single_for_device() call below will not be able
+		 * to flush the caches for the right memory pages. Output a
+		 * big warning in that case so that the DMA mask can be set
+		 * properly and the bounce buffer avoided.
+		 */
+		WARN(phys != vic->falcon.firmware.paddr,
+		     "check DMA mask setting for %s\n", dev_name(vic->dev));
+	}
+
+	dma_sync_single_for_device(vic->dev, phys, size, DMA_TO_DEVICE);
+
+	if (client->group)
+		dma_unmap_single(vic->dev, phys, size, DMA_TO_DEVICE);
+
 	return 0;
 
 cleanup:
-	vic->falcon.data = NULL;
+	if (!client->group)
+		dma_free_coherent(vic->dev, size, virt, phys);
+	else
+		tegra_drm_free(tegra, size, virt, phys);
+
 	return err;
 }
 
@@ -415,7 +453,6 @@ static int vic_probe(struct platform_device *pdev)
 
 	vic->falcon.dev = dev;
 	vic->falcon.regs = vic->regs;
-	vic->falcon.ops = &vic_falcon_ops;
 
 	err = falcon_init(&vic->falcon);
 	if (err < 0)

From d972d6247628054f4a9f05c084a1f52de9df209d Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:16 +0100
Subject: [PATCH 64/68] drm/tegra: falcon: Clarify address usage

Rename paddr -> iova and vaddr -> virt to make it clearer how these
addresses are used. This is important for a subsequent patch that makes
a distinction between the physical address (physical address of the
system memory from the CPU's point of view) and the IOVA (physical
address of the system memory from the device's point of view).

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/falcon.c | 14 ++++-----
 drivers/gpu/drm/tegra/falcon.h |  5 +--
 drivers/gpu/drm/tegra/vic.c    | 56 +++++++++++++++-------------------
 3 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c
index a5b25e4ecbd2..56edef06c48e 100644
--- a/drivers/gpu/drm/tegra/falcon.c
+++ b/drivers/gpu/drm/tegra/falcon.c
@@ -58,17 +58,17 @@ static int falcon_copy_chunk(struct falcon *falcon,
 static void falcon_copy_firmware_image(struct falcon *falcon,
 				       const struct firmware *firmware)
 {
-	u32 *firmware_vaddr = falcon->firmware.vaddr;
+	u32 *virt = falcon->firmware.virt;
 	size_t i;
 
 	/* copy the whole thing taking into account endianness */
 	for (i = 0; i < firmware->size / sizeof(u32); i++)
-		firmware_vaddr[i] = le32_to_cpu(((u32 *)firmware->data)[i]);
+		virt[i] = le32_to_cpu(((u32 *)firmware->data)[i]);
 }
 
 static int falcon_parse_firmware_image(struct falcon *falcon)
 {
-	struct falcon_fw_bin_header_v1 *bin = (void *)falcon->firmware.vaddr;
+	struct falcon_fw_bin_header_v1 *bin = (void *)falcon->firmware.virt;
 	struct falcon_fw_os_header_v1 *os;
 
 	/* endian problems would show up right here */
@@ -89,7 +89,7 @@ static int falcon_parse_firmware_image(struct falcon *falcon)
 		return -EINVAL;
 	}
 
-	os = falcon->firmware.vaddr + bin->os_header_offset;
+	os = falcon->firmware.virt + bin->os_header_offset;
 
 	falcon->firmware.bin_data.size = bin->os_size;
 	falcon->firmware.bin_data.offset = bin->os_data_offset;
@@ -138,7 +138,7 @@ int falcon_load_firmware(struct falcon *falcon)
 
 int falcon_init(struct falcon *falcon)
 {
-	falcon->firmware.vaddr = NULL;
+	falcon->firmware.virt = NULL;
 
 	return 0;
 }
@@ -155,7 +155,7 @@ int falcon_boot(struct falcon *falcon)
 	u32 value;
 	int err;
 
-	if (!falcon->firmware.vaddr)
+	if (!falcon->firmware.virt)
 		return -EINVAL;
 
 	err = readl_poll_timeout(falcon->regs + FALCON_DMACTL, value,
@@ -168,7 +168,7 @@ int falcon_boot(struct falcon *falcon)
 	falcon_writel(falcon, 0, FALCON_DMACTL);
 
 	/* setup the address of the binary data so Falcon can access it later */
-	falcon_writel(falcon, (falcon->firmware.paddr +
+	falcon_writel(falcon, (falcon->firmware.iova +
 			       falcon->firmware.bin_data.offset) >> 8,
 		      FALCON_DMATRFBASE);
 
diff --git a/drivers/gpu/drm/tegra/falcon.h b/drivers/gpu/drm/tegra/falcon.h
index 92491a1e90df..c56ee32d92ee 100644
--- a/drivers/gpu/drm/tegra/falcon.h
+++ b/drivers/gpu/drm/tegra/falcon.h
@@ -84,8 +84,9 @@ struct falcon_firmware {
 	const struct firmware *firmware;
 
 	/* Raw firmware data */
-	dma_addr_t paddr;
-	void *vaddr;
+	dma_addr_t iova;
+	dma_addr_t phys;
+	void *virt;
 	size_t size;
 
 	/* Parsed firmware information */
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 4345b8054617..9444ba183990 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -133,9 +133,9 @@ static int vic_boot(struct vic *vic)
 	if (err < 0)
 		return err;
 
-	hdr = vic->falcon.firmware.vaddr;
+	hdr = vic->falcon.firmware.virt;
 	fce_bin_data_offset = *(u32 *)(hdr + VIC_UCODE_FCE_DATA_OFFSET);
-	hdr = vic->falcon.firmware.vaddr +
+	hdr = vic->falcon.firmware.virt +
 		*(u32 *)(hdr + VIC_UCODE_FCE_HEADER_OFFSET);
 	fce_ucode_size = *(u32 *)(hdr + FCE_UCODE_SIZE_OFFSET);
 
@@ -143,7 +143,7 @@ static int vic_boot(struct vic *vic)
 	falcon_execute_method(&vic->falcon, VIC_SET_FCE_UCODE_SIZE,
 			      fce_ucode_size);
 	falcon_execute_method(&vic->falcon, VIC_SET_FCE_UCODE_OFFSET,
-			      (vic->falcon.firmware.paddr + fce_bin_data_offset)
+			      (vic->falcon.firmware.iova + fce_bin_data_offset)
 				>> 8);
 
 	err = falcon_wait_idle(&vic->falcon);
@@ -225,14 +225,17 @@ static int vic_exit(struct host1x_client *client)
 	host1x_channel_put(vic->channel);
 	host1x_client_iommu_detach(client);
 
-	if (client->group)
+	if (client->group) {
+		dma_unmap_single(vic->dev, vic->falcon.firmware.phys,
+				 vic->falcon.firmware.size, DMA_TO_DEVICE);
 		tegra_drm_free(tegra, vic->falcon.firmware.size,
-			       vic->falcon.firmware.vaddr,
-			       vic->falcon.firmware.paddr);
-	else
+			       vic->falcon.firmware.virt,
+			       vic->falcon.firmware.iova);
+	} else {
 		dma_free_coherent(vic->dev, vic->falcon.firmware.size,
-				  vic->falcon.firmware.vaddr,
-				  vic->falcon.firmware.paddr);
+				  vic->falcon.firmware.virt,
+				  vic->falcon.firmware.iova);
+	}
 
 	return 0;
 }
@@ -246,12 +249,12 @@ static int vic_load_firmware(struct vic *vic)
 {
 	struct host1x_client *client = &vic->client.base;
 	struct tegra_drm *tegra = vic->client.drm;
-	dma_addr_t phys;
+	dma_addr_t iova;
 	size_t size;
 	void *virt;
 	int err;
 
-	if (vic->falcon.firmware.vaddr)
+	if (vic->falcon.firmware.virt)
 		return 0;
 
 	err = falcon_read_firmware(&vic->falcon, vic->config->firmware);
@@ -261,17 +264,17 @@ static int vic_load_firmware(struct vic *vic)
 	size = vic->falcon.firmware.size;
 
 	if (!client->group) {
-		virt = dma_alloc_coherent(vic->dev, size, &phys, GFP_KERNEL);
+		virt = dma_alloc_coherent(vic->dev, size, &iova, GFP_KERNEL);
 
-		err = dma_mapping_error(vic->dev, phys);
+		err = dma_mapping_error(vic->dev, iova);
 		if (err < 0)
 			return err;
 	} else {
-		virt = tegra_drm_alloc(tegra, size, &phys);
+		virt = tegra_drm_alloc(tegra, size, &iova);
 	}
 
-	vic->falcon.firmware.vaddr = virt;
-	vic->falcon.firmware.paddr = phys;
+	vic->falcon.firmware.virt = virt;
+	vic->falcon.firmware.iova = iova;
 
 	err = falcon_load_firmware(&vic->falcon);
 	if (err < 0)
@@ -283,35 +286,24 @@ static int vic_load_firmware(struct vic *vic)
 	 * knows what memory pages to flush the cache for.
 	 */
 	if (client->group) {
+		dma_addr_t phys;
+
 		phys = dma_map_single(vic->dev, virt, size, DMA_TO_DEVICE);
 
 		err = dma_mapping_error(vic->dev, phys);
 		if (err < 0)
 			goto cleanup;
 
-		/*
-		 * If the DMA API mapped this through a bounce buffer, the
-		 * dma_sync_single_for_device() call below will not be able
-		 * to flush the caches for the right memory pages. Output a
-		 * big warning in that case so that the DMA mask can be set
-		 * properly and the bounce buffer avoided.
-		 */
-		WARN(phys != vic->falcon.firmware.paddr,
-		     "check DMA mask setting for %s\n", dev_name(vic->dev));
+		vic->falcon.firmware.phys = phys;
 	}
 
-	dma_sync_single_for_device(vic->dev, phys, size, DMA_TO_DEVICE);
-
-	if (client->group)
-		dma_unmap_single(vic->dev, phys, size, DMA_TO_DEVICE);
-
 	return 0;
 
 cleanup:
 	if (!client->group)
-		dma_free_coherent(vic->dev, size, virt, phys);
+		dma_free_coherent(vic->dev, size, virt, iova);
 	else
-		tegra_drm_free(tegra, size, virt, phys);
+		tegra_drm_free(tegra, size, virt, iova);
 
 	return err;
 }

From 2e8d8749f6f9bb35b947228271dc9ec31be93335 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:17 +0100
Subject: [PATCH 65/68] drm/tegra: Support DMA API for display controllers

If a display controller is not attached to an explicit IOMMU domain,
which usually means that it's connected to an IOMMU domain controlled by
the DMA API, make sure to map the framebuffer to the display controller
address space. This allows us to transparently handle setups where the
display controller is attached to an IOMMU or setups where it isn't. It
also allows the driver to work with a DMA API that is backed by an
IOMMU.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/dc.c    |   8 ++-
 drivers/gpu/drm/tegra/hub.c   |   6 +-
 drivers/gpu/drm/tegra/plane.c | 104 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/tegra/plane.h |   8 +++
 4 files changed, 120 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 36c36b295ab1..5b1f9ff97576 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -715,9 +715,7 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 	window.swap = state->swap;
 
 	for (i = 0; i < fb->format->num_planes; i++) {
-		struct tegra_bo *bo = tegra_fb_get_plane(fb, i);
-
-		window.base[i] = bo->iova + fb->offsets[i];
+		window.base[i] = state->iova[i] + fb->offsets[i];
 
 		/*
 		 * Tegra uses a shared stride for UV planes. Framebuffers are
@@ -732,6 +730,8 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 }
 
 static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = {
+	.prepare_fb = tegra_plane_prepare_fb,
+	.cleanup_fb = tegra_plane_cleanup_fb,
 	.atomic_check = tegra_plane_atomic_check,
 	.atomic_disable = tegra_plane_atomic_disable,
 	.atomic_update = tegra_plane_atomic_update,
@@ -914,6 +914,8 @@ static void tegra_cursor_atomic_disable(struct drm_plane *plane,
 }
 
 static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = {
+	.prepare_fb = tegra_plane_prepare_fb,
+	.cleanup_fb = tegra_plane_cleanup_fb,
 	.atomic_check = tegra_cursor_atomic_check,
 	.atomic_update = tegra_cursor_atomic_update,
 	.atomic_disable = tegra_cursor_atomic_disable,
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 104115e42190..2b4082d0bc9e 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -413,7 +413,6 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 	unsigned int zpos = plane->state->normalized_zpos;
 	struct drm_framebuffer *fb = plane->state->fb;
 	struct tegra_plane *p = to_tegra_plane(plane);
-	struct tegra_bo *bo;
 	dma_addr_t base;
 	u32 value;
 
@@ -456,8 +455,7 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 	/* disable compression */
 	tegra_plane_writel(p, 0, DC_WINBUF_CDE_CONTROL);
 
-	bo = tegra_fb_get_plane(fb, 0);
-	base = bo->iova;
+	base = state->iova[0] + fb->offsets[0];
 
 	tegra_plane_writel(p, state->format, DC_WIN_COLOR_DEPTH);
 	tegra_plane_writel(p, 0, DC_WIN_PRECOMP_WGRP_PARAMS);
@@ -521,6 +519,8 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 }
 
 static const struct drm_plane_helper_funcs tegra_shared_plane_helper_funcs = {
+	.prepare_fb = tegra_plane_prepare_fb,
+	.cleanup_fb = tegra_plane_cleanup_fb,
 	.atomic_check = tegra_shared_plane_atomic_check,
 	.atomic_update = tegra_shared_plane_atomic_update,
 	.atomic_disable = tegra_shared_plane_atomic_disable,
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index 6bab71d6e81d..163b590be224 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -6,6 +6,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fourcc.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_plane_helper.h>
 
 #include "dc.h"
@@ -23,6 +24,7 @@ static void tegra_plane_reset(struct drm_plane *plane)
 {
 	struct tegra_plane *p = to_tegra_plane(plane);
 	struct tegra_plane_state *state;
+	unsigned int i;
 
 	if (plane->state)
 		__drm_atomic_helper_plane_destroy_state(plane->state);
@@ -36,6 +38,9 @@ static void tegra_plane_reset(struct drm_plane *plane)
 		plane->state->plane = plane;
 		plane->state->zpos = p->index;
 		plane->state->normalized_zpos = p->index;
+
+		for (i = 0; i < 3; i++)
+			state->iova[i] = DMA_MAPPING_ERROR;
 	}
 }
 
@@ -60,6 +65,11 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
 	for (i = 0; i < 2; i++)
 		copy->blending[i] = state->blending[i];
 
+	for (i = 0; i < 3; i++) {
+		copy->iova[i] = DMA_MAPPING_ERROR;
+		copy->sgt[i] = NULL;
+	}
+
 	return &copy->base;
 }
 
@@ -95,6 +105,100 @@ const struct drm_plane_funcs tegra_plane_funcs = {
 	.format_mod_supported = tegra_plane_format_mod_supported,
 };
 
+static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < state->base.fb->format->num_planes; i++) {
+		struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
+
+		if (!dc->client.group) {
+			struct sg_table *sgt;
+
+			sgt = host1x_bo_pin(dc->dev, &bo->base, NULL);
+			if (IS_ERR(sgt)) {
+				err = PTR_ERR(sgt);
+				goto unpin;
+			}
+
+			err = dma_map_sg(dc->dev, sgt->sgl, sgt->nents,
+					 DMA_TO_DEVICE);
+			if (err == 0) {
+				err = -ENOMEM;
+				goto unpin;
+			}
+
+			state->iova[i] = sg_dma_address(sgt->sgl);
+			state->sgt[i] = sgt;
+		} else {
+			state->iova[i] = bo->iova;
+		}
+	}
+
+	return 0;
+
+unpin:
+	dev_err(dc->dev, "failed to map plane %u: %d\n", i, err);
+
+	while (i--) {
+		struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
+		struct sg_table *sgt = state->sgt[i];
+
+		dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE);
+		host1x_bo_unpin(dc->dev, &bo->base, sgt);
+
+		state->iova[i] = DMA_MAPPING_ERROR;
+		state->sgt[i] = NULL;
+	}
+
+	return err;
+}
+
+static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state)
+{
+	unsigned int i;
+
+	for (i = 0; i < state->base.fb->format->num_planes; i++) {
+		struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
+
+		if (!dc->client.group) {
+			struct sg_table *sgt = state->sgt[i];
+
+			if (sgt) {
+				dma_unmap_sg(dc->dev, sgt->sgl, sgt->nents,
+					     DMA_TO_DEVICE);
+				host1x_bo_unpin(dc->dev, &bo->base, sgt);
+			}
+		}
+
+		state->iova[i] = DMA_MAPPING_ERROR;
+		state->sgt[i] = NULL;
+	}
+}
+
+int tegra_plane_prepare_fb(struct drm_plane *plane,
+			   struct drm_plane_state *state)
+{
+	struct tegra_dc *dc = to_tegra_dc(state->crtc);
+
+	if (!state->fb)
+		return 0;
+
+	drm_gem_fb_prepare_fb(plane, state);
+
+	return tegra_dc_pin(dc, to_tegra_plane_state(state));
+}
+
+void tegra_plane_cleanup_fb(struct drm_plane *plane,
+			    struct drm_plane_state *state)
+{
+	struct tegra_dc *dc = to_tegra_dc(state->crtc);
+
+	if (dc)
+		tegra_dc_unpin(dc, to_tegra_plane_state(state));
+}
+
 int tegra_plane_state_add(struct tegra_plane *plane,
 			  struct drm_plane_state *state)
 {
diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h
index 510c394e6d9a..a158a915109a 100644
--- a/drivers/gpu/drm/tegra/plane.h
+++ b/drivers/gpu/drm/tegra/plane.h
@@ -39,6 +39,9 @@ struct tegra_plane_legacy_blending_state {
 struct tegra_plane_state {
 	struct drm_plane_state base;
 
+	struct sg_table *sgt[3];
+	dma_addr_t iova[3];
+
 	struct tegra_bo_tiling tiling;
 	u32 format;
 	u32 swap;
@@ -61,6 +64,11 @@ to_tegra_plane_state(struct drm_plane_state *state)
 
 extern const struct drm_plane_funcs tegra_plane_funcs;
 
+int tegra_plane_prepare_fb(struct drm_plane *plane,
+			   struct drm_plane_state *state);
+void tegra_plane_cleanup_fb(struct drm_plane *plane,
+			    struct drm_plane_state *state);
+
 int tegra_plane_state_add(struct tegra_plane *plane,
 			  struct drm_plane_state *state);
 

From fa6661b7aa0b52073681b0d26742650c8cbd30f3 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 28 Oct 2019 13:37:18 +0100
Subject: [PATCH 66/68] drm/tegra: Optionally attach clients to the IOMMU

If a client is already attached to an IOMMU domain that is not the
shared domain, don't try to attach it again. This allows using the
IOMMU-backed DMA API.

Since the IOMMU-backed DMA API is now supported and there's no way
to detach from it on 64-bit ARM, don't bother to detach from it on
32-bit ARM either.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/drm.c | 66 +++++++++++++++++++++++++++----------
 drivers/gpu/drm/tegra/drm.h |  1 +
 2 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index efc8a27b9e6a..56e5e7a5c108 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -20,10 +20,6 @@
 #include <drm/drm_prime.h>
 #include <drm/drm_vblank.h>
 
-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
-#include <asm/dma-iommu.h>
-#endif
-
 #include "drm.h"
 #include "gem.h"
 
@@ -908,30 +904,27 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra,
 
 int host1x_client_iommu_attach(struct host1x_client *client)
 {
+	struct iommu_domain *domain = iommu_get_domain_for_dev(client->dev);
 	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_drm *tegra = drm->dev_private;
 	struct iommu_group *group = NULL;
 	int err;
 
-	if (tegra->domain) {
-		struct iommu_domain *domain;
+	/*
+	 * If the host1x client is already attached to an IOMMU domain that is
+	 * not the shared IOMMU domain, don't try to attach it to a different
+	 * domain. This allows using the IOMMU-backed DMA API.
+	 */
+	if (domain && domain != tegra->domain)
+		return 0;
 
+	if (tegra->domain) {
 		group = iommu_group_get(client->dev);
 		if (!group) {
 			dev_err(client->dev, "failed to get IOMMU group\n");
 			return -ENODEV;
 		}
 
-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
-		if (client->dev->archdata.mapping) {
-			struct dma_iommu_mapping *mapping =
-				to_dma_iommu_mapping(client->dev);
-			arm_iommu_detach_device(client->dev);
-			arm_iommu_release_mapping(mapping);
-		}
-#endif
-
-		domain = iommu_get_domain_for_dev(client->dev);
 		if (domain != tegra->domain) {
 			err = iommu_attach_group(tegra->domain, group);
 			if (err < 0) {
@@ -939,6 +932,8 @@ int host1x_client_iommu_attach(struct host1x_client *client)
 				return err;
 			}
 		}
+
+		tegra->use_explicit_iommu = true;
 	}
 
 	client->group = group;
@@ -963,6 +958,7 @@ void host1x_client_iommu_detach(struct host1x_client *client)
 			iommu_detach_group(tegra->domain, client->group);
 
 		iommu_group_put(client->group);
+		client->group = NULL;
 	}
 }
 
@@ -1046,6 +1042,7 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
 static int host1x_drm_probe(struct host1x_device *dev)
 {
 	struct drm_driver *driver = &tegra_drm_driver;
+	struct iommu_domain *domain;
 	struct tegra_drm *tegra;
 	struct drm_device *drm;
 	int err;
@@ -1060,7 +1057,36 @@ static int host1x_drm_probe(struct host1x_device *dev)
 		goto put;
 	}
 
-	if (iommu_present(&platform_bus_type)) {
+	/*
+	 * If the Tegra DRM clients are backed by an IOMMU, push buffers are
+	 * likely to be allocated beyond the 32-bit boundary if sufficient
+	 * system memory is available. This is problematic on earlier Tegra
+	 * generations where host1x supports a maximum of 32 address bits in
+	 * the GATHER opcode. In this case, unless host1x is behind an IOMMU
+	 * as well it won't be able to process buffers allocated beyond the
+	 * 32-bit boundary.
+	 *
+	 * The DMA API will use bounce buffers in this case, so that could
+	 * perhaps still be made to work, even if less efficient, but there
+	 * is another catch: in order to perform cache maintenance on pages
+	 * allocated for discontiguous buffers we need to map and unmap the
+	 * SG table representing these buffers. This is fine for something
+	 * small like a push buffer, but it exhausts the bounce buffer pool
+	 * (typically on the order of a few MiB) for framebuffers (many MiB
+	 * for any modern resolution).
+	 *
+	 * Work around this by making sure that Tegra DRM clients only use
+	 * an IOMMU if the parent host1x also uses an IOMMU.
+	 *
+	 * Note that there's still a small gap here that we don't cover: if
+	 * the DMA API is backed by an IOMMU there's no way to control which
+	 * device is attached to an IOMMU and which isn't, except via wiring
+	 * up the device tree appropriately. This is considered an problem
+	 * of integration, so care must be taken for the DT to be consistent.
+	 */
+	domain = iommu_get_domain_for_dev(drm->dev->parent);
+
+	if (domain && iommu_present(&platform_bus_type)) {
 		tegra->domain = iommu_domain_alloc(&platform_bus_type);
 		if (!tegra->domain) {
 			err = -ENOMEM;
@@ -1104,7 +1130,7 @@ static int host1x_drm_probe(struct host1x_device *dev)
 	if (err < 0)
 		goto fbdev;
 
-	if (tegra->domain) {
+	if (tegra->use_explicit_iommu) {
 		u64 carveout_start, carveout_end, gem_start, gem_end;
 		u64 dma_mask = dma_get_mask(&dev->dev);
 		dma_addr_t start, end;
@@ -1132,6 +1158,10 @@ static int host1x_drm_probe(struct host1x_device *dev)
 		DRM_DEBUG_DRIVER("  GEM: %#llx-%#llx\n", gem_start, gem_end);
 		DRM_DEBUG_DRIVER("  Carveout: %#llx-%#llx\n", carveout_start,
 				 carveout_end);
+	} else if (tegra->domain) {
+		iommu_domain_free(tegra->domain);
+		tegra->domain = NULL;
+		iova_cache_put();
 	}
 
 	if (tegra->hub) {
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 28f2820a7371..d941553f7a3d 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -36,6 +36,7 @@ struct tegra_drm {
 	struct drm_device *drm;
 
 	struct iommu_domain *domain;
+	bool use_explicit_iommu;
 	struct mutex mm_lock;
 	struct drm_mm mm;
 

From c8a2036474882f478845c70c496c3e49635e34c2 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 29 Aug 2019 17:56:53 +0200
Subject: [PATCH 67/68] gpu: host1x: Unconditionally select IOMMU_IOVA

Currently configurations can be generated where IOMMU_SUPPORT is
disabled but IOMMU_IOVA is built as a module and HOST1X as built-in. In
such a case, the symbols guarded by IOMMU_IOVA will not be available
when linking the host1x driver and cause a linking failure.

Simplify this by unconditionally selecting IOMMU_IOVA, which makes sure
that it will be forced to =y if HOST1X=y. Technically we can now get
IOMMU_IOVA code built-in even if we don't use it (host1x only uses it
when IOMMU_SUPPORT is also enabled), but such configuration are of a
mostly academic nature. In all practical configurations we want IOMMU
support anyway.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/host1x/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
index cf987a317a55..6dab94adf25e 100644
--- a/drivers/gpu/host1x/Kconfig
+++ b/drivers/gpu/host1x/Kconfig
@@ -2,7 +2,7 @@
 config TEGRA_HOST1X
 	tristate "NVIDIA Tegra host1x driver"
 	depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
-	select IOMMU_IOVA if IOMMU_SUPPORT
+	select IOMMU_IOVA
 	help
 	  Driver for the NVIDIA Tegra host1x hardware.
 

From 84db889e6d827eefc3dde130fec8382d2dcb23ac Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 29 Aug 2019 17:52:42 +0200
Subject: [PATCH 68/68] drm/tegra: Unconditionally select IOMMU_IOVA

Currently configurations can be generated where IOMMU_SUPPORT is
disabled but IOMMU_IOVA is built as a module and DRM_TEGRA as built-in.
In such a case, the symbols guarded by IOMMU_IOVA will not be available
when linking the Tegra DRM driver and cause a linking failure.

Simplify this by unconditionally selecting IOMMU_IOVA, which makes sure
that it will be forced to =y if DRM_TEGRA=y. Technically we can now get
IOMMU_IOVA code built-in even if we don't use it (Tegra DRM only uses it
when IOMMU_SUPPORT is also enabled), but such configuration are of a
mostly academic nature. In all practical configurations we want IOMMU
support anyway.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/gpu/drm/tegra/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
index 1d1269fde3c1..5043dcaf1cf9 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/drm/tegra/Kconfig
@@ -9,7 +9,7 @@ config DRM_TEGRA
 	select DRM_MIPI_DSI
 	select DRM_PANEL
 	select TEGRA_HOST1X
-	select IOMMU_IOVA if IOMMU_SUPPORT
+	select IOMMU_IOVA
 	select CEC_CORE if CEC_NOTIFIER
 	help
 	  Choose this option if you have an NVIDIA Tegra SoC.