From 34bd69668a7307b0be0cdd3319b5e822b3014302 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@mariadb.com>
Date: Mon, 20 Nov 2017 09:49:21 +0200
Subject: [PATCH] Follow-up fix to MDEV-13328

As pointed out by Shaohua Wang, the merge of MDEV-13328 from
10.1 (5.6) to 10.2 (5.7) was performed incorrectly.
Let us always pass a non-NULL FlushObserver* when writing to
data files is desired.

FlushObserver::is_partial_flush(): Check if this is a bulk-load
(partial flush of the tablespace).

FlushObserver::is_interrupted(): Check for interrupt status.

buf_LRU_flush_or_remove_pages(): Instead of trx_t*, take
FlushObserver* as a parameter.

buf_flush_or_remove_pages(): Remove the parameters flush, trx.
If observer!=NULL, write out the data pages. Use the new predicate
observer->is_partial() to distinguish a partial tablespace flush
(after bulk-loading) from a full tablespace flush (export).
Return a bool (whether all pages were removed from the flush_list).

buf_flush_dirty_pages(): Remove the parameter trx.
---
 storage/innobase/buf/buf0flu.cc     |   6 +-
 storage/innobase/buf/buf0lru.cc     | 119 +++++++++++++++---------------------
 storage/innobase/fil/fil0fil.cc     |   5 +-
 storage/innobase/include/buf0flu.h  |  11 +++-
 storage/innobase/include/buf0lru.h  |   7 ++-
 storage/innobase/row/row0import.cc  |  15 +++--
 storage/innobase/row/row0quiesce.cc |   5 +-
 storage/innobase/srv/srv0start.cc   |  11 ++--
 8 files changed, 85 insertions(+), 94 deletions(-)

diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index edeca87cbf1..7b67852e1cc 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -3809,11 +3809,9 @@ FlushObserver::notify_remove(
 void
 FlushObserver::flush()
 {
-	trx_t* trx = m_trx;
-	ut_ad(trx);
+	ut_ad(m_trx);
 
 	if (m_interrupted) {
-		trx = NULL;
 	} else {
 		if (m_stage != NULL) {
 			ulint	pages_to_flush =
@@ -3825,7 +3823,7 @@ FlushObserver::flush()
 	}
 
 	/* Flush or remove dirty pages. */
-	buf_LRU_flush_or_remove_pages(m_space_id, trx);
+	buf_LRU_flush_or_remove_pages(m_space_id, this);
 
 	/* Wait for all dirty pages were flushed. */
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index 785fce68fef..62cae53bc9f 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -550,24 +550,15 @@ tablespace. The pages still remain a part of LRU and are evicted from
 the list as they age towards the tail of the LRU.
 @param[in,out]	buf_pool	buffer pool
 @param[in]	id		tablespace identifier
-@param[in]	trx		transaction (to check for interrupt),
+@param[in]	observer	flush observer (to check for interrupt),
 				or NULL if the files should not be written to
-@retval DB_SUCCESS if all freed
-@retval DB_FAIL if not all freed
-@retval DB_INTERRUPTED if the transaction was interrupted */
+@return	whether all dirty pages were freed */
 static	MY_ATTRIBUTE((warn_unused_result))
-dberr_t
+bool
 buf_flush_or_remove_pages(
-/*======================*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
-	ulint		id,		/*!< in: target space id for which
-					to remove or flush pages */
-	FlushObserver*	observer,	/*!< in: flush observer */
-	bool		flush,		/*!< in: flush to disk if true but
-					don't remove else remove without
-					flushing to disk */
-	const trx_t*	trx)		/*!< to check if the operation must
-					be interrupted, can be 0 */
+	buf_pool_t*	buf_pool,
+	ulint		id,
+	FlushObserver*	observer)
 {
 	buf_page_t*	prev;
 	buf_page_t*	bpage;
@@ -589,15 +580,27 @@ rescan:
 
 		prev = UT_LIST_GET_PREV(list, bpage);
 
-		/* If flush observer is NULL, flush page for space id,
-		or flush page for flush observer. */
-		if ((observer != NULL && observer != bpage->flush_observer)
-		    || (observer == NULL && id != bpage->id.space())) {
-
-			/* Skip this block, as it does not belong to
-			the target space. */
-
-		} else if (!buf_flush_or_remove_page(buf_pool, bpage, trx)) {
+		/* Flush the pages matching space id,
+		or pages matching the flush observer. */
+		if (observer && observer->is_partial_flush()) {
+			if (observer != bpage->flush_observer) {
+				/* Skip this block. */
+			} else if (!buf_flush_or_remove_page(
+					   buf_pool, bpage,
+					   !observer->is_interrupted())) {
+				all_freed = false;
+			} else if (!observer->is_interrupted()) {
+				/* The processing was successful. And during the
+				processing we have released the buf_pool mutex
+				when calling buf_page_flush(). We cannot trust
+				prev pointer. */
+				goto rescan;
+			}
+		} else if (id != bpage->id.space()) {
+			/* Skip this block, because it is for a
+			different tablespace. */
+		} else if (!buf_flush_or_remove_page(
+				   buf_pool, bpage, observer != NULL)) {
 
 			/* Remove was unsuccessful, we have to try again
 			by scanning the entire list from the end.
@@ -620,7 +623,7 @@ rescan:
 			iteration. */
 
 			all_freed = false;
-		} else if (trx) {
+		} else if (observer) {
 
 			/* The processing was successful. And during the
 			processing we have released the buf_pool mutex
@@ -639,7 +642,7 @@ rescan:
 			processed = 0;
 		}
 
-		if (trx) {
+		if (observer) {
 			DBUG_EXECUTE_IF("ib_export_flush_crash",
 					static ulint	n_pages;
 					if (++n_pages == 4) {DBUG_SUICIDE();});
@@ -647,25 +650,14 @@ rescan:
 
 		/* The check for trx is interrupted is expensive, we want
 		to check every N iterations. */
-		if (!processed && trx && trx_is_interrupted(trx)) {
-			if (trx->flush_observer != NULL) {
-				if (flush) {
-					trx->flush_observer->interrupted();
-				} else {
-					/* We should remove all pages with the
-					the flush observer. */
-					continue;
-				}
-			}
-
-			buf_flush_list_mutex_exit(buf_pool);
-			return(DB_INTERRUPTED);
+		if (!processed && observer) {
+			observer->check_interrupted();
 		}
 	}
 
 	buf_flush_list_mutex_exit(buf_pool);
 
-	return(all_freed ? DB_SUCCESS : DB_FAIL);
+	return(all_freed);
 }
 
 /** Remove or flush all the dirty pages that belong to a given tablespace
@@ -676,73 +668,58 @@ the tail of the LRU list.
 @param[in]	id		tablespace identifier
 @param[in]	observer	flush observer,
 				or NULL if the files should not be written to
-@param[in]	trx		transaction (to check for interrupt),
-				or NULL if the files should not be written to
 */
 static
 void
 buf_flush_dirty_pages(
 	buf_pool_t*	buf_pool,
 	ulint		id,
-	FlushObserver*	observer,
-	const trx_t*	trx)
+	FlushObserver*	observer)
 {
-	dberr_t		err;
-	bool		flush = trx != NULL;
-
-	do {
+	for (;;) {
 		buf_pool_mutex_enter(buf_pool);
 
-		err = buf_flush_or_remove_pages(
-			buf_pool, id, observer, flush, trx);
+		bool freed = buf_flush_or_remove_pages(buf_pool, id, observer);
 
 		buf_pool_mutex_exit(buf_pool);
 
 		ut_ad(buf_flush_validate(buf_pool));
 
-		if (err == DB_FAIL) {
-			os_thread_sleep(2000);
-		}
-
-		if (err == DB_INTERRUPTED && observer != NULL) {
-			ut_a(flush);
-
-			flush = false;
-			err = DB_FAIL;
+		if (freed) {
+			break;
 		}
 
-		/* DB_FAIL is a soft error, it means that the task wasn't
-		completed, needs to be retried. */
-
+		os_thread_sleep(2000);
 		ut_ad(buf_flush_validate(buf_pool));
+	}
 
-	} while (err == DB_FAIL);
-
-	ut_ad(err == DB_INTERRUPTED
+	ut_ad((observer && observer->is_interrupted())
 	      || buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0);
 }
 
 /** Empty the flush list for all pages belonging to a tablespace.
 @param[in]	id		tablespace identifier
-@param[in]	trx		transaction, for checking for user interrupt;
+@param[in]	observer	flush observer,
 				or NULL if nothing is to be written
 @param[in]	drop_ahi	whether to drop the adaptive hash index */
 void
-buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi)
+buf_LRU_flush_or_remove_pages(
+	ulint		id,
+	FlushObserver*	observer,
+	bool		drop_ahi)
 {
-	FlushObserver*	observer = (trx == NULL) ? NULL : trx->flush_observer;
 	/* Pages in the system tablespace must never be discarded. */
-	ut_ad(id || trx);
+	ut_ad(id || observer);
 
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 		buf_pool_t* buf_pool = buf_pool_from_array(i);
 		if (drop_ahi) {
 			buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
 		}
-		buf_flush_dirty_pages(buf_pool, id, observer, trx);
+		buf_flush_dirty_pages(buf_pool, id, observer);
 	}
 
-	if (trx && !observer && !trx_is_interrupted(trx)) {
+	if (observer && !observer->is_interrupted()) {
 		/* Ensure that all asynchronous IO is completed. */
 		os_aio_wait_until_no_pending_writes();
 		fil_flush(id);
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index d7096096582..ef4c3c5c4fa 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -2711,7 +2711,10 @@ fil_close_tablespace(
 	completely and permanently. The flag stop_new_ops also prevents
 	fil_flush() from being applied to this tablespace. */
 
-	buf_LRU_flush_or_remove_pages(id, trx);
+	{
+		FlushObserver observer(id, trx, NULL);
+		buf_LRU_flush_or_remove_pages(id, &observer);
+	}
 
 	/* If the free is successful, the X lock will be released before
 	the space memory data structure is freed. */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 76bd62b7ed9..19d9446aa9c 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -378,6 +378,12 @@ public:
 		       || m_interrupted);
 	}
 
+	/** @return whether to flush only some pages of the tablespace */
+	bool is_partial_flush() const { return m_stage != NULL; }
+
+	/** @return whether the operation was interrupted */
+	bool is_interrupted() const { return m_interrupted; }
+
 	/** Interrupt observer not to wait. */
 	void interrupted()
 	{
@@ -390,7 +396,6 @@ public:
 
 	/** Flush dirty pages. */
 	void flush();
-
 	/** Notify observer of flushing a page
 	@param[in]	buf_pool	buffer pool instance
 	@param[in]	bpage		buffer page to flush */
@@ -406,10 +411,10 @@ public:
 		buf_page_t*	bpage);
 private:
 	/** Table space id */
-	ulint			m_space_id;
+	const ulint		m_space_id;
 
 	/** Trx instance */
-	trx_t*			m_trx;
+	trx_t* const		m_trx;
 
 	/** Performance schema accounting object, used by ALTER TABLE.
 	If not NULL, then stage->begin_phase_flush() will be called initially,
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 733ca727996..de627c19d2a 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -53,11 +53,14 @@ These are low-level functions
 
 /** Empty the flush list for all pages belonging to a tablespace.
 @param[in]	id		tablespace identifier
-@param[in]	trx		transaction, for checking for user interrupt;
+@param[in,out]	observer	flush observer,
 				or NULL if nothing is to be written
 @param[in]	drop_ahi	whether to drop the adaptive hash index */
 void
-buf_LRU_flush_or_remove_pages(ulint id, const trx_t* trx, bool drop_ahi=false);
+buf_LRU_flush_or_remove_pages(
+	ulint		id,
+	FlushObserver*	observer,
+	bool		drop_ahi = false);
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 /********************************************************************//**
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index a8a0ee4a27e..a49af25cfc4 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -3929,11 +3929,16 @@ row_import_for_mysql(
 	The only dirty pages generated should be from the pessimistic purge
 	of delete marked records that couldn't be purged in Phase I. */
 
-	buf_LRU_flush_or_remove_pages(prebuilt->table->space, trx);
-
-	if (trx_is_interrupted(trx)) {
-		ib::info() << "Phase III - Flush interrupted";
-		return(row_import_error(prebuilt, trx, DB_INTERRUPTED));
+	{
+		FlushObserver observer(prebuilt->table->space, trx, NULL);
+		buf_LRU_flush_or_remove_pages(prebuilt->table->space,
+					      &observer);
+
+		if (observer.is_interrupted()) {
+			ib::info() << "Phase III - Flush interrupted";
+			return(row_import_error(prebuilt, trx,
+						DB_INTERRUPTED));
+		}
 	}
 
 	ib::info() << "Phase IV - Flush complete";
diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc
index 0d07c5d54d4..c32c79ade95 100644
--- a/storage/innobase/row/row0quiesce.cc
+++ b/storage/innobase/row/row0quiesce.cc
@@ -748,7 +748,10 @@ row_quiesce_table_start(
 			mutex_enter(&master_key_id_mutex);
 		}
 
-		buf_LRU_flush_or_remove_pages(table->space, trx);
+		{
+			FlushObserver observer(table->space, trx, NULL);
+			buf_LRU_flush_or_remove_pages(table->space, &observer);
+		}
 
 		if (dict_table_is_encrypted(table)) {
 			mutex_exit(&master_key_id_mutex);
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 0a5f1b0b494..6ce0714e7b1 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -1072,22 +1072,19 @@ srv_undo_tablespaces_init(
 		mtr_commit(&mtr);
 
 		/* Step-2: Flush the dirty pages from the buffer pool. */
-		trx_t* trx = trx_allocate_for_background();
-
 		for (undo::undo_spaces_t::const_iterator it
 			     = undo::Truncate::s_fix_up_spaces.begin();
 		     it != undo::Truncate::s_fix_up_spaces.end();
 		     ++it) {
-
-			buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, trx);
-
-			buf_LRU_flush_or_remove_pages(*it, trx);
+			FlushObserver dummy(TRX_SYS_SPACE, NULL, NULL);
+			buf_LRU_flush_or_remove_pages(TRX_SYS_SPACE, &dummy);
+			FlushObserver dummy2(*it, NULL, NULL);
+			buf_LRU_flush_or_remove_pages(*it, &dummy2);
 
 			/* Remove the truncate redo log file. */
 			undo::Truncate	undo_trunc;
 			undo_trunc.done_logging(*it);
 		}
-		trx_free_for_background(trx);
 	}
 
 	return(DB_SUCCESS);
-- 
2.15.0

