diff --git a/mm/swapfile.c b/mm/swapfile.c index a78fe4d6f4c6..a41632e74787 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -55,12 +55,14 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, static void free_swap_count_continuations(struct swap_info_struct *); static void swap_entries_free(struct swap_info_struct *si, struct swap_cluster_info *ci, - swp_entry_t entry, unsigned int nr_pages); + unsigned long start, unsigned int nr_pages); static void swap_range_alloc(struct swap_info_struct *si, unsigned int nr_entries); static int __swap_duplicate(swp_entry_t entry, unsigned char usage, int nr); -static bool swap_entries_put_map(struct swap_info_struct *si, - swp_entry_t entry, int nr); +static void swap_put_entry_locked(struct swap_info_struct *si, + struct swap_cluster_info *ci, + unsigned long offset, + unsigned char usage); static bool folio_swapcache_freeable(struct folio *folio); static void move_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci, struct list_head *list, @@ -197,25 +199,6 @@ static bool swap_only_has_cache(struct swap_info_struct *si, return true; } -static bool swap_is_last_map(struct swap_info_struct *si, - unsigned long offset, int nr_pages, bool *has_cache) -{ - unsigned char *map = si->swap_map + offset; - unsigned char *map_end = map + nr_pages; - unsigned char count = *map; - - if (swap_count(count) != 1) - return false; - - while (++map < map_end) { - if (*map != count) - return false; - } - - *has_cache = !!(count & SWAP_HAS_CACHE); - return true; -} - /* * returns number of pages in the folio that backs the swap entry. If positive, * the folio was reclaimed. If negative, the folio was not reclaimed. If 0, no @@ -1439,6 +1422,76 @@ start_over: return false; } +/** + * swap_put_entries_cluster - Decrease the swap count of a set of slots. + * @si: The swap device. + * @start: start offset of slots. + * @nr: number of slots. + * @reclaim_cache: if true, also reclaim the swap cache. + * + * This helper decreases the swap count of a set of slots and tries to + * batch free them. Also reclaims the swap cache if @reclaim_cache is true. + * Context: The caller must ensure that all slots belong to the same + * cluster and their swap count doesn't go underflow. + */ +static void swap_put_entries_cluster(struct swap_info_struct *si, + unsigned long start, int nr, + bool reclaim_cache) +{ + unsigned long offset = start, end = start + nr; + unsigned long batch_start = SWAP_ENTRY_INVALID; + struct swap_cluster_info *ci; + bool need_reclaim = false; + unsigned int nr_reclaimed; + unsigned long swp_tb; + unsigned int count; + + ci = swap_cluster_lock(si, offset); + do { + swp_tb = __swap_table_get(ci, offset % SWAPFILE_CLUSTER); + count = si->swap_map[offset]; + VM_WARN_ON(swap_count(count) < 1 || count == SWAP_MAP_BAD); + if (swap_count(count) == 1) { + /* count == 1 and non-cached slots will be batch freed. */ + if (!swp_tb_is_folio(swp_tb)) { + if (!batch_start) + batch_start = offset; + continue; + } + /* count will be 0 after put, slot can be reclaimed */ + VM_WARN_ON(!(count & SWAP_HAS_CACHE)); + need_reclaim = true; + } + /* + * A count != 1 or cached slot can't be freed. Put its swap + * count and then free the interrupted pending batch. Cached + * slots will be freed when folio is removed from swap cache + * (__swap_cache_del_folio). + */ + swap_put_entry_locked(si, ci, offset, 1); + if (batch_start) { + swap_entries_free(si, ci, batch_start, offset - batch_start); + batch_start = SWAP_ENTRY_INVALID; + } + } while (++offset < end); + + if (batch_start) + swap_entries_free(si, ci, batch_start, offset - batch_start); + swap_cluster_unlock(ci); + + if (!need_reclaim || !reclaim_cache) + return; + + offset = start; + do { + nr_reclaimed = __try_to_reclaim_swap(si, offset, + TTRS_UNMAPPED | TTRS_FULL); + offset++; + if (nr_reclaimed) + offset = round_up(offset, abs(nr_reclaimed)); + } while (offset < end); +} + /** * folio_alloc_swap - allocate swap space for a folio * @folio: folio we want to move to swap @@ -1544,6 +1597,7 @@ void folio_put_swap(struct folio *folio, struct page *subpage) { swp_entry_t entry = folio->swap; unsigned long nr_pages = folio_nr_pages(folio); + struct swap_info_struct *si = __swap_entry_to_info(entry); VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio); VM_WARN_ON_FOLIO(!folio_test_swapcache(folio), folio); @@ -1553,7 +1607,7 @@ void folio_put_swap(struct folio *folio, struct page *subpage) nr_pages = 1; } - swap_entries_put_map(__swap_entry_to_info(entry), entry, nr_pages); + swap_put_entries_cluster(si, swp_offset(entry), nr_pages, false); } static struct swap_info_struct *_swap_info_get(swp_entry_t entry) @@ -1590,12 +1644,11 @@ out: return NULL; } -static unsigned char swap_entry_put_locked(struct swap_info_struct *si, - struct swap_cluster_info *ci, - swp_entry_t entry, - unsigned char usage) +static void swap_put_entry_locked(struct swap_info_struct *si, + struct swap_cluster_info *ci, + unsigned long offset, + unsigned char usage) { - unsigned long offset = swp_offset(entry); unsigned char count; unsigned char has_cache; @@ -1621,9 +1674,7 @@ static unsigned char swap_entry_put_locked(struct swap_info_struct *si, if (usage) WRITE_ONCE(si->swap_map[offset], usage); else - swap_entries_free(si, ci, entry, 1); - - return usage; + swap_entries_free(si, ci, offset, 1); } /* @@ -1691,70 +1742,6 @@ put_out: return NULL; } -static bool swap_entries_put_map(struct swap_info_struct *si, - swp_entry_t entry, int nr) -{ - unsigned long offset = swp_offset(entry); - struct swap_cluster_info *ci; - bool has_cache = false; - unsigned char count; - int i; - - if (nr <= 1) - goto fallback; - count = swap_count(data_race(si->swap_map[offset])); - if (count != 1) - goto fallback; - - ci = swap_cluster_lock(si, offset); - if (!swap_is_last_map(si, offset, nr, &has_cache)) { - goto locked_fallback; - } - if (!has_cache) - swap_entries_free(si, ci, entry, nr); - else - for (i = 0; i < nr; i++) - WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE); - swap_cluster_unlock(ci); - - return has_cache; - -fallback: - ci = swap_cluster_lock(si, offset); -locked_fallback: - for (i = 0; i < nr; i++, entry.val++) { - count = swap_entry_put_locked(si, ci, entry, 1); - if (count == SWAP_HAS_CACHE) - has_cache = true; - } - swap_cluster_unlock(ci); - return has_cache; -} - -/* - * Only functions with "_nr" suffix are able to free entries spanning - * cross multi clusters, so ensure the range is within a single cluster - * when freeing entries with functions without "_nr" suffix. - */ -static bool swap_entries_put_map_nr(struct swap_info_struct *si, - swp_entry_t entry, int nr) -{ - int cluster_nr, cluster_rest; - unsigned long offset = swp_offset(entry); - bool has_cache = false; - - cluster_rest = SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER; - while (nr) { - cluster_nr = min(nr, cluster_rest); - has_cache |= swap_entries_put_map(si, entry, cluster_nr); - cluster_rest = SWAPFILE_CLUSTER; - nr -= cluster_nr; - entry.val += cluster_nr; - } - - return has_cache; -} - /* * Check if it's the last ref of swap entry in the freeing path. */ @@ -1769,9 +1756,9 @@ static inline bool __maybe_unused swap_is_last_ref(unsigned char count) */ static void swap_entries_free(struct swap_info_struct *si, struct swap_cluster_info *ci, - swp_entry_t entry, unsigned int nr_pages) + unsigned long offset, unsigned int nr_pages) { - unsigned long offset = swp_offset(entry); + swp_entry_t entry = swp_entry(si->type, offset); unsigned char *map = si->swap_map + offset; unsigned char *map_end = map + nr_pages; @@ -1978,10 +1965,8 @@ void swap_put_entries_direct(swp_entry_t entry, int nr) { const unsigned long start_offset = swp_offset(entry); const unsigned long end_offset = start_offset + nr; + unsigned long offset, cluster_end; struct swap_info_struct *si; - bool any_only_cache = false; - unsigned long offset; - unsigned long swp_tb; si = get_swap_device(entry); if (WARN_ON_ONCE(!si)) @@ -1989,44 +1974,13 @@ void swap_put_entries_direct(swp_entry_t entry, int nr) if (WARN_ON_ONCE(end_offset > si->max)) goto out; - /* - * First free all entries in the range. - */ - any_only_cache = swap_entries_put_map_nr(si, entry, nr); - - /* - * Short-circuit the below loop if none of the entries had their - * reference drop to zero. - */ - if (!any_only_cache) - goto out; - - /* - * Now go back over the range trying to reclaim the swap cache. - */ - for (offset = start_offset; offset < end_offset; offset += nr) { - nr = 1; - swp_tb = swap_table_get(__swap_offset_to_cluster(si, offset), - offset % SWAPFILE_CLUSTER); - if (!swap_count(READ_ONCE(si->swap_map[offset])) && swp_tb_is_folio(swp_tb)) { - /* - * Folios are always naturally aligned in swap so - * advance forward to the next boundary. Zero means no - * folio was found for the swap entry, so advance by 1 - * in this case. Negative value means folio was found - * but could not be reclaimed. Here we can still advance - * to the next boundary. - */ - nr = __try_to_reclaim_swap(si, offset, - TTRS_UNMAPPED | TTRS_FULL); - if (nr == 0) - nr = 1; - else if (nr < 0) - nr = -nr; - nr = ALIGN(offset + 1, nr) - offset; - } - } - + /* Put entries and reclaim cache in each cluster */ + offset = start_offset; + do { + cluster_end = min(round_up(offset + 1, SWAPFILE_CLUSTER), end_offset); + swap_put_entries_cluster(si, offset, cluster_end - offset, true); + offset = cluster_end; + } while (offset < end_offset); out: put_swap_device(si); } @@ -2073,7 +2027,7 @@ void swap_free_hibernation_slot(swp_entry_t entry) return; ci = swap_cluster_lock(si, offset); - swap_entry_put_locked(si, ci, entry, 1); + swap_put_entry_locked(si, ci, offset, 1); swap_cluster_unlock(ci); /* In theory readahead might add it to the swap cache by accident */ @@ -3827,10 +3781,10 @@ void __swapcache_clear_cached(struct swap_info_struct *si, swp_entry_t entry, unsigned int nr) { if (swap_only_has_cache(si, swp_offset(entry), nr)) { - swap_entries_free(si, ci, entry, nr); + swap_entries_free(si, ci, swp_offset(entry), nr); } else { for (int i = 0; i < nr; i++, entry.val++) - swap_entry_put_locked(si, ci, entry, SWAP_HAS_CACHE); + swap_put_entry_locked(si, ci, swp_offset(entry), SWAP_HAS_CACHE); } } @@ -3951,7 +3905,7 @@ outer: * into, carry if so, or else fail until a new continuation page is allocated; * when the original swap_map count is decremented from 0 with continuation, * borrow from the continuation and report whether it still holds more. - * Called while __swap_duplicate() or caller of swap_entry_put_locked() + * Called while __swap_duplicate() or caller of swap_put_entry_locked() * holds cluster lock. */ static bool swap_count_continued(struct swap_info_struct *si,