HEX
Server: LiteSpeed
System: Linux eko108.isimtescil.net 4.18.0-477.21.1.lve.1.el8.x86_64 #1 SMP Tue Sep 5 23:08:35 UTC 2023 x86_64
User: uyarreklamcomtr (11202)
PHP: 7.4.33
Disabled: opcache_get_status
Upload Files
File: /var/www/vhosts/uyarreklam.com.tr/httpdocs/Links.tar
Data.php000064400000026501151537762310006144 0ustar00<?php
namespace AIOSEO\BrokenLinkChecker\Links;

// Exit if accessed directly.
if ( ! defined( 'ABSPATH' ) ) {
	exit;
}

use AIOSEO\BrokenLinkChecker\Models;

/**
 * Handles the extraction, parsing and storage of links for the links scan.
 *
 * @since 1.0.0
 */
class Data {
	/**
	 * The ignored extensions.
	 *
	 * @since 1.0.0
	 *
	 * @var array
	 */
	private $ignoredExtensions = [];

	/**
	 * Class constructor.
	 *
	 * @since 1.0.0
	 */
	public function __construct() {
		$this->setIgnoredExtensions();
	}

	/**
	 * Indexes the links in the given post.
	 *
	 * @since 1.0.0
	 *
	 * @param  int  $postId The post ID.
	 * @return void
	 */
	public function indexLinks( $postId ) {
		$post = get_post( $postId );
		if ( ! is_a( $post, 'WP_Post' ) ) {
			return;
		}

		// Delete all links first. We have to do this in order to remove old links that no longer exist.
		Models\Link::deleteLinks( $postId );

		$links = $this->extractLinks( $postId, $post->post_content );
		if ( empty( $links ) ) {
			return;
		}

		$this->storeLinks( $links );
	}

	/**
	 * Stores the given links to the DB.
	 *
	 * @since 1.0.0
	 *
	 * @param  array $links The links.
	 * @return void
	 */
	private function storeLinks( $links ) {
		$urls         = [];
		$insertValues = [];
		$currentDate  = gmdate( 'Y-m-d H:i:s' );
		foreach ( $links as $linkData ) {
			$data = Models\Link::sanitizeLink( $linkData );
			if ( empty( $data ) ) {
				continue;
			}

			if ( ! Models\Link::validateLink( $data ) ) {
				continue;
			}

			$urls[ $data['url_hash'] ] = $data['url'];

			$blcLinkStatusId = '%d';
			if ( empty( $data['blc_link_status_id'] ) ) {
				$blcLinkStatusId           = '%s';
				$data['blc_link_status_id'] = 'null';
			}

			$insertValues[] = vsprintf(
				"(%d, $blcLinkStatusId, '%s', '%s', '%s', '%s', %d, '%s', '%s', '%s', '%s', '%s', '$currentDate', '$currentDate')",
				$data
			);
		}

		$implodedInsertValues = implode( ',', $insertValues );

		$tableName = aioseoBrokenLinkChecker()->core->db->prefix . 'aioseo_blc_links';
		aioseoBrokenLinkChecker()->core->db->execute(
			"INSERT INTO $tableName
			(`post_id`, `blc_link_status_id`, `url`, `url_hash`, `hostname`, `hostname_url`, `external`, `anchor`, `phrase`, `phrase_html`, `paragraph`, `paragraph_html`, `created`, `updated`)
			VALUES $implodedInsertValues"
		);

		$existing = aioseoBrokenLinkChecker()->core->db->start( 'aioseo_blc_link_status' )
			->select( 'url_hash' )
			->whereIn( 'url_hash', array_keys( $urls ) )
			->run()
			->result();

		foreach ( $existing as $row ) {
			unset( $urls[ $row->url_hash ] );
		}

		if ( empty( $urls ) ) {
			return;
		}

		foreach ( $urls as $hash => $url ) {
			$statusId = aioseoBrokenLinkChecker()->core->db->insert( 'aioseo_blc_link_status' )
				->set( [
					'url'      => $url,
					'url_hash' => $hash,
					'created'  => aioseoBrokenLinkChecker()->helpers->timeToMysql( time() ),
					'updated'  => aioseoBrokenLinkChecker()->helpers->timeToMysql( time() )
				] )
				->run()
				->insertId();

			aioseoBrokenLinkChecker()->core->db->update( 'aioseo_blc_links' )
				->where( 'url', $url )
				->set( [
					'blc_link_status_id' => $statusId
				] )
				->run();
		}
	}

	/**
	 * Returns the links that are in the post content.
	 *
	 * @since 1.0.0
	 *
	 * @param  int    $postId      The post ID.
	 * @param  string $postContent The post content.
	 * @return array               The links.
	 */
	private function extractLinks( $postId, $postContent ) {
		$postContent = aioseoBrokenLinkChecker()->helpers->decodeHtmlEntities( $postContent );

		// Strip data URIs to prevent catastrophic backtracking.
		$postContent = preg_replace( '/data:[^;]+;base64,[^"]+/', '', (string) $postContent );

		/**
		 * Regex pattern divided into groups:
		 * 0  - Full phrase with link tag.
		 * 2  - Start of the phrase, before the anchor.
		 * 4  - The URL.
		 * 6  - The anchor.
		 * 9  - The end of the phrase, after the anchor.
		 * 10 - The ending punctuation mark.
		 */
		preg_match_all(
			'/(([^\r\n.?!]*)<t?a[^>]*?href=(\"|\')(?!tel:|mailto:)([^\"\']*?)(\"|\')[^>]*?>([\s\w\W]*?)<\/t?a>|<!-- wp:core-embed\/wordpress {"url":"([^"]*?)"[^}]*?"} -->|(?:>|&nbsp;|\s)((?:(?:http|ftp|https)\:\/\/)(?:[\w_-]+(?:(?:\.[\w_-]+)+))(?:[\w.,@?^=%&:\/~+#-]*[\w@?^=%&\/~+#-]))(?:<|&nbsp;|\s))([^<>.?!\r\n]*)([.?!]?)/i', // phpcs:disable Generic.Files.LineLength.MaxExceeded
			(string) $postContent,
			$matches
		);

		if ( empty( $matches[0] ) ) {
			return [];
		}

		$links = [];
		foreach ( $matches[0] as $k => $v ) {
			if ( empty( $matches[4][ $k ] ) || empty( $matches[6][ $k ] ) ) {
				continue;
			}

			$parsedUrl = $this->parseUrl( $matches[4][ $k ] );
			if ( empty( $parsedUrl['host'] ) ) {
				continue;
			}

			if (
				! empty( $parsedUrl['path'] ) &&
				preg_match( '/\.(.*?)$/i', $parsedUrl['path'], $extension ) &&
				! empty( $extension[1] ) &&
				in_array( $extension[1], $this->ignoredExtensions, true )
			) {
				continue;
			}

			// NOTE: We need to check this here before we strip off the "www" part.
			// Otherwise we will not be able to detect internal links on sites running on "www".
			$isInternal = $parsedUrl['host'] === $this->getHostname();

			$hostname = aioseoBrokenLinkChecker()->helpers->pregReplace( '/www\./i', '', $parsedUrl['host'] );
			$anchor   = wp_strip_all_tags( $matches[6][ $k ] );
			// Remove trailing URL tags. The regex isn't sufficient for this.
			$phrase = wp_strip_all_tags( $matches[0][ $k ] );
			$phrase = trim( preg_replace( '/(.*)(<t?a[^<>].*$)/', '', (string) $phrase ) );

			// Don't continue if the anchor or phrase are empty, e.g. blank link tag.
			if ( ! $anchor || ! $phrase ) {
				continue;
			}

			$phraseHtml = aioseoBrokenLinkChecker()->helpers->stripIncompleteHtmlTags( $matches[0][ $k ] );
			$phraseHtml = aioseoBrokenLinkChecker()->helpers->stripScriptTags( $phraseHtml );
			$phraseHtml = aioseoBrokenLinkChecker()->helpers->trimParagraphTags( $phraseHtml );

			if ( empty( $phraseHtml ) ) {
				continue;
			}

			$paragraph     = aioseoBrokenLinkChecker()->main->paragraph->get( $postId, $postContent, $phrase );
			$paragraphHtml = aioseoBrokenLinkChecker()->main->paragraph->getHtml( $anchor, $paragraph, $postContent );

			// Reformat the URL to get rid of params and fragments.
			$url = $this->geturlWithoutParamsAndFragment( $parsedUrl );

			// We need to sanitize the URL here so the hash is calculated based on the escaped version.
			$url = trim( sanitize_url( $url ) );

			$linkData = [
				'post_id'            => (int) $postId,
				'blc_link_status_id' => $this->getLinkStatusId( $url ),
				'url'                => $url,
				'url_hash'           => sha1( $url ),
				'hostname'           => $hostname,
				'hostname_url'       => sha1( $hostname ),
				'external'           => ! $isInternal,
				'anchor'             => $anchor,
				'phrase'             => $phrase,
				'phrase_html'        => $phraseHtml,
				'paragraph'          => $paragraph,
				'paragraph_html'     => $paragraphHtml
			];

			$links[] = $linkData;
		}

		return $links;
	}

	/**
	 * Return the link status ID.
	 *
	 * @since 1.0.0
	 *
	 * @param  string   $url The URL to look up.
	 * @return int|null      The link status ID.
	 */
	private function getLinkStatusId( $url ) {
		static $linkStatusId = [];

		$hash = sha1( $url );
		if ( isset( $linkStatusId[ $hash ] ) ) {
			return $linkStatusId[ $hash ];
		}

		$possibleLinkStatusId = aioseoBrokenLinkChecker()->core->db->start( 'aioseo_blc_link_status' )
			->where( 'url_hash', $hash )
			->run()
			->result();

		$linkStatusId[ $hash ] = ! empty( $possibleLinkStatusId ) ? $possibleLinkStatusId[0]->id : null;

		return $linkStatusId[ $hash ];
	}

	/**
	 * Returns the site's hostname.
	 *
	 * @since 1.0.0
	 *
	 * @return string The hostname.
	 */
	private function getHostname() {
		static $siteUrl = null;
		if ( null === $siteUrl ) {
			$siteUrl = wp_parse_url( get_site_url(), PHP_URL_HOST );
		}

		return $siteUrl;
	}

	/**
	 * Returns the parsed URL.
	 *
	 * @since 1.0.0
	 * @since 1.1.1 Renamed method.
	 *
	 * @param  string $url The URL.
	 * @return array       The parsed URL.
	 */
	private function parseUrl( $url ) {
		$parsedUrl = wp_parse_url( $url );
		if ( empty( $parsedUrl ) ) {
			return [];
		}

		// If the URL is relative, add the hostname of the site.
		if ( empty( $parsedUrl['host'] ) ) {
			$parsedUrl['host']   = $this->getHostname();
			$parsedUrl['scheme'] = wp_parse_url( get_site_url(), PHP_URL_SCHEME );
		}

		return $parsedUrl;
	}

	/**
	 * Returns the URL without params and fragments.
	 *
	 * @since 1.1.1
	 *
	 * @param  array  $parsedUrl The parsed URL.
	 * @return string            The URL without params and fragments.
	 */
	private function geturlWithoutParamsAndFragment( $parsedUrl ) {
		$url = '';
		if ( ! empty( $parsedUrl['scheme'] ) ) {
			$url .= $parsedUrl['scheme'] . '://';
		}

		$url .= $parsedUrl['host'];

		if ( ! empty( $parsedUrl['path'] ) ) {
			$url .= $parsedUrl['path'];
		}

		return $url;
	}

	/**
	 * Returns the posts to scan.
	 *
	 * @since 1.0.0
	 *
	 * @param  bool      $countOnly Whether to return only the count.
	 * @return array|int            The posts to scan or a count.
	 */
	public function getPostsToScan( $countOnly = false ) {
		$postsPerScan        = apply_filters( 'aioseo_blc_links_posts_per_scan', 50 );
		$postTypes           = aioseoBrokenLinkChecker()->helpers->getScannablePostTypes();
		$postStatuses        = aioseoBrokenLinkChecker()->helpers->getPublicPostStatuses( true );
		$minimumLinkScanDate = aioseoBrokenLinkChecker()->internalOptions->internal->minimumLinkScanDate ?: date( 'Y-m-d H:i:s' );

		$query = aioseoBrokenLinkChecker()->core->db->start( 'posts as p' )
			->leftJoin( 'aioseo_blc_posts as abp', 'p.ID = abp.post_id' )
			->whereIn( 'p.post_status', $postStatuses )
			->whereIn( 'p.post_type', $postTypes )
			->whereRaw( "(
				abp.post_id IS NULL OR
				abp.link_scan_date < p.post_modified_gmt OR
				abp.link_scan_date IS NULL OR
				abp.link_scan_date < '$minimumLinkScanDate'
			)" );

		if ( $countOnly ) {
			return $query->count();
		}

		$postsToScan = $query
			->select( 'DISTINCT p.ID, p.post_content, p.post_type, p.post_status' )
			->limit( $postsPerScan )
			->run()
			->result();

		return $postsToScan;
	}

	/**
	 * Returns the total number of scannable posts.
	 *
	 * @since 1.0.0
	 *
	 * @return int The total number of scannable posts.
	 */
	private function getTotalScannablePosts() {
		$postTypes    = aioseoBrokenLinkChecker()->helpers->getScannablePostTypes();
		$postStatuses = aioseoBrokenLinkChecker()->helpers->getPublicPostStatuses( true );

		$query = aioseoBrokenLinkChecker()->core->db->start( 'posts as p' )
			->whereIn( 'p.post_status', $postStatuses )
			->whereIn( 'p.post_type', $postTypes );

		return $query->count();
	}

	/**
	 * Returns the scan percentage.
	 *
	 * @since 1.0.0
	 *
	 * @return int The scan percentage.
	 */
	public function getScanPercentage() {
		$postsToScan         = $this->getPostsToScan( true );
		$totalScannablePosts = $this->getTotalScannablePosts();
		if ( 0 === $postsToScan || 0 === $totalScannablePosts ) {
			return 100;
		}

		return ceil( 100 - ( ( $postsToScan / $totalScannablePosts ) * 100 ) );
	}

	/**
	 * Sets the ignored extensions.
	 *
	 * @since 1.0.0
	 *
	 * @return void
	 */
	private function setIgnoredExtensions() {
		$this->ignoredExtensions = apply_filters( 'aioseo_blc_ignored_extensions', [
			// Executable files
			'apk',
			'bat',
			'bin',
			'cgi',
			'com',
			'exe',
			'gadget',
			'jar',
			'py',
			'wsf',
		] );
	}
}Links.php000064400000007723151537762320006361 0ustar00<?php
namespace AIOSEO\BrokenLinkChecker\Links;

// Exit if accessed directly.
if ( ! defined( 'ABSPATH' ) ) {
	exit;
}

use AIOSEO\BrokenLinkChecker\Models;

/**
 * Handles the Links scan.
 *
 * @since 1.0.0
 */
class Links {
	/**
	 * The action name of the scan.
	 *
	 * @since 1.0.0
	 *
	 * @var string
	 */
	private $scanActionName = 'aioseo_blc_links_scan';

	/**
	 * Data class instance.
	 *
	 * @since 1.1.0
	 *
	 * @var Data
	 */
	public $data = null;

	/**
	 * Holds the IDs of posts that need to be rescanned.
	 * We have to rescan these on shutdown instead of through the "save_post" hook since that hook is triggered right after a post is updated.
	 * That in turn can cause subsequent link updatss/deletions during REST API requests to fail because all links are deleted in the callback.
	 *
	 * @since 1.1.0
	 *
	 * @var array
	 */
	public $postsToRescan = [];

	/**
	 * Class constructor.
	 *
	 * @since 1.0.0
	 */
	public function __construct() {
		$this->data = new Data();

		add_action( $this->scanActionName, [ $this, 'scanPosts' ], 11, 1 );
		add_action( 'save_post', [ $this, 'scanPost' ], 21, 1 );
		add_action( 'shutdown', [ $this, 'rescanPosts' ] );

		if ( ! is_admin() ) {
			return;
		}

		add_action( 'init', [ $this, 'scheduleScan' ], 3003 );
	}

	/**
	 * Schedules the initial links scan.
	 *
	 * @since 1.0.0
	 *
	 * @return void
	 */
	public function scheduleScan() {
		// If there is no action at all, schedule one.
		if ( ! aioseoBrokenLinkChecker()->actionScheduler->isScheduled( $this->scanActionName ) ) {
			aioseoBrokenLinkChecker()->actionScheduler->scheduleAsync( $this->scanActionName );
		}
	}

	/**
	 * Scans posts for links and stores them in the DB.
	 *
	 * @since 1.0.0
	 *
	 * @param  bool $scheduleNewAction Whether to schedule a new action.
	 * @return void
	 */
	public function scanPosts( $scheduleNewAction = true ) {
		static $iterations = 0;
		$iterations++;

		aioseoBrokenLinkChecker()->helpers->timeElapsed();

		$postsToScan = $this->data->getPostsToScan();

		if ( empty( $postsToScan ) ) {
			if ( $scheduleNewAction ) {
				aioseoBrokenLinkChecker()->actionScheduler->scheduleSingle( $this->scanActionName, 15 * MINUTE_IN_SECONDS );
			}

			return;
		}

		foreach ( $postsToScan as $postToScan ) {
			$this->scanPost( $postToScan );
		}

		$timeElapsed = aioseoBrokenLinkChecker()->helpers->timeElapsed();
		if ( 10 > $timeElapsed && 200 > $iterations ) {
			// If we still have time, do another scan.
			$this->scanPosts();

			return;
		}

		if ( $scheduleNewAction ) {
			aioseoBrokenLinkChecker()->actionScheduler->scheduleSingle( $this->scanActionName, MINUTE_IN_SECONDS );
		}
	}

	/**
	 * Scans the given individual post for links.
	 *
	 * @since 1.0.0
	 *
	 * @param  Object|int $post The post object or ID (if called on "save_post").
	 * @return void
	 */
	public function scanPost( $post ) {
		if ( doing_action( 'save_post' ) && ! empty( $this->postsToRescan ) ) {
			// If posts need to be reindexed manually, bail.
			return;
		}

		if ( ! is_object( $post ) ) {
			$post = get_post( $post );
		}

		// Check if we didn't scan this post in the last 3 seconds. This is to prevent a second, subsequent request from scanning the same post.
		if ( aioseoBrokenLinkChecker()->core->cache->get( 'aioseo_blc_scan_post_' . $post->ID ) ) {
			return;
		}

		if ( ! aioseoBrokenLinkChecker()->helpers->isScannablePost( $post ) ) {
			return;
		}

		$this->data->indexLinks( $post->ID );

		$aioseoPost                 = Models\Post::getPost( $post->ID );
		$aioseoPost->link_scan_date = gmdate( 'Y-m-d H:i:s' );
		$aioseoPost->save();

		// Set a transient to prevent scanning the same post again in the next 3 seconds.
		aioseoBrokenLinkChecker()->core->cache->update( 'aioseo_blc_scan_post_' . $post->ID, true, 3 );
	}

	/**
	 * Reindexes posts on shutdown.
	 *
	 * @since 1.1.0
	 *
	 * @return void
	 */
	public function rescanPosts() {
		if ( empty( $this->postsToRescan ) ) {
			return;
		}

		foreach ( $this->postsToRescan as $postId ) {
			$this->scanPost( $postId );
		}
	}
}