collaborative-filtering

A Simple Slope One User Item Recommender In PHP/MySQL

Big fan of user/item recommenders, which is also known as collaborative filtering. Slope one is a very basic but reasonably accurate recommender algorithm which makes it fairly simple to implement in PHP/MySQL. I attempted to make a reusable algo you could use in your application. This implementation of slope one is basically a simplistic netflix algorithm that takes users, items, and ratings and predicts what the user might rate on known items they have not yet rated. Slope one was first proposed by Daniel Lemire and Anna Maclachlan.

Something interesting to note about recommenders though, I recall from a netflix paper I read that implicit user ratings produce better recommendations than explicit. Meaning the movies people watch, their actual behavior, is more useful in prediction than what they self report as a rating.

Create a pdo connection to pass in
$dsn = "mysql:host=127.0.0.1;dbname=test;charset=utf8";
$opt = [
	PDO::ATTR_ERRMODE            => PDO::ERRMODE_EXCEPTION,
	PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
	PDO::ATTR_EMULATE_PREPARES   => false,
	];
$pdo = new PDO($dsn, 'root', 'your_db_password', $opt);
Create instance of SlopeOne
$slope = new SlopeOne($pdo);
Create the 2 tables we need to store data, caution, only run once because it will drop and recreate the tables.
$slope->initializeTables();
Add some users, items, and their ratings. Note that I purposely left out Sara’s rating for Salads, the item we’re trying to predict for her.
$slope->addRating('Mike', 'Tacos', 7);
$slope->addRating('Mike', 'Pizza', 9);
$slope->addRating('Mike', 'Hamburgers', 3);
$slope->addRating('Mike', 'Salads', 10);
$slope->addRating('Mike', 'Italian', 2);

$slope->addRating('Pete', 'Tacos', 5);
$slope->addRating('Pete', 'Pizza', 5);
$slope->addRating('Pete', 'Hamburgers', 8);
$slope->addRating('Pete', 'Salads', 5);
$slope->addRating('Pete', 'Italian', 5);

$slope->addRating('Sara', 'Tacos', 4);
$slope->addRating('Sara', 'Pizza', 4);
$slope->addRating('Sara', 'Hamburgers', 10);
$slope->addRating('Sara', 'Italian', 9);
Clear out advice table (all predictions)
$slope->clearPredictions();
Process all ratings into advice (predictions)
$slope->processAllItems();
Show all predictions for user Sara
print_r($slope->predictAllForItemName('Sara'));
Sara’s salad prediction
Array
(
    [0] => Array
        (
            [item] => Salads
            [rating] => 8.710042
        )

)
SlopeOne class
ini_set('memory_limit', '512M');

class SlopeOne {
	public static $pdo = null;

	public function __construct($pdo) {
		$this->pdo = $pdo;
	}

	// log a rating
	public function addRating($itemName, $subItemName, $rating){
		$this->pdo->prepare("
		insert into rating values(?, ?, ?, now());
		")->execute(array($itemName, $subItemName, $rating));
	}

	// process all rating items into advice for predictions
	public function processAllItems(){
		$stmt = $this->pdo->prepare('select itemName, subItemName from rating');
		$stmt->execute();
		$rows = $stmt->fetchAll();
		foreach($rows as $row){
			$this->processItem($row['itemName'], $row['subItemName']);
		}
	}

	// process a single item for predictions, typically the one you just added with addRating
	public function processItem($itemName, $subItemName){
		$stmt = $this->pdo->prepare('
		SELECT DISTINCT
			r.subItemName,
			(r2.ratingValue - r.ratingValue) as rating_difference
		FROM rating r, rating r2
		WHERE r.itemName=? and
			r2.subItemName=? and
			r2.itemName=?;
		');
		$stmt->execute(array($itemName, $subItemName, $itemName));
		$rows = $stmt->fetchAll();

		//For every one of the user's rating pairs, update the advice table
		foreach($rows as $row){

			$other_subItemName = $row["subItemName"];

			$rating_difference = $row["rating_difference"];

			//if the pair ($subItemName, $other_subItemName) is already in the advice table
			//then we want to update 2 rows.
			$stmt = $this->pdo->prepare('
			select count(*) FROM advice
			WHERE subItemName1=? and subItemName2=?
			');
			$stmt->execute(array($subItemName, $other_subItemName));
			$count = $stmt->fetchColumn();

			if($count > 0){
				$this->pdo->prepare("
					UPDATE advice
						SET count=count+1, sum=sum+1
					WHERE subItemName1=?
						AND subItemName2=?
				")->execute(array($subItemName, $other_subItemName));

				//We only want to update if the items are different
				if ($subItemName != $other_subItemName) {
					$this->pdo->prepare("
						UPDATE advice
							SET count = count+1, sum = sum-?
						WHERE subItemName1 = ? AND subItemName2 = ?
					")->execute(array($rating_difference, $other_subItemName, $subItemName));

				} else {
					$this->pdo->prepare("
						UPDATE advice
							SET count = count+1, sum = sum-?
						WHERE subItemName1 = ? AND subItemName2 = ?
					")->execute(array($rating_difference, $other_subItemName, $subItemName));
				}

			} else {
				//we want to insert 2 rows into the advice table
				$this->pdo->prepare("
					INSERT INTO advice VALUES (?,?,1,?)
				")->execute(array($subItemName, $other_subItemName, $rating_difference));

				//We only want to insert if the items are different
				if ($subItemName != $other_subItemName) {
					$this->pdo->prepare("
						INSERT INTO advice VALUES (?,?,1,?)
					")->execute(array($other_subItemName, $subItemName, (0-$rating_difference)));
				}
			}
		}
	}

	// return an array of every predictions for a user (item)
	public function predictAllForItemName($itemName, $ignoreKnownItems = true, $stdDeviation = true) {
		$arr = array();
		$stdDev = 0;

		/* here we calculate the std deviation if it's set to true, 
		and then add the inverse to the row (can tweak this value in the future) so that as std dev decreases, the rating should increase gradually*/

		$stmt = $this->pdo->prepare("
			SELECT
				d.subItemName1 as item,
				sum(d.count) as denom,
				".( $stdDeviation ? "std(r.ratingValue) as stdd, " : "") . "
				sum(d.sum + d.count*r.ratingValue) as numer
			FROM rating r, advice d
			WHERE r.itemName=?
				and d.subItemName1 <> r.subItemName
				and d.subItemName2 = r.subItemName ".
			($ignoreKnownItems ? " and d.subItemName1 not in (select subItemName from rating where itemName = '$itemName') " : "") . "
		GROUP BY d.subItemName1
		");
		$stmt->execute(array($itemName));
		$rows = $stmt->fetchAll();
		foreach($rows as $row){
			if($row['denom']==0){
				$rating = 0;
			}else{
				if($row['stdd']!=0 && $stdDeviation){
					$rating = number_format(((float)($row['numer']+(1/$row['stdd']))/(float)$row['denom']),6, '.','');
				}else{
					$rating = number_format(((float)$row['numer']/(float)$row['denom']), 6, '.', '');
				}
			}
			$arr[] = array('item'=>$row['item'],'rating'=>$rating);
		}
		return $arr;
	}


	public function clearPredictions(){
		$this->pdo->prepare("
		truncate table advice;
		")->execute();
	}


	public function initializeTables(){
		$this->pdo->prepare("
		drop table IF EXISTS rating;
		")->execute();

		$this->pdo->prepare("
		CREATE TABLE rating (
			itemName varchar(255) NOT NULL,
			subItemName varchar(255) NOT NULL,
			ratingValue INT NOT NULL,
			ratingTime TIMESTAMP NOT NULL
		);")->execute();

		$this->pdo->prepare("
		CREATE INDEX itemName ON rating (itemName);
		")->execute();

		$this->pdo->prepare("
		CREATE INDEX subItemName ON rating (subItemName);
		")->execute();

		$this->pdo->prepare("
		drop table IF EXISTS advice;
		")->execute();

		$this->pdo->prepare("
		CREATE TABLE advice (
		subItemName1 varchar(100) NOT NULL default '',
		subItemName2 varchar(100) NOT NULL default '',
		count int(11) NOT NULL default '0',
		sum int(11) NOT NULL default '0',
		PRIMARY KEY  (subItemName1,subItemName2)
		);
		")->execute();

	}

}

Leave a Reply

Your email address will not be published.