\contentsline {figure}{\numberline {1.1}{\ignorespaces The Author.}}{8}{figure.1.1}
\contentsline {figure}{\numberline {1.2}{\ignorespaces Users as low-dimensional manifolds.}}{13}{figure.1.2}
\contentsline {figure}{\numberline {2.1}{\ignorespaces Supervised, unsupervised, and semi-supervised learning.}}{21}{figure.2.1}
\contentsline {figure}{\numberline {2.2}{\ignorespaces Ratings versus review length (\emph {Goodreads}).}}{22}{figure.2.2}
\contentsline {figure}{\numberline {2.3}{\ignorespaces Why is there a column of `1's in the feature matrix?}}{23}{figure.2.3}
\contentsline {figure}{\numberline {2.4}{\ignorespaces Line of best fit between ratings and review length (Goodreads). }}{24}{figure.2.4}
\contentsline {figure}{\numberline {2.5}{\ignorespaces Gaussian error density. }}{28}{figure.2.5}
\contentsline {figure}{\numberline {2.6}{\ignorespaces The MSE and the MLE.}}{29}{figure.2.6}
\contentsline {figure}{\numberline {2.7}{\ignorespaces Residuals versus theoretical quantiles under a normal distribution.}}{31}{figure.2.7}
\contentsline {figure}{\numberline {2.8}{\ignorespaces Quadratic and cubic polynomials of best fit. }}{34}{figure.2.8}
\contentsline {figure}{\numberline {2.9}{\ignorespaces Gender versus review length (beer data).}}{35}{figure.2.9}
\contentsline {figure}{\numberline {2.10}{\ignorespaces Sequential versus one-hot encodings.}}{35}{figure.2.10}
\contentsline {figure}{\numberline {2.11}{\ignorespaces Ratings as a function of the weekday, and line of best fit. }}{38}{figure.2.11}
\contentsline {figure}{\numberline {2.12}{\ignorespaces Attempt to fit periodic data with a linear model.}}{39}{figure.2.12}
\contentsline {figure}{\numberline {2.13}{\ignorespaces Upvotes versus submission number on \emph {reddit}.}}{39}{figure.2.13}
\contentsline {figure}{\numberline {2.14}{\ignorespaces Interpreting the parameters of linear models. }}{41}{figure.2.14}
\contentsline {figure}{\numberline {2.15}{\ignorespaces Gradient descent demonstration. }}{42}{figure.2.15}
\contentsline {figure}{\numberline {3.1}{\ignorespaces The sigmoid function.}}{50}{figure.3.1}
\contentsline {figure}{\numberline {3.2}{\ignorespaces Receiver-Operating Characteristic and Precision Recall curves.}}{60}{figure.3.2}
\contentsline {figure}{\numberline {3.3}{\ignorespaces Overfitting demonstration.}}{63}{figure.3.3}
\contentsline {figure}{\numberline {3.4}{\ignorespaces Basic roles of training, validation, and test sets.}}{66}{figure.3.4}
\contentsline {figure}{\numberline {3.5}{\ignorespaces Guidelines for building training, validation, and tests sets.}}{66}{figure.3.5}
\contentsline {figure}{\numberline {3.6}{\ignorespaces The regularization effect of $\ell _1$ versus $\ell _2$ norms.}}{67}{figure.3.6}
\contentsline {figure}{\numberline {3.7}{\ignorespaces Example train, validation, and test curves.}}{68}{figure.3.7}
\contentsline {figure}{\numberline {3.8}{\ignorespaces Training, validation, and test error on a real pipeline. }}{73}{figure.3.8}
\contentsline {figure}{\numberline {4.1}{\ignorespaces Recommender systems compared to other types of machine learning.}}{82}{figure.4.1}
\contentsline {figure}{\numberline {4.2}{\ignorespaces Memory-based and model-based recommender systems. }}{84}{figure.4.2}
\contentsline {figure}{\numberline {4.3}{\ignorespaces Similarity as intersection over union.}}{86}{figure.4.3}
\contentsline {figure}{\numberline {4.4}{\ignorespaces The Cosine Similarity between two vectors.}}{88}{figure.4.4}
\contentsline {figure}{\numberline {4.5}{\ignorespaces Demonstration of the Cosine Similarity.}}{89}{figure.4.5}
\contentsline {figure}{\numberline {4.6}{\ignorespaces Demonstration of the Pearson Similarity.}}{91}{figure.4.6}
\contentsline {figure}{\numberline {4.7}{\ignorespaces Summary of similarity measures. }}{92}{figure.4.7}
\contentsline {figure}{\numberline {4.8}{\ignorespaces User interactions as bipartite graphs.}}{96}{figure.4.8}
\contentsline {figure}{\numberline {5.1}{\ignorespaces Memory-based versus model-based approaches. }}{103}{figure.5.1}
\contentsline {figure}{\numberline {5.2}{\ignorespaces Representation of a user and item in a latent factor model.}}{106}{figure.5.2}
\contentsline {figure}{\numberline {5.3}{\ignorespaces Pointwise versus pairwise recommendation. }}{113}{figure.5.3}
\contentsline {figure}{\numberline {5.4}{\ignorespaces Highly compatible items in latent space.}}{123}{figure.5.4}
\contentsline {figure}{\numberline {5.5}{\ignorespaces Representation of a multilayer perceptron with $L$ layers.}}{125}{figure.5.5}
\contentsline {figure}{\numberline {5.6}{\ignorespaces Autoencoder representation.}}{126}{figure.5.6}
\contentsline {figure}{\numberline {6.1}{\ignorespaces Reciprocal interest in bartering settings.}}{150}{figure.6.1}
\contentsline {figure}{\numberline {6.2}{\ignorespaces Shared parameters in socially-aware recommendation.}}{153}{figure.6.2}
\contentsline {figure}{\numberline {6.3}{\ignorespaces When is side-information useful for recommendation? }}{158}{figure.6.3}
\contentsline {figure}{\numberline {6.4}{\ignorespaces Ad recommendation as bipartite matching.}}{167}{figure.6.4}
\contentsline {figure}{\numberline {6.5}{\ignorespaces Performance demonstration in cold-start settings.}}{170}{figure.6.5}
\contentsline {figure}{\numberline {7.1}{\ignorespaces Moving-average plots of \emph {Goodreads} Fantasy novel ratings.}}{174}{figure.7.1}
\contentsline {figure}{\numberline {7.2}{\ignorespaces Temporal dynamics on Netflix.}}{178}{figure.7.2}
\contentsline {figure}{\numberline {7.3}{\ignorespaces Expressive deviation term from \cite {koren2009collaborative}. }}{180}{figure.7.3}
\contentsline {figure}{\numberline {7.4}{\ignorespaces Spline interpolation of temporally evolving user bias. }}{181}{figure.7.4}
\contentsline {figure}{\numberline {7.5}{\ignorespaces Session-based Temporal Graph \citep {xiang2010temporal}. }}{185}{figure.7.5}
\contentsline {figure}{\numberline {7.6}{\ignorespaces Translation-based sequential models.}}{192}{figure.7.6}
\contentsline {figure}{\numberline {7.7}{\ignorespaces Visualization of an LSTM cell.}}{198}{figure.7.7}
\contentsline {figure}{\numberline {8.1}{\ignorespaces What's the point of sentiment analysis? }}{214}{figure.8.1}
\contentsline {figure}{\numberline {8.2}{\ignorespaces Bag-of-Words models.}}{215}{figure.8.2}
\contentsline {figure}{\numberline {8.3}{\ignorespaces Arguments for and against N-grams. }}{220}{figure.8.3}
\contentsline {figure}{\numberline {8.4}{\ignorespaces Term frequency and \emph {tf-idf} comparison.}}{222}{figure.8.4}
\contentsline {figure}{\numberline {8.5}{\ignorespaces Item representations under the \emph {item2vec} model.}}{227}{figure.8.5}
\contentsline {figure}{\numberline {8.6}{\ignorespaces Shared parameters in personalized text models.}}{230}{figure.8.6}
\contentsline {figure}{\numberline {8.7}{\ignorespaces Recurrent neural network for text generation.}}{232}{figure.8.7}
\contentsline {figure}{\numberline {8.8}{\ignorespaces Personalized recurrent network architectures.}}{234}{figure.8.8}
\contentsline {figure}{\numberline {8.9}{\ignorespaces Real and synthetically generated user reviews.}}{235}{figure.8.9}
\contentsline {figure}{\numberline {8.10}{\ignorespaces Example of a personalized recipe.}}{236}{figure.8.10}
\contentsline {figure}{\numberline {8.11}{\ignorespaces Examples of generated justifications for a recommendation.}}{238}{figure.8.11}
\contentsline {figure}{\numberline {9.1}{\ignorespaces Basic Siamese setup for item-to-item compatibility. }}{254}{figure.9.1}
\contentsline {figure}{\numberline {9.2}{\ignorespaces Basic setup of a personalized generative adversarial network.}}{262}{figure.9.2}
\contentsline {figure}{\numberline {9.3}{\ignorespaces Item representations embedded via \emph {t-SNE}.}}{264}{figure.9.3}
\contentsline {figure}{\numberline {10.1}{\ignorespaces Comparison of inner product and nearest neighbor recommendations.}}{268}{figure.10.1}
\contentsline {figure}{\numberline {10.2}{\ignorespaces Distribution of interactions compared to recommendations.}}{270}{figure.10.2}