I am still a proponent of the pages/posts distinction. A page belongs to a Site, a post belongs to a page. This is the schema I have in mind:
CREATE TABLE codidact.Users (
Id BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT,
Username VARCHAR(255) NOT NULL,
Email VARCHAR(255) NOT NULL, -- https://stackoverflow.com/questions/7717573
PasswordHash VARCHAR(255) NOT NULL,
PasswordSalt VARCHAR(255) NOT NULL,
Reputation INT(11) NOT NULL DEFAULT 1,
CreatedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
ModifiedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE TABLE codidact.Sites (
Id BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT,
Title VARCHAR(255) NOT NULL,
-- can have other things here like banner, description, help page text, etc.
CreatedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
ModifiedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE TABLE codidact.Tags (
Id BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT,
Deleted BIT NOT NULL, -- deleted tags cannot be recreated
SiteId BIGINT NOT NULL,
Title VARCHAR(255) NOT NULL,
Body TEXT NOT NULL,
CreatedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
ModifiedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
INDEX TagsOnSiteId (SiteId),
INDEX TagsOnTitle (Title),
CONSTRAINT FKTagsSites FOREIGN KEY (SiteId) REFERENCES Sites (Id)
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE TABLE codidact.Pages (
Id BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT,
Title VARCHAR(255) NOT NULL,
Views BIGINT NOT NULL DEFAULT 0,
Deleted BIT NOT NULL,
SiteId BIGINT NOT NULL,
Tag1Id BIGINT NOT NULL, -- must have at least one tag
Tag2Id BIGINT,
Tag3Id BIGINT,
Tag4Id BIGINT,
Tag5Id BIGINT,
CreatedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
ModifiedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
INDEX PagesOnSiteId (SiteId),
INDEX PagesOnTag1Id (Tag1Id),
INDEX PagesOnTag2Id (Tag2Id),
INDEX PagesOnTag3Id (Tag3Id),
INDEX PagesOnTag4Id (Tag4Id),
INDEX PagesOnTag5Id (Tag5Id),
CONSTRAINT FKPagesSites FOREIGN KEY (SiteId) REFERENCES Sites (Id),
CONSTRAINT FKPagesTag1s FOREIGN KEY (Tag1Id) REFERENCES Tags (Id),
CONSTRAINT FKPagesTag2s FOREIGN KEY (Tag2Id) REFERENCES Tags (Id),
CONSTRAINT FKPagesTag3s FOREIGN KEY (Tag3Id) REFERENCES Tags (Id),
CONSTRAINT FKPagesTag4s FOREIGN KEY (Tag4Id) REFERENCES Tags (Id),
CONSTRAINT FKPagesTag5s FOREIGN KEY (Tag5Id) REFERENCES Tags (Id)
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE TABLE codidact.Posts (
Id BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT,
TypeId TINYINT NOT NULL, -- ENUM (Question, Answer, Comment)
Deleted BIT NOT NULL,
Body TEXT NOT NULL,
Score INT(11) NOT NULL DEFAULT 0, -- cached upvotes - downvotes, updated with pgsql atomic increment
ParentId BIGINT NOT NULL,
PageId BIGINT NOT NULL,
UserId BIGINT NOT NULL,
CreatedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
ModifiedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
INDEX PostsOnTypeId (Score),
INDEX PostsOnTypeId (TypeId),
INDEX PostsOnPageId (PageId),
INDEX PostsOnUserId (UserId),
CONSTRAINT FKPostsParents FOREIGN KEY (ParentId) REFERENCES Posts (Id),
CONSTRAINT FKPostsPages FOREIGN KEY (PageId) REFERENCES Pages (Id),
CONSTRAINT FKPostsUsers FOREIGN KEY (UserId) REFERENCES Users (Id)
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE TABLE codidact.Events (
Id BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT,
TypeId TINYINT NOT NULL, -- ENUM (HitHNQ, Rename, Migrated, Edit, Upvote, Downvote, Star, Deleted, FlagInappropriate, FlagSpam, FlagNotAnswer, etc)
Body TEXT, -- stores post body for edits, new title for renames, or source and destination for migrations
Reason TEXT, -- for edits, flags, renames, and migrations
PostId BIGINT NOT NULL,
PageId BIGINT NOT NULL,
UserId BIGINT NOT NULL,
CreatedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
INDEX EventsOnType (TypeId),
INDEX EventsOnPost (PostId),
INDEX EventsOnPage (PageId),
INDEX EventsOnUser (UserId),
CONSTRAINT FKEventsPosts FOREIGN KEY (PostId) REFERENCES Posts (Id),
CONSTRAINT FKEventsPages FOREIGN KEY (PageId) REFERENCES Pages (Id),
CONSTRAINT FKEventsUsers FOREIGN KEY (UserId) REFERENCES Users (Id)
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE TABLE codidact.Alerts (
Id BIGINT NOT NULL PRIMARY KEY AUTO_INCREMENT,
TypeId TINYINT NOT NULL, -- ENUM (Upvoted, Downvoted, RepliedTo, Edit, Deleted, Mentioned, etc)
PostId BIGINT, -- sometimes there's no relevant post (NULL)
UserId BIGINT NOT NULL,
Seen BIT NOT NULL,
CreatedOnUTC TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP
INDEX AlertsOnUser (UserId),
CONSTRAINT FKAlertsPosts FOREIGN KEY (PostId) REFERENCES Posts (Id),
CONSTRAINT FKAlertsUsers FOREIGN KEY (UserId) REFERENCES Users (Id)
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
example queries:
-- View @PageId
SELECT post.Id,post.TypeId,post.Body,post.Score,post.ParentId,post.UserId,user.Username,post.CreatedOnUTC,post.ModifiedOnUTC
FROM Posts post
JOIN Users user ON user.Id = post.UserId
WHERE post.PageId = @PageId AND NOT post.Deleted
SELECT page.Title,page.Views,page.Tag1Id,page.Tag2Id,page.Tag3Id,page.Tag4Id,page.Tag5Id
FROM Pages page
WHERE page.Id = @PageId
-- List newest 50 questions
SELECT page.Title,page.Views,page.CreatedOnUTC,post.Score,post.UserId,user.Username,user.Reputation
FROM Pages page
JOIN Posts post ON post.PageId = page.Id
JOIN Users user ON user.Id = post.UserId
WHERE post.TypeId = @Question
ORDER BY page.CreatedOnUTC
LIMIT 50
-- List 50 top voted questions in @TagId
SELECT page.Title,page.Views,page.CreatedOnUTC,post.Score,post.UserId,user.Username,user.Reputation
FROM Pages page
JOIN Posts post ON post.PageId = page.Id
JOIN Users user ON user.Id = post.UserId
WHERE post.TypeId = @Question AND (
page.Tag1Id = @TagId OR
page.Tag2Id = @TagId OR
page.Tag3Id = @TagId OR
page.Tag4Id = @TagId OR
page.Tag5Id = @TagId )
ORDER BY post.Score
LIMIT 50
Sorry for the code dump, it’s just more productive and efficient here for me to show, not tell. Is there anything that this doesn’t cover?
(Thanks @manassehkatz @ArtOfCode and @MasonWheeler for discord discussion that lead to this.)