Understanding Attention Mechanisms in Transformers
A deep dive into how self-attention works and why it's revolutionized natural language processing...
Read ArticleBuilding intelligent solutions with Python, ML, and data analysis
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset
class TextClassifier:
def __init__(self, model_name='bert-base-uncased', num_labels=2):
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.tokenizer = BertTokenizer.from_pretrained(model_name)
self.model = BertForSequenceClassification.from_pretrained(
model_name,
num_labels=num_labels
).to(self.device)
def train(self, train_texts, train_labels, batch_size=16, epochs=3):
# Prepare dataset
dataset = TextDataset(train_texts, train_labels, self.tokenizer)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# Optimizer
optimizer = torch.optim.AdamW(self.model.parameters(), lr=2e-5)
# Training loop
self.model.train()
for epoch in range(epochs):
total_loss = 0
for batch in dataloader:
optimizer.zero_grad()
input_ids = batch['input_ids'].to(self.device)
attention_mask = batch['attention_mask'].to(self.device)
labels = batch['labels'].to(self.device)
outputs = self.model(
input_ids=input_ids,
attention_mask=attention_mask,
labels=labels
)
loss = outputs.loss
total_loss += loss.item()
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss/len(dataloader):.4f}")
def predict(self, texts):
self.model.eval()
predictions = []
with torch.no_grad():
for text in texts:
inputs = self.tokenizer(
text,
return_tensors="pt",
truncation=True,
padding=True
).to(self.device)
outputs = self.model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
predictions.append(probs.cpu().numpy())
return predictions
# Example dataset class
class TextDataset(Dataset):
def __init__(self, texts, labels, tokenizer, max_length=128):
self.encodings = tokenizer(
texts,
truncation=True,
padding=True,
max_length=max_length
)
self.labels = labels
def __getitem__(self, idx):
item = {
key: torch.tensor(val[idx])
for key, val in self.encodings.items()
}
item['labels'] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.labels)
A deep dive into how self-attention works and why it's revolutionized natural language processing...
Read ArticleHow to communicate complex machine learning concepts through intuitive visualizations...
Read ArticleReal-world challenges and solutions for deploying machine learning at scale...
Read ArticleI'm currently open to new opportunities in AI and machine learning. Feel free to reach out if you'd like to discuss potential collaborations or projects.