• About Us
  • Privacy Policy
  • Disclaimer
  • Contact Us
AimactGrow
  • Home
  • Technology
  • AI
  • SEO
  • Coding
  • Gaming
  • Cybersecurity
  • Digital marketing
No Result
View All Result
  • Home
  • Technology
  • AI
  • SEO
  • Coding
  • Gaming
  • Cybersecurity
  • Digital marketing
No Result
View All Result
AimactGrow
No Result
View All Result

Tremendous-Tuning a BERT Mannequin – MachineLearningMastery.com

Admin by Admin
December 25, 2025
Home AI
Share on FacebookShare on Twitter


import collections

import dataclasses

import functools

 

import torch

import torch.nn as nn

import torch.optim as optim

import tqdm

from datasets import load_dataset

from tokenizers import Tokenizer

from torch import Tensor

 

 

# BERT config and mannequin outlined beforehand

@dataclasses.dataclass

class BertConfig:

    “”“Configuration for BERT mannequin.”“”

    vocab_size: int = 30522

    num_layers: int = 12

    hidden_size: int = 768

    num_heads: int = 12

    dropout_prob: float = 0.1

    pad_id: int = 0

    max_seq_len: int = 512

    num_types: int = 2

 

class BertBlock(nn.Module):

    “”“One transformer block in BERT.”“”

    def __init__(self, hidden_size: int, num_heads: int, dropout_prob: float):

        tremendous().__init__()

        self.consideration = nn.MultiheadAttention(hidden_size, num_heads,

                                               dropout=dropout_prob, batch_first=True)

        self.attn_norm = nn.LayerNorm(hidden_size)

        self.ff_norm = nn.LayerNorm(hidden_size)

        self.dropout = nn.Dropout(dropout_prob)

        self.feed_forward = nn.Sequential(

            nn.Linear(hidden_size, 4 * hidden_size),

            nn.GELU(),

            nn.Linear(4 * hidden_size, hidden_size),

        )

 

    def ahead(self, x: Tensor, pad_mask: Tensor) -> Tensor:

        # self-attention with padding masks and post-norm

        attn_output, _ = self.consideration(x, x, x, key_padding_mask=pad_mask)

        x = self.attn_norm(x + attn_output)

        # feed-forward with GeLU activation and post-norm

        ff_output = self.feed_forward(x)

        x = self.ff_norm(x + self.dropout(ff_output))

        return x

 

class BertPooler(nn.Module):

    “”“Pooler layer for BERT to course of the [CLS] token output.”“”

    def __init__(self, hidden_size: int):

        tremendous().__init__()

        self.dense = nn.Linear(hidden_size, hidden_size)

        self.activation = nn.Tanh()

 

    def ahead(self, x: Tensor) -> Tensor:

        x = self.dense(x)

        x = self.activation(x)

        return x

 

class BertModel(nn.Module):

    “”“Spine of BERT mannequin.”“”

    def __init__(self, config: BertConfig):

        tremendous().__init__()

        # embedding layers

        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size,

                                            padding_idx=config.pad_id)

        self.type_embeddings = nn.Embedding(config.num_types, config.hidden_size)

        self.position_embeddings = nn.Embedding(config.max_seq_len, config.hidden_size)

        self.embeddings_norm = nn.LayerNorm(config.hidden_size)

        self.embeddings_dropout = nn.Dropout(config.dropout_prob)

        # transformer blocks

        self.blocks = nn.ModuleList([

            BertBlock(config.hidden_size, config.num_heads, config.dropout_prob)

            for _ in range(config.num_layers)

        ])

        # [CLS] pooler layer

        self.pooler = BertPooler(config.hidden_size)

 

    def ahead(self, input_ids: Tensor, token_type_ids: Tensor, pad_id: int = 0,

                ) -> tuple[Tensor, Tensor]:

        # create consideration masks for padding tokens

        pad_mask = input_ids == pad_id

        # convert integer tokens to embedding vectors

        batch_size, seq_len = input_ids.form

        position_ids = torch.arange(seq_len, machine=input_ids.machine).unsqueeze(0)

        position_embeddings = self.position_embeddings(position_ids)

        type_embeddings = self.type_embeddings(token_type_ids)

        token_embeddings = self.word_embeddings(input_ids)

        x = token_embeddings + type_embeddings + place_embeddings

        x = self.embeddings_norm(x)

        x = self.embeddings_dropout(x)

        # course of the sequence with transformer blocks

        for block in self.blocks:

            x = block(x, pad_mask)

        # pool the hidden state of the `[CLS]` token

        pooled_output = self.pooler(x[:, 0, :])

        return x, pooled_output

 

# Outline new BERT mannequin for query answering

class BertForQuestionAnswering(nn.Module):

    “”“BERT mannequin for SQuAD query answering.”“”

    def __init__(self, config: BertConfig):

        tremendous().__init__()

        self.bert = BertModel(config)

        # Two outputs: begin and finish place logits

        self.qa_outputs = nn.Linear(config.hidden_size, 2)

 

    def ahead(self,

        input_ids: Tensor,

        token_type_ids: Tensor,

        pad_id: int = 0,

    ) -> tuple[Tensor, Tensor]:

        # Get sequence output from BERT (batch_size, seq_len, hidden_size)

        seq_output, pooled_output = self.bert(input_ids, token_type_ids, pad_id=pad_id)

        # Undertaking to start out and finish logits

        logits = self.qa_outputs(seq_output)  # (batch_size, seq_len, 2)

        start_logits = logits[:, :, 0]  # (batch_size, seq_len)

        end_logits = logits[:, :, 1]    # (batch_size, seq_len)

        return start_logits, finish_logits

 

# Load SQuAD dataset for query answering

dataset = load_dataset(“squad”)

 

# Load the pretrained BERT tokenizer

TOKENIZER_PATH = “wikitext-2_wordpiece.json”

tokenizer = Tokenizer.from_file(TOKENIZER_PATH)

 

# Setup collate operate to tokenize question-context pairs for the mannequin

def collate(batch: checklist[dict], tokenizer: Tokenizer, max_len: int,

            ) -> tuple[Tensor, Tensor, Tensor, Tensor]:

    “”“Collate question-context pairs for the mannequin.”“”

    cls_id = tokenizer.token_to_id(“[CLS]”)

    sep_id = tokenizer.token_to_id(“[SEP]”)

    pad_id = tokenizer.token_to_id(“[PAD]”)

 

    input_ids_list = []

    token_type_ids_list = []

    start_positions = []

    end_positions = []

 

    for merchandise in batch:

        # Tokenize query and context

        query, context = merchandise[“question”], merchandise[“context”]

        question_ids = tokenizer.encode(query).ids

        context_ids = tokenizer.encode(context).ids

 

        # Construct enter: [CLS] query [SEP] context [SEP]

        input_ids = [cls_id, *question_ids, sep_id, *context_ids, sep_id]

        token_type_ids = [0] * (len(question_ids)+2) + [1] * (len(context_ids)+1)

 

        # Truncate or pad to max size

        if len(input_ids) > max_len:

            input_ids = input_ids[:max_len]

            token_type_ids = token_type_ids[:max_len]

        else:

            input_ids.lengthen([pad_id] * (max_len – len(input_ids)))

            token_type_ids.lengthen([1] * (max_len – len(token_type_ids)))

 

        # Discover reply place in tokens: Reply might not be within the context

        start_pos = end_pos = 0

        if len(merchandise[“answers”][“text”]) > 0:

            solutions = tokenizer.encode(merchandise[“answers”][“text”][0]).ids

            # discover the context offset of the reply in context_ids

            for i in vary(len(context_ids) – len(solutions) + 1):

                if context_ids[i:i+len(answers)] == solutions:

                    start_pos = i + len(question_ids) + 2

                    end_pos = start_pos + len(solutions) – 1

                    break

            if end_pos >= max_len:

                start_pos = end_pos = 0  # reply is clipped, therefore no reply

 

        input_ids_list.append(input_ids)

        token_type_ids_list.append(token_type_ids)

        start_positions.append(start_pos)

        end_positions.append(end_pos)

 

    input_ids_list = torch.tensor(input_ids_list)

    token_type_ids_list = torch.tensor(token_type_ids_list)

    start_positions = torch.tensor(start_positions)

    end_positions = torch.tensor(end_positions)

    return (input_ids_list, token_type_ids_list, start_positions, end_positions)

 

batch_size = 16

max_len = 384  # Longer for Q&A to accommodate context

collate_fn = functools.partial(collate, tokenizer=tokenizer, max_len=max_len)

train_loader = torch.utils.information.DataLoader(dataset[“train”], batch_size=batch_size,

                                           shuffle=True, collate_fn=collate_fn)

val_loader = torch.utils.information.DataLoader(dataset[“validation”], batch_size=batch_size,

                                         shuffle=False, collate_fn=collate_fn)

 

# Create Q&A mannequin with a pretrained basis BERT mannequin

machine = torch.machine(“cuda” if torch.cuda.is_available() else “cpu”)

config = BertConfig()

mannequin = BertForQuestionAnswering(config)

mannequin.to(machine)

mannequin.bert.load_state_dict(torch.load(“bert_model.pth”, map_location=machine))

 

# Coaching setup

loss_fn = nn.CrossEntropyLoss()

optimizer = optim.AdamW(mannequin.parameters(), lr=2e–5)

num_epochs = 3

 

for epoch in vary(num_epochs):

    mannequin.prepare()

    # Coaching

    with tqdm.tqdm(train_loader, desc=f“Epoch {epoch+1}/{num_epochs}”) as pbar:

        for batch in pbar:

            # get batched information

            input_ids, token_type_ids, start_positions, end_positions = batch

            input_ids = input_ids.to(machine)

            token_type_ids = token_type_ids.to(machine)

            start_positions = start_positions.to(machine)

            end_positions = end_positions.to(machine)

            # ahead move

            start_logits, end_logits = mannequin(input_ids, token_type_ids)

            # backward move

            optimizer.zero_grad()

            start_loss = loss_fn(start_logits, start_positions)

            end_loss = loss_fn(end_logits, end_positions)

            loss = start_loss + end_loss

            loss.backward()

            optimizer.step()

            # replace progress bar

            pbar.set_postfix(loss=float(loss))

            pbar.replace(1)

 

    # Validation: Maintain monitor of the common loss and accuracy

    mannequin.eval()

    val_loss, num_matches, num_batches, num_samples = 0, 0, 0, 0

    with torch.no_grad():

        for batch in val_loader:

            # get batched information

            input_ids, token_type_ids, start_positions, end_positions = batch

            input_ids = input_ids.to(machine)

            token_type_ids = token_type_ids.to(machine)

            start_positions = start_positions.to(machine)

            end_positions = end_positions.to(machine)

            # ahead move on validation information

            start_logits, end_logits = mannequin(input_ids, token_type_ids)

            # compute loss

            start_loss = loss_fn(start_logits, start_positions)

            end_loss = loss_fn(end_logits, end_positions)

            loss = start_loss + end_loss

            val_loss += loss.merchandise()

            num_batches += 1

            # compute accuracy

            pred_start = start_logits.argmax(dim=–1)

            pred_end = end_logits.argmax(dim=–1)

            match = (pred_start == start_positions) & (pred_end == end_positions)

            num_matches += match.sum().merchandise()

            num_samples += len(start_positions)

 

    avg_loss = val_loss / num_batches

    acc = num_matches / num_samples

    print(f“Validation {epoch+1}/{num_epochs}: acc {acc:.4f}, avg loss {avg_loss:.4f}”)

 

# Save the fine-tuned mannequin

torch.save(mannequin.state_dict(), f“bert_model_squad.pth”)

Tags: BERTFineTuningMachineLearningMastery.commodel
Admin

Admin

Next Post
Palantir indicators a cope with The Nuclear Firm beneath which the startup can pay Palantir $100M over 5 years to develop AI software program for the nuclear business (Miquela Thornton/Bloomberg)

crypto M&A success $8.6B throughout 267 offers in 2025 vs. $2.17B in 2024; 11 crypto IPOs raised $14.6B worldwide, up from $310M from 4 IPOs in 2024 (Nikou Asgari/Monetary Occasions)

Leave a Reply Cancel reply

Your email address will not be published. Required fields are marked *

Recommended.

All New Xbox Sport Go Video games For October, From Baldur’s Gate To Ninja Gaiden 4

All New Xbox Sport Go Video games For October, From Baldur’s Gate To Ninja Gaiden 4

October 9, 2025
Optimize Your Product Pages for AI Visibility

Optimize Your Product Pages for AI Visibility

March 8, 2026

Trending.

AI-Assisted Menace Actor Compromises 600+ FortiGate Gadgets in 55 Nations

AI-Assisted Menace Actor Compromises 600+ FortiGate Gadgets in 55 Nations

February 23, 2026
10 tricks to begin getting ready! • Yoast

10 tricks to begin getting ready! • Yoast

July 21, 2025
Exporting a Material Simulation from Blender to an Interactive Three.js Scene

Exporting a Material Simulation from Blender to an Interactive Three.js Scene

August 20, 2025
Moonshot AI Releases 𝑨𝒕𝒕𝒆𝒏𝒕𝒊𝒐𝒏 𝑹𝒆𝒔𝒊𝒅𝒖𝒂𝒍𝒔 to Exchange Mounted Residual Mixing with Depth-Sensible Consideration for Higher Scaling in Transformers

Moonshot AI Releases 𝑨𝒕𝒕𝒆𝒏𝒕𝒊𝒐𝒏 𝑹𝒆𝒔𝒊𝒅𝒖𝒂𝒍𝒔 to Exchange Mounted Residual Mixing with Depth-Sensible Consideration for Higher Scaling in Transformers

March 16, 2026
Design Has By no means Been Extra Vital: Inside Shopify’s Acquisition of Molly

Design Has By no means Been Extra Vital: Inside Shopify’s Acquisition of Molly

September 8, 2025

AimactGrow

Welcome to AimactGrow, your ultimate source for all things technology! Our mission is to provide insightful, up-to-date content on the latest advancements in technology, coding, gaming, digital marketing, SEO, cybersecurity, and artificial intelligence (AI).

Categories

  • AI
  • Coding
  • Cybersecurity
  • Digital marketing
  • Gaming
  • SEO
  • Technology

Recent News

WaterPlum Unleashes “StoatWaffle” Malware in VSCode Provide Chain Assault

WaterPlum Unleashes “StoatWaffle” Malware in VSCode Provide Chain Assault

March 19, 2026
What It Is, Why It Issues, and What to Do Now

Do Key phrase Analysis in 2026 (6 Methods + Framework)

March 19, 2026
  • About Us
  • Privacy Policy
  • Disclaimer
  • Contact Us

© 2025 https://blog.aimactgrow.com/ - All Rights Reserved

No Result
View All Result
  • Home
  • Technology
  • AI
  • SEO
  • Coding
  • Gaming
  • Cybersecurity
  • Digital marketing

© 2025 https://blog.aimactgrow.com/ - All Rights Reserved