Skip to content

Commit

Permalink
[Typing][A-48,A-49,A-50][debug] Add type annotations for 3 optimizers…
Browse files Browse the repository at this point in the history
… (`RAdam`, `RMSProp`, `Rprop`) (PaddlePaddle#65085)

---------

Co-authored-by: SigureMo <[email protected]>
  • Loading branch information
2 people authored and co63oc committed Jun 25, 2024
1 parent 7d9b558 commit 5c4f0aa
Show file tree
Hide file tree
Showing 13 changed files with 241 additions and 129 deletions.
21 changes: 10 additions & 11 deletions python/paddle/hapi/model_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from typing_extensions import TypedDict

import paddle
from paddle import nn
from paddle import Tensor, nn
from paddle.autograd import no_grad
from paddle.static import InputSpec

Expand All @@ -35,17 +35,16 @@ class ModelSummary(TypedDict):


def summary(
net: paddle.nn.Layer,
input_size: int
| tuple[int, ...]
| InputSpec
| list[tuple[int, ...] | InputSpec]
| None = None,
net: nn.Layer,
input_size: (
int
| tuple[int, ...]
| InputSpec
| list[tuple[int, ...] | InputSpec]
| None
) = None,
dtypes: str | Sequence[str] | None = None,
input: paddle.Tensor
| Sequence[paddle.Tensor]
| dict[str, paddle.Tensor]
| None = None,
input: Tensor | Sequence[Tensor] | dict[str, Tensor] | None = None,
) -> ModelSummary:
"""Prints a string summary of the network.
Expand Down
44 changes: 27 additions & 17 deletions python/paddle/optimizer/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,10 @@ class Adam(Optimizer):
>>> inp = paddle.rand([10,10], dtype="float32")
>>> out = linear(inp)
>>> loss = paddle.mean(out)
>>> adam = paddle.optimizer.Adam(learning_rate=0.1,
... parameters=linear.parameters())
>>> adam = paddle.optimizer.Adam(
... learning_rate=0.1,
... parameters=linear.parameters()
... )
>>> loss.backward()
>>> adam.step()
>>> adam.clear_grad()
Expand All @@ -147,11 +149,13 @@ class Adam(Optimizer):
>>> loss = paddle.mean(out)
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
>>> adam = paddle.optimizer.Adam(learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01)
>>> adam = paddle.optimizer.Adam(
... learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
... )
>>> loss.backward()
>>> adam.step()
>>> adam.clear_grad()
Expand All @@ -174,12 +178,14 @@ class Adam(Optimizer):
... 'beta1': 0.8
... }],
... weight_decay=0.01,
... beta1=0.9)
... beta1=0.9
... )
>>> loss.backward()
>>> adam.step()
>>> adam.clear_grad()
"""

type: str
_moment1_acc_str = "moment1"
_moment2_acc_str = "moment2"
Expand All @@ -192,9 +198,9 @@ def __init__(
beta1: float | Tensor = 0.9,
beta2: float | Tensor = 0.999,
epsilon: float | Tensor = 1e-8,
parameters: Sequence[Tensor]
| Sequence[_AdamParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_AdamParameterConfig] | None
) = None,
weight_decay: float | WeightDecayRegularizer | None = None,
grad_clip: GradientClipBase | None = None,
lazy_mode: bool = False,
Expand Down Expand Up @@ -265,9 +271,11 @@ def _add_moments_pows(self, p):
name=self._beta1_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.9
if isinstance(self._beta1, (Variable, Value))
else self._beta1,
fill_value=(
0.9
if isinstance(self._beta1, (Variable, Value))
else self._beta1
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand All @@ -276,9 +284,11 @@ def _add_moments_pows(self, p):
name=self._beta2_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.999
if isinstance(self._beta2, (Variable, Value))
else self._beta2,
fill_value=(
0.999
if isinstance(self._beta2, (Variable, Value))
else self._beta2
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand Down
28 changes: 15 additions & 13 deletions python/paddle/optimizer/adamax.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,15 +117,16 @@ class Adamax(Optimizer):
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
>>> adam = paddle.optimizer.Adamax(learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
>>> adamax = paddle.optimizer.Adamax(
... learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
... )
>>> out.backward()
>>> adam.step()
>>> adam.clear_grad()
>>> adamax.step()
>>> adamax.clear_grad()
>>> # Note that the learning_rate of linear_2 is 0.01.
Expand All @@ -135,7 +136,7 @@ class Adamax(Optimizer):
>>> out = linear_1(inp)
>>> out = linear_2(out)
>>> loss = paddle.mean(out)
>>> adam = paddle.optimizer.Adamax(
>>> adamax = paddle.optimizer.Adamax(
... learning_rate=0.1,
... parameters=[{ # type: ignore
... 'params': linear_1.parameters()
Expand All @@ -149,9 +150,10 @@ class Adamax(Optimizer):
... beta1=0.9
... )
>>> out.backward()
>>> adam.step()
>>> adam.clear_grad()
>>> adamax.step()
>>> adamax.clear_grad()
"""

type: str
_moment_acc_str = "moment"
_inf_norm_acc_str = "inf_norm"
Expand All @@ -163,9 +165,9 @@ def __init__(
beta1: float | Tensor = 0.9,
beta2: float | Tensor = 0.999,
epsilon: float | Tensor = 1e-8,
parameters: Sequence[Tensor]
| Sequence[_AdamaxParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_AdamaxParameterConfig] | None
) = None,
weight_decay: float | WeightDecayRegularizer | None = None,
grad_clip: GradientClipBase | None = None,
name: str | None = None,
Expand Down
41 changes: 24 additions & 17 deletions python/paddle/optimizer/adamw.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,12 @@ class AdamW(Optimizer):
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
>>> opt = paddle.optimizer.AdamW(learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
>>> opt = paddle.optimizer.AdamW(
... learning_rate=0.1,
... parameters=linear.parameters(),
... beta1=beta1,
... beta2=beta2,
... weight_decay=0.01
... )
>>> loss.backward()
>>> opt.step()
Expand Down Expand Up @@ -171,9 +172,9 @@ def __init__(
beta1: float | Tensor = 0.9,
beta2: float | Tensor = 0.999,
epsilon: float | Tensor = 1e-8,
parameters: Sequence[Tensor]
| Sequence[_AdamParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_AdamParameterConfig] | None
) = None,
weight_decay: float | Tensor = 0.01,
lr_ratio: Callable[[Tensor], float] | None = None,
apply_decay_param_fun: Callable[[str], bool] | None = None,
Expand Down Expand Up @@ -383,9 +384,11 @@ def _add_moments_pows(self, p):
name=self._beta1_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.9
if isinstance(self._beta1, (Variable, Value))
else self._beta1,
fill_value=(
0.9
if isinstance(self._beta1, (Variable, Value))
else self._beta1
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand All @@ -394,9 +397,11 @@ def _add_moments_pows(self, p):
name=self._beta2_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.999
if isinstance(self._beta2, (Variable, Value))
else self._beta2,
fill_value=(
0.999
if isinstance(self._beta2, (Variable, Value))
else self._beta2
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand Down Expand Up @@ -538,9 +543,11 @@ def _append_optimize_op(self, block, param_and_grad):
"multi_precision": find_master,
"with_decay": with_decay,
"coeff": self._weight_decay,
"lr_ratio": 1.0
if self._lr_ratio is None
else self._lr_ratio(param_and_grad[0]),
"lr_ratio": (
1.0
if self._lr_ratio is None
else self._lr_ratio(param_and_grad[0])
),
}

if isinstance(self._beta1, Variable):
Expand Down
7 changes: 6 additions & 1 deletion python/paddle/optimizer/asgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,12 @@ class ASGD(Optimizer):
>>> inp = paddle.to_tensor(inp)
>>> out = linear(inp)
>>> loss = paddle.mean(out)
>>> asgd = paddle.optimizer.ASGD(learning_rate=0.001, batch_num=10, parameters=linear.parameters(), weight_decay=0.01)
>>> asgd = paddle.optimizer.ASGD(
... learning_rate=0.001,
... batch_num=10,
... parameters=linear.parameters(),
... weight_decay=0.01
... )
>>> out.backward()
>>> asgd.step()
>>> asgd.clear_grad()
Expand Down
26 changes: 16 additions & 10 deletions python/paddle/optimizer/lamb.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,18 @@ class Lamb(Optimizer):
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.85], dtype="float32")
>>> lamb = paddle.optimizer.Lamb(
... learning_rate=0.002, beta1=beta1, beta2=beta2, parameters=linear.parameters(), lamb_weight_decay=0.01)
... learning_rate=0.002,
... beta1=beta1,
... beta2=beta2,
... parameters=linear.parameters(),
... lamb_weight_decay=0.01
... )
>>> back = out.backward()
>>> lamb.step()
>>> lamb.clear_grad()
"""

_moment1_acc_str = "moment1"
_moment2_acc_str = "moment2"
_beta1_pow_acc_str = "beta1_pow_acc"
Expand All @@ -129,9 +135,9 @@ def __init__(
beta1: float | Tensor = 0.9,
beta2: float | Tensor = 0.999,
epsilon: float | Tensor = 1e-6,
parameters: Sequence[Tensor]
| Sequence[_LambParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_LambParameterConfig] | None
) = None,
grad_clip: GradientClipBase | None = None,
exclude_from_weight_decay_fn: Callable[[Tensor], bool] | None = None,
multi_precision: bool = False,
Expand Down Expand Up @@ -211,9 +217,9 @@ def _add_moments_pows(self, p):
name=self._beta1_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.9
if isinstance(self._beta1, Variable)
else self._beta1,
fill_value=(
0.9 if isinstance(self._beta1, Variable) else self._beta1
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand All @@ -222,9 +228,9 @@ def _add_moments_pows(self, p):
name=self._beta2_pow_acc_str,
param=p,
dtype=acc_dtype,
fill_value=0.999
if isinstance(self._beta2, Variable)
else self._beta2,
fill_value=(
0.999 if isinstance(self._beta2, Variable) else self._beta2
),
shape=[1],
type=core.VarDesc.VarType.LOD_TENSOR,
device='cpu',
Expand Down
7 changes: 6 additions & 1 deletion python/paddle/optimizer/momentum.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,11 @@ class Momentum(Optimizer):
>>> loss = paddle.mean(out)
>>> beta1 = paddle.to_tensor([0.9], dtype="float32")
>>> beta2 = paddle.to_tensor([0.99], dtype="float32")
>>> momentum = paddle.optimizer.Momentum(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
>>> momentum = paddle.optimizer.Momentum(
... learning_rate=0.1,
... parameters=linear.parameters(),
... weight_decay=0.01
... )
>>> back = out.backward()
>>> momentum.step()
>>> momentum.clear_grad()
Expand Down Expand Up @@ -117,6 +121,7 @@ class Momentum(Optimizer):
>>> momentum.clear_grad()
"""

_velocity_acc_str = "velocity"

def __init__(
Expand Down
14 changes: 8 additions & 6 deletions python/paddle/optimizer/nadam.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,10 @@ class NAdam(Optimizer):
>>> out = linear(inp)
>>> loss = paddle.mean(out)
>>> nadam = paddle.optimizer.NAdam(learning_rate=0.1,
... parameters=linear.parameters())
>>> nadam = paddle.optimizer.NAdam(
... learning_rate=0.1,
... parameters=linear.parameters()
... )
>>> out.backward()
>>> nadam.step()
>>> nadam.clear_grad()
Expand All @@ -124,7 +126,7 @@ class NAdam(Optimizer):
>>> loss = paddle.mean(out)
>>> opt = paddle.optimizer.NAdam(
... learning_rate=0.1,
... parameters=[{ # type: ignore
... parameters=[{ # type: ignore
... 'params': linear_1.parameters()
... }, {
... 'params': linear_2.parameters(),
Expand Down Expand Up @@ -154,9 +156,9 @@ def __init__(
beta2: float | Tensor = 0.999,
epsilon: float = 1.0e-8,
momentum_decay: float = 0.004,
parameters: Sequence[Tensor]
| Sequence[_NAdamParameterConfig]
| None = None,
parameters: (
Sequence[Tensor] | Sequence[_NAdamParameterConfig] | None
) = None,
weight_decay: float | Tensor | None = None,
grad_clip: GradientClipBase | None = None,
name: str | None = None,
Expand Down
Loading

0 comments on commit 5c4f0aa

Please sign in to comment.