<?xml version="1.0" encoding="US-ASCII"?>
<dblp>
<article key="journals/corr/abs-2310-00212" publtype="informal" mdate="2024-03-18">
<author>Tianhao Wu 0002</author>
<author>Banghua Zhu</author>
<author>Ruoyu Zhang</author>
<author>Zhaojin Wen</author>
<author>Kannan Ramchandran</author>
<author>Jiantao Jiao</author>
<title>Pairwise Proximal Policy Optimization: Harnessing Relative Feedback for LLM Alignment.</title>
<year>2023</year>
<volume>abs/2310.00212</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2310.00212</ee>
<url>db/journals/corr/corr2310.html#abs-2310-00212</url>
</article>
</dblp>
