CS144 2023 简要记录

前言： 因为大学时候学计网的无聊体验，相关的八股不怎么愿意看，面试的时候一直被问计网相关的问题都不怎么能回答上来，虽然现在秋招快结束了，但还是打算趁着国庆假期以及后面几天，把之前就听说过的CS144计网课程实验做完，补齐计网这一环。由于只是为了熟悉计网，故不怎么会详细记录实验过程，只是写一些印象深刻的点，由于这是个人网站，没做什么seo优化，搜索引擎不怎么能搜到，故直接把相关代码贴上去。

如有任何问题，欢迎联系我，个人邮箱：r1523646952@163.com

环境搭建：

安装高版本的ubuntu保证g++版本符合要求

清华ubuntu镜像：ubuntu-23.04-desktop-amd64

实验pdf备份：https://github.com/1020xyr/cs144-pdf

lab0 wget与内存字节流实现

lab0地址：check0.pdf

string_view不涉及实际的内存管理

刚开始ByteStream类中使用string_view存储字节流，虽然知道string_view和leveldb中的Slice类差不多，但总感觉它会维持一份它所指的内存（特别是使用了移动构造函数），但实际测试却发现报错，指向栈内内存，故改成string存储

string_view测试：

#include <string>
#include <iostream>

using namespace std;

string_view GetView() {
  string test = "123456789";
  string_view view = test;
  cout << view << endl;
  cout << view.size() << " " << sizeof(view) << endl;
  return view;
}

int main() {
  auto view = GetView();
  cout << view << endl;
  cout << view.size() << " " << sizeof(view) << endl;
}

/*
string_view成员:
      size_t	    _M_len;
      const _CharT* _M_str;
*/

Writer::push 数据为空时处理

  {
    ByteStreamTestHarness test { "peeks", 2 };
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Push { "cat" } );
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Peek { "ca" } );
    test.execute( Peek { "ca" } );
    test.execute( BytesBuffered { 2 } );
    test.execute( Peek { "ca" } );
    test.execute( Peek { "ca" } );
    test.execute( Pop { 1 } );
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Push { "" } );
    test.execute( Peek { "a" } );
    test.execute( Peek { "a" } );
    test.execute( BytesBuffered { 1 } );
  }

} catch ( const exception& e ) {
  cerr << "Exception: " << e.what() << endl;
  return EXIT_FAILURE;
}

测试时以上测试段一直过不去，后面发现即使数据为空也会向字节流中加入一个””，可以在peek时进行特殊处理，返回第一个非空的string，但更简单的是直接在push时不加入空数据

1
2
3

if ( data.empty() || avil == 0 ) { // 数据为空或缓冲区为空，直接返回
  return;
}

无符号整数while循环 > 0

刚开始我的Reader::pop实现如左边所示，这是基于byte_stream_helpers中read函数的基本用法，使用peek后选择性进行裁剪后调用pop，故pop的len一定小于等于第一个string的长度，当然以上代码是可以通过全部测试用例的。写完之后，看了看网上其他实现，发现CS144 2023 完全指南比我考虑多一点，故对其他情况进行处理。

实现过程中忽略了len为无符号整数的特点，想通过使len小于0结束循环，然后导致循环出错，发现了这一点。也就是说使用无符号整数当作while循环条件是很危险的，只有==0的时候才能跳出循环，len>0实际上等同于len!=0，并且len变成“负数”时值更加无法预测，导致程序行为非常诡异，尽量少使用无符号整数当作循环条件

通过截图：

参考代码：

void get_URL( const string& host, const string& path )
{
  Address addr( host, "http" ); // 传递主机名与服务名
  TCPSocket tcp_socket;         // 初始化socket
  tcp_socket.connect( addr );   // 连接服务端
  string str = "GET " + path + " HTTP/1.1\r\nHost: cs144.keithw.org\r\nConnection: close\r\n\r\n";
  tcp_socket.write( str );      // 发送HTTP请求报文
  while ( !tcp_socket.eof() ) { // 接受HTTP响应报文
    string content;
    tcp_socket.read( content );
    cout << content;
  }
  tcp_socket.close(); // 关闭socket连接
}

class ByteStream
{
protected:
  uint64_t capacity_;
  // Please add any additional state to the ByteStream here, and not to the Writer and Reader interfaces.
  uint64_t cur_len_ { 0 };
  uint64_t push_len_ { 0 };
  uint64_t pop_len_ { 0 };
  bool close_flag_ { false };
  bool error_flag_ { false };
  std::queue<std::string> byte_stream_ {};

public:
  explicit ByteStream( uint64_t capacity );

  // Helper functions (provided) to access the ByteStream's Reader and Writer interfaces
  Reader& reader();
  const Reader& reader() const;
  Writer& writer();
  const Writer& writer() const;
};

#include <stdexcept>

#include "byte_stream.hh"

using namespace std;

ByteStream::ByteStream( uint64_t capacity ) : capacity_( capacity ) {}

void Writer::push( string data )
{
  const uint64_t avil = available_capacity();
  if ( data.empty() || avil == 0 ) { // 数据为空或缓冲区为空，直接返回
    return;
  }

  if ( data.size() > avil ) { // 数据长度大于现存容量，删除尾部数据
    data.erase( avil );
  }

  cur_len_ += data.size();
  push_len_ += data.size();
  byte_stream_.emplace( move( data ) ); // 调用移动构造，减少拷贝
}

void Writer::close()
{
  close_flag_ = true;
}

void Writer::set_error()
{
  error_flag_ = true;
}

bool Writer::is_closed() const
{
  return close_flag_;
}

uint64_t Writer::available_capacity() const
{
  return capacity_ - cur_len_;
}

uint64_t Writer::bytes_pushed() const
{
  return push_len_;
}

string_view Reader::peek() const
{
  return byte_stream_.front(); // 返回队列首部数据包string_view
}

bool Reader::is_finished() const
{
  return close_flag_ && ( cur_len_ == 0 );
}

bool Reader::has_error() const
{
  return error_flag_;
}

void Reader::pop( uint64_t len )
{
  len = min( len, cur_len_ );
  cur_len_ -= len;
  pop_len_ += len;
  while ( len > 0 ) {
    auto sz = byte_stream_.front().size();
    if ( len >= sz ) { // 弹出整个数据包
      byte_stream_.pop();
      len -= sz;
    } else { // 删除队首数据包前部数据
      byte_stream_.front().erase( 0, len );
      len = 0;
    }
  }
}

uint64_t Reader::bytes_buffered() const
{
  return cur_len_;
}

uint64_t Reader::bytes_popped() const
{
  return pop_len_;
}

lab1 重组器

重组器的数据结构选择为list，每一个链表节点保存子串的起始索引，终止索引，string数据，链表节点顺序与区间范围顺序一致。insert操作主要分成两种情况：（1）到达数据在小于等于预期索引，则将该数据写入字节流，并写入后面连续的子串数据（2）到达数据在大于预期索引，则考虑是否需要暂存数据，而后检查list中子串位置关系，确保各子串存储数据范围不重叠

ps: 实现过程中我想要使得情况2的处理更加简洁，但一直想不到非常好的方法，最后直接遍历链表，不同位置关系分类讨论，懒得想更好的方法了，而且先写出来一个能过测试的demo也是便于进一步优化的，一昧空想意义不大。

问题

终止条件：

出现末尾子串标记且末尾子串前所有数据均写入字节流

1
2
3

if ( unassembler_string_.empty() && occur_last_string_ ) {
  output.close();
}

语法风格检查：

minnow/src/reassembler.cc:5:19: error: function ‘insert’ has cognitive complexity of 32 (threshold 25) [readability-function-cognitive-complexity,-warnings-as-errors]

将insert函数拆分为insert_bytestream函数与insert_tmp_buffer函数

测试通过截图

参考代码

#pragma once

#include "byte_stream.hh"

#include <list>
#include <string>

struct Substring
{
  // 左开右闭区间
  uint64_t start_index {};
  uint64_t end_index {};
  std::string data;
  Substring( uint64_t start, uint64_t end, std::string&& s )
    : start_index( start ), end_index( end ), data( std::move( s ) )
  {}
  Substring( Substring&& substr ) noexcept
    : start_index( substr.start_index ), end_index( substr.end_index ), data( std::move( substr.data ) )
  {}
  Substring& operator=( Substring&& substr ) noexcept
  {
    start_index = substr.start_index;
    end_index = substr.start_index;
    data = std::move( substr.data );
    return *this;
  }

  Substring() noexcept = default;
  ~Substring() noexcept = default;
  Substring( const Substring& ) noexcept = default;
  Substring& operator=( const Substring& ) noexcept = default;
};

class Reassembler
{
public:
  /*
   * Insert a new substring to be reassembled into a ByteStream.
   *   `first_index`: the index of the first byte of the substring
   *   `data`: the substring itself
   *   `is_last_substring`: this substring represents the end of the stream
   *   `output`: a mutable reference to the Writer
   *
   * The Reassembler's job is to reassemble the indexed substrings (possibly out-of-order
   * and possibly overlapping) back into the original ByteStream. As soon as the Reassembler
   * learns the next byte in the stream, it should write it to the output.
   *
   * If the Reassembler learns about bytes that fit within the stream's available capacity
   * but can't yet be written (because earlier bytes remain unknown), it should store them
   * internally until the gaps are filled in.
   *
   * The Reassembler should discard any bytes that lie beyond the stream's available capacity
   * (i.e., bytes that couldn't be written even if earlier gaps get filled in).
   *
   * The Reassembler should close the stream after writing the last byte.
   */
  void insert( uint64_t first_index, std::string data, bool is_last_substring, Writer& output );

  // How many bytes are stored in the Reassembler itself?
  uint64_t bytes_pending() const;

private:
  void insert_bytestream( uint64_t first_index, std::string& data, Writer& output );

  void insert_tmp_buffer( uint64_t first_index, std::string& data, Writer& output );

  std::list<Substring> unassembler_string_ {}; // 暂存区链表
  uint64_t next_stream_index_ { 0 };           // 下次写入的位置
  uint64_t bytes_pending_num_ { 0 };           // 暂存区字节数
  bool occur_last_string_ { false };           // 是否出现末尾标记
};

#include "reassembler.hh"

using namespace std;

void Reassembler::insert( uint64_t first_index, string data, bool is_last_substring, Writer& output )
{
  if ( is_last_substring ) { // 出现末尾数据
    occur_last_string_ = true;
  }
  if ( first_index <= next_stream_index_ ) { // 将数据直接写入缓冲区
    insert_bytestream( first_index, data, output );
  } else { // 将数据保存至暂存区
    insert_tmp_buffer( first_index, data, output );
  }
}

void Reassembler::insert_bytestream( uint64_t first_index, std::string& data, Writer& output )
{
  data.erase( 0, next_stream_index_ - first_index ); // 删除已经接受的部分数据
  output.push( move( data ) );
  next_stream_index_ = output.bytes_pushed(); // 更新下一次字节流写入位置

  // 将之后连续的数据写入缓冲区
  while ( !unassembler_string_.empty() && unassembler_string_.front().start_index <= next_stream_index_ ) {
    auto& first_substring = unassembler_string_.front();
    first_substring.data.erase( 0, next_stream_index_ - first_substring.start_index ); // 删除已经接受的部分数据
    output.push( move( first_substring.data ) );

    next_stream_index_ = output.bytes_pushed(); // 更新下一次字节流写入位置
    bytes_pending_num_ -= first_substring.end_index - first_substring.start_index; // 更新暂存区字节数
    unassembler_string_.pop_front();                                               // 弹出对应项
  }

  if ( unassembler_string_.empty() && occur_last_string_ ) { // 出现末尾标记且当前暂存区为空，关闭写入
    output.close();
  }
}

void Reassembler::insert_tmp_buffer( uint64_t first_index, std::string& data, Writer& output )
{
  const uint64_t max_stream_index = next_stream_index_ + output.available_capacity();
  uint64_t string_end_index = first_index + data.length();
  if ( first_index >= max_stream_index ) { // 超出暂存区范围，直接返回
    return;
  }

  if ( string_end_index > max_stream_index ) { // 只保存max_stream_index之前的数据
    data.erase( max_stream_index - first_index );
    string_end_index = max_stream_index;
  }

  // 处理不同相对位置的子串
  auto iter = unassembler_string_.begin();
  while ( iter != unassembler_string_.end() && iter->start_index < string_end_index ) {
    if ( iter->start_index <= first_index
         && iter->end_index >= string_end_index ) { // 输入子串被其他子串所覆盖，直接返回
      return;
    }

    if ( iter->start_index >= first_index
         && iter->end_index <= string_end_index ) { // 其他子串被输入子层包含，删除该子串
      bytes_pending_num_ -= iter->end_index - iter->start_index; // 更新暂存区字节数
      iter = unassembler_string_.erase( iter );
    } else { // 其他位置关系
      if ( iter->start_index <= first_index
           && first_index < iter->end_index ) { // 输入子串首部重叠，删除重叠部分数据
        data.erase( 0, iter->end_index - first_index );
        first_index = iter->end_index;
      } else if ( iter->start_index < string_end_index
                  && string_end_index <= iter->end_index ) { // 输入子串尾部重叠，删除重叠部分数据
        data.erase( iter->start_index - first_index );
        string_end_index = iter->start_index;
        // 直接在其前插入输入子串并返回
        unassembler_string_.emplace( iter, first_index, string_end_index, move( data ) );
        bytes_pending_num_ += string_end_index - first_index;
        return;
      }
      ++iter;
    }
  }
  // 在最后位置前插入输入子串
  unassembler_string_.emplace( iter, first_index, string_end_index, move( data ) );
  bytes_pending_num_ += string_end_index - first_index;
}
uint64_t Reassembler::bytes_pending() const
{
  return bytes_pending_num_; // 返回暂存区字节数
}

lab2 TCP接收端

我的Wrap32::unwrap实现比较复杂，但思想比较简单，设checkpoint / 2^32 = x，遍历x-1 x x+1三个点，选择与checkpoint差绝对值最小的系数k 2^32 * k + seq_no

CS144 2023 Lab Checkpoint 2: the TCP receiver中的实现更简洁一点，但懒得改了

TCPReceiver中的代码更多是根据测试报错然后修改，主要的点就在于SYN标志位 FIN标志位与序号流索引的关系，以及窗口最大值限制。

对于确认号中FIN标志是否占据一个序列号，我刚开始是通过TCPReceiver::receive中判断是否出现FIN标志位且重组器为空来判断是否接收全部数据，而后以这个变量计算确认号

if ( occur_fin_ && reassembler.bytes_pending() == 0 ) { // 数据已全部收到，确认号需加上FIN
    recv_all_data_ = true;
}

if ( occur_syn_ ) { // 仅在syn出现时返回确认号
    // SYN占一个序列号，若数据全部收到，则FIN占一个序列号
    msg.ackno = Wrap32::wrap( inbound_stream.bytes_pushed() + 1 + recv_all_data_, zero_point_ ); // 下一个序列号
}

这样的实现比较复杂，在看了CS144 2023 Lab Checkpoint 2: the TCP receiver实现后，发现inbound_stream.is_closed()方法效果一致，故简化代码，以is_closed代替以上方法

测试通过截图

参考代码

#pragma once

#include <cstdint>

/*
 * The Wrap32 type represents a 32-bit unsigned integer that:
 *    - starts at an arbitrary "zero point" (initial value), and
 *    - wraps back to zero when it reaches 2^32 - 1.
 */

class Wrap32
{
protected:
  uint32_t raw_value_ {};

public:
  explicit Wrap32( uint32_t raw_value ) : raw_value_( raw_value ) {}

  /* Construct a Wrap32 given an absolute sequence number n and the zero point. */
  static Wrap32 wrap( uint64_t n, Wrap32 zero_point );

  /*
   * The unwrap method returns an absolute sequence number that wraps to this Wrap32, given the zero point
   * and a "checkpoint": another absolute sequence number near the desired answer.
   *
   * There are many possible absolute sequence numbers that all wrap to the same Wrap32.
   * The unwrap method should return the one that is closest to the checkpoint.
   */
  uint64_t unwrap( Wrap32 zero_point, uint64_t checkpoint ) const;

  Wrap32 operator+( uint32_t n ) const { return Wrap32 { raw_value_ + n }; }
  bool operator==( const Wrap32& other ) const { return raw_value_ == other.raw_value_; }

  static uint64_t unsigned_abs( uint64_t a, uint64_t b ) { return ( a > b ) ? a - b : b - a; }
};

#include "wrapping_integers.hh"
#include <algorithm>
using namespace std;

Wrap32 Wrap32::wrap( uint64_t n, Wrap32 zero_point )
{
  return Wrap32 { zero_point + n };
}

uint64_t Wrap32::unwrap( Wrap32 zero_point, uint64_t checkpoint ) const
{
  // 设checkpoint / 2^32 = x，遍历x-1 x x+1三个点，选择与checkpoint差绝对值最小的系数k  2^32 * k + seq_no
  const uint64_t pow32 = 1ULL << 32;
  const uint32_t seq_no = raw_value_ - zero_point.raw_value_;
  uint64_t min_abs = pow32 + 1;
  uint64_t k = 0;
  const auto multiple = static_cast<int64_t>( checkpoint / pow32 );
  const int64_t low_bound = max<int64_t>( 0, multiple - 1 );          // x-1需大于等于0
  const int64_t high_bound = min<int64_t>( multiple + 1, pow32 - 1 ); // x+1需小于等于2^32 -1
  for ( int64_t i = low_bound; i <= high_bound; i++ ) {
    // 2^32 * k + seq_no 与 checkpoint的差绝对值
    const uint64_t sub = unsigned_abs( i * pow32 + seq_no, checkpoint );
    if ( min_abs > sub ) {
      min_abs = sub;
      k = i;
    }
  }
  return k * pow32 + seq_no;
}

#pragma once

#include "reassembler.hh"
#include "tcp_receiver_message.hh"
#include "tcp_sender_message.hh"

class TCPReceiver
{
public:
  /*
   * The TCPReceiver receives TCPSenderMessages, inserting their payload into the Reassembler
   * at the correct stream index.
   */
  void receive( TCPSenderMessage message, Reassembler& reassembler, Writer& inbound_stream );

  /* The TCPReceiver sends TCPReceiverMessages back to the TCPSender. */
  TCPReceiverMessage send( const Writer& inbound_stream ) const;

private:
  bool occur_syn_ { false }; // 出现SYN标志位
  Wrap32 zero_point_ { 0 };  // ISN
};

#include "tcp_receiver.hh"

using namespace std;

void TCPReceiver::receive( TCPSenderMessage message, Reassembler& reassembler, Writer& inbound_stream )
{
  if ( message.SYN ) {
    occur_syn_ = true;
    zero_point_ = message.seqno;
  }
  if ( occur_syn_ ) {
    reassembler.insert( message.seqno.unwrap( zero_point_, inbound_stream.bytes_pushed() ) + message.SYN - 1,
                        message.payload,
                        message.FIN,
                        inbound_stream );
  }
}

TCPReceiverMessage TCPReceiver::send( const Writer& inbound_stream ) const
{
  TCPReceiverMessage msg;
  if ( occur_syn_ ) { // 仅在syn出现时返回确认号,FIN在全部数据发送完成后占一位序列号
    msg.ackno = Wrap32::wrap( inbound_stream.bytes_pushed() + 1 + inbound_stream.is_closed(), zero_point_ );
  }
  msg.window_size = min<uint64_t>( UINT16_MAX, inbound_stream.available_capacity() ); // 最大值限定
  return msg;
}

未完待续（等搞完毕设再写）

国外课程实验

#计算机网络 cs144 c++

spdk学习记录上一篇

面试问题记录下一篇