DRF Serializers

Views: 1064
Wrote on April 20, 2020, 7:17 p.m.

In this chapter we are going to compare DRF (Django Rest Framework) with two old school data serializing methods.

# models.py

class Book(models.Model):
    title = models.CharField(max_length=32)
    CHOICES = ((1, "Python"), (2, "Linux"), (3, "GoLang"))
    category = models.IntegerField(choices=CHOICES)
    pub_time = models.DateTimeField()
    publisher = models.ForeignKey(to="Publisher", on_delete=models.CASCADE)
    authors = models.ManyToManyField(to="Author")

    class Meta:
        verbose_name = "Book Info"
        verbose_name_plural = verbose_name

    def __str__(self):
        return self.title


class Publisher(models.Model):
    title = models.CharField(max_length=32)

    class Meta:
        verbose_name = "Publisher Info"
        verbose_name_plural = verbose_name

    def __str__(self):
        return self.title


class Author(models.Model):
    name = models.CharField(max_length=32)

    class Meta:
        verbose_name = "书籍作者名"
        verbose_name_plural = verbose_name

    def __str__(self):
        return self.name

As you can see from the models, Book links Publisher and Author tables by foreign keys.

Old school methods of serializing data

Method 1

# views.py

class BookView(View):
    def get(self, request):
        book_queryset = Book.objects.values("id", "title","pub_time", "publisher")
        book_list = list(book_queryset)
        ret = []
        for book in book_list:
            book["publisher"] = {
                "id": book["publisher"],
                "title": Publisher.objects.filter(id=book["publisher"]).first().title,
            }
            ret.append(book)
        # ret = json.dumps(book_list, ensure_ascii=False)
        # return HttpResponse(ret)
        return JsonResponse(ret, safe=False, json_dumps_params={"ensure_ascii": False})

First, json.dumps() doesn't work with queryset, so you'll have to convert it into a list; second, foreign key fields will only show id so you'll have to loop retrieve foreign key related fields and append. You can either use json.dumps or Django's JsonResponse. What's the difference? By checking the raw code of JsonResponse we can get the answer:

class JsonResponse(HttpResponse):

    def __init__(self, data, encoder=DjangoJSONEncoder, safe=True,
                 json_dumps_params=None, **kwargs):
        if safe and not isinstance(data, dict):
            raise TypeError(
                'In order to allow non-dict objects to be serialized set the '
                'safe parameter to False.'
            )
        if json_dumps_params is None:
            json_dumps_params = {}
        kwargs.setdefault('content_type', 'application/json')
        data = json.dumps(data, cls=encoder, **json_dumps_params)
        super().__init__(content=data, **kwargs)

The normal json.dumps extends class JSONEncoder, but JsonResponse extends class DjangoJSONEncoder, which handles additional types like datetime, date, time etc, but still not enough to support foreign keys.

class DjangoJSONEncoder(json.JSONEncoder):
    """
    JSONEncoder subclass that knows how to encode date/time, decimal types, and
    UUIDs.
    """
    def default(self, o):
        # See "Date Time String Format" in the ECMA-262 specification.
        if isinstance(o, datetime.datetime):
            r = o.isoformat()
            if o.microsecond:
                r = r[:23] + r[26:]
            if r.endswith('+00:00'):
                r = r[:-6] + 'Z'
            return r
        elif isinstance(o, datetime.date):
            return o.isoformat()
        elif isinstance(o, datetime.time):
            if is_aware(o):
                raise ValueError("JSON can't represent timezone-aware times.")
            r = o.isoformat()
            if o.microsecond:
                r = r[:12]
            return r
        elif isinstance(o, datetime.timedelta):
            return duration_iso_string(o)
        elif isinstance(o, (decimal.Decimal, uuid.UUID, Promise)):
            return str(o)
        else:
            return super().default(o)

Method 2

# views.py

from django.core import serializers


class BookView(View):
    def get(self, request, *args, **kwargs):
        book_queryset = Book.objects.all()
        data = serializers.serialize("json", book_queryset, ensure_ascii=False)
        return HttpResponse(data)

By checking the raw code of serializers, we found out that it works with query set, cool! Django serializers serialize a queryset or any iterator that returns database objects using a certain serializer.

def serialize(format, queryset, **options):
    s = get_serializer(format)()
    s.serialize(queryset, **options)
    return s.getvalue()

However, foreign key fields are still show id instead of actual value we need.

Serialize data with DRF

DRF version 1

Please read the comments below.

# urls.py

path('book/', book.BookView.as_view()),
re_path(r'^book/(?P<pk>\d+)$', book.BookEditView.as_view()),
# views.py

class BookView(APIView):
    def get(self, request, *args, **kwargs):
        book_queryset = Book.objects.all()
        ser_obj = BookSerializer(book_queryset, many=True)
        return Response(ser_obj.data)

# aim to the create() in serializers which need to be overide
    def post(self, request, *args, **kwargs):
        book_obj = request.data
        print(book_obj)
        ser_obj = BookSerializer(data=book_obj)
        if ser_obj.is_valid():
            ser_obj.save()
            return Response(ser_obj.validated_data)
        return Response(ser_obj.errors)


class BookEditView(APIView):
    def get(self, request, id):
        book_obj = Book.objects.filter(id=id).first()
        ser_obj = BookSerializer(book_obj)
        return Response(ser_obj.data)

# aim to the update() in serializers which need to be overide
    def put(self, request, id):
        book_obj = Book.objects.filter(id=id).first()
        ser_obj = BookSerializer(instance=book_obj, data=request.data, partial=True)
        if ser_obj.is_valid():
            ser_obj.save()
            return Response(ser_obj.validated_data)
        return Response(ser_obj.errors)

PUT vs POST

Let’s compare them for better understanding.



Let’s say we are designing a network application. Let’s list down few URIs and their purpose to get better understanding when to use POST and when to use PUT operations.

  • GET /device-management/devices : Get all devices
  • POST /device-management/devices : Create a new device
  • GET /device-management/devices/{id} : Get the device information identified by "id"
  • PUT /device-management/devices/{id} : Update the device information identified by "id"
  • DELETE /device-management/devices/{id} : Delete device by "id"

Follow the similar URI design practices for other resources as well.

POST is not always create and PUT is not always update. This is somewhat minor, but the document says that HTTP POST is a Create. That is not correct. There is no one-to-one mapping between CRUD and HTTP verbs. It boils down to whether the operation is idempotent or not.

HTTP POST is non-idempotent, HTTP PUT is idempotent.

As for CRUD: Create can be both idempotent (if you provide ID of the record that ought to be created, e.g. you are creating a user with specific user_id) or non-idempotent (if you provide data but not the ID). So there're many cases where PUT is absolutely fine for a Create operation.

Furthermore, many services do not support anything but GET or POST. This is because browsers do not support any other verb so lots of proxies and most CDNs also only implement GET and POST with zero support for anything else. We have had some conversations with major CDN providers to address this very issue and I know some of them are actively working on it. Point is: a lot of pragmatic APIs implement POST for delete and update with so-called Method Override.Speaking of which, this document could probably use a word or two about method overrides.

# serializers.py

from rest_framework import serializers


class PublisherSerializer(serializers.Serializer):
    id = serializers.IntegerField()
    title = serializers.CharField(max_length=32)

class AuthorSerializer(serializers.Serializer):
    id = serializers.IntegerField()
    name = serializers.CharField(max_length=32)

class BookSerializer(serializers.Serializer):
    id = serializers.IntegerField(required=False)
    title = serializers.CharField(max_length=32, validators=[my_validate,]) # customized validate, check info below
    pub_time = serializers.DateField()
    category = serializers.CharField(source="get_category_display", read_only=True)
    # Code after `source` will be treated as ORM
    post_category = serializers.IntegerField(write_only=True)

    publisher = PublisherSerializer(read_only=True)
    # Using foreign key relationship to process publisher_obj
    # PublisherSerializer(publisher_obj)
    authors = AuthorSerializer(many=True, read_only=True)
    # Using foreign key relationship to process authors_obj
    # AuthorSerializer(authors_obj)

    publisher_id = serializers.IntegerField(write_only=True)
    author_list = serializers.ListField(write_only=True)
  • Note: If your field_name is declared on your serializers with the parameter required=False then this validation step will not take place if the field is not included.
  • With read_only=True, any extra_kwargs will no effect to them, it is used for GET method only
  • Use write_only if want to let user POST/PUT/PATCH with publisher and authors using id but not expose to the public with GET method

Looks nice, foreign key linked information is now brought in and displayed in the way we want. By the way, you are able to validate the input data with the DRF serializers too:

    def post(self, request, *args, **kwargs):
        book_obj = request.data
        ser_obj = BookSerializer(data=book_obj)     
        is ser_obj.is_valid(raise_exception=True)
        # your task 
        return Response("OK")
Create, Retrieve, Update and Delete (CRUD)
# serializers.py

......

    def create(self, validated_data):
        # validated_data is the validated book_obj
        # ORM Operation
        book_obj = Book.objects.create(title=validated_data["title"],\
        pub_time=validated_data["pub_time"],\
        category=validated_data["post_category"],\
        publisher_id=validated_data["publisher_id"])
        book_obj.authors.add(*validated_data["author_list"])
        # * operator is used to call a function by unpacking an iterable
        # * for a list and ** for a dict
        return book_obj

    def update(self, instance, validated_data):
        # instance of the updated book_obj item
        instance.title = validated_data.get("title", instance.title)
        instance.pub_time = validated_data.get("pub_time", instance.pub_time)
        instance.category = validated_data.get("post_category", instance.category)
        instance.publisher_id = validated_data.get("publisher_id", instance.publisher_id)
        if validated_data.get("author_list"):
            instance.authors.set(validated_data["author_list"])
        instance.save()
        return instance
Validate Hook

A very handy method, or a hook, the DRF provides is the validate_ function. This is very popular for projects in Chinese companies because there are so-called sensitive strings you'll always deal with.

# serializers.py

......

    def validate_title(self, value):
        # value is the value of title field
        if "Donald Trump" in value.lower():
            raise serializers.ValidationError("This sex addict is prohibited")
        return value

    def validate(self, attrs):
        # attrs includes all the fields
        print(attrs)
        if "Donald Trump" not in attrs["Johns_list"].lower() or "Donald Trump" not in attrs["prostitutes_list"]:
            return attrs
        else:
            raise serializers.ValidationError("Ready for a HIV/AIDS test?")

    def my_validate(value):
        if "Trump" in value.lower():
            raise serializers.ValidationError("You make me sick")
        return value

The priority/weights: my_validate(value) > validate_title(self, value) > validate(self, attrs)

DRF version 2

Don't you think DRF version 1 is too complicated? Do you think the framework will make you waste your time on doing such redundant work? Of course not. All of them have been packed up by DRF already.

# urls.py

path('book/', book.BookView.as_view()),
# re_path(r'^book/(?P<pk>\d+)$', book.BookEditView.as_view()),
re_path(r'^book/(?P<pk>\d+)$', book.BookDetailView.as_view()),
# views.py

ret = {'code': 1000, 'data': None}

class BookView(APIView):

    def get(self, request, *args, **kwargs):
        book_queryset = Book.objects.all()
        ser_obj = BookSerializer(book_queryset, many=True)
        ret['data'] = ser_obj.data
        return Response(ret)

    def post(self, request, *args, **kwargs):
        book_obj = request.data
        print(book_obj)
        ser_obj = BookSerializer(data=book_obj)
        if ser_obj.is_valid():
            ser_obj.save()
            return Response(ser_obj.validated_data)
        return Response(ser_obj.errors)


class BookDetailView(APIView):

    def get(self, request, *args, **kwargs):
        pk = kwargs.get('pk')
        book_obj = Book.objects.filter(id=pk).first()
        ser_obj = BookSerializer(book_obj)
        ret['data'] = ser_obj.data
        return Response(ret)
# serializers.py
from rest_framework import serializers
from api.models import Book


class PublisherSerializer(serializers.Serializer):
    id = serializers.IntegerField()
    title = serializers.CharField(max_length=32)


class AuthorSerializer(serializers.Serializer):
    id = serializers.IntegerField()
    name = serializers.CharField(max_length=32)


def my_validate(value):
        if "Trump" in value.lower():
            raise serializers.ValidationError("You make me sick")
        return value


class BookSerializer(serializers.ModelSerializer):
    publisher_info = serializers.SerializerMethodField(read_only=True)
    authors_info = serializers.SerializerMethodField(read_only=True)

    def get_authors_info(self, obj):
        authors_querset = obj.authors.all()
        return [{"id": author.id, "name": author.name} for author in authors_querset]

    def get_publisher_info(self, obj):
        publisher_obj = obj.publisher
        return {"id": publisher_obj.id, "title": publisher_obj.title}

    class Meta:
        model = Book
        fields = "__all__"
        # exclude=["id"]
        # depth = 1 # level of foreign key
                    # depth will turn all foreign key related
                    # field to be `read_only = True`
        extra_kwargs = {"publisher": {"write_only": True}, "authors": {"write_only": True}}

A ModelSerializer is just a regular Serializer, except that:

  • A set of default fields are automatically populated;
  • A set of default validators are automatically populated;
  • Default .create() and .update() implementations are provided.

The process of automatically determining a set of serializer fields based on the model fields is reasonably complex, but you almost certainly don't need to dig into the implementation.

If the ModelSerializer class doesn't generate the set of fields that you need you should either declare the extra/differing fields explicitly on the serializer class, or simply use a Serializer class.